summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2024-10-17 20:14:22 -0700
committerGitHub <noreply@github.com>2024-10-17 20:14:22 -0700
commita618b8c5e249b0f20e6c0c95f9da1b5cbfdbf08b (patch)
treed583c373d574a265fefe7f288a96c4b382e259b8
parent11e1ecafa09396a3559fe245d729b40ce4f25d52 (diff)
Cleanup atomic intrinsics. (#5324)
* Cleanup atomic intrinsics. * Fix. * Fix glsl. * Remove hacky intrinsic expansion logic for glsl image atomics. * Fix all tests. * Fix. * Add `InterlockedAddF16Emulated`. * Fix glsl intrinsic. * Fix.
-rw-r--r--prelude/slang-cuda-prelude.h18
-rw-r--r--source/slang/core.meta.slang20
-rw-r--r--source/slang/hlsl.meta.slang2417
-rw-r--r--source/slang/slang-diagnostic-defs.h1
-rw-r--r--source/slang/slang-emit-c-like.cpp10
-rw-r--r--source/slang/slang-emit-c-like.h1
-rw-r--r--source/slang/slang-emit-cuda.cpp20
-rw-r--r--source/slang/slang-emit-glsl.cpp226
-rw-r--r--source/slang/slang-emit-glsl.h2
-rw-r--r--source/slang/slang-emit-hlsl.cpp47
-rw-r--r--source/slang/slang-emit-metal.cpp356
-rw-r--r--source/slang/slang-emit-metal.h5
-rw-r--r--source/slang/slang-emit-spirv.cpp30
-rw-r--r--source/slang/slang-emit.cpp7
-rw-r--r--source/slang/slang-intrinsic-expand.cpp106
-rw-r--r--source/slang/slang-ir-legalize-extract-from-texture-access.cpp136
-rw-r--r--source/slang/slang-ir-legalize-extract-from-texture-access.h11
-rw-r--r--source/slang/slang-ir-legalize-is-texture-access.cpp79
-rw-r--r--source/slang/slang-ir-legalize-is-texture-access.h11
-rw-r--r--source/slang/slang-ir-use-uninitialized-values.cpp5
-rw-r--r--source/slang/slang-ir.cpp2
-rw-r--r--tests/bugs/gh-3997.slang2
-rw-r--r--tests/compute/atomics-invalid-dest-type.slang5
-rw-r--r--tests/compute/nonuniformres-atomic.slang2
-rw-r--r--tests/hlsl-intrinsic/texture/float-atomics.slang2
-rw-r--r--tests/metal/atomic-byteaddressbuffer.slang57
-rw-r--r--tests/metal/atomic-intrinsics.slang11
-rw-r--r--tests/metal/atomic-texture-buffer.slang2
-rw-r--r--tests/slang-extension/atomic-int64-byte-address-buffer.slang2
-rw-r--r--tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang2
-rw-r--r--tests/slang-extension/cas-int64-byte-address-buffer.slang2
-rw-r--r--tests/slang-extension/exchange-int64-byte-address-buffer.slang5
-rw-r--r--tests/spirv/ref-this.slang2
33 files changed, 1040 insertions, 2564 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 96ef22dd1..a6c8fd17b 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -1261,7 +1261,14 @@ struct ByteAddressBuffer
memcpy(&data, ((const char*)this->data) + index, sizeof(T));
return data;
}
-
+ template<typename T>
+ SLANG_CUDA_CALL StructuredBuffer<T> asStructuredBuffer() const
+ {
+ StructuredBuffer<T> rs;
+ rs.data = (T*)data;
+ rs.count = sizeInBytes / sizeof(T);
+ return rs;
+ }
const uint32_t* data;
size_t sizeInBytes; //< Must be multiple of 4
};
@@ -1348,7 +1355,14 @@ struct RWByteAddressBuffer
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
return (T*)(((char*)data) + index);
}
-
+ template<typename T>
+ SLANG_CUDA_CALL RWStructuredBuffer<T> asStructuredBuffer() const
+ {
+ RWStructuredBuffer<T> rs;
+ rs.data = (T*)data;
+ rs.count = sizeInBytes / sizeof(T);
+ return rs;
+ }
uint32_t* data;
size_t sizeInBytes; //< Must be multiple of 4
};
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 084654d0f..67ec91cf6 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -299,6 +299,18 @@ interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}
interface __BuiltinIntegerType : __BuiltinArithmeticType, IInteger
{}
+/// Represent a `int` or `uint` type.
+[sealed]
+[builtin]
+interface __BuiltinInt32Type : __BuiltinIntegerType
+{}
+
+/// Represent a `int64_t` or `uint64_t` type.
+[sealed]
+[builtin]
+interface __BuiltinInt64Type : __BuiltinIntegerType
+{}
+
/// Represent builtin types that can represent a real number.
[sealed]
[builtin]
@@ -603,6 +615,14 @@ ${{{{
, __BuiltinArithmeticType
, __BuiltinIntegerType
${{{{
+ if (kBaseTypes[tt].tag == BaseType::Int || kBaseTypes[tt].tag == BaseType::UInt)
+}}}}
+ , __BuiltinInt32Type
+${{{{
+ if (kBaseTypes[tt].tag == BaseType::Int64 || kBaseTypes[tt].tag == BaseType::UInt64)
+}}}}
+ , __BuiltinInt64Type
+${{{{
; // fall through
case BaseType::Bool:
}}}}
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 191fa3195..1c01c2f6b 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -3923,475 +3923,36 @@ ${{{{
}
}}}}
-// AtomicAdd
-// Make the GLSL atomicAdd available.
-// We have separate int/float implementations, as the float version requires some specific extensions
-// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_atomic_float.txt
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_float)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_float1)]
-float __atomicAdd(__ref float value, float amount)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAdd($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpExtension "SPV_EXT_shader_atomic_float_add";
- OpCapability AtomicFloat32AddEXT;
- result:$$float = OpAtomicFAddEXT &value Device None $amount
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_NV_shader_atomic_fp16_vector)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_halfvec)]
-half2 __atomicAdd(__ref half2 value, half2 amount)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAdd($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpExtension "SPV_EXT_shader_atomic_float_add";
- OpCapability AtomicFloat32AddEXT;
- result:$$half2 = OpAtomicFAddEXT &value Device None $amount
- };
- }
-}
-
-// Helper for hlsl, using NVAPI
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __atomicAdd(RWByteAddressBuffer buf, uint offset, uint2)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedAddUint64($0, $1, $2)";
- }
-}
-
-// atomic add for hlsl using SM6.6
-[require(hlsl, atomic_hlsl_sm_6_6)]
-void __atomicAdd(RWByteAddressBuffer buf, uint offset, int64_t value, out int64_t originalValue)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "$0.InterlockedAdd64($1, $2, $3)";
- }
-}
-
-[require(hlsl, atomic_hlsl_sm_6_6)]
-void __atomicAdd(RWByteAddressBuffer buf, uint offset, uint64_t value, out uint64_t originalValue)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "$0.InterlockedAdd64($1, $2, $3)";
- }
-}
-
-// Int versions require glsl 4.30
-// https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/atomicAdd.xhtml
-
-__glsl_version(430)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl)]
-int __atomicAdd(__ref int value, int amount)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAdd($0, $1)";
- case spirv:
- return spirv_asm
- {
- result:$$int = OpAtomicIAdd &value Device None $amount;
- };
- }
-}
-
-__glsl_version(430)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl)]
-uint __atomicAdd(__ref uint value, uint amount)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAdd($0, $1)";
- case spirv:
- return spirv_asm
- {
- result:$$uint = OpAtomicIAdd &value Device None $amount;
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-int64_t __atomicAdd(__ref int64_t value, int64_t amount)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAdd($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$int64_t = OpAtomicIAdd &value Device None $amount
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __atomicAdd(__ref uint64_t value, uint64_t amount)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAdd($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicIAdd &value Device None $amount
- };
- }
-}
-
-// Cas - Compare and swap
-
-// Helper for HLSL, using NVAPI
-
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __cas(RWByteAddressBuffer buf, uint offset, uint2 compareValue, uint2 value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedCompareExchangeUint64($0, $1, $2, $3)";
- }
-}
-
-// CAS using SM6.6
-[require(hlsl, atomic_hlsl_sm_6_6)]
-void __cas(RWByteAddressBuffer buf, uint offset, in int64_t compare_value, in int64_t value, out int64_t original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "$0.InterlockedCompareExchange64($1, $2, $3, $4)";
- }
-}
-
-[require(hlsl, atomic_hlsl_sm_6_6)]
-void __cas(RWByteAddressBuffer buf, uint offset, in uint64_t compare_value, in uint64_t value, out uint64_t original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "$0.InterlockedCompareExchange64($1, $2, $3, $4)";
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-int64_t __cas(__ref int64_t ioValue, int64_t compareValue, int64_t newValue)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$int64_t = OpAtomicCompareExchange &ioValue Device None None $newValue $compareValue
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __cas(__ref uint64_t ioValue, uint64_t compareValue, uint64_t newValue)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicCompareExchange &ioValue Device None None $newValue $compareValue
- };
- }
-}
-
-// Max
-
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __atomicMax(RWByteAddressBuffer buf, uint offset, uint2 value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedMaxUint64($0, $1, $2)";
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __atomicMax(__ref uint64_t ioValue, uint64_t value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMax($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicUMax &ioValue Device None $value
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_float2)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_float2)]
-float __atomicMax(__ref float ioValue, float value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMax($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpExtension "SPV_EXT_shader_atomic_float_min_max";
- OpCapability AtomicFloat32MinMaxEXT;
- result:$$float = OpAtomicFMaxEXT &ioValue Device None $value
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_float2)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_float2)]
-half __atomicMax(__ref half ioValue, half value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMax($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpExtension "SPV_EXT_shader_atomic_float_min_max";
- OpCapability AtomicFloat16MinMaxEXT;
- result:$$half = OpAtomicFMaxEXT &ioValue Device None $value
- };
- }
-}
-
-// Min
-
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __atomicMin(RWByteAddressBuffer buf, uint offset, uint2 value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedMinUint64($0, $1, $2)";
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __atomicMin(__ref uint64_t ioValue, uint64_t value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMin($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicUMin &ioValue Device None $value
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_float2)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_float2)]
-float __atomicMin(__ref float ioValue, float value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMin($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpExtension "SPV_EXT_shader_atomic_float_min_max";
- OpCapability AtomicFloat32MinMaxEXT;
- result:$$float = OpAtomicFMinEXT &ioValue Device None $value
- };
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_float2)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_float2)]
-half __atomicMin(__ref half ioValue, half value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMin($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpExtension "SPV_EXT_shader_atomic_float_min_max";
- OpCapability AtomicFloat16MinMaxEXT;
- result:$$half = OpAtomicFMinEXT &ioValue Device None $value
- };
- }
-}
-
-// And
-
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __atomicAnd(RWByteAddressBuffer buf, uint offset, uint2 value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedAndUint64($0, $1, $2)";
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __atomicAnd(__ref uint64_t ioValue, uint64_t value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAnd($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicAnd &ioValue Device None $value
- };
- }
-}
-
-// Or
-
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __atomicOr(RWByteAddressBuffer buf, uint offset, uint2 value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedOrUint64($0, $1, $2)";
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __atomicOr(__ref uint64_t ioValue, uint64_t value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicOr($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicOr &ioValue Device None $value
- };
- }
-}
-
-// Xor
-
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __atomicXor(RWByteAddressBuffer buf, uint offset, uint2 value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedXorUint64($0, $1, $2)";
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __atomicXor(__ref uint64_t ioValue, uint64_t value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicXor($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicXor &ioValue Device None $value
- };
- }
-}
-
-// Exchange
-
-[__requiresNVAPI]
-[require(hlsl, atomic_hlsl_nvapi)]
-uint2 __atomicExchange(RWByteAddressBuffer buf, uint offset, uint2 value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "NvInterlockedExchangeUint64($0, $1, $2)";
- }
-}
-
-__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_int64)
-[ForceInline]
-[require(glsl_spirv, atomic_glsl_int64)]
-uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicExchange($0, $1)";
- case spirv:
- return spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicExchange &ioValue Device None $value
- };
- }
-}
+// Atomic intrinsic insts.
+
+__intrinsic_op($(kIROp_AtomicExchange))
+T __atomic_exchange<T>(__ref T val, T newValue, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicCompareExchange))
+T __atomic_compare_exchange<T>(
+ __ref T val,
+ T compareValue,
+ T newValue,
+ MemoryOrder successOrder = MemoryOrder.Relaxed,
+ MemoryOrder failOrder = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicAdd))
+T __atomic_add<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicSub))
+T __atomic_sub<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicMax))
+T __atomic_max<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicMin))
+T __atomic_min<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicAnd))
+T __atomic_and<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicOr))
+T __atomic_or<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicXor))
+T __atomic_xor<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicInc))
+T __atomic_increment<T>(__ref T val, MemoryOrder order = MemoryOrder.Relaxed);
+__intrinsic_op($(kIROp_AtomicDec))
+T __atomic_decrement<T>(__ref T val, MemoryOrder order = MemoryOrder.Relaxed);
// Conversion between uint64_t and uint2
@@ -4802,6 +4363,20 @@ struct $(item.name)
}
${{{{
+ struct BufferAtomicOps
+ {
+ const char* name;
+ const char* internalName;
+ };
+ const BufferAtomicOps bufferAtomicOps[] = {
+ {"Max", "max"},
+ {"Min", "min"},
+ {"Add", "add"},
+ {"And", "and"},
+ {"Or", "or"},
+ {"Xor", "xor"},
+ {"Exchange", "exchange"}
+ };
if (item.op == kIROp_HLSLRWByteAddressBufferType)
{
}}}}
@@ -4822,6 +4397,13 @@ ${{{{
// F32 Add
+ /// Perform a 32-bit floating point atomic add operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic add operation.
+ /// @param valueToAdd The value to add to the value at `byteAddress`.
+ /// @param originalValue The original value at `byteAddress` before the add operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd`. For HLSL, this function translates to an NVAPI call
+ /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function
+ /// maps to `atomicAdd`.
__cuda_sm_version(2.0)
[__requiresNVAPI]
[ForceInline]
@@ -4832,35 +4414,45 @@ ${{{{
{
case hlsl: __intrinsic_asm "($3 = NvInterlockedAddFp32($0, $1, $2))";
case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<float>($1), $2))";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<float>(this);
- __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd, originalValue);
- return;
- }
- case glsl:
- case spirv:
+ default:
{
let buf = __getEquivalentStructuredBuffer<float>(this);
- originalValue = __atomicAdd(buf[byteAddress / 4], valueToAdd);
+ originalValue = __atomic_add(buf[byteAddress / 4], valueToAdd);
return;
}
}
}
// FP16x2
+
+ /// @internal
+ /// Maps to the `NvInterlockedAddFp16x2` NVAPI function.
+ ///
[__requiresNVAPI]
[ForceInline]
- [require(hlsl, atomic_hlsl_nvapi)]
+ [require(cuda_hlsl_spirv)]
uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value)
{
__target_switch
{
case hlsl:
__intrinsic_asm "NvInterlockedAddFp16x2($0, $1, $2)";
+ default:
+ let buf = __getEquivalentStructuredBuffer<half2>(this);
+ return bit_cast<uint>(__atomic_add(buf[byteAddress / 4], bit_cast<half2>(fp16x2Value)));
}
}
+
+ /// Perform a 16-bit floating point atomic add operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic add operation.
+ /// @param valueToAdd The value to add to the value at `byteAddress`.
+ /// @param originalValue The original value at `byteAddress` before the add operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` and requires `SPV_EXT_shader_atomic_float16_add` extension.
+ ///
+ /// For HLSL, this function translates to an NVAPI call
+ /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function
+ /// maps to `atomicAdd`.
[__requiresNVAPI]
[ForceInline]
void InterlockedAddF16(uint byteAddress, half value, out half originalValue)
@@ -4880,17 +4472,55 @@ ${{{{
originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16));
}
return;
- case glsl:
- case spirv:
+ default:
+ {
+ let buf = __getEquivalentStructuredBuffer<half>(this);
+ originalValue = __atomic_add(buf[byteAddress/2], value);
+ return;
+ }
+ }
+ }
+
+ /// Perform a 16-bit floating point atomic add operation at `byteAddress` through emulation using `half2` atomics.
+ /// @param byteAddress The address at which to perform the atomic add operation.
+ /// @param valueToAdd The value to add to the value at `byteAddress`.
+ /// @param originalValue The original value at `byteAddress` before the add operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half2` vector with the correct part set to `value`
+ /// and the remaining part set to 0. This requires the `AtomicFloat16VectorNV` capability introduced by the `SPV_NV_shader_atomic_fp16_vector`
+ /// extension.
+ ///
+ /// For HLSL, this function translates to an equivalent NVAPI call
+ /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function
+ /// maps to `atomicAdd`.
+ [__requiresNVAPI]
+ [ForceInline]
+ void InterlockedAddF16Emulated(uint byteAddress, half value, out half originalValue)
+ {
+ __target_switch
+ {
+ case hlsl:
+ if ((byteAddress & 2) == 0)
+ {
+ uint packedInput = asuint16(value);
+ originalValue = asfloat16((uint16_t)_NvInterlockedAddFp16x2(byteAddress, packedInput));
+ }
+ else
+ {
+ byteAddress = byteAddress & ~3;
+ uint packedInput = ((uint)asuint16(value)) << 16;
+ originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16));
+ }
+ return;
+ default:
{
let buf = __getEquivalentStructuredBuffer<half2>(this);
if ((byteAddress & 2) == 0)
{
- originalValue = __atomicAdd(buf[byteAddress/4], half2(value, half(0.0))).x;
+ originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x;
}
else
{
- originalValue = __atomicAdd(buf[byteAddress/4], half2(half(0.0), value)).y;
+ originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y;
}
return;
}
@@ -4908,484 +4538,207 @@ ${{{{
__target_switch
{
case hlsl: __intrinsic_asm "(NvInterlockedAddFp32($0, $1, $2))";
- case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<float>($1), $2)";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<float>(this);
- __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd);
- return;
- }
- case glsl:
- case spirv:
+ default:
{
let buf = __getEquivalentStructuredBuffer<float>(this);
- __atomicAdd(buf[byteAddress / 4], valueToAdd);
+ __atomic_add(buf[byteAddress / 4], valueToAdd);
return;
}
}
}
// Int64 Add
+
+ /// Perform a 64-bit integer atomic add operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic add operation.
+ /// @param valueToAdd The value to add to the value at `byteAddress`.
+ /// @param originalValue The original value at `byteAddress` before the add operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicAdd`. For HLSL, this function
+ /// translates to `InterlockedAdd64` and requires shader model 6.6.
+ /// For CUDA, this function maps to `atomicAdd`.
[ForceInline]
- __cuda_sm_version(6.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)]
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue)
{
- __target_switch
- {
- case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<uint64_t>($1), $2))";
- case hlsl:
- originalValue = __asuint64(__atomicAdd(this, byteAddress, __asuint2(valueToAdd)));
- case glsl:
- case spirv:
- {
- let buf = __getEquivalentStructuredBuffer<int64_t>(this);
- originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
- }
- }
+ InterlockedAdd64(byteAddress, valueToAdd, originalValue);
}
// Without returning original value
- __cuda_sm_version(6.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)]
+ [ForceInline]
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd)
{
- __target_switch
- {
- case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<uint64_t>($1), $2)";
- case hlsl:
- __atomicAdd(this, byteAddress, __asuint2(valueToAdd));
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<int64_t>(this);
- __atomicAdd(buf[byteAddress / 8], valueToAdd);
- }
+ InterlockedAdd64(byteAddress, valueToAdd);
}
// Cas uint64_t
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)]
+ /// Perform a 64-bit integer atomic compare-and-exchange operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic compare-and-exchange operation.
+ /// @param compareValue The value to compare to the value at `byteAddress`.
+ /// @param value The value to store at `byteAddress` if the comparison is successful.
+ /// @param originalValue The original value at `byteAddress` before the add operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
+ /// translates to `InterlockedCompareExchange64` and requires shader model 6.6.
+ /// For CUDA, this function maps to `atomicCAS`.
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue)
{
__target_switch
{
case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt<uint64_t>($1), $2, $3))";
case hlsl:
- outOriginalValue = __asuint64(__cas(this, byteAddress, __asuint2(compareValue), __asuint2(value)));
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value);
- }
- }
-
- // Max
-
- __cuda_sm_version(5.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)]
- uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case cuda: __intrinsic_asm "atomicMax($0._getPtrAt<uint64_t>($1), $2)";
- case hlsl:
- return __asuint64(__atomicMax(this, byteAddress, __asuint2(value)));
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- return __atomicMax(buf[byteAddress / 8], value);
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMax64(uint byteAddress, int64_t value)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMax64";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMax64(uint byteAddress, int64_t value, out int64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMax64";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMax64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMax64";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMax64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMax64";
- }
- }
-
- // Min
-
- __cuda_sm_version(5.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)]
- uint64_t InterlockedMinU64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case cuda: __intrinsic_asm "atomicMin($0._getPtrAt<uint64_t>($1), $2)";
- case hlsl:
- return __asuint64(__atomicMin(this, byteAddress, __asuint2(value)));
- case glsl:
- case spirv:
+ __intrinsic_asm ".InterlockedCompareExchange64";
+ default:
let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- return __atomicMin(buf[byteAddress / 8], value);
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMin64(uint byteAddress, int64_t value)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMin64";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMin64(uint byteAddress, int64_t value, out int64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMin64";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMin64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMin64";
+ outOriginalValue = __atomic_compare_exchange(buf[byteAddress / 8], compareValue, value);
}
}
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedMin64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedMin64";
- }
- }
-
- // And
-
- __cuda_sm_version(5.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)]
- uint64_t InterlockedAndU64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt<uint64_t>($1), $2)";
- case hlsl:
- return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value)));
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- return __atomicAnd(buf[byteAddress / 8], value);
- }
- }
+ // SM6.6 6 64bit atomics.
+ // InterlockedMax64, InterlockedMin64, InterlockedAdd64, InterlockedAnd64, InterlockedOr64, InterlockedXor64, InterlockedExchange64
+${{{{
+ for (auto op : bufferAtomicOps) {
+}}}}
[ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedAnd64(uint byteAddress, uint64_t value)
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
+ uint64_t Interlocked$(op.name)U64(uint byteAddress, uint64_t value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedAnd64";
- }
+ uint64_t originalValue;
+ Interlocked$(op.name)64(byteAddress, value, originalValue);
+ return originalValue;
}
[ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedAnd64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
+ void Interlocked$(op.name)64(uint byteAddress, int64_t value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedAnd64";
- }
- }
-
- // Or
-
- __cuda_sm_version(5.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)]
- uint64_t InterlockedOrU64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case cuda: __intrinsic_asm "atomicOr($0._getPtrAt<uint64_t>($1), $2)";
- case hlsl:
- return __asuint64(__atomicOr(this, byteAddress, __asuint2(value)));
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- return __atomicOr(buf[byteAddress / 8], value);
- }
+ int64_t oldValue;
+ Interlocked$(op.name)64(byteAddress, value, oldValue);
}
+ /// Perform a 64-bit integer atomic $(op.internalName) operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic $(op.internalName) operation.
+ /// @param value The operand for the $(op.internalName) operation.
+ /// @param originalValue The original value at `byteAddress` before the $(op.internalName) operation.
[ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedOr64(uint byteAddress, uint64_t value)
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
+ void Interlocked$(op.name)64<T:__BuiltinInt64Type>(uint byteAddress, T value, out T outOriginalValue)
{
__target_switch
{
- case hlsl: __intrinsic_asm ".InterlockedOr64";
+ case hlsl: __intrinsic_asm ".Interlocked$(op.name)64";
+ default:
+ let buf = __getEquivalentStructuredBuffer<T>(this);
+ outOriginalValue = __atomic_$(op.internalName)(buf[byteAddress / 8], value);
+ return;
}
}
+${{{{
+} // for (each bufferOps)
+}}}}
+ /// Perform a 64-bit integer atomic compare-and-exchange operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic compare-and-exchange operation.
+ /// @param compareValue The value to compare to the value at `byteAddress`.
+ /// @param value The value to store at `byteAddress` if the comparison is successful.
+ /// @param outOriginalValue The original value at `byteAddress` before the add operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
+ /// translates to `InterlockedCompareExchange64` and requires shader model 6.6.
+ /// For CUDA, this function maps to `atomicCAS`.
[ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedOr64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
+ void InterlockedCompareExchange64<T:__BuiltinInt64Type>(uint byteAddress, T compareValue, T value, out T outOriginalValue)
{
__target_switch
{
- case hlsl: __intrinsic_asm ".InterlockedOr64";
- }
- }
-
- // Xor
-
- __cuda_sm_version(5.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)]
- uint64_t InterlockedXorU64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case cuda: __intrinsic_asm "atomicXor($0._getPtrAt<uint64_t>($1), $2)";
case hlsl:
- return __asuint64(__atomicXor(this, byteAddress, __asuint2(value)));
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- return __atomicXor(buf[byteAddress / 8], value);
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedXor64(uint byteAddress, uint64_t value)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedXor64";
+ __intrinsic_asm ".InterlockedCompareExchange64";
+ default:
+ let buf = __getEquivalentStructuredBuffer<T>(this);
+ outOriginalValue = __atomic_compare_exchange(buf[byteAddress / 8], compareValue, value);
+ return;
}
}
[ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedXor64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedXor64";
- }
- }
-
- // Exchange
-
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)]
- uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value)
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
+ void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue)
{
__target_switch
{
- case cuda: __intrinsic_asm "atomicExch($0._getPtrAt<uint64_t>($1), $2)";
- case hlsl:
- return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value)));
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- return __atomicExchange(buf[byteAddress / 8], value);
+ case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise";
+ default:
+ let buf = __getEquivalentStructuredBuffer<float>(this);
+ outOriginalValue = __atomic_compare_exchange(buf[byteAddress / 4], compareValue, value);
+ return;
}
}
+ /// Perform a floating-point atomic bitwise exchange operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic exchange operation.
+ /// @param value The value to store at `byteAddress`.
+ /// @param [out] outOriginalValue The original value at `byteAddress` before the exchange operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicExchange`. For HLSL, this function
+ /// translates to `InterlockedExchangeFloat` and requires shader model 6.6.
+ /// For CUDA, this function maps to `atomicExch`.
[ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
void InterlockedExchangeFloat(uint byteAddress, float value, out float outOriginalValue)
{
__target_switch
{
case hlsl: __intrinsic_asm ".InterlockedExchangeFloat";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedExchange64(uint byteAddress, int64_t value, out int64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedExchange64";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedExchange64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedExchange64";
- }
- }
-
- // SM6.6 6 64bit atomics.
- [ForceInline]
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
- void InterlockedAdd64(uint byteAddress, int64_t valueToAdd)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedAdd64";
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<int64_t>(this);
- __atomicAdd(buf[byteAddress / 8], valueToAdd);
- }
- }
-
- [ForceInline]
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
- void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedAdd64";
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<int64_t>(this);
- outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
- return;
- }
- }
-
- [ForceInline]
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
- void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedAdd64";
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- __atomicAdd(buf[byteAddress / 8], valueToAdd);
- }
- }
-
- [ForceInline]
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
- void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedAdd64";
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ default:
+ let buf = __getEquivalentStructuredBuffer<float>(this);
+ outOriginalValue = __atomic_exchange(buf[byteAddress / 4], value);
return;
}
}
+ /// Perform a 64-bit integer atomic compare-and-store operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic store operation.
+ /// @param compareValue The value to compare to the value at `byteAddress`.
+ /// @param value The value to store at `byteAddress` if the the value at address is equal to `compareValue`.
+ /// @param [out] outOriginalValue The original value at `byteAddress` before the store operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
+ /// translates to `InterlockedCompareStore64` and requires shader model 6.6.
+ /// For CUDA, this function maps to `atomicCAS`.
[ForceInline]
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
- void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue)
- {
- __target_switch
- {
- case hlsl:
- __cas(this, byteAddress, compareValue, value, outOriginalValue);
- return;
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<int64_t>(this);
- outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value);
- return;
- }
- }
-
[ForceInline]
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
- void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue)
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
+ void InterlockedCompareStore64<T:__BuiltinInt64Type>(uint byteAddress, T compareValue, T value)
{
__target_switch
{
- case hlsl:
- __cas(this, byteAddress, compareValue, value, outOriginalValue);
- return;
- case glsl:
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value);
+ case hlsl: __intrinsic_asm ".InterlockedCompareStore64";
+ default:
+ let buf = __getEquivalentStructuredBuffer<T>(this);
+ __atomic_compare_exchange(buf[byteAddress / 4], compareValue, value);
return;
}
}
-
+
+ /// Perform a floating-point atomic bitwise compare-and-store operation at `byteAddress`.
+ /// @param byteAddress The address at which to perform the atomic compare-and-exchange operation.
+ /// @param compareValue The value to perform bitwise comparison to the value at `byteAddress`.
+ /// @param value The value to store at `byteAddress` if the comparison is successful.
+ /// @param [out] outOriginalValue The original value at `byteAddress` before the compare-and-exchange operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
+ /// translates to `InterlockedCompareStoreFloatBitwise` and requires shader model 6.6.
+ /// For CUDA, this function maps to `atomicCAS`.
[ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
+ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)]
void InterlockedCompareStoreFloatBitwise(uint byteAddress, float compareValue, float value)
{
__target_switch
{
case hlsl: __intrinsic_asm ".InterlockedCompareStoreFloatBitwise";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedCompareStore64(uint byteAddress, int64_t compareValue, int64_t value)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedCompareStore64";
- }
- }
-
- [ForceInline]
- [require(hlsl, atomic_hlsl_sm_6_6)]
- void InterlockedCompareStore64(uint byteAddress, uint64_t compareValue, uint64_t value)
- {
- __target_switch
- {
- case hlsl: __intrinsic_asm ".InterlockedCompareStore64";
+ default:
+ let buf = __getEquivalentStructuredBuffer<float>(this);
+ __atomic_compare_exchange(buf[byteAddress / 4], compareValue, value);
+ return;
}
}
@@ -5393,103 +4746,62 @@ ${{{{
} // endif (type == RWByteAddressBuffer)
}}}}
- // Added operations:
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedAdd(
- UINT dest,
- UINT value,
- out UINT original_value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "($3 = atomicAdd($0._data[$1/4], $2))";
- case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<uint32_t>($1), $2))";
- case hlsl: __intrinsic_asm ".InterlockedAdd";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedAdd(buf[dest / 4], value, original_value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedAdd(
- UINT dest,
- UINT value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicAdd($0._data[$1/4], $2)";
- case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<uint32_t>($1), $2)";
- case hlsl: __intrinsic_asm ".InterlockedAdd";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedAdd(buf[dest / 4], value);
- }
- }
+ // 32-bit atomic operations:
+ // InterlockedMax, InterlockedMin, InterlockedAdd, InterlockedAnd, InterlockedOr, InterlockedXor, InterlockedExchange
+${{{{
+ for (auto op : bufferAtomicOps) {
+}}}}
+ /// Perform an atomic $(op.internalName) operation at the specified byte
+ /// location of the byte address buffer.
+ /// @param dest The byte address at which to perform the atomic $(op.internalName) operation.
+ /// @param value The operand of the atomic operation.
+ /// @param original_value The original value at `dest` before the $(op.internalName) operation.
[ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedAnd(
+ [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)]
+ void Interlocked$(op.name)(
UINT dest,
UINT value,
out UINT original_value)
{
__target_switch
{
- case glsl: __intrinsic_asm "$3 = atomicAnd($0._data[$1/4], $2)";
- case cuda: __intrinsic_asm "(*$3 = atomicAnd($0._getPtrAt<uint32_t>($1), $2))";
- case hlsl: __intrinsic_asm ".InterlockedAnd";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
- return;
- }
- case spirv:
+ case hlsl: __intrinsic_asm ".Interlocked$(op.name)";
+ default:
let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedAnd(buf[dest / 4], value, original_value);
+ ::Interlocked$(op.name)(buf[dest / 4], value, original_value);
}
}
[ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedAnd(
+ [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)]
+ void Interlocked$(op.name)(
UINT dest,
UINT value)
{
__target_switch
{
- case glsl: __intrinsic_asm "atomicAnd($0._data[$1/4], $2)";
- case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt<uint32_t>($1), $2)";
- case hlsl: __intrinsic_asm ".InterlockedAnd";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value);
- return;
- }
- case spirv:
+ case hlsl: __intrinsic_asm ".Interlocked$(op.name)";
+ default:
let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedAnd(buf[dest / 4], value);
+ ::Interlocked$(op.name)(buf[dest / 4], value);
}
}
+${{{{
+} // for (buffer atomic ops)
+}}}}
+ /// Perform a 32-bit integer atomic compare-and-exchange operation at
+ /// the specified byte address within the `RWByteAddressBuffer`.
+ /// @param dest The address at which to perform the atomic compare-and-exchange operation.
+ /// @param compare_value The value to perform bitwise comparison to the value at `byteAddress`.
+ /// @param value The value to store at `byteAddress` if the comparison is successful.
+ /// @param original_value The original value at `byteAddress` before the compare-and-exchange operation.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
+ /// translates to `InterlockedCompareExchange`.
+ /// For CUDA, this function maps to `atomicCAS`.
[ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
+ [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)]
void InterlockedCompareExchange(
UINT dest,
UINT compare_value,
@@ -5498,23 +4810,23 @@ ${{{{
{
__target_switch
{
- case glsl: __intrinsic_asm "($4 = atomicCompSwap($0._data[$1/4], $2, $3))";
- case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3))";
case hlsl: __intrinsic_asm ".InterlockedCompareExchange";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value, original_value);
- return;
- }
- case spirv:
+ default:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedCompareExchange(buf[dest / 4], compare_value, value, original_value);
}
}
+ /// Perform a 32-bit integer atomic compare-and-store operation at
+ /// the specified byte address within the `RWByteAddressBuffer`.
+ /// @param dest The address at which to perform the atomic add operation.
+ /// @param compare_value The value to perform comparison to the value at `byteAddress`.
+ /// @param value The value to store at `byteAddress` if the comparison is successful.
+ /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function
+ /// translates to `InterlockedCompareStore`.
+ /// For CUDA, this function maps to `atomicCAS`.
[ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
+ [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)]
void InterlockedCompareStore(
UINT dest,
UINT compare_value,
@@ -5522,232 +4834,13 @@ ${{{{
{
__target_switch
{
- case glsl: __intrinsic_asm "atomicCompSwap($0._data[$1/4], $2, $3)";
- case cuda: __intrinsic_asm "atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3)";
case hlsl: __intrinsic_asm ".InterlockedCompareStore";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value);
- return;
- }
- case spirv:
+ default:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedCompareStore(buf[dest / 4], compare_value, value);
}
}
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedExchange(
- UINT dest,
- UINT value,
- out UINT original_value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "($3 = atomicExchange($0._data[$1/4], $2))";
- case cuda: __intrinsic_asm "(*$3 = atomicExch($0._getPtrAt<uint32_t>($1), $2))";
- case hlsl: __intrinsic_asm ".InterlockedExchange";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_exchange(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedExchange(buf[dest / 4], value, original_value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedMax(
- UINT dest,
- UINT value,
- out UINT original_value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "($3 = atomicMax($0._data[$1/4], $2))";
- case cuda: __intrinsic_asm "(*$3 = atomicMax($0._getPtrAt<uint32_t>($1), $2))";
- case hlsl: __intrinsic_asm ".InterlockedMax";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedMax(buf[dest / 4], value, original_value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedMax(
- UINT dest,
- UINT value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMax($0._data[$1/4], $2)";
- case cuda: __intrinsic_asm "atomicMax($0._getPtrAt<uint32_t>($1), $2)";
- case hlsl: __intrinsic_asm ".InterlockedMax";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedMax(buf[dest / 4], value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedMin(
- UINT dest,
- UINT value,
- out UINT original_value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "($3 = atomicMin($0._data[$1/4], $2))";
- case cuda: __intrinsic_asm "(*$3 = atomicMin($0._getPtrAt<uint32_t>($1), $2))";
- case hlsl: __intrinsic_asm ".InterlockedMin";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedMin(buf[dest / 4], value, original_value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedMin(
- UINT dest,
- UINT value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicMin($0._data[$1/4], $2)";
- case cuda: __intrinsic_asm "atomicMin($0._getPtrAt<uint32_t>($1), $2)";
- case hlsl: __intrinsic_asm ".InterlockedMin";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedMin(buf[dest / 4], value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedOr(
- UINT dest,
- UINT value,
- out UINT original_value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "($3 = atomicOr($0._data[$1/4], $2))";
- case cuda: __intrinsic_asm "(*$3 = atomicOr($0._getPtrAt<uint32_t>($1), $2))";
- case hlsl: __intrinsic_asm ".InterlockedOr";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedOr(buf[dest / 4], value, original_value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedOr(
- UINT dest,
- UINT value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicOr($0._data[$1/4], $2)";
- case cuda: __intrinsic_asm "atomicOr($0._getPtrAt<uint32_t>($1), $2)";
- case hlsl: __intrinsic_asm ".InterlockedOr";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedOr(buf[dest / 4], value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedXor(
- UINT dest,
- UINT value,
- out UINT original_value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "($3 = atomicXor($0._data[$1/4], $2))";
- case cuda: __intrinsic_asm "(*$3 = atomicXor($0._getPtrAt<uint32_t>($1), $2))";
- case hlsl: __intrinsic_asm ".InterlockedXor";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedXor(buf[dest / 4], value, original_value);
- }
- }
-
- [ForceInline]
- [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
- void InterlockedXor(
- UINT dest,
- UINT value)
- {
- __target_switch
- {
- case glsl: __intrinsic_asm "atomicXor($0._data[$1/4], $2)";
- case cuda: __intrinsic_asm "atomicXor($0._getPtrAt<uint32_t>($1), $2)";
- case hlsl: __intrinsic_asm ".InterlockedXor";
- case metal:
- {
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value);
- return;
- }
- case spirv:
- let buf = __getEquivalentStructuredBuffer<uint>(this);
- ::InterlockedXor(buf[dest / 4], value);
- }
- }
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
@@ -9699,26 +8792,6 @@ void GroupMemoryBarrierWithGroupSync()
// Atomics
-__generic<T>
-__intrinsic_op($(kIROp_MetalAtomicCast))
-[require(metal)]
-T* __getMetalAtomicRef(__ref T x);
-
-// Checks if input is a ImageSubscript
-__generic<T>
-__intrinsic_op($(kIROp_IsTextureAccess))
-bool __isTextureAccess(__ref T x);
-
-// Checks if input is a texture of T type scalar
-__generic<T>
-__intrinsic_op($(kIROp_IsTextureScalarAccess))
-bool __isTextureScalarAccess(__ref T x);
-
-// Checks if input is a texture array
-__generic<T>
-__intrinsic_op($(kIROp_IsTextureArrayAccess))
-bool __isTextureArrayAccess(__ref T x);
-
// Accepts an ImageSubscript
// Gets Texture used with ImageSubscript.
__generic<TextureAccess>
@@ -9738,414 +8811,6 @@ __intrinsic_op($(kIROp_ExtractArrayCoordFromTextureAccess))
uint __extractArrayCoordFromTextureAccess(__ref TextureAccess x);
${{{{
-for (bool isArray : {false, true})
-{
- StringBuilder coordBuilder;
- StringBuilder coordFetchBuilder;
-
- StringBuilder threeParamsASMBuilder;
- StringBuilder threeParamsOutputParamASMBuilder;
-
- StringBuilder fourParamsASMBuilder;
-
- coordBuilder << "Coord coord";
- coordFetchBuilder << "coord";
-
- threeParamsASMBuilder << "$1, $2";
-
- fourParamsASMBuilder << "$1, $2, $3";
- if(isArray)
- {
- coordBuilder << ", uint arrayCoord";
- coordFetchBuilder << ", arrayCoord";
- threeParamsASMBuilder << ", $3";
- fourParamsASMBuilder << ", $4";
- threeParamsOutputParamASMBuilder << "$4";
- }
- else
- {
- threeParamsOutputParamASMBuilder << "$3";
- }
- auto coordString = coordBuilder.toString();
- auto coordFetchString = coordFetchBuilder.toString();
-
- auto threeParamsASMString = threeParamsASMBuilder.toString();
- auto threeParamsOutputParamASMString = threeParamsOutputParamASMBuilder.toString();
-
- auto fourParamsASMString = fourParamsASMBuilder.toString();
-}}}}
-
-${{{{
- for (const char* atomicOperation : {"add", "and", "max", "min", "or", "sub", "xor"})
- {
-}}}}
- __generic<TextureType, T, Coord>
- [ForceInline]
- [require(metal)]
- vector<T, 4> __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value)
- {
- static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
- static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
- "__metalImageInterlocked implementation only allows 'uint' coordinates");
- __intrinsic_asm "$0.atomic_fetch_$(atomicOperation)($(threeParamsASMString))";
- }
-
- __generic<TextureType, T, Coord>
- [ForceInline]
- [require(metal)]
- void __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value, out T original_value)
- {
- static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
- static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
- "__metalImageInterlocked implementation only allows 'uint' coordinates");
- original_value = __metalImageInterlocked_$(atomicOperation)(tex, $(coordFetchString), value)[0];
- }
-${{{{
- } // atomicOperation
-}}}}
-
- __generic<TextureType, T, Coord>
- [ForceInline]
- [require(metal)]
- vector<T, 4> __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value)
- {
- static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
- static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
- "__metalImageInterlocked implementation only allows 'uint' coordinates");
- __intrinsic_asm "($0.atomic_exchange($(threeParamsASMString)))";
- }
- __generic<TextureType, T, Coord>
- [ForceInline]
- [require(metal)]
- void __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value, out T original_value)
- {
- static_assert(T is int || T is uint, "Metal atomic texture operations only allow 'int'/'uint' textures");
- static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
- "__metalImageInterlocked implementation only allows 'uint' coordinates");
- original_value = __metalImageInterlocked_exchange(tex, $(coordFetchString), value)[0];
- }
-
- __generic<TextureType, T, Coord>
- [ForceInline]
- [require(metal)]
- void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), __ref vector<T, 4> compare_value, vector<T, 4> value)
- {
- static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
- static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
- "__metalImageInterlocked implementation only allows 'uint' coordinates");
- __intrinsic_asm "($0.atomic_compare_exchange_weak($(fourParamsASMString)))";
- }
- __generic<TextureType, T, Coord>
- [ForceInline]
- [require(metal)]
- void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), vector<T, 4> compare_value, vector<T, 4> value, out T original_value)
- {
- static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
- static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
- "__metalImageInterlocked implementation only allows 'uint' coordinates");
- __metalImageInterlocked_compare_exchange(tex, $(coordFetchString), compare_value, value);
- original_value = compare_value[0];
- }
-
-${{{{
-} // isArray
-}}}}
-
-${{{{
-
-// Generated functions:
-
-// atomicAdd, InterlockedAdd, atomic_fetch_add_explicit, OpAtomicIAdd, OpAtomicFAddEXT
-// __cudaInterlocked_add, __glslInterlocked_add, __hlslInterlocked_add, __metalInterlocked_add, __spirvInterlocked_add
-
-// atomicAnd, InterlockedAnd, atomic_fetch_and_explicit, OpAtomicAnd
-// __cudaInterlocked_and, __glslInterlocked_and, __hlslInterlocked_and, __metalInterlocked_and, __spirvInterlocked_and
-
-// atomicMax, InterlockedMax, atomic_fetch_max_explicit, OpAtomicUMax, OpAtomicSMax, OpAtomicFMaxEXT
-// __cudaInterlocked_max, __glslInterlocked_max, __hlslInterlocked_max, __metalInterlocked_max, __spirvInterlocked_max
-
-// atomicMin, InterlockedMin, atomic_fetch_min_explicit, OpAtomicUMin, OpAtomicSMin, OpAtomicFMinEXT
-// __cudaInterlocked_min, __glslInterlocked_min, __hlslInterlocked_min, __metalInterlocked_min, __spirvInterlocked_min
-
-// atomicOr, InterlockedOr, atomic_fetch_or_explicit, OpAtomicOr
-// __cudaInterlocked_or, __glslInterlocked_or, __hlslInterlocked_or, __metalInterlocked_or, __spirvInterlocked_or
-
-// atomicXor, InterlockedXor, atomic_fetch_xor_explicit, OpAtomicXor
-// __cudaInterlocked_xor, __glslInterlocked_xor, __hlslInterlocked_xor, __metalInterlocked_xor, __spirvInterlocked_xor
-
-// atomicExchange, atomicExch, InterlockedExchange, atomic_exchange_explicit, OpAtomicExchange
-// __cudaInterlocked_exchange, __glslInterlocked_exchange, __hlslInterlocked_exchange, __metalInterlocked_exchange, __spirvInterlocked_exchange
-
-struct InternalAtomicOperationInfo
-{
- const char* slangSuffix;
- const char* cudaSuffix;
- const char* glslSuffix;
- const char* hlslSuffix;
- const char* metalSuffix;
- const char* spirvFloatSuffix;
- const char* spirvUIntSuffix;
- const char* spirvIntSuffix;
-
- const char* assertExpr;
-};
-
-InternalAtomicOperationInfo internalAtomicOperationInfo[7] = {
- { "add", "Add", "Add", "Add", "fetch_add", "FAddEXT", "IAdd", "IAdd", "true" },
- { "and", "And", "And", "And", "fetch_and", "And", "And", "And", "!__isFloat<T>()" },
- { "max", "Max", "Max", "Max", "fetch_max", "FMaxEXT", "UMax", "SMax", "true" },
- { "min", "Min", "Min", "Min", "fetch_min", "FMinEXT", "UMin", "SMin", "true" },
- { "or", "Or", "Or", "Or", "fetch_or", "Or", "Or", "Or", "!__isFloat<T>()" },
- { "xor", "Xor", "Xor", "Xor", "fetch_xor", "Xor", "Xor", "Xor", "!__isFloat<T>()" },
- { "exchange", "Exch", "Exchange", "Exchange", "exchange", "Exchange", "Exchange", "Exchange", "true" },
-};
-
-for (InternalAtomicOperationInfo atomicOp : internalAtomicOperationInfo)
-{
-}}}}
- __generic<AtomicType, T>
- [ForceInline]
- [require(metal)]
- void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)";
- }
-
- __generic<AtomicType, T>
- [ForceInline]
- [require(metal)]
- void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value, out T original_value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "((*($2)) = (atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)))";
- }
-
- __generic<T>
- [ForceInline]
- [require(cuda)]
- void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "atomic$(atomicOp.cudaSuffix)((int*)$0, $1)";
- }
-
- __generic<T>
- [ForceInline]
- [require(cuda)]
- void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "(*$2 = atomic$(atomicOp.cudaSuffix)((int*)$0, $1))";
- }
-
- __generic<T>
- [ForceInline]
- [require(glsl)]
- void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "$atomic$(atomicOp.glslSuffix)($A, $1)";
- }
-
- __generic<T>
- [ForceInline]
- [require(glsl)]
- void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "($2 = $atomic$(atomicOp.glslSuffix)($A, $1))";
- }
-
- __generic<T>
- [ForceInline]
- [require(hlsl)]
- void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)";
- }
-
- __generic<T>
- [ForceInline]
- [require(hlsl)]
- void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)";
- }
-
- __generic<T>
- [ForceInline]
- [require(spirv)]
- void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- if (__isFloat<T>())
- {
- spirv_asm
- {
- result:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value
- };
- }
- else if (__isUnsignedInt<T>())
- {
- spirv_asm
- {
- result:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value
- };
- }
- else if (__isInt<T>())
- {
- spirv_asm
- {
- result:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value
- };
- }
- }
-
- __generic<T>
- [ForceInline]
- [require(spirv)]
- void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
- {
- static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
- if (__isFloat<T>())
- {
- spirv_asm
- {
- %original:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value;
- OpStore &original_value %original
- };
- }
- else if (__isUnsignedInt<T>())
- {
- spirv_asm
- {
- %original:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value;
- OpStore &original_value %original
- };
- }
- else if (__isInt<T>())
- {
- spirv_asm
- {
- %original:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value;
- OpStore &original_value %original
- };
- }
- }
-
-${{{{
-} // fetchAndModify
-}}}}
-
-__generic<AtomicType, T>
-[ForceInline]
-[require(metal)]
-void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value)
-{
- __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)";
-}
-
-__generic<AtomicType, T>
-[ForceInline]
-[require(metal)]
-void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value)
-{
- __metalInterlocked_compare_exchange(dest, compare_value, value);
- original_value = compare_value;
-}
-
-__generic<T>
-__glsl_version(430)
-[ForceInline]
-[require(cuda)]
-void __cudaInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
-{
- __intrinsic_asm "atomicCAS($0, $1, $2)";
-}
-
-__generic<T>
-[ForceInline]
-[require(cuda)]
-void __cudaInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
-{
- __intrinsic_asm "*$3 = atomicCAS($0, $1, $2)";
-}
-
-__generic<T>
-[ForceInline]
-[require(glsl)]
-void __glslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
-{
- __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
-}
-
-__generic<T>
-[ForceInline]
-[require(glsl)]
-void __glslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
-{
- __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))";
-}
-
-__generic<T>
-[ForceInline]
-[require(hlsl)]
-void __hlslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
-{
- __intrinsic_asm "InterlockedCompareExchange";
-}
-
-__generic<T>
-[ForceInline]
-[require(hlsl)]
-void __hlslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
-{
- __intrinsic_asm "InterlockedCompareExchange";
-}
-
-__generic<T>
-[ForceInline]
-[require(spirv)]
-void __spirvInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
-{
- spirv_asm
- {
- %result:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- };
-}
-
-__generic<T>
-[ForceInline]
-[require(spirv)]
-void __spirvInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
-{
- spirv_asm
- {
- %original:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- OpStore &original_value %original
- };
-}
-
-__generic<T>
-[ForceInline]
-[require(hlsl)]
-void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value)
-{
- __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
-}
-
-__generic<T>
-[ForceInline]
-[require(hlsl)]
-void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value, out T original_value)
-{
- __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
-}
-
-${{{{
// Generates code for:
// InterlockedAdd, InterlockedAnd, InterlockedOr, InterlockedXor,
// InterlockedMax, InterlockedMin, InterlockedExchange
@@ -10153,516 +8818,166 @@ struct SlangAtomicOperationInfo
{
const char* slangCallSuffix;
const char* internalCallSuffix;
+ const char* interface;
};
SlangAtomicOperationInfo slangAtomicOperationInfo[7] = {
- { "Add", "add" },
- { "And", "and" },
- { "Or", "or" },
- { "Xor", "xor" },
- { "Max", "max" },
- { "Min", "min" },
- { "Exchange", "exchange" },
+ { "Add", "add", "IArithmeticAtomicable" },
+ { "And", "and", "IArithmeticAtomicable" },
+ { "Or", "or", "IArithmeticAtomicable" },
+ { "Xor", "xor", "IArithmeticAtomicable" },
+ { "Max", "max", "IArithmeticAtomicable" },
+ { "Min", "min", "IArithmeticAtomicable" },
+ { "Exchange", "exchange", "IAtomicable" },
};
for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo)
{
- for(const char* T : {"int", "uint"})
- {
}}}}
+/// Perform an atomic $(atomicOp.internalCallSuffix) operation on `dest`.
+/// @param T The type of the value to perform the atomic operation on.
+/// @param dest The value to perform the atomic operation on.
+/// @param value The operand to the atomic operation.
+/// @param original_value The value of `dest` before the operation.
+/// @remarks When targeting HLSL, it is invalid to call this function with `T` being a floating-point type, since
+/// HLSL does not allow atomic operations on floating point types. For `InterlockedAdd`, consider using
+/// `RWByteAddressBuffer.InterlockedAddF32` or `RWByteAddressBuffer.InterlockedAddF16` instead when NVAPI is available.
+/// On SPIR-V (Vulkan), all integer and floating point types are supported.
+/// On Metal and WGSL, all floating-point types are not supported.
+/// @category atomic Atomic functions
[ForceInline]
__glsl_version(430)
[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
-void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value)
+void Interlocked$(atomicOp.slangCallSuffix)<T:$(atomicOp.interface)>(__ref T dest, T value)
{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
- case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
- case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
- case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
- case metal:
- if (__isTextureAccess(dest))
- {
- if(__isTextureArrayAccess(dest))
- {
- __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T), 4>(value));
- }
- else
- {
- __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), vector<$(T), 4>(value));
- }
- }
- else
- {
- __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value);
- }
- return;
- }
+ __atomic_$(atomicOp.internalCallSuffix)(dest, value);
}
[ForceInline]
__glsl_version(430)
[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
-void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value, out $(T) original_value)
+void Interlocked$(atomicOp.slangCallSuffix)<T:$(atomicOp.interface)>(__ref T dest, T value, out T original_value)
{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to a scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
- case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
- case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
- case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
- case metal:
- if (__isTextureAccess(dest))
- if(__isTextureArrayAccess(dest))
- {
- __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value);
- }
- else
- {
- __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value);
- }
- else
- __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value, original_value);
- return;
- }
+ original_value = __atomic_$(atomicOp.internalCallSuffix)(dest, value);
}
-${{{{
- } // for(const char* T : {"int64_t", "uint64_t"})
-}}}}
-
[ForceInline]
+__glsl_version(430)
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
void Interlocked$(atomicOp.slangCallSuffix)(__ref uint dest, int value)
{
- Interlocked$(atomicOp.slangCallSuffix)(dest, (uint)value);
+ __atomic_$(atomicOp.internalCallSuffix)(dest, (uint)value);
}
${{{{
} // for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo)
}}}}
-${{{{
-for(const char* T : {"int64_t", "uint64_t"})
-{
-}}}}
-/// @category atomic Atomic functions
-[ForceInline]
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_metal)]
-void InterlockedAdd(__ref $(T) dest, $(T) value)
-{
- __target_switch
- {
- case hlsl: __hlslInterlocked_add(dest, value);
- case cuda: __cudaInterlocked_add(dest, value);
- case glsl:
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
- __glslInterlocked_add(dest, value);
- case spirv:
- spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$$(T) = OpAtomicIAdd &dest Device None $value;
- };
- }
-}
-
-[ForceInline]
-void InterlockedAdd(__ref $(T) dest, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __hlslInterlocked_add(dest, value, original_value);
- case cuda: __cudaInterlocked_add(dest, value, original_value);
- case glsl:
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
- __glslInterlocked_add(dest, value, original_value);
- case spirv:
- spirv_asm
- {
- OpCapability Int64Atomics;
- %origin:$$$(T) = OpAtomicIAdd &dest Device None $value;
- OpStore &original_value %origin
- };
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedAnd(__ref $(T) dest, $(T) value)
-{
- __target_switch
- {
- case hlsl: __hlslInterlocked_and(dest, value);
- }
-}
-
-[ForceInline]
-void InterlockedAnd(__ref $(T) dest, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __hlslInterlocked_and(dest, value, original_value);
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value)
-{
- __target_switch
- {
- case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value);
- }
-}
-
-[ForceInline]
-void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value);
- }
-}
-
-[ForceInline]
-void InterlockedCompareStore(__ref $(T) dest, $(T) compare_value, $(T) value);
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedCompareStore";
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedExchange(__ref $(T) dest, $(T) value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedExchange";
- }
-}
-
-[ForceInline]
-void InterlockedExchange(__ref $(T) dest, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedExchange";
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedMax(__ref $(T) dest, $(T) value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedMax";
- }
-}
-
-[ForceInline]
-void InterlockedMax(__ref $(T) dest, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedMax";
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedMin(__ref $(T) dest, $(T) value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedMin";
- }
-}
-
-[ForceInline]
-void InterlockedMin(__ref $(T) dest, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedMin";
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedOr(__ref $(T) dest, $(T) value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedOr";
- }
-}
-
-[ForceInline]
-void InterlockedOr(__ref $(T) dest, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedOr";
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedXor(__ref $(T) dest, $(T) value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedXor";
- }
-}
-
-[ForceInline]
-void InterlockedXor(__ref $(T) dest, $(T) value, out $(T) original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedXor";
- }
-}
-
-${{{{
-} // for(const char* T : {"int64_t", "uint64_t"})
-}}}}
-
+/// Perform an atomic compare and exchange operation on `dest`.
+/// @param T The type of the value to perform the atomic operation on.
+/// @param dest The value to perform the atomic operation on.
+/// @param compare_value The value to compare `dest` with.
+/// @param value The value to store into `dest` if the compare result is equal.
+/// @param original_value The value of `dest` before the operation.
+/// @remarks When targeting HLSL, a call to this function with `T` being `float` will translate to a call to
+/// `InterlockedCompareExchangeFloatBitwise`, which means the comparison is done as a bitwise comparison.
+///
+/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`.
+///
+/// On Metal and WGSL, all floating-point types are not supported.
+///
+/// On CUDA, this function maps to `atomicCAS`.
/// @category atomic
[ForceInline]
-__glsl_version(430)
[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
-void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value)
+void InterlockedCompareExchange<T:IAtomicable>(__ref T dest, T compare_value, T value, out T original_value)
{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case metal:
- if (__isTextureAccess(dest))
- {
- vector<int, 4> vec_compare_value = vector<int, 4>(compare_value);
- if(__isTextureArrayAccess(dest))
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value);
- }
- else
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value);
- }
- }
- else
- {
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
- }
- return;
- }
-}
-
-[ForceInline]
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
-void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value)
-{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value);
- case metal:
- if (__isTextureAccess(dest))
- {
- vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value);
- if(__isTextureArrayAccess(dest))
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value);
- }
- else
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value);
- }
- }
- else
- {
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
- }
- return;
- }
+ original_value = __atomic_compare_exchange(dest, compare_value, value);
}
+/// Perform an atomic compare and exchange operation on `dest`.
+/// @param T The type of the value to perform the atomic operation on.
+/// @param dest The value to perform the atomic operation on.
+/// @param compare_value The value to compare `dest` with.
+/// @param value The value to store into `dest` if the compare result is equal.
+/// @param original_value The value of `dest` before the operation.
+/// @remarks When targeting HLSL, a call to this function will translate to a call to
+/// `InterlockedCompareExchangeFloatBitwise`, which means the comparison is done as a bitwise comparison.
+///
+/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`.
+///
+/// On Metal and WGSL, this function is not available.
+///
+/// On CUDA, this function maps to `atomicCAS`.
/// @category atomic
[ForceInline]
void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value)
{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value);
- case metal:
- static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's");
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value);
- return;
- }
+ __atomic_compare_exchange(dest, compare_value, value);
}
[ForceInline]
void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value)
{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value, original_value);
- case metal:
- static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's");
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
- return;
- }
+ original_value = __atomic_compare_exchange(dest, compare_value, value);
}
+/// Perform an atomic compare and store operation on `dest`.
+/// @param T The type of the value to perform the atomic operation on.
+/// @param dest The value to perform the atomic operation on.
+/// @param compare_value The value to compare `dest` with.
+/// @param value The value to store into `dest` if the compare result is equal.
+/// @remarks When targeting HLSL, a call to this function with `T` being `float` will translate to a call to
+/// `InterlockedCompareStoreFloatBitwise`, which means the comparison is done as a bitwise comparison.
+///
+/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`.
+///
+/// On Metal and WGSL, this function is not available.
+///
+/// On CUDA, this function maps to `atomicCAS`.
/// @category atomic
[ForceInline]
__glsl_version(430)
[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
-void InterlockedCompareStore(__ref int dest, int compare_value, int value)
+void InterlockedCompareStore<T:IAtomicable>(__ref T dest, T compare_value, T value)
{
__target_switch
{
case hlsl: __intrinsic_asm "InterlockedCompareStore";
- case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
- case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)";
- case spirv:
- {
- spirv_asm
- {
- result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- };
- return;
- }
- case metal:
- {
- if (__isTextureAccess(dest))
- {
- vector<int, 4> vec_compare_value = vector<int, 4>(compare_value);
- if(__isTextureArrayAccess(dest))
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value));
- }
- else
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value));
- }
- }
- else
- {
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value);
- }
- return;
- }
- }
-}
-
-[ForceInline]
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
-void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedCompareStore";
- case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
- case cuda: __intrinsic_asm "atomicCAS((int*)$0, $1, $2)";
- case spirv:
- spirv_asm
- {
- result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- };
- case metal:
- if (__isTextureAccess(dest))
- {
- vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value);
- if(__isTextureArrayAccess(dest))
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value));
- }
- else
- {
- __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
- __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value));
- }
- }
- else
- {
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value);
- }
+ default:
+ __atomic_compare_exchange(dest, compare_value, value);
return;
}
}
+/// Perform an atomic compare and store operation on `dest`.
+/// @param T The type of the value to perform the atomic operation on.
+/// @param dest The value to perform the atomic operation on.
+/// @param compare_value The value to compare `dest` with.
+/// @param value The value to store into `dest` if the compare result is equal.
+/// @remarks When targeting HLSL, a call to this function will translate to a call to
+/// `InterlockedCompareStoreFloatBitwise`, which means the comparison is done as a bitwise comparison.
+///
+/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`.
+///
+/// On Metal and WGSL, this function is not available.
+///
+/// On CUDA, this function maps to `atomicCAS`.
/// @category atomic
[ForceInline]
-void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value)
+void InterlockedCompareStoreFloatBitwise<T:IAtomicable>(__ref T dest, T compare_value, T value)
{
__target_switch
{
case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise";
- }
-}
-
-/// @category atomic
-[ForceInline]
-void InterlockedExchange(__ref float dest, float value)
-{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_exchange(dest, value);
- case metal:
- static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's");
- __metalInterlocked_exchange(__getMetalAtomicRef(dest), value);
- return;
- }
-}
-
-[ForceInline]
-void InterlockedExchange(__ref float dest, float value, out float original_value)
-{
- static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
- __target_switch
- {
- case hlsl: __hlslInterlocked_exchange(dest, value, original_value);
- case metal:
- static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's");
- __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value);
+ default:
+ __atomic_compare_exchange(dest, compare_value, value);
return;
}
}
-
/// Test if a floating-point value finite.
/// @category math
__generic<T : __BuiltinFloatingPointType>
@@ -21245,13 +19560,13 @@ extension _Texture<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite), 0,
{
__target_switch
{
- case spirv:
- originalValue = __atomicAdd(this[coord], value);
+ default:
+ originalValue = __atomic_add(this[coord], value);
return;
- case glsl:
- __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)";
case hlsl:
__intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)";
+ case glsl:
+ __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)";
}
}
diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h
index 298c79f7e..48b296ce3 100644
--- a/source/slang/slang-diagnostic-defs.h
+++ b/source/slang/slang-diagnostic-defs.h
@@ -883,6 +883,7 @@ DIAGNOSTIC(55200, Error, unsupportedBuiltinType, "'$0' is not a supported builti
DIAGNOSTIC(55201, Error, unsupportedRecursion, "recursion detected in call to '$0', but the current code generation target does not allow recursion.")
DIAGNOSTIC(55202, Error, systemValueAttributeNotSupported, "system value semantic '$0' is not supported for the current target.")
DIAGNOSTIC(55203, Error, systemValueTypeIncompatible, "system value semantic '$0' should have type '$1' or be convertible to type '$1'.")
+DIAGNOSTIC(55204, Error, unsupportedTargetIntrinsic, "intrinsic operation '$0' is not supported for the current target.")
DIAGNOSTIC(56001, Error, unableToAutoMapCUDATypeToHostType, "Could not automatically map '$0' to a host type. Automatic binding generation failed for '$1'")
DIAGNOSTIC(56002, Error, attemptToQuerySizeOfUnsizedArray, "cannot obtain the size of an unsized array.")
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index b113f726e..79a9b1a56 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2472,6 +2472,16 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
}
break;
+ case kIROp_GetEquivalentStructuredBuffer:
+ {
+ auto base = inst->getOperand(0);
+ emitOperand(base, outerPrec);
+ m_writer->emit(".asStructuredBuffer<");
+ emitType(as<IRHLSLStructuredBufferTypeBase>(inst->getDataType())->getElementType());
+ m_writer->emit(">()");
+ }
+ break;
+
case kIROp_RWStructuredBufferStore:
{
auto base = inst->getOperand(0);
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 3cccad9e6..f0d703b40 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -260,7 +260,6 @@ public:
bool hasExplicitConstantBufferOffset(IRInst* cbufferType);
bool isSingleElementConstantBuffer(IRInst* cbufferType);
bool shouldForceUnpackConstantBufferElements(IRInst* cbufferType);
-
//
// Expressions
//
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 81bcafeb3..7d104ff1b 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -515,7 +515,17 @@ bool CUDASourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitInstResultDecl(inst);
m_writer->emit("atomicAdd(");
+ bool needCloseTypeCast = false;
+ if (inst->getDataType()->getOp() == kIROp_Int64Type)
+ {
+ m_writer->emit("(unsigned long long*)(");
+ needCloseTypeCast = true;
+ }
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (needCloseTypeCast)
+ {
+ m_writer->emit(")");
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -525,7 +535,17 @@ bool CUDASourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitInstResultDecl(inst);
m_writer->emit("atomicAdd(");
+ bool needCloseTypeCast = false;
+ if (inst->getDataType()->getOp() == kIROp_Int64Type)
+ {
+ m_writer->emit("(unsigned long long*)(");
+ needCloseTypeCast = true;
+ }
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (needCloseTypeCast)
+ {
+ m_writer->emit(")");
+ }
m_writer->emit(", -(");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit("));\n");
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index ca5569602..7f8bc14b4 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -2153,8 +2153,50 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
return false;
}
+static IRImageSubscript* isTextureAccess(IRInst* inst)
+{
+ return as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
+}
+
+void GLSLSourceEmitter::emitAtomicImageCoord(IRImageSubscript* inst)
+{
+ emitOperand(inst->getImage(), getInfo(EmitOp::General));
+ m_writer->emit(", ");
+ if (auto vecType = as<IRVectorType>(inst->getCoord()->getDataType()))
+ {
+ m_writer->emit("ivec");
+ m_writer->emit(getIntVal(vecType->getElementCount()));
+ }
+ else
+ {
+ m_writer->emit("int");
+ }
+ m_writer->emit("(");
+ emitOperand(inst->getCoord(), getInfo(EmitOp::General));
+ m_writer->emit(")");
+ if (inst->hasSampleCoord())
+ {
+ m_writer->emit(", ");
+ emitOperand(inst->getSampleCoord(), getInfo(EmitOp::General));
+ }
+}
+
bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
+ auto requireAtomicExtIfNeeded = [&]()
+ {
+ if (isFloatingType(inst->getDataType()))
+ {
+ _requireGLSLExtension(toSlice("GL_EXT_shader_atomic_float"));
+ }
+ if (isIntegralType(inst->getDataType()))
+ {
+ if (getIntTypeInfo(inst->getDataType()).width == 64)
+ {
+ _requireGLSLExtension(toSlice("GL_EXT_shader_atomic_int64"));
+ }
+ }
+ };
switch (inst->getOp())
{
case kIROp_StructuredBufferGetDimensions:
@@ -2176,24 +2218,52 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
case kIROp_AtomicLoad:
{
emitInstResultDecl(inst);
- emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageLoad(");
+ emitAtomicImageCoord(imageSubscript);
+ m_writer->emit(")");
+ }
+ else
+ {
+ emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(";\n");
return true;
}
case kIROp_AtomicStore:
{
- emitInstResultDecl(inst);
- emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(" = ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(";\n");
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageStore(");
+ emitAtomicImageCoord(imageSubscript);
+ m_writer->emit(", ");
+ emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+ m_writer->emit(")");
+ }
+ else
+ {
+ emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ m_writer->emit(" = ");
+ emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+ m_writer->emit(";\n");
+ }
return true;
}
case kIROp_AtomicExchange:
{
+ requireAtomicExtIfNeeded();
emitInstResultDecl(inst);
- m_writer->emit("atomicExchange(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicExchange(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicExchange(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -2201,9 +2271,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicCompareExchange:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicCompSwap(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicCompSwap(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicCompSwap(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(", ");
@@ -2213,9 +2293,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicAdd:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicAdd(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicAdd(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicAdd(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -2223,9 +2313,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicSub:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicAdd(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicAdd(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicAdd(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", -(");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit("));\n");
@@ -2233,9 +2333,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicAnd:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicAnd(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicAnd(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicAnd(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -2243,9 +2353,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicOr:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicOr(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicOr(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicOr(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -2253,9 +2373,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicXor:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicXor(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicXor(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicXor(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -2263,9 +2393,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicMin:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicMin(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicMin(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicMin(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -2273,9 +2413,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicMax:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicMax(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicMax(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicMax(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(");\n");
@@ -2283,9 +2433,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicInc:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicAdd(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicAdd(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicAdd(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitType(inst->getDataType());
m_writer->emit("(1)");
@@ -2294,9 +2454,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicDec:
{
+ requireAtomicExtIfNeeded();
+
emitInstResultDecl(inst);
- m_writer->emit("atomicAdd(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ m_writer->emit("imageAtomicAdd(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomicAdd(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ }
m_writer->emit(", ");
emitType(inst->getDataType());
m_writer->emit("(-1)");
diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h
index 8958c7608..12ab60e46 100644
--- a/source/slang/slang-emit-glsl.h
+++ b/source/slang/slang-emit-glsl.h
@@ -133,6 +133,8 @@ protected:
void _emitSpecialFloatImpl(IRType* type, const char* valueExpr);
+ void emitAtomicImageCoord(IRImageSubscript* operand);
+
Dictionary<IRInst*, HashSet<IRFunc*>> m_referencingEntryPoints;
RefPtr<GLSLExtensionTracker> m_glslExtensionTracker;
diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp
index b45b4c575..ae87fd6d5 100644
--- a/source/slang/slang-emit-hlsl.cpp
+++ b/source/slang/slang-emit-hlsl.cpp
@@ -498,6 +498,10 @@ void HLSLSourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPoin
bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
+ auto diagnoseFloatAtommic = [&]()
+ {
+ getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "floating point atomic operation");
+ };
switch (inst->getOp())
{
case kIROp_AtomicLoad:
@@ -519,7 +523,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedExchange(");
+ m_writer->emit("InterlockedExchange");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -532,7 +537,10 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedCompareExchange(");
+ m_writer->emit("InterlockedCompareExchange");
+ if (inst->getDataType()->getOp() == kIROp_FloatType)
+ m_writer->emit("FloatBitwise");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -547,7 +555,12 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedAdd(");
+ if (inst->getDataType()->getOp() == kIROp_FloatType)
+ {
+ diagnoseFloatAtommic();
+ }
+ m_writer->emit("InterlockedAdd");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -560,7 +573,12 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedAdd(");
+ if (inst->getDataType()->getOp() == kIROp_FloatType)
+ {
+ diagnoseFloatAtommic();
+ }
+ m_writer->emit("InterlockedAdd");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", -(");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -573,7 +591,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedAnd(");
+ m_writer->emit("InterlockedAnd");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -586,7 +605,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedOr(");
+ m_writer->emit("InterlockedOr");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -599,7 +619,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedXor(");
+ m_writer->emit("InterlockedXor");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -612,7 +633,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedMin(");
+ m_writer->emit("InterlockedMin");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -625,7 +647,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedMax(");
+ m_writer->emit("InterlockedMax");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
@@ -638,7 +661,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedAdd(");
+ m_writer->emit("InterlockedAdd");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", 1, ");
m_writer->emit(getName(inst));
@@ -649,7 +673,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n");
- m_writer->emit("InterlockedAdd(");
+ m_writer->emit("InterlockedAdd");
+ m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(", -1, ");
m_writer->emit(getName(inst));
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
index 2d5a7d56b..abd4d670a 100644
--- a/source/slang/slang-emit-metal.cpp
+++ b/source/slang/slang-emit-metal.cpp
@@ -260,8 +260,118 @@ void MetalSourceEmitter::emitMemoryOrderOperand(IRInst* inst)
}
}
+static IRImageSubscript* isTextureAccess(IRInst* inst)
+{
+ return as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
+}
+
+void MetalSourceEmitter::emitAtomicImageCoord(IRImageSubscript* inst)
+{
+ auto resourceType = as<IRResourceTypeBase>(inst->getImage()->getDataType());
+ if (auto textureType = as<IRTextureType>(resourceType))
+ {
+ if (as<IRVectorType>(textureType->getElementType()))
+ {
+ getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "atomic operation on non-scalar texture");
+ }
+ }
+ bool isArray = getIntVal(resourceType->getIsArrayInst()) != 0;
+ if (isArray)
+ {
+ emitOperand(inst->getCoord(), getInfo(EmitOp::Postfix));
+ if (auto coordType = as<IRVectorType>(inst->getCoord()->getDataType()))
+ {
+ m_writer->emit(".");
+ const char* elements[] = { "x", "y", "z", "w" };
+ for (IRIntegerValue i = 0; i < getIntVal(coordType->getElementCount()) - 1; i++)
+ m_writer->emit(elements[Math::Min(3, (int)i)]);
+ m_writer->emit(", ");
+ emitOperand(inst->getCoord(), getInfo(EmitOp::Postfix));
+ m_writer->emit(".");
+ m_writer->emit(elements[Math::Min(3, (int)getIntVal(coordType->getElementCount()) - 1)]);
+ }
+ else
+ {
+ getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "invalid image coordinate for atomic operation");
+ }
+ }
+ else
+ {
+ emitOperand(inst->getCoord(), getInfo(EmitOp::General));
+ }
+}
+
+void MetalSourceEmitter::emitAtomicDestOperand(IRInst* inst)
+{
+ // If operand is already an atomic type, we can emit it
+ // as is.
+ auto ptrType = as<IRPtrTypeBase>(inst->getDataType());
+ if (ptrType && as<IRAtomicType>(ptrType->getValueType()))
+ {
+ emitOperand(inst, getInfo(EmitOp::General));
+ return;
+ }
+ // Otherwise, we need to emit a cast.
+ m_writer->emit("((atomic_");
+ emitType(inst->getDataType());
+ m_writer->emit(")(");
+ emitOperand(inst, getInfo(EmitOp::General));
+ m_writer->emit("))");
+}
+
+void MetalSourceEmitter::emitAtomicSrcOperand(bool isImage, IRInst* inst)
+{
+ if (!isImage)
+ {
+ emitOperand(inst, getInfo(EmitOp::General));
+ return;
+ }
+ // If we are emitting a source operand for an atomic image operation,
+ // we need to convert it into a 4-vector.
+ m_writer->emit("vec<");
+ emitType(inst->getDataType());
+ m_writer->emit(", 4>(");
+ emitOperand(inst, getInfo(EmitOp::General));
+ m_writer->emit(")");
+}
+
bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
{
+ auto emitAtomicOp = [&](const char* imageFunc, const char* bufferFunc)
+ {
+ emitInstResultDecl(inst);
+ bool isImageOp = false;
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix));
+ m_writer->emit(".");
+ m_writer->emit(imageFunc);
+ m_writer->emit("(");
+ emitAtomicImageCoord(imageSubscript);
+ isImageOp = true;
+ }
+ else
+ {
+ m_writer->emit(bufferFunc);
+ m_writer->emit("(");
+ emitAtomicDestOperand(inst->getOperand(0));
+ }
+ m_writer->emit(", ");
+ emitAtomicSrcOperand(isImageOp, inst->getOperand(1));
+ if (!isImageOp)
+ {
+ m_writer->emit(", ");
+ emitMemoryOrderOperand(inst->getOperand(inst->getOperandCount() - 1));
+ }
+ if (isImageOp)
+ m_writer->emit(").x;\n");
+ else
+ m_writer->emit(");\n");
+ };
+ auto diagnoseFloatAtommic = [&]()
+ {
+ getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "floating point atomic operation");
+ };
switch (inst->getOp())
{
case kIROp_discard:
@@ -287,160 +397,216 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicLoad:
{
+ if (isFloatingType(inst->getDataType()))
+ diagnoseFloatAtommic();
+
emitInstResultDecl(inst);
- m_writer->emit("atomic_load_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(1));
- m_writer->emit(");\n");
+ bool isImageOp = false;
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix));
+ m_writer->emit(".atomic_load(");
+ emitAtomicImageCoord(imageSubscript);
+ isImageOp = true;
+ }
+ else
+ {
+ m_writer->emit("atomic_load_explicit(");
+ emitAtomicDestOperand(inst->getOperand(0));
+ }
+ if (!isImageOp)
+ {
+ m_writer->emit(", ");
+ emitMemoryOrderOperand(inst->getOperand(1));
+ }
+ if (isImageOp)
+ m_writer->emit(").x;\n");
+ else
+ m_writer->emit(");\n");
return true;
}
case kIROp_AtomicStore:
{
- m_writer->emit("atomic_store_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+ bool isImageOp = false;
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix));
+ m_writer->emit(".atomic_store(");
+ emitAtomicImageCoord(imageSubscript);
+ isImageOp = true;
+ }
+ else
+ {
+ m_writer->emit("atomic_store_explicit(");
+ emitAtomicDestOperand(inst->getOperand(0));
+ }
m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
+ emitAtomicSrcOperand(isImageOp, inst->getOperand(1));
+ if (!isImageOp)
+ {
+ m_writer->emit(", ");
+ emitMemoryOrderOperand(inst->getOperand(2));
+ }
m_writer->emit(");\n");
return true;
}
case kIROp_AtomicExchange:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_exchange_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ if (isFloatingType(inst->getDataType()))
+ diagnoseFloatAtommic();
+
+ emitAtomicOp("atomic_exchange", "atomic_exchange_explicit");
return true;
}
case kIROp_AtomicCompareExchange:
{
+ if (isFloatingType(inst->getDataType()))
+ diagnoseFloatAtommic();
+
+ bool isImageOp = false;
+ auto imageSubscript = isTextureAccess(inst);
+ isImageOp = (imageSubscript != nullptr);
+
emitType(inst->getDataType(), getName(inst));
m_writer->emit(";\n{\n");
- emitType(inst->getDataType(), "_metal_cas_comparand");
+ if (isImageOp)
+ m_writer->emit("vec<");
+ emitType(inst->getDataType());
+ if (isImageOp)
+ m_writer->emit(", 4>");
+ m_writer->emit(" _metal_cas_comparand");
m_writer->emit(" = ");
emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
m_writer->emit(";\n");
-
- m_writer->emit(getName(inst));
- m_writer->emit(" = atomic_compare_exchange_weak_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ if (imageSubscript)
+ {
+ emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix));
+ m_writer->emit(".atomic_compare_exchange_weak(");
+ emitAtomicImageCoord(imageSubscript);
+ }
+ else
+ {
+ m_writer->emit("atomic_compare_exchange_weak_explicit(");
+ emitAtomicDestOperand(inst->getOperand(0));
+ }
m_writer->emit(", &_metal_cas_comparand, ");
- emitOperand(inst->getOperand(2), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(3));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(4));
- m_writer->emit(");\n}\n");
+ emitAtomicSrcOperand(isImageOp, inst->getOperand(2));
+ if (!isImageOp)
+ {
+ m_writer->emit(", ");
+ emitMemoryOrderOperand(inst->getOperand(3));
+ m_writer->emit(", ");
+ emitMemoryOrderOperand(inst->getOperand(4));
+ }
+ m_writer->emit(");\n");
+ m_writer->emit(getName(inst));
+ m_writer->emit(" = _metal_cas_comparand");
+ if (isImageOp)
+ m_writer->emit(".x");
+ m_writer->emit(";\n}\n");
return true;
}
case kIROp_AtomicAdd:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_add_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ if (isFloatingType(inst->getDataType()))
+ diagnoseFloatAtommic();
+
+ emitAtomicOp("atomic_fetch_add", "atomic_fetch_add_explicit");
return true;
}
case kIROp_AtomicSub:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_sub_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ if (isFloatingType(inst->getDataType()))
+ diagnoseFloatAtommic();
+
+ emitAtomicOp("atomic_fetch_sub", "atomic_fetch_sub_explicit");
return true;
}
case kIROp_AtomicAnd:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_and_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ emitAtomicOp("atomic_fetch_and", "atomic_fetch_and_explicit");
return true;
}
case kIROp_AtomicOr:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_or_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ emitAtomicOp("atomic_fetch_or", "atomic_fetch_or_explicit");
return true;
}
case kIROp_AtomicXor:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_xor_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ emitAtomicOp("atomic_fetch_xor", "atomic_fetch_xor_explicit");
return true;
}
case kIROp_AtomicMin:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_min_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ if (isFloatingType(inst->getDataType()))
+ diagnoseFloatAtommic();
+
+ emitAtomicOp("atomic_fetch_min", "atomic_fetch_min_explicit");
return true;
}
case kIROp_AtomicMax:
{
- emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_max_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitMemoryOrderOperand(inst->getOperand(2));
- m_writer->emit(");\n");
+ if (isFloatingType(inst->getDataType()))
+ diagnoseFloatAtommic();
+
+ emitAtomicOp("atomic_fetch_max", "atomic_fetch_max_explicit");
return true;
}
case kIROp_AtomicInc:
{
emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_add_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", 1, ");
- emitMemoryOrderOperand(inst->getOperand(1));
- m_writer->emit(");\n");
+ bool isImageOp = false;
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix));
+ m_writer->emit(".atomic_fetch_add(");
+ emitAtomicImageCoord(imageSubscript);
+ isImageOp = true;
+ }
+ else
+ {
+ m_writer->emit("atomic_fetch_add_explicit(");
+ emitAtomicDestOperand(inst->getOperand(0));
+ }
+ m_writer->emit(", 1");
+ if (!isImageOp)
+ {
+ m_writer->emit(", ");
+ emitMemoryOrderOperand(inst->getOperand(1));
+ }
+ if (isImageOp)
+ m_writer->emit(").x;\n");
+ else
+ m_writer->emit(");\n");
return true;
}
case kIROp_AtomicDec:
{
emitInstResultDecl(inst);
- m_writer->emit("atomic_fetch_sub_explicit(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", 1, ");
- emitMemoryOrderOperand(inst->getOperand(1));
- m_writer->emit(");\n");
+ bool isImageOp = false;
+ if (auto imageSubscript = isTextureAccess(inst))
+ {
+ emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix));
+ m_writer->emit(".atomic_fetch_sub(");
+ emitAtomicImageCoord(imageSubscript);
+ isImageOp = true;
+ }
+ else
+ {
+ m_writer->emit("atomic_fetch_sub_explicit(");
+ emitAtomicDestOperand(inst->getOperand(0));
+ }
+ m_writer->emit(", 1");
+ if (!isImageOp)
+ {
+ m_writer->emit(", ");
+ emitMemoryOrderOperand(inst->getOperand(1));
+ }
+ if (isImageOp)
+ m_writer->emit(").x;\n");
+ else
+ m_writer->emit(");\n");
return true;
}
}
diff --git a/source/slang/slang-emit-metal.h b/source/slang/slang-emit-metal.h
index 8e33eddef..e0fe1f1c8 100644
--- a/source/slang/slang-emit-metal.h
+++ b/source/slang/slang-emit-metal.h
@@ -79,6 +79,11 @@ protected:
void _emitStageAccessSemantic(IRStageAccessDecoration* decoration, const char* name);
bool _emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex);
bool maybeEmitSystemSemantic(IRInst* inst);
+
+ void emitAtomicImageCoord(IRImageSubscript* subscript);
+ void emitAtomicDestOperand(IRInst* operand);
+ void emitAtomicSrcOperand(bool isImage, IRInst* operand);
+ void emitAtomicSemanticOperand(IRInst* inst);
};
}
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index 0f123b8fd..62819e6d5 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -2929,11 +2929,11 @@ struct SPIRVEmitContext
void ensureAtomicCapability(IRInst* atomicInst, SpvOp op)
{
+ auto typeOp = atomicInst->getDataType()->getOp();
switch (op)
{
case SpvOpAtomicFAddEXT:
{
- auto typeOp = getVectorElementType(atomicInst->getDataType())->getOp();
switch (typeOp)
{
case kIROp_FloatType:
@@ -2948,13 +2948,19 @@ struct SPIRVEmitContext
ensureExtensionDeclaration(toSlice("SPV_EXT_shader_atomic_float16_add"));
requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT);
break;
+ case kIROp_VectorType:
+ if (as<IRVectorType>(atomicInst->getDataType())->getElementType()->getOp() == kIROp_HalfType)
+ {
+ ensureExtensionDeclaration(toSlice("VK_NV_shader_atomic_float16_vector"));
+ requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV);
+ }
+ break;
}
}
break;
case SpvOpAtomicFMinEXT:
case SpvOpAtomicFMaxEXT:
{
- auto typeOp = getVectorElementType(atomicInst->getDataType())->getOp();
switch (typeOp)
{
case kIROp_FloatType:
@@ -2969,10 +2975,24 @@ struct SPIRVEmitContext
ensureExtensionDeclaration(toSlice("SPV_EXT_shader_atomic_float_min_max"));
requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT);
break;
+ case kIROp_VectorType:
+ if (as<IRVectorType>(atomicInst->getDataType())->getElementType()->getOp() == kIROp_HalfType)
+ {
+ ensureExtensionDeclaration(toSlice("VK_NV_shader_atomic_float16_vector"));
+ requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV);
+ }
+ break;
}
}
break;
}
+ switch (typeOp)
+ {
+ case kIROp_UInt64Type:
+ case kIROp_Int64Type:
+ requireSPIRVCapability(SpvCapabilityInt64Atomics);
+ break;
+ }
}
// The instructions that appear inside the basic blocks of
@@ -3321,6 +3341,7 @@ struct SPIRVEmitContext
const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType());
const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1));
result = emitOpAtomicIIncrement(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics);
+ ensureAtomicCapability(inst, SpvOpAtomicIIncrement);
}
break;
case kIROp_AtomicDec:
@@ -3329,6 +3350,7 @@ struct SPIRVEmitContext
const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType());
const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1));
result = emitOpAtomicIDecrement(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics);
+ ensureAtomicCapability(inst, SpvOpAtomicIDecrement);
}
break;
case kIROp_AtomicLoad:
@@ -3337,6 +3359,7 @@ struct SPIRVEmitContext
const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType());
const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1));
result = emitOpAtomicLoad(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics);
+ ensureAtomicCapability(inst, SpvOpAtomicLoad);
}
break;
case kIROp_AtomicStore:
@@ -3345,6 +3368,7 @@ struct SPIRVEmitContext
const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType());
const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2));
result = emitOpAtomicStore(parent, inst, inst->getOperand(0), memoryScope, memorySemantics, inst->getOperand(1));
+ ensureAtomicCapability(inst, SpvOpAtomicStore);
}
break;
case kIROp_AtomicExchange:
@@ -3353,6 +3377,7 @@ struct SPIRVEmitContext
const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType());
const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2));
result = emitOpAtomicExchange(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics, inst->getOperand(1));
+ ensureAtomicCapability(inst, SpvOpAtomicExchange);
}
break;
case kIROp_AtomicCompareExchange:
@@ -3365,6 +3390,7 @@ struct SPIRVEmitContext
parent, inst, inst->getFullType(), inst->getOperand(0),
memoryScope, memorySemanticsEqual, memorySemanticsUnequal,
inst->getOperand(2), inst->getOperand(1));
+ ensureAtomicCapability(inst, SpvOpAtomicCompareExchange);
}
break;
case kIROp_AtomicAdd:
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index c9319a13b..2206d29cf 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -53,9 +53,7 @@
#include "slang-ir-lower-l-value-cast.h"
#include "slang-ir-lower-reinterpret.h"
#include "slang-ir-loop-unroll.h"
-#include "slang-ir-legalize-extract-from-texture-access.h"
#include "slang-ir-legalize-image-subscript.h"
-#include "slang-ir-legalize-is-texture-access.h"
#include "slang-ir-legalize-vector-types.h"
#include "slang-ir-metadata.h"
#include "slang-ir-optix-entry-point-uniforms.h"
@@ -1058,9 +1056,6 @@ Result linkAndOptimizeIR(
legalizeVectorTypes(irModule, sink);
- // Legalize `__isTextureAccess` and related.
- legalizeIsTextureAccess(irModule, sink);
-
// Once specialization and type legalization have been performed,
// we should perform some of our basic optimization steps again,
// to see if we can clean up any temporaries created by legalization.
@@ -1335,8 +1330,6 @@ Result linkAndOptimizeIR(
// Create aliases for all dynamic resource parameters.
if(requiredLoweringPassSet.dynamicResource && isKhronosTarget(targetRequest))
legalizeDynamicResourcesForGLSL(codeGenContext, irModule);
-
- legalizeExtractFromTextureAccess(irModule);
// Legalize `ImageSubscript` loads.
switch (target)
diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp
index 7cde70777..aabc193dd 100644
--- a/source/slang/slang-intrinsic-expand.cpp
+++ b/source/slang/slang-intrinsic-expand.cpp
@@ -653,112 +653,6 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
}
}
break;
-
- case 'a':
- {
- // We have an operation that needs to lower to either
- // `atomic*` or `imageAtomic*` for GLSL, depending on
- // whether its first operand is a subscript into an
- // array. This `$a` is the first `a` in `atomic`,
- // so we will replace it accordingly.
- //
- // TODO: This distinction should be made earlier,
- // with the front-end picking the right overload
- // based on the "address space" of the argument.
-
- Index argIndex = 0;
- SLANG_RELEASE_ASSERT(m_argCount > argIndex);
-
- auto arg = m_args[argIndex].get();
- if (arg->getOp() == kIROp_ImageSubscript)
- {
- m_writer->emit("imageA");
- }
- else
- {
- m_writer->emit("a");
- }
- }
- break;
-
- case 'A':
- {
- // We have an operand that represents the destination
- // of an atomic operation in GLSL, and it should
- // be lowered based on whether it is an ordinary l-value,
- // or an image subscript. In the image subscript case
- // this operand will turn into multiple arguments
- // to the `imageAtomic*` function.
- //
-
- Index argIndex = 0;
- SLANG_RELEASE_ASSERT(m_argCount > argIndex);
-
- auto arg = m_args[argIndex].get();
- if (arg->getOp() == kIROp_ImageSubscript)
- {
- if (m_emitter->getSourceLanguage() == SourceLanguage::GLSL)
- {
- // TODO: we don't handle the multisample
- // case correctly here, where the last
- // component of the image coordinate needs
- // to be broken out into its own argument.
- //
- m_writer->emit("(");
- m_emitter->emitOperand(arg->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit("), ");
-
- // The coordinate argument will have been computed
- // as a `vector<uint, N>` because that is how the
- // HLSL image subscript operations are defined.
- // In contrast, the GLSL `imageAtomic*` operations
- // expect `vector<int, N>` coordinates, so we
- // will hackily insert the conversion here as
- // part of the intrinsic op.
- //
- auto coords = arg->getOperand(1);
- auto coordsType = coords->getDataType();
-
- auto coordsVecType = as<IRVectorType>(coordsType);
- IRIntegerValue elementCount = 1;
- if (coordsVecType)
- {
- coordsType = coordsVecType->getElementType();
- elementCount = getIntVal(coordsVecType->getElementCount());
- }
-
- SLANG_ASSERT(coordsType->getOp() == kIROp_UIntType);
-
- if (elementCount > 1)
- {
- m_writer->emit("ivec");
- m_writer->emit(elementCount);
- }
- else
- {
- m_writer->emit("int");
- }
-
- m_writer->emit("(");
- m_emitter->emitOperand(arg->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(")");
- }
- else
- {
- m_writer->emit("(");
- m_emitter->emitOperand(arg, getInfo(EmitOp::General));
- m_writer->emit(")");
- }
- }
- else
- {
- m_writer->emit("(");
- m_emitter->emitOperand(arg, getInfo(EmitOp::General));
- m_writer->emit(")");
- }
- }
- break;
-
case 'P':
// Type-based prefix as used for CUDA and C++ targets
{
diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.cpp b/source/slang/slang-ir-legalize-extract-from-texture-access.cpp
deleted file mode 100644
index de1e244a8..000000000
--- a/source/slang/slang-ir-legalize-extract-from-texture-access.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-#include "slang-ir-legalize-extract-from-texture-access.h"
-
-#include "slang-ir.h"
-#include "slang-ir-insts.h"
-#include "slang-ir-util.h"
-#include "slang-ir-clone.h"
-#include "slang-ir-specialize-address-space.h"
-#include "slang-parameter-binding.h"
-#include "slang-ir-legalize-image-subscript.h"
-#include "slang-ir-legalize-varying-params.h"
-#include "slang-ir-simplify-cfg.h"
-
-namespace Slang
-{
- void legalizeExtractTextureFromTextureAccess(IRBuilder& builder, IRInst* inst)
- {
- SLANG_ASSERT(inst);
-
- builder.setInsertBefore(inst);
- IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
- SLANG_ASSERT(imageSubscript);
- SLANG_ASSERT(imageSubscript->getImage());
- inst->replaceUsesWith(imageSubscript->getImage());
- inst->removeAndDeallocate();
- // Ensure we are done processing the imageSubscript before we remove it
- if (!imageSubscript->hasUses())
- imageSubscript->removeAndDeallocate();
- }
-
- void legalizeExtractArrayCoordFromTextureAccess(IRBuilder& builder, IRInst* inst)
- {
- SLANG_ASSERT(inst);
-
- builder.setInsertBefore(inst);
- IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
- SLANG_ASSERT(imageSubscript);
- SLANG_ASSERT(imageSubscript->getImage());
-
- auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType());
- IRInst* coord = imageSubscript->getCoord();
- if(image->isArray())
- {
- // Extract final element which is 'ArrayCoord'
- IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType());
- SLANG_ASSERT(coordType);
- auto coordSize = getIRVectorElementSize(coordType);
-
- IRType* newArrayCoordType = coordType->getElementType();
- auto arrayCoordLocation = coordSize - 1;
- List<UInt> swizzleIndicies = { (UInt)arrayCoordLocation };
-
- coord = builder.emitSwizzle(newArrayCoordType, coord, 1, swizzleIndicies.getBuffer());
- }
- else
- coord = builder.getIntValue(builder.getUIntType(), 0);
-
-
- inst->replaceUsesWith(coord);
- inst->removeAndDeallocate();
- // Ensure we are done processing the imageSubscript completly before we remove it
- if (!imageSubscript->hasUses())
- imageSubscript->removeAndDeallocate();
- }
-
- void legalizeExtractCoordFromTextureAccess(IRBuilder& builder, IRInst* inst)
- {
- SLANG_ASSERT(inst);
-
- builder.setInsertBefore(inst);
- IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
- SLANG_ASSERT(imageSubscript);
- SLANG_ASSERT(imageSubscript->getImage());
-
- auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType());
- IRInst* coord = imageSubscript->getCoord();
- if(image->isArray())
- {
- // Extract all but final element which is 'ArrayCoord'
- IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType());
- auto coordSize = getIRVectorElementSize(coordType);
- SLANG_ASSERT(coordType);
-
- IRType* newCoordType = nullptr;
- auto newCoordSize = coordSize - 1;
- if(newCoordSize != 1)
- newCoordType = builder.getVectorType(coordType->getElementType(), newCoordSize);
- else
- newCoordType = coordType->getElementType();
- List<UInt> swizzleIndicies = {1, 2, 3, 4};
-
- coord = builder.emitSwizzle(newCoordType, coord, newCoordSize, swizzleIndicies.getBuffer());
- }
-
- inst->replaceUsesWith(coord);
- inst->removeAndDeallocate();
- // Ensure we are done processing the imageSubscript completly before we remove it
- if (!imageSubscript->hasUses())
- imageSubscript->removeAndDeallocate();
- }
-
- void legalizeExtractFromTextureAccess(IRModule* module)
- {
- IRBuilder builder(module);
- for (auto globalInst : module->getModuleInst()->getChildren())
- {
- auto func = as<IRFunc>(globalInst);
- if (!func)
- continue;
- for (auto block : func->getBlocks())
- {
- auto inst = block->getFirstInst();
- IRInst* next;
- for ( ; inst; inst = next)
- {
- next = inst->getNextInst();
- switch (inst->getOp())
- {
- case kIROp_ExtractArrayCoordFromTextureAccess:
- if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0))))
- legalizeExtractArrayCoordFromTextureAccess(builder, inst);
- continue;
- case kIROp_ExtractCoordFromTextureAccess:
- if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0))))
- legalizeExtractCoordFromTextureAccess(builder, inst);
- continue;
- case kIROp_ExtractTextureFromTextureAccess:
- if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0))))
- legalizeExtractTextureFromTextureAccess(builder, inst);
- continue;
- }
- }
- }
- }
- }
-}
-
diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.h b/source/slang/slang-ir-legalize-extract-from-texture-access.h
deleted file mode 100644
index 016c86def..000000000
--- a/source/slang/slang-ir-legalize-extract-from-texture-access.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-#include "slang-ir.h"
-#include "slang-compiler.h"
-
-namespace Slang
-{
- class DiagnosticSink;
-
- void legalizeExtractFromTextureAccess(IRModule* module);
-}
diff --git a/source/slang/slang-ir-legalize-is-texture-access.cpp b/source/slang/slang-ir-legalize-is-texture-access.cpp
deleted file mode 100644
index b9a0a7772..000000000
--- a/source/slang/slang-ir-legalize-is-texture-access.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-#include "slang-ir-legalize-is-texture-access.h"
-
-#include "slang-ir.h"
-#include "slang-ir-insts.h"
-#include "slang-ir-util.h"
-#include "slang-ir-clone.h"
-#include "slang-ir-specialize-address-space.h"
-#include "slang-parameter-binding.h"
-#include "slang-ir-legalize-image-subscript.h"
-#include "slang-ir-legalize-varying-params.h"
-#include "slang-ir-sccp.h"
-
-namespace Slang
-{
- IRImageSubscript* getTextureAccess(IRInst* inst)
- {
- return as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
- }
-
- void legalizeIsTextureAccess(IRModule* module, DiagnosticSink* sink)
- {
- HashSet<IRFunc*> functionsToSCCP;
- IRBuilder builder(module);
- for (auto globalInst : module->getModuleInst()->getChildren())
- {
- auto func = as<IRFunc>(globalInst);
- if (!func)
- continue;
- for (auto block : func->getBlocks())
- {
- auto inst = block->getFirstInst();
- IRInst* next;
- for ( ; inst; inst = next)
- {
- next = inst->getNextInst();
- switch (inst->getOp())
- {
- case kIROp_IsTextureAccess:
- if (getTextureAccess(inst))
- inst->replaceUsesWith(builder.getBoolValue(true));
- else
- inst->replaceUsesWith(builder.getBoolValue(false));
- inst->removeAndDeallocate();
- functionsToSCCP.add(func);
- continue;
- case kIROp_IsTextureArrayAccess:
- {
- auto textureAccess = getTextureAccess(inst);
- if (textureAccess && as<IRTextureType>(textureAccess->getImage()->getDataType())->isArray())
- inst->replaceUsesWith(builder.getBoolValue(true));
- else
- inst->replaceUsesWith(builder.getBoolValue(false));
- inst->removeAndDeallocate();
- functionsToSCCP.add(func);
- continue;
- }
- case kIROp_IsTextureScalarAccess:
- {
- auto textureAccess = getTextureAccess(inst);
- if (textureAccess && !as<IRVectorType>(as<IRTextureType>(textureAccess->getImage()->getDataType())->getElementType()))
- inst->replaceUsesWith(builder.getBoolValue(true));
- else
- inst->replaceUsesWith(builder.getBoolValue(false));
- inst->removeAndDeallocate();
- functionsToSCCP.add(func);
- continue;
- }
- }
- }
- }
- }
- // Requires a SCCP to ensure Slang does not evaluate 'IRTextureType' code path
- // and unresolved 'isTextureAccess' operations for when 'inst' is not a
- // 'IRTextureType'/`TextureAccessor`
- for (auto func : functionsToSCCP)
- applySparseConditionalConstantPropagation(func, sink);
- }
-}
-
diff --git a/source/slang/slang-ir-legalize-is-texture-access.h b/source/slang/slang-ir-legalize-is-texture-access.h
deleted file mode 100644
index 9b9e1cca0..000000000
--- a/source/slang/slang-ir-legalize-is-texture-access.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-#include "slang-ir.h"
-#include "slang-compiler.h"
-
-namespace Slang
-{
- class DiagnosticSink;
-
- void legalizeIsTextureAccess(IRModule* module, DiagnosticSink* sink);
-}
diff --git a/source/slang/slang-ir-use-uninitialized-values.cpp b/source/slang/slang-ir-use-uninitialized-values.cpp
index 98fd9841a..fea55de8d 100644
--- a/source/slang/slang-ir-use-uninitialized-values.cpp
+++ b/source/slang/slang-ir-use-uninitialized-values.cpp
@@ -315,8 +315,11 @@ namespace Slang
case kIROp_Unmodified:
return Store;
- // ... and the rest will load/use them
default:
+ // Default case is that if the instruction is a pointer, it
+ // is considered a store, otherwise a load.
+ if (as<IRPtrTypeBase>(user->getDataType()))
+ return Store;
return Load;
}
}
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index d0dcfd4fb..e0998779a 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -5092,7 +5092,7 @@ namespace Slang
auto inst = createInst<IRAtomicStore>(
this,
kIROp_AtomicStore,
- nullptr,
+ getVoidType(),
dstPtr,
srcVal,
memoryOrder);
diff --git a/tests/bugs/gh-3997.slang b/tests/bugs/gh-3997.slang
index 8c75da426..d42e65e39 100644
--- a/tests/bugs/gh-3997.slang
+++ b/tests/bugs/gh-3997.slang
@@ -10,7 +10,7 @@ float atomicAdd(__ref float value, float amount)
__requirePrelude("#include <atomic>");
__intrinsic_asm "std::atomic_ref(*$0).fetch_add($1)";
case spirv:
- return __atomicAdd(value, amount);
+ return __atomic_add(value, amount);
}
}
diff --git a/tests/compute/atomics-invalid-dest-type.slang b/tests/compute/atomics-invalid-dest-type.slang
index 864debaee..5ae03a5c7 100644
--- a/tests/compute/atomics-invalid-dest-type.slang
+++ b/tests/compute/atomics-invalid-dest-type.slang
@@ -1,11 +1,8 @@
// atomics-buffer.slang
-//TEST:SIMPLE(filecheck=CHECK): -target spirv -stage compute -entry computeMain
-//TEST:SIMPLE(filecheck=CHECK): -target hlsl -stage compute -entry computeMain
-//TEST:SIMPLE(filecheck=CHECK): -target glsl -stage compute -entry computeMain
//TEST:SIMPLE(filecheck=CHECK): -target metal -stage compute -entry computeMain
-//CHECK: Atomic must be applied to a scalar texture or non-texture
+//CHECK: atomic operation on non-scalar texture
RWBuffer<uint2> outputBuffer;
diff --git a/tests/compute/nonuniformres-atomic.slang b/tests/compute/nonuniformres-atomic.slang
index 95ae502dc..10dd30cb0 100644
--- a/tests/compute/nonuniformres-atomic.slang
+++ b/tests/compute/nonuniformres-atomic.slang
@@ -9,7 +9,7 @@ RWTexture2D<uint> texArray[2];
void main( uint2 dispatchThreadID : SV_DispatchThreadID, uint2 groupThreadID : SV_GroupThreadID )
{
- // CHECK0: imageAtomicAdd((texArray_{{.*}}[nonuniformEXT({{.*}})]
+ // CHECK0: {{.*}}imageAtomicAdd(texArray_{{.*}}[nonuniformEXT({{.*}})]
// CHECK1: InterlockedAdd(texArray_{{.*}}[NonUniformResourceIndex({{.*}})]
diff --git a/tests/hlsl-intrinsic/texture/float-atomics.slang b/tests/hlsl-intrinsic/texture/float-atomics.slang
index 02cb5570c..913380416 100644
--- a/tests/hlsl-intrinsic/texture/float-atomics.slang
+++ b/tests/hlsl-intrinsic/texture/float-atomics.slang
@@ -24,6 +24,6 @@ void computeMain(uint3 tid : SV_DispatchThreadID)
AllMemoryBarrier();
// CHECK: 4.0
- outputBuffer[0] = t[uint2(1, 0)];
+ outputBuffer[0] = t[uint2(1, 0)] + originalValue;
}
diff --git a/tests/metal/atomic-byteaddressbuffer.slang b/tests/metal/atomic-byteaddressbuffer.slang
new file mode 100644
index 000000000..677f80dbf
--- /dev/null
+++ b/tests/metal/atomic-byteaddressbuffer.slang
@@ -0,0 +1,57 @@
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -compute -shaderobj -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-cuda -compute -shaderobj -output-using-type
+//TEST:SIMPLE(filecheck=LIB):-target metallib -entry computeMain -stage compute -DMETAL
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0]):name=uintBuffer
+RWByteAddressBuffer uintBuffer;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ], stride=4):out,name outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+[numthreads(1,1,1)]
+void computeMain()
+{
+ uintBuffer.InterlockedAdd(0, 1);
+ int oldValue;
+ //LIB: call {{.*}}.atomic.global.add.u.i32
+ uintBuffer.InterlockedAdd(0, 1, oldValue);
+ // CHK: 1
+ outputBuffer[0] = oldValue;
+
+ uintBuffer.InterlockedAdd(0, 1, oldValue);
+ // CHK: 2
+ outputBuffer[1] = (int)oldValue;
+
+ uintBuffer.InterlockedCompareExchange(0, 3, 4, oldValue);
+ // CHK: 3
+ outputBuffer[2] = (int)oldValue;
+
+ uintBuffer.InterlockedOr(0, 3, oldValue);
+ // CHK: 4
+ outputBuffer[3] = oldValue; // 4
+
+ uintBuffer.InterlockedExchange(0, 4, oldValue);
+ // CHK: 7
+ outputBuffer[4] = oldValue; // 7
+
+ uintBuffer.InterlockedMin(0, 3, oldValue);
+ // CHK: 4
+ outputBuffer[5] = oldValue; // 4
+
+ uintBuffer.InterlockedMax(0, 4, oldValue);
+ // CHK: 3
+ outputBuffer[6] = oldValue; // 3
+
+ uintBuffer.InterlockedAnd(0, 7, oldValue);
+ // CHK: 4
+ outputBuffer[7] = oldValue; // 4
+
+ uintBuffer.InterlockedXor(0, 7, oldValue);
+ // CHK: 4
+ outputBuffer[8] = oldValue; // 4
+
+ // CHK: 3
+ outputBuffer[9] = uintBuffer.Load(0);
+
+} \ No newline at end of file
diff --git a/tests/metal/atomic-intrinsics.slang b/tests/metal/atomic-intrinsics.slang
index 5d47db913..afa0e5365 100644
--- a/tests/metal/atomic-intrinsics.slang
+++ b/tests/metal/atomic-intrinsics.slang
@@ -1,8 +1,7 @@
//TEST:SIMPLE(filecheck=MTL):-target metal -entry computeMain -stage compute -DMETAL
//TEST:SIMPLE(filecheck=LIB):-target metallib -entry computeMain -stage compute -DMETAL
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type
-//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -emit-spirv-directly -compute -shaderobj -output-using-type
-//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -emit-spirv-via-glsl -compute -shaderobj -output-using-type
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -compute -shaderobj -output-using-type
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -output-using-type
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type
@@ -36,22 +35,22 @@ void computeMain(uint groupIndex : SV_GroupIndex)
float val = 0.0f;
// InterlockedAdd
- //MTL: atomic_uint threadgroup* {{.*}}shareMemUI
+ //MTL: atomic_uint threadgroup*{{.*}}shareMemUI
//LIB: call {{.*}}.atomic.local.add.u.i32
InterlockedAdd(shareMemUI[idx], uint(1));
val += shareMemUI[idx];
- //MTL: atomic_int threadgroup* {{.*}}shareMemI
+ //MTL: atomic_int threadgroup*{{.*}}shareMemI
//LIB: call {{.*}}.atomic.local.add.s.i32
InterlockedAdd(shareMemI[idx], 2);
val += shareMemI[idx];
- //MTL: atomic_uint device* {{.*}}uintBuffer
+ //MTL: atomic_uint device*{{.*}}uintBuffer
//LIB: call {{.*}}.atomic.global.add.u.i32
InterlockedAdd(uintBuffer[idx], 1);
val += uintBuffer[idx];
- //MTL: atomic_int device* {{.*}}intBuffer
+ //MTL: atomic_int device*{{.*}}intBuffer
//LIB: call {{.*}}.atomic.global.add.s.i32
InterlockedAdd(intBuffer[idx], 2);
val += intBuffer[idx];
diff --git a/tests/metal/atomic-texture-buffer.slang b/tests/metal/atomic-texture-buffer.slang
index 3e4eda94b..1db156364 100644
--- a/tests/metal/atomic-texture-buffer.slang
+++ b/tests/metal/atomic-texture-buffer.slang
@@ -2,7 +2,7 @@
//TEST:SIMPLE(filecheck=METAL_FLOAT): -target metal -stage compute -entry computeMain -DFLOAT
//TEST:SIMPLE(filecheck=METALLIB): -target metallib -stage compute -entry computeMain
-// METAL_FLOAT: 'float' atomic texture operations are disallowed with Metal target's
+// METAL_FLOAT: floating point atomic operation
//METALLIB: @computeMain
diff --git a/tests/slang-extension/atomic-int64-byte-address-buffer.slang b/tests/slang-extension/atomic-int64-byte-address-buffer.slang
index 9a7ae3b61..61e38069d 100644
--- a/tests/slang-extension/atomic-int64-byte-address-buffer.slang
+++ b/tests/slang-extension/atomic-int64-byte-address-buffer.slang
@@ -5,7 +5,7 @@
// No support for int64_t on fxc - we need SM6.0 and dxil
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj
diff --git a/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang
index 4ab67df8e..2fce9788a 100644
--- a/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang
+++ b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang
@@ -5,7 +5,7 @@
// No support for int64_t on fxc - we need SM6.0 and dxil
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj
// For some reason this doesn't work correctly on CUDA? That it behaves as if always does Min. Min and Max do appropriate
// things tho, because if I force the condition I do get the right answer
diff --git a/tests/slang-extension/cas-int64-byte-address-buffer.slang b/tests/slang-extension/cas-int64-byte-address-buffer.slang
index 873f6ab4b..2d3189215 100644
--- a/tests/slang-extension/cas-int64-byte-address-buffer.slang
+++ b/tests/slang-extension/cas-int64-byte-address-buffer.slang
@@ -5,7 +5,7 @@
// No support for int64_t on fxc - we need SM6.0 and dxil
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj
diff --git a/tests/slang-extension/exchange-int64-byte-address-buffer.slang b/tests/slang-extension/exchange-int64-byte-address-buffer.slang
index 84654ab80..a6c1277ac 100644
--- a/tests/slang-extension/exchange-int64-byte-address-buffer.slang
+++ b/tests/slang-extension/exchange-int64-byte-address-buffer.slang
@@ -2,10 +2,7 @@
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj
// No support for int64_t on DX11
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj
-// No support for int64_t on fxc - we need SM6.0 and dxil
-// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj
diff --git a/tests/spirv/ref-this.slang b/tests/spirv/ref-this.slang
index 5eaa7f3a1..de4263975 100644
--- a/tests/spirv/ref-this.slang
+++ b/tests/spirv/ref-this.slang
@@ -1,7 +1,7 @@
//TEST:SIMPLE(filecheck=CHECK): -target spirv
// CHECK: %[[PTR:[0-9a-zA-Z_]+]] = OpAccessChain %_ptr_PhysicalStorageBuffer_uint %{{.*}} %int_0
-// CHECK: %original = OpAtomicIAdd %uint %[[PTR]] %uint_1 %uint_0 %uint_1
+// CHECK: %{{.*}} = OpAtomicIAdd %uint %[[PTR]] %uint_1 %uint_0 %uint_1
struct Buf
{