diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 297 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 1 | ||||
| -rw-r--r-- | source/slang/slang-emit-metal.cpp | 18 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 1 |
4 files changed, 243 insertions, 74 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 82ef5837e..597e4dc06 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -4633,7 +4633,7 @@ ${{{{ // Added operations: [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAdd( UINT dest, UINT value, @@ -4644,6 +4644,7 @@ ${{{{ case glsl: __intrinsic_asm "($3 = atomicAdd($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedAdd"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAdd(buf[dest / 4], value, original_value); @@ -4651,7 +4652,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAdd( UINT dest, UINT value) @@ -4661,6 +4662,7 @@ ${{{{ case glsl: __intrinsic_asm "atomicAdd($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedAdd"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAdd(buf[dest / 4], value); @@ -4668,7 +4670,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAnd( UINT dest, UINT value, @@ -4679,6 +4681,7 @@ ${{{{ case glsl: __intrinsic_asm "$3 = atomicAnd($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "(*$3 = atomicAnd($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedAnd"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAnd(buf[dest / 4], value, original_value); @@ -4686,7 +4689,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAnd( UINT dest, UINT value) @@ -4696,6 +4699,7 @@ ${{{{ case glsl: __intrinsic_asm "atomicAnd($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedAnd"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAnd(buf[dest / 4], value); @@ -4703,7 +4707,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedCompareExchange( UINT dest, UINT compare_value, @@ -4715,6 +4719,7 @@ ${{{{ case glsl: __intrinsic_asm "($4 = atomicCompSwap($0._data[$1/4], $2, $3))"; case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3))"; case hlsl: __intrinsic_asm ".InterlockedCompareExchange"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedCompareExchange(buf[dest / 4], compare_value, value, original_value); @@ -4740,7 +4745,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedExchange( UINT dest, UINT value, @@ -4751,6 +4756,7 @@ ${{{{ case glsl: __intrinsic_asm "($3 = atomicExchange($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicExch($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedExchange"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedExchange(buf[dest / 4], value, original_value); @@ -4758,7 +4764,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMax( UINT dest, UINT value, @@ -4769,6 +4775,7 @@ ${{{{ case glsl: __intrinsic_asm "($3 = atomicMax($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicMax($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedMax"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMax(buf[dest / 4], value, original_value); @@ -4776,7 +4783,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMax( UINT dest, UINT value) @@ -4786,6 +4793,7 @@ ${{{{ case glsl: __intrinsic_asm "atomicMax($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicMax($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedMax"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMax(buf[dest / 4], value); @@ -4793,7 +4801,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMin( UINT dest, UINT value, @@ -4804,6 +4812,7 @@ ${{{{ case glsl: __intrinsic_asm "($3 = atomicMin($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicMin($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedMin"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMin(buf[dest / 4], value, original_value); @@ -4811,7 +4820,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMin( UINT dest, UINT value) @@ -4821,6 +4830,7 @@ ${{{{ case glsl: __intrinsic_asm "atomicMin($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicMin($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedMin"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMin(buf[dest / 4], value); @@ -4828,7 +4838,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedOr( UINT dest, UINT value, @@ -4839,6 +4849,7 @@ ${{{{ case glsl: __intrinsic_asm "($3 = atomicOr($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicOr($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedOr"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedOr(buf[dest / 4], value, original_value); @@ -4846,7 +4857,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedOr( UINT dest, UINT value) @@ -4856,6 +4867,7 @@ ${{{{ case glsl: __intrinsic_asm "atomicOr($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicOr($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedOr"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedOr(buf[dest / 4], value); @@ -4863,7 +4875,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedXor( UINT dest, UINT value, @@ -4874,6 +4886,7 @@ ${{{{ case glsl: __intrinsic_asm "($3 = atomicXor($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicXor($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedXor"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedXor(buf[dest / 4], value, original_value); @@ -4881,7 +4894,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedXor( UINT dest, UINT value) @@ -4891,6 +4904,7 @@ ${{{{ case glsl: __intrinsic_asm "atomicXor($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicXor($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedXor"; + case metal: case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedXor(buf[dest / 4], value); @@ -8596,9 +8610,62 @@ void GroupMemoryBarrierWithGroupSync() // Atomics +__generic<T> +__intrinsic_op($(kIROp_MetalAtomicCast)) +[require(metal)] +T* __getMetalAtomicRef(__ref T x); + +${{{{ +for (const char* fetchAndModify : {"add", "and", "max", "min", "or", "sub", "xor"}) +{ +}}}} + __generic<AtomicType, T> + [ForceInline] + [require(metal)] + void __metalInterlocked_$(fetchAndModify)(AtomicType dest, T value) + { + __intrinsic_asm "atomic_fetch_$(fetchAndModify)_explicit($0, $1, memory_order_relaxed)"; + } + + __generic<AtomicType, T> + [ForceInline] + [require(metal)] + void __metalInterlocked_$(fetchAndModify)(AtomicType dest, T value, out T original_value) + { + __intrinsic_asm "((*($2)) = (($[0])(atomic_fetch_$(fetchAndModify)_explicit($0, $1, memory_order_relaxed))))", T; + } +${{{{ +} // fetchAndModify +}}}} + +__generic<AtomicType, T> [ForceInline] +[require(metal)] +void __metalInterlocked_exchange(AtomicType dest, T value, out T original_value) +{ + __intrinsic_asm "((*($2)) = (($[0])(atomic_exchange_explicit($0, $1, memory_order_relaxed))))", T; +} + +__generic<AtomicType, T> +[ForceInline] +[require(metal)] +void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value) +{ + __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)"; +} + +__generic<AtomicType, T> +[ForceInline] +[require(metal)] +void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value) +{ + __metalInterlocked_compare_exchange(dest, compare_value, value); + original_value = compare_value; +} + __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[ForceInline] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAdd(__ref int dest, int value) { __target_switch @@ -8606,6 +8673,9 @@ void InterlockedAdd(__ref int dest, int value) case hlsl: __intrinsic_asm "InterlockedAdd"; case cuda: __intrinsic_asm "atomicAdd($0, $1)"; case glsl: __intrinsic_asm "$atomicAdd($A, $1)"; + case metal: + __metalInterlocked_add(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -8616,7 +8686,7 @@ void InterlockedAdd(__ref int dest, int value) [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAdd(__ref uint dest, uint value) { __target_switch @@ -8624,6 +8694,9 @@ void InterlockedAdd(__ref uint dest, uint value) case hlsl: __intrinsic_asm "InterlockedAdd"; case cuda: __intrinsic_asm "atomicAdd((int*)$0, $1)"; case glsl: __intrinsic_asm "$atomicAdd($A, $1)"; + case metal: + __metalInterlocked_add(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -8640,14 +8713,17 @@ void InterlockedAdd(__ref uint dest, int value) [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAdd(__ref int dest, int value, out int original_value) +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +void InterlockedAdd(__ref int dest, int value, out int original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedAdd"; case cuda: __intrinsic_asm "(*$2 = atomicAdd($0, $1))"; case glsl: __intrinsic_asm "($2 = $atomicAdd($A, $1))"; + case metal: + __metalInterlocked_add(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -8659,7 +8735,7 @@ void InterlockedAdd(__ref int dest, int value, out int original_value) [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAdd(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -8667,6 +8743,9 @@ void InterlockedAdd(__ref uint dest, uint value, out uint original_value) case hlsl: __intrinsic_asm "InterlockedAdd"; case cuda: __intrinsic_asm "(*$2 = (uint)atomicAdd((int*)$0, $1))"; case glsl: __intrinsic_asm "($2 = $atomicAdd($A, $1))"; + case metal: + __metalInterlocked_add(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -8758,14 +8837,17 @@ void InterlockedAdd(__ref uint64_t dest, uint64_t value, out uint64_t original_v __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAnd(__ref int dest, int value) +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +void InterlockedAnd(__ref int dest, int value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedAnd"; case cuda: __intrinsic_asm "atomicAnd($0, $1)"; case glsl: __intrinsic_asm "$atomicAnd($A, $1)"; + case metal: + __metalInterlocked_and(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -8775,7 +8857,7 @@ void InterlockedAnd(__ref int dest, int value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAnd(__ref uint dest, uint value) { __target_switch @@ -8783,6 +8865,9 @@ void InterlockedAnd(__ref uint dest, uint value) case hlsl: __intrinsic_asm "InterlockedAnd"; case cuda: __intrinsic_asm "atomicAnd((int*)$0, $1)"; case glsl: __intrinsic_asm "$atomicAnd($A, $1)"; + case metal: + __metalInterlocked_and(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -8792,14 +8877,17 @@ void InterlockedAnd(__ref uint dest, uint value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAnd(__ref int dest, int value, out int original_value) +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +void InterlockedAnd(__ref int dest, int value, out int original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedAnd"; case cuda: __intrinsic_asm "(*$2 = atomicAnd($0, $1))"; case glsl: __intrinsic_asm "($2 = $atomicAnd($A, $1))"; + case metal: + __metalInterlocked_and(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -8810,7 +8898,7 @@ void InterlockedAnd(__ref int dest, int value, out int original_value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAnd(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -8818,6 +8906,9 @@ void InterlockedAnd(__ref uint dest, uint value, out uint original_value) case hlsl: __intrinsic_asm "InterlockedAnd"; case glsl: __intrinsic_asm "($2 = atomicAnd($0, $1))"; case cuda: __intrinsic_asm "(*$2 = atomicAnd((int*)$0, $1))"; + case metal: + __metalInterlocked_and(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -8846,7 +8937,7 @@ void InterlockedAnd(__ref uint64_t dest, uint64_t value, out uint64_t origina } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) { __target_switch @@ -8854,6 +8945,9 @@ void InterlockedCompareExchange(__ref int dest, int compare_value, int value, case hlsl: __intrinsic_asm "InterlockedCompareExchange"; case glsl: __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))"; case cuda: __intrinsic_asm "(*$3 = atomicCAS($0, $1, $2))"; + case metal: + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); + return; case spirv: spirv_asm { @@ -8864,7 +8958,7 @@ void InterlockedCompareExchange(__ref int dest, int compare_value, int value, } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value) { __target_switch @@ -8872,6 +8966,9 @@ void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, case hlsl: __intrinsic_asm "InterlockedCompareExchange"; case glsl: __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))"; case cuda: __intrinsic_asm "(*$3 = (uint)atomicCAS((int*)$0, $1, $2))"; + case metal: + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); + return; case spirv: spirv_asm { @@ -8887,6 +8984,9 @@ void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_val __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; + case metal: + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); + return; } } @@ -8896,44 +8996,36 @@ void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_val __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; + case metal: + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); + return; } } -[ForceInline] -void InterlockedCompareExchange(__ref int64_t dest, int64_t compare_value, int64_t value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; - } -} - -[ForceInline] -void InterlockedCompareExchange(__ref int64_t dest, int64_t compare_value, int64_t value, out int64_t original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; - } -} - -[ForceInline] -void InterlockedCompareExchange(__ref uint64_t dest, uint64_t compare_value, uint64_t value) +${{{{ +for (const char* T : {"int64_t", "uint64_t"}) { - __target_switch +}}}} + [ForceInline] + void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value) { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + } } -} -[ForceInline] -void InterlockedCompareExchange(__ref uint64_t dest, uint64_t compare_value, uint64_t value, out uint64_t original_value) -{ - __target_switch + [ForceInline] + void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value) { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + } } -} +${{{{ +} // T +}}}} __glsl_version(430) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] @@ -8997,7 +9089,7 @@ void InterlockedCompareStore(__ref uint64_t dest, uint64_t compare_value, uint64 } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedExchange(__ref int dest, int value, out int original_value) { __target_switch @@ -9005,6 +9097,9 @@ void InterlockedExchange(__ref int dest, int value, out int original_value) case hlsl: __intrinsic_asm "InterlockedExchange"; case glsl: __intrinsic_asm "($2 = $atomicExchange($A, $1))"; case cuda: __intrinsic_asm "(*$2 = atomicExch($0, $1))"; + case metal: + __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9015,7 +9110,7 @@ void InterlockedExchange(__ref int dest, int value, out int original_value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedExchange(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -9023,6 +9118,9 @@ void InterlockedExchange(__ref uint dest, uint value, out uint original_value) case hlsl: __intrinsic_asm "InterlockedExchange"; case glsl: __intrinsic_asm "($2 = $atomicExchange($A, $1))"; case cuda: __intrinsic_asm "(*$2 = (uint)atomicExch((int*)$0, $1))"; + case metal: + __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9047,6 +9145,9 @@ void InterlockedExchange(__ref float dest, float value, out float original_va __target_switch { case hlsl: __intrinsic_asm "InterlockedExchange"; + case metal: + __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); + return; } } @@ -9087,7 +9188,7 @@ void InterlockedExchange(__ref uint64_t dest, uint64_t value, out uint64_t or } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref int dest, int value) { __target_switch @@ -9095,6 +9196,9 @@ void InterlockedMax(__ref int dest, int value) case hlsl: __intrinsic_asm "InterlockedMax"; case glsl: __intrinsic_asm "$atomicMax($A, $1)"; case cuda: __intrinsic_asm "atomicMax($0, $1)"; + case metal: + __metalInterlocked_max(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9104,7 +9208,7 @@ void InterlockedMax(__ref int dest, int value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref uint dest, uint value) { __target_switch @@ -9112,6 +9216,9 @@ void InterlockedMax(__ref uint dest, uint value) case hlsl: __intrinsic_asm "InterlockedMax"; case glsl: __intrinsic_asm "$atomicMax($A, $1)"; case cuda: __intrinsic_asm "atomicMax((int*)$0, $1)"; + case metal: + __metalInterlocked_max(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9121,7 +9228,7 @@ void InterlockedMax(__ref uint dest, uint value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref int dest, int value, out int original_value) { __target_switch @@ -9129,6 +9236,9 @@ void InterlockedMax(__ref int dest, int value, out int original_value) case hlsl: __intrinsic_asm "InterlockedMax"; case glsl: __intrinsic_asm "($2 = $atomicMax($A, $1))"; case cuda: __intrinsic_asm "(*$2 = atomicMax($0, $1))"; + case metal: + __metalInterlocked_max(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9139,7 +9249,7 @@ void InterlockedMax(__ref int dest, int value, out int original_value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -9147,6 +9257,9 @@ void InterlockedMax(__ref uint dest, uint value, out uint original_value) case hlsl: __intrinsic_asm "InterlockedMax"; case glsl: __intrinsic_asm "($2 = $atomicMax($A, $1))"; case cuda: __intrinsic_asm "(*$2 = (uint)atomicMax((int*)$0, $1))"; + case metal: + __metalInterlocked_max(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9193,7 +9306,7 @@ void InterlockedMax(__ref uint64_t dest, uint64_t value, out uint64_t origina } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref int dest, int value) { __target_switch @@ -9201,6 +9314,9 @@ void InterlockedMin(__ref int dest, int value) case hlsl: __intrinsic_asm "InterlockedMin"; case glsl: __intrinsic_asm "$atomicMin($A, $1)"; case cuda: __intrinsic_asm "atomicMin($0, $1)"; + case metal: + __metalInterlocked_min(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9210,7 +9326,7 @@ void InterlockedMin(__ref int dest, int value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref uint dest, uint value) { __target_switch @@ -9218,6 +9334,9 @@ void InterlockedMin(__ref uint dest, uint value) case hlsl: __intrinsic_asm "InterlockedMin"; case glsl: __intrinsic_asm "$atomicMin($A, $1)"; case cuda: __intrinsic_asm "atomicMin((int*)$0, $1)"; + case metal: + __metalInterlocked_min(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9227,7 +9346,7 @@ void InterlockedMin(__ref uint dest, uint value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref int dest, int value, out int original_value) { __target_switch @@ -9235,6 +9354,9 @@ void InterlockedMin(__ref int dest, int value, out int original_value) case hlsl: __intrinsic_asm "InterlockedMin"; case glsl: __intrinsic_asm "($2 = $atomicMin($A, $1))"; case cuda: __intrinsic_asm "(*$2 = atomicMin($0, $1))"; + case metal: + __metalInterlocked_min(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9245,7 +9367,7 @@ void InterlockedMin(__ref int dest, int value, out int original_value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -9253,6 +9375,9 @@ void InterlockedMin(__ref uint dest, uint value, out uint original_value) case hlsl: __intrinsic_asm "InterlockedMin"; case glsl: __intrinsic_asm "($2 = $atomicMin($A, $1))"; case cuda: __intrinsic_asm "(*$2 = (uint)atomicMin((int*)$0, $1))"; + case metal: + __metalInterlocked_min(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9299,7 +9424,7 @@ void InterlockedMin(__ref uint64_t dest, uint64_t value, out uint64_t origina } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref int dest, int value) { __target_switch @@ -9307,6 +9432,9 @@ void InterlockedOr(__ref int dest, int value) case hlsl: __intrinsic_asm "InterlockedOr"; case cuda: __intrinsic_asm "atomicOr((int*)$0, $1)"; case glsl: __intrinsic_asm "$atomicOr($A, $1)"; + case metal: + __metalInterlocked_or(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9316,7 +9444,7 @@ void InterlockedOr(__ref int dest, int value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref uint dest, uint value) { __target_switch @@ -9324,6 +9452,9 @@ void InterlockedOr(__ref uint dest, uint value) case hlsl: __intrinsic_asm "InterlockedOr"; case cuda: __intrinsic_asm "atomicOr((int*)$0, $1)"; case glsl: __intrinsic_asm "$atomicOr($A, $1)"; + case metal: + __metalInterlocked_or(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9333,7 +9464,7 @@ void InterlockedOr(__ref uint dest, uint value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref int dest, int value, out int original_value) { __target_switch @@ -9341,6 +9472,9 @@ void InterlockedOr(__ref int dest, int value, out int original_value) case hlsl: __intrinsic_asm "InterlockedOr"; case glsl: __intrinsic_asm "($2 = atomicOr($0, $1))"; case cuda: __intrinsic_asm "(*$2 = atomicOr($0, $1))"; + case metal: + __metalInterlocked_or(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9351,7 +9485,7 @@ void InterlockedOr(__ref int dest, int value, out int original_value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -9359,6 +9493,9 @@ void InterlockedOr(__ref uint dest, uint value, out uint original_value) case hlsl: __intrinsic_asm "InterlockedOr"; case glsl: __intrinsic_asm "($2 = atomicOr($0, $1))"; case cuda: __intrinsic_asm "(*$2 = atomicOr((int*)$0, $1))"; + case metal: + __metalInterlocked_or(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9387,7 +9524,7 @@ void InterlockedOr(__ref uint64_t dest, uint64_t value, out uint64_t original } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref int dest, int value) { __target_switch @@ -9395,6 +9532,9 @@ void InterlockedXor(__ref int dest, int value) case hlsl: __intrinsic_asm "InterlockedXor"; case cuda: __intrinsic_asm "atomicXor((int*)$0, $1)"; case glsl: __intrinsic_asm "$atomicXor($A, $1)"; + case metal: + __metalInterlocked_xor(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9404,7 +9544,7 @@ void InterlockedXor(__ref int dest, int value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref uint dest, uint value) { __target_switch @@ -9412,6 +9552,9 @@ void InterlockedXor(__ref uint dest, uint value) case hlsl: __intrinsic_asm "InterlockedXor"; case cuda: __intrinsic_asm "atomicXor((int*)$0, $1)"; case glsl: __intrinsic_asm "$atomicXor($A, $1)"; + case metal: + __metalInterlocked_xor(__getMetalAtomicRef(dest), value); + return; case spirv: spirv_asm { @@ -9421,7 +9564,7 @@ void InterlockedXor(__ref uint dest, uint value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref int dest, int value, out int original_value) { __target_switch @@ -9429,6 +9572,9 @@ void InterlockedXor(__ref int dest, int value, out int original_value) case hlsl: __intrinsic_asm "InterlockedXor"; case glsl: __intrinsic_asm "($2 = atomicXor($0, $1))"; case cuda: __intrinsic_asm "(*$2 = atomicXor($0, $1))"; + case metal: + __metalInterlocked_xor(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { @@ -9439,7 +9585,7 @@ void InterlockedXor(__ref int dest, int value, out int original_value) } __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -9447,6 +9593,9 @@ void InterlockedXor(__ref uint dest, uint value, out uint original_value) case hlsl: __intrinsic_asm "InterlockedXor"; case glsl: __intrinsic_asm "($2 = atomicXor($0, $1))"; case cuda: __intrinsic_asm "(*$2 = (uint)atomicXor((int*)$0, $1))"; + case metal: + __metalInterlocked_xor(__getMetalAtomicRef(dest), value, original_value); + return; case spirv: spirv_asm { diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index e9ab58bca..6062875b3 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2889,6 +2889,7 @@ void CLikeSourceEmitter::_emitInst(IRInst* inst) case kIROp_AtomicCounterIncrement: case kIROp_AtomicCounterDecrement: case kIROp_StructuredBufferGetDimensions: + case kIROp_MetalAtomicCast: emitInstStmt(inst); break; diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index 0a5506776..d38c3de9b 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -253,6 +253,24 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) case kIROp_discard: m_writer->emit("discard_fragment();\n"); return true; + case kIROp_MetalAtomicCast: + { + auto oldValName = getName(inst); + auto op0 = inst->getOperand(0); + + m_writer->emit("atomic_"); + emitType(op0->getDataType()); + m_writer->emit(" "); + m_writer->emit(oldValName); + m_writer->emit(" = "); + + m_writer->emit("((atomic_"); + emitType(op0->getDataType()); + m_writer->emit(")("); + emitOperand(op0, getInfo(EmitOp::General)); + m_writer->emit("));\n"); + return true; + } } return false; } diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index f639d3343..19117c00e 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -685,6 +685,7 @@ INST(GetLegalizedSPIRVGlobalParamAddr, GetLegalizedSPIRVGlobalParamAddr, 1, 0) INST(GetPerVertexInputArray, GetPerVertexInputArray, 1, 0) INST(ForceVarIntoStructTemporarily, ForceVarIntoStructTemporarily, 1, 0) +INST(MetalAtomicCast, MetalAtomicCast, 1, 0) INST(MakeArrayList, makeArrayList, 0, 0) INST(MakeTensorView, makeTensorView, 0, 0) |
