diff options
| author | ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> | 2024-07-19 02:05:33 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-07-18 23:05:33 -0700 |
| commit | a00d603519d395d41b2f68c5874e8a708335a31a (patch) | |
| tree | 114e3da71d3d95034e944edb0ffd1510f192418d | |
| parent | 59dd133f1c52fb0a7a388f4a8f42234f4556a28a (diff) | |
Metal: `Interlocked` (atomic) member function support for buffers (#4655)
* Metal: `Interlocked` (atomic) member function support for buffers
fixes: #4654
fixes: #4481
1. Add `Interlocked` (atomic) member function support for buffers to Metal
2. Fix `__getEquivalentStructuredBuffer` so it works with CPP/Metal targets
* add `CompareStore` support
* legalize RWByteAddressBuffer to fully replace StructuredBuffer
* destroy replaced byte-addr buffer
* cleanup as per review and add comment to explain why certain code exists
* fix flow of byte-address-buffer replacement
* toggle on option to translate byteAddrBuffer to StructuredBuffer
* cleanup unused buffers
* add treatGetEquivalentStructuredBufferAsGetThis flag to treat getEquivStructuredBuffer as a byteAddressBuffer
* comment to explain `treatGetEquivalentStructuredBufferAsGetThis`
---------
Co-authored-by: Yong He <yonghe@outlook.com>
| -rw-r--r-- | source/slang/hlsl.meta.slang | 155 | ||||
| -rw-r--r-- | source/slang/slang-capabilities.capdef | 4 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-byte-address-legalize.cpp | 7 | ||||
| -rw-r--r-- | source/slang/slang-ir-byte-address-legalize.h | 5 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/byte-address-buffer-atomics.slang | 4 | ||||
| -rw-r--r-- | tests/metal/atomic-texture-buffer.slang | 4 | ||||
| -rw-r--r-- | tests/metal/atomic-texture-texture1d.slang | 10 | ||||
| -rw-r--r-- | tests/metal/atomic-texture-texture2d.slang | 8 | ||||
| -rw-r--r-- | tests/metal/atomic-texture-texture3d.slang | 4 |
10 files changed, 190 insertions, 14 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2639c1e88..9760f974a 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -4193,13 +4193,19 @@ ${{{{ __cuda_sm_version(2.0) [__requiresNVAPI] [ForceInline] - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] + [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd, out float originalValue) { __target_switch { case hlsl: __intrinsic_asm "($3 = NvInterlockedAddFp32($0, $1, $2))"; case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<float>($1), $2))"; + case metal: + { + let buf = __getEquivalentStructuredBuffer<float>(this); + __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd, originalValue); + return; + } case glsl: case spirv: { @@ -4264,13 +4270,19 @@ ${{{{ [__requiresNVAPI] [ForceInline] __cuda_sm_version(2.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] + [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd) { __target_switch { case hlsl: __intrinsic_asm "(NvInterlockedAddFp32($0, $1, $2))"; case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<float>($1), $2)"; + case metal: + { + let buf = __getEquivalentStructuredBuffer<float>(this); + __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd); + return; + } case glsl: case spirv: { @@ -4763,6 +4775,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedAdd"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAdd(buf[dest / 4], value, original_value); @@ -4781,6 +4798,11 @@ ${{{{ case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedAdd"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAdd(buf[dest / 4], value); @@ -4800,6 +4822,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$3 = atomicAnd($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedAnd"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAnd(buf[dest / 4], value, original_value); @@ -4818,6 +4845,11 @@ ${{{{ case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedAnd"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedAnd(buf[dest / 4], value); @@ -4838,6 +4870,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3))"; case hlsl: __intrinsic_asm ".InterlockedCompareExchange"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedCompareExchange(buf[dest / 4], compare_value, value, original_value); @@ -4845,7 +4882,7 @@ ${{{{ } [ForceInline] - [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedCompareStore( UINT dest, UINT compare_value, @@ -4856,6 +4893,12 @@ ${{{{ case glsl: __intrinsic_asm "atomicCompSwap($0._data[$1/4], $2, $3)"; case cuda: __intrinsic_asm "atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3)"; case hlsl: __intrinsic_asm ".InterlockedCompareStore"; + case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedCompareStore(buf[dest / 4], compare_value, value); @@ -4875,6 +4918,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$3 = atomicExch($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedExchange"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_exchange(__getMetalAtomicRef(buf[dest / 4]), value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedExchange(buf[dest / 4], value, original_value); @@ -4894,6 +4942,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$3 = atomicMax($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedMax"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMax(buf[dest / 4], value, original_value); @@ -4912,6 +4965,11 @@ ${{{{ case cuda: __intrinsic_asm "atomicMax($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedMax"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMax(buf[dest / 4], value); @@ -4931,6 +4989,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$3 = atomicMin($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedMin"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMin(buf[dest / 4], value, original_value); @@ -4949,6 +5012,11 @@ ${{{{ case cuda: __intrinsic_asm "atomicMin($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedMin"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedMin(buf[dest / 4], value); @@ -4968,6 +5036,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$3 = atomicOr($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedOr"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedOr(buf[dest / 4], value, original_value); @@ -4986,6 +5059,11 @@ ${{{{ case cuda: __intrinsic_asm "atomicOr($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedOr"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedOr(buf[dest / 4], value); @@ -5005,6 +5083,11 @@ ${{{{ case cuda: __intrinsic_asm "(*$3 = atomicXor($0._getPtrAt<uint32_t>($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedXor"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value, original_value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedXor(buf[dest / 4], value, original_value); @@ -5023,6 +5106,11 @@ ${{{{ case cuda: __intrinsic_asm "atomicXor($0._getPtrAt<uint32_t>($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedXor"; case metal: + { + let buf = __getEquivalentStructuredBuffer<uint>(this); + __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value); + return; + } case spirv: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedXor(buf[dest / 4], value); @@ -9254,7 +9342,7 @@ for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo) [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); @@ -9288,7 +9376,7 @@ void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value) [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value, out $(T) original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to a scalar texture or non-texture"); @@ -9335,7 +9423,7 @@ for(const char* T : {"int64_t", "uint64_t"}) { }}}} [ForceInline] -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedAdd(__ref $(T) dest, $(T) value) { __target_switch @@ -9515,7 +9603,7 @@ ${{{{ [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); @@ -9550,7 +9638,7 @@ void InterlockedCompareExchange(__ref int dest, int compare_value, int value, ou [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); @@ -9613,7 +9701,7 @@ void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_val [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareStore(__ref int dest, int compare_value, int value) { __target_switch @@ -9622,16 +9710,41 @@ void InterlockedCompareStore(__ref int dest, int compare_value, int value) case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)"; case spirv: + { spirv_asm { result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value; }; + return; + } + case metal: + { + if (__isTextureAccess(dest)) + { + vector<int, 4> vec_compare_value = vector<int, 4>(compare_value); + if(__isTextureArrayAccess(dest)) + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value)); + } + else + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value)); + } + } + else + { + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); + } + return; + } } } [ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) { __target_switch @@ -9644,6 +9757,26 @@ void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) { result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value; }; + case metal: + if (__isTextureAccess(dest)) + { + vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value); + if(__isTextureArrayAccess(dest)) + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value)); + } + else + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value)); + } + } + else + { + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); + } + return; } } @@ -20006,7 +20139,7 @@ extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite) [__requiresNVAPI] [ForceInline] __glsl_extension(GL_EXT_shader_atomic_float) - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue) { __target_switch diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index a27146bf9..b13571d18 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -820,12 +820,12 @@ alias subgroup_clustered = GL_KHR_shader_subgroup_clustered | _sm_6_0 | _cuda_sm alias subgroup_quad = GL_KHR_shader_subgroup_quad | _sm_6_0 | _cuda_sm_7_0; alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5 | _cuda_sm_7_0; -alias atomic_glsl_hlsl_nvapi_cuda_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0; +alias atomic_glsl_hlsl_nvapi_cuda_metal_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0 | metal; alias atomic_glsl_hlsl_nvapi_cuda5_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0; alias atomic_glsl_hlsl_nvapi_cuda6_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0; alias atomic_glsl_hlsl_nvapi_cuda9_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_9_0; -alias atomic_glsl_hlsl_cuda = atomic_glsl | _sm_5_0 | _cuda_sm_2_0 | metal; +alias atomic_glsl_hlsl_cuda_metal = atomic_glsl | _sm_5_0 | _cuda_sm_2_0 | metal; alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | _sm_6_6 | _cuda_sm_9_0 | metal; alias helper_lane = _sm_6_0 + fragment diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 678b4137a..b690b7c38 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -1039,6 +1039,7 @@ Result linkAndOptimizeIR( case CodeGenTarget::MetalLib: case CodeGenTarget::MetalLibAssembly: byteAddressBufferOptions.scalarizeVectorLoadStore = true; + byteAddressBufferOptions.treatGetEquivalentStructuredBufferAsGetThis = true; byteAddressBufferOptions.translateToStructuredBufferOps = false; byteAddressBufferOptions.lowerBasicTypeOps = true; break; @@ -1135,6 +1136,8 @@ Result linkAndOptimizeIR( } break; case CodeGenTarget::Metal: + case CodeGenTarget::MetalLib: + case CodeGenTarget::MetalLibAssembly: { legalizeIRForMetal(irModule, sink); } diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp index dba3ab5f5..d5685bad6 100644 --- a/source/slang/slang-ir-byte-address-legalize.cpp +++ b/source/slang/slang-ir-byte-address-legalize.cpp @@ -38,6 +38,8 @@ struct ByteAddressBufferLegalizationContext IRModule* m_module; IRBuilder m_builder; + Dictionary<IRInst*, IRType*> byteAddrBufferToReplace; + // Everything starts with a request to process a module, // which delegates to the central recrusive walk of the IR. // @@ -787,10 +789,15 @@ struct ByteAddressBufferLegalizationContext IRInst* getEquivalentStructuredBuffer(IRType* elementType, IRInst* byteAddressBuffer) { + if (this->m_options.treatGetEquivalentStructuredBufferAsGetThis) + return byteAddressBuffer; + if (!elementType) { return nullptr; } + if (as<IRHLSLStructuredBufferTypeBase>(byteAddressBuffer->getDataType())) + return byteAddressBuffer; // The simple case for replacement is when the byte-address buffer to // be replaced is a global shader parameter. That path will get its // own routine. diff --git a/source/slang/slang-ir-byte-address-legalize.h b/source/slang/slang-ir-byte-address-legalize.h index 1ae69070e..8a92bcf33 100644 --- a/source/slang/slang-ir-byte-address-legalize.h +++ b/source/slang/slang-ir-byte-address-legalize.h @@ -14,6 +14,11 @@ struct ByteAddressBufferLegalizationOptions bool useBitCastFromUInt = false; bool translateToStructuredBufferOps = false; bool lowerBasicTypeOps = false; + + /// Causes all calls to `getEquivlentStructuredBuffer` to return a `ByteAddressBuffer` (this) instead of a `StructuredBuffer`. + /// This option is used for targets that do not distinctly define `ByteAddressBuffer`/`StructuredBuffer` and introduce + /// operations which prevent DCE from destroying old definitions of `ByteAddressBuffer` after variable replacement. + bool treatGetEquivalentStructuredBufferAsGetThis = false; }; /// Legalize byte-address buffer `Load()` and `Store()` operations. diff --git a/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang b/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang index f133bb372..f621cc621 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang @@ -1,6 +1,10 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -dx12 -use-dxil -output-using-type //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cuda -output-using-type +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -metal -output-using-type +//TEST:SIMPLE(filecheck=METALLIB): -target metallib -entry computeMain -stage compute + +// METALLIB: computeMain //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<int> outputBuffer; diff --git a/tests/metal/atomic-texture-buffer.slang b/tests/metal/atomic-texture-buffer.slang index b1a5bcf25..3e4eda94b 100644 --- a/tests/metal/atomic-texture-buffer.slang +++ b/tests/metal/atomic-texture-buffer.slang @@ -44,6 +44,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak // METAL: .atomic_fetch_add // METAL: .atomic_fetch_and @@ -59,6 +60,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak InterlockedAdd(intBuffer[0], valInt); InterlockedAnd(intBuffer[0], valInt); InterlockedMax(intBuffer[0], valInt); @@ -73,6 +75,7 @@ void test() InterlockedXor(intBuffer[0], valInt, originalValueInt); InterlockedExchange(intBuffer[0], valInt, originalValueInt); InterlockedCompareExchange(intBuffer[0], valInt, compareValueInt, originalValueInt); + InterlockedCompareStore(intBuffer[0], valUInt, compareValueUInt); InterlockedAdd(uintBuffer[0], valUInt); InterlockedAnd(uintBuffer[0], valUInt); @@ -88,6 +91,7 @@ void test() InterlockedXor(uintBuffer[0], valUInt, originalValueUInt); InterlockedExchange(uintBuffer[0], valUInt, originalValueUInt); InterlockedCompareExchange(uintBuffer[0], valUInt, compareValueUInt, originalValueUInt); + InterlockedCompareStore(uintBuffer[0], valUInt, compareValueUInt); } [numthreads(1, 1, 1)] diff --git a/tests/metal/atomic-texture-texture1d.slang b/tests/metal/atomic-texture-texture1d.slang index 52d3d15fd..70f639cb5 100644 --- a/tests/metal/atomic-texture-texture1d.slang +++ b/tests/metal/atomic-texture-texture1d.slang @@ -34,6 +34,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak // METAL: .atomic_fetch_add // METAL: .atomic_fetch_and @@ -49,6 +50,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak InterlockedAdd(intTexture1D[0], valInt); InterlockedAnd(intTexture1D[0], valInt); InterlockedMax(intTexture1D[0], valInt); @@ -63,6 +65,7 @@ void test() InterlockedXor(intTexture1D[0], valInt, originalValueInt); InterlockedExchange(intTexture1D[0], valInt, originalValueInt); InterlockedCompareExchange(intTexture1D[0], valInt, compareValueInt, originalValueInt); + InterlockedCompareStore(intTexture1D[0], valUInt, compareValueUInt); InterlockedAdd(uintTexture1D[0], valUInt); InterlockedAnd(uintTexture1D[0], valUInt); @@ -78,6 +81,7 @@ void test() InterlockedXor(uintTexture1D[0], valUInt, originalValueUInt); InterlockedExchange(uintTexture1D[0], valUInt, originalValueUInt); InterlockedCompareExchange(uintTexture1D[0], valUInt, compareValueUInt, originalValueUInt); + InterlockedCompareStore(uintTexture1D[0], valUInt, compareValueUInt); // Texture1DArray // METAL: .atomic_fetch_add @@ -94,6 +98,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak // METAL: .atomic_fetch_add // METAL: .atomic_fetch_and @@ -109,6 +114,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak InterlockedAdd(intTexture1DArray[0], valInt); InterlockedAnd(intTexture1DArray[0], valInt); InterlockedMax(intTexture1DArray[0], valInt); @@ -122,7 +128,8 @@ void test() InterlockedOr(intTexture1DArray[0], valInt, originalValueInt); InterlockedXor(intTexture1DArray[0], valInt, originalValueInt); InterlockedExchange(intTexture1DArray[0], valInt, originalValueInt); - InterlockedCompareExchange(intTexture1DArray[0], valInt, compareValueInt, originalValueInt); + InterlockedCompareExchange(intTexture1DArray[0], valInt, compareValueInt, originalValueUInt); + InterlockedCompareStore(intTexture1DArray[0], valUInt, compareValueUInt); InterlockedAdd(uintTexture1DArray[0], valUInt); InterlockedAnd(uintTexture1DArray[0], valUInt); @@ -138,6 +145,7 @@ void test() InterlockedXor(uintTexture1DArray[0], valUInt, originalValueUInt); InterlockedExchange(uintTexture1DArray[0], valUInt, originalValueUInt); InterlockedCompareExchange(uintTexture1DArray[0], valUInt, compareValueUInt, originalValueUInt); + InterlockedCompareStore(uintTexture1DArray[0], valUInt, compareValueUInt); } [numthreads(1, 1, 1)] diff --git a/tests/metal/atomic-texture-texture2d.slang b/tests/metal/atomic-texture-texture2d.slang index e147eae22..b18c96168 100644 --- a/tests/metal/atomic-texture-texture2d.slang +++ b/tests/metal/atomic-texture-texture2d.slang @@ -34,6 +34,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak // METAL: .atomic_fetch_add // METAL: .atomic_fetch_and @@ -49,6 +50,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak InterlockedAdd(intTexture2D[0], valInt); InterlockedAnd(intTexture2D[0], valInt); InterlockedMax(intTexture2D[0], valInt); @@ -63,6 +65,7 @@ void test() InterlockedXor(intTexture2D[0], valInt, originalValueInt); InterlockedExchange(intTexture2D[0], valInt, originalValueInt); InterlockedCompareExchange(intTexture2D[0], valInt, compareValueInt, originalValueInt); + InterlockedCompareStore(intTexture2D[0], valUInt, compareValueUInt); InterlockedAdd(uintTexture2D[0], valUInt); InterlockedAnd(uintTexture2D[0], valUInt); @@ -78,6 +81,7 @@ void test() InterlockedXor(uintTexture2D[0], valUInt, originalValueUInt); InterlockedExchange(uintTexture2D[0], valUInt, originalValueUInt); InterlockedCompareExchange(uintTexture2D[0], valUInt, compareValueUInt, originalValueUInt); + InterlockedCompareStore(uintTexture2D[0], valUInt, compareValueUInt); // Texture2DArray // METAL: .atomic_fetch_add @@ -94,6 +98,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak // METAL: .atomic_fetch_add // METAL: .atomic_fetch_and @@ -109,6 +114,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak InterlockedAdd(intTexture2DArray[0], valInt); InterlockedAnd(intTexture2DArray[0], valInt); InterlockedMax(intTexture2DArray[0], valInt); @@ -123,6 +129,7 @@ void test() InterlockedXor(intTexture2DArray[0], valInt, originalValueInt); InterlockedExchange(intTexture2DArray[0], valInt, originalValueInt); InterlockedCompareExchange(intTexture2DArray[0], valInt, compareValueInt, originalValueInt); + InterlockedCompareStore(intTexture2DArray[0], valUInt, compareValueUInt); InterlockedAdd(uintTexture2DArray[0], valUInt); InterlockedAnd(uintTexture2DArray[0], valUInt); @@ -138,6 +145,7 @@ void test() InterlockedXor(uintTexture2DArray[0], valUInt, originalValueUInt); InterlockedExchange(uintTexture2DArray[0], valUInt, originalValueUInt); InterlockedCompareExchange(uintTexture2DArray[0], valUInt, compareValueUInt, originalValueUInt); + InterlockedCompareStore(uintTexture2DArray[0], valUInt, compareValueUInt); } [numthreads(1, 1, 1)] diff --git a/tests/metal/atomic-texture-texture3d.slang b/tests/metal/atomic-texture-texture3d.slang index 5a97cc44f..755b941da 100644 --- a/tests/metal/atomic-texture-texture3d.slang +++ b/tests/metal/atomic-texture-texture3d.slang @@ -30,6 +30,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak // METAL: .atomic_fetch_add // METAL: .atomic_fetch_and @@ -45,6 +46,7 @@ void test() // METAL: .atomic_fetch_xor // METAL: .atomic_exchange // METAL: .atomic_compare_exchange_weak +// METAL: .atomic_compare_exchange_weak InterlockedAdd(intTexture3D[0], valInt); InterlockedAnd(intTexture3D[0], valInt); InterlockedMax(intTexture3D[0], valInt); @@ -59,6 +61,7 @@ void test() InterlockedXor(intTexture3D[0], valInt, originalValueInt); InterlockedExchange(intTexture3D[0], valInt, originalValueInt); InterlockedCompareExchange(intTexture3D[0], valInt, compareValueInt, originalValueInt); + InterlockedCompareStore(intTexture3D[0], valUInt, compareValueUInt); InterlockedAdd(uintTexture3D[0], valUInt); InterlockedAnd(uintTexture3D[0], valUInt); @@ -74,6 +77,7 @@ void test() InterlockedXor(uintTexture3D[0], valUInt, originalValueUInt); InterlockedExchange(uintTexture3D[0], valUInt, originalValueUInt); InterlockedCompareExchange(uintTexture3D[0], valUInt, compareValueUInt, originalValueUInt); + InterlockedCompareStore(uintTexture3D[0], valUInt, compareValueUInt); } |
