summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArielG-NV <159081215+ArielG-NV@users.noreply.github.com>2024-07-19 02:05:33 -0400
committerGitHub <noreply@github.com>2024-07-18 23:05:33 -0700
commita00d603519d395d41b2f68c5874e8a708335a31a (patch)
tree114e3da71d3d95034e944edb0ffd1510f192418d
parent59dd133f1c52fb0a7a388f4a8f42234f4556a28a (diff)
Metal: `Interlocked` (atomic) member function support for buffers (#4655)
* Metal: `Interlocked` (atomic) member function support for buffers fixes: #4654 fixes: #4481 1. Add `Interlocked` (atomic) member function support for buffers to Metal 2. Fix `__getEquivalentStructuredBuffer` so it works with CPP/Metal targets * add `CompareStore` support * legalize RWByteAddressBuffer to fully replace StructuredBuffer * destroy replaced byte-addr buffer * cleanup as per review and add comment to explain why certain code exists * fix flow of byte-address-buffer replacement * toggle on option to translate byteAddrBuffer to StructuredBuffer * cleanup unused buffers * add treatGetEquivalentStructuredBufferAsGetThis flag to treat getEquivStructuredBuffer as a byteAddressBuffer * comment to explain `treatGetEquivalentStructuredBufferAsGetThis` --------- Co-authored-by: Yong He <yonghe@outlook.com>
-rw-r--r--source/slang/hlsl.meta.slang155
-rw-r--r--source/slang/slang-capabilities.capdef4
-rw-r--r--source/slang/slang-emit.cpp3
-rw-r--r--source/slang/slang-ir-byte-address-legalize.cpp7
-rw-r--r--source/slang/slang-ir-byte-address-legalize.h5
-rw-r--r--tests/hlsl-intrinsic/byte-address-buffer-atomics.slang4
-rw-r--r--tests/metal/atomic-texture-buffer.slang4
-rw-r--r--tests/metal/atomic-texture-texture1d.slang10
-rw-r--r--tests/metal/atomic-texture-texture2d.slang8
-rw-r--r--tests/metal/atomic-texture-texture3d.slang4
10 files changed, 190 insertions, 14 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 2639c1e88..9760f974a 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -4193,13 +4193,19 @@ ${{{{
__cuda_sm_version(2.0)
[__requiresNVAPI]
[ForceInline]
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)]
+ [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)]
void InterlockedAddF32(uint byteAddress, float valueToAdd, out float originalValue)
{
__target_switch
{
case hlsl: __intrinsic_asm "($3 = NvInterlockedAddFp32($0, $1, $2))";
case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<float>($1), $2))";
+ case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<float>(this);
+ __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd, originalValue);
+ return;
+ }
case glsl:
case spirv:
{
@@ -4264,13 +4270,19 @@ ${{{{
[__requiresNVAPI]
[ForceInline]
__cuda_sm_version(2.0)
- [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)]
+ [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)]
void InterlockedAddF32(uint byteAddress, float valueToAdd)
{
__target_switch
{
case hlsl: __intrinsic_asm "(NvInterlockedAddFp32($0, $1, $2))";
case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<float>($1), $2)";
+ case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<float>(this);
+ __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd);
+ return;
+ }
case glsl:
case spirv:
{
@@ -4763,6 +4775,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<uint32_t>($1), $2))";
case hlsl: __intrinsic_asm ".InterlockedAdd";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedAdd(buf[dest / 4], value, original_value);
@@ -4781,6 +4798,11 @@ ${{{{
case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<uint32_t>($1), $2)";
case hlsl: __intrinsic_asm ".InterlockedAdd";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedAdd(buf[dest / 4], value);
@@ -4800,6 +4822,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$3 = atomicAnd($0._getPtrAt<uint32_t>($1), $2))";
case hlsl: __intrinsic_asm ".InterlockedAnd";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedAnd(buf[dest / 4], value, original_value);
@@ -4818,6 +4845,11 @@ ${{{{
case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt<uint32_t>($1), $2)";
case hlsl: __intrinsic_asm ".InterlockedAnd";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedAnd(buf[dest / 4], value);
@@ -4838,6 +4870,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3))";
case hlsl: __intrinsic_asm ".InterlockedCompareExchange";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedCompareExchange(buf[dest / 4], compare_value, value, original_value);
@@ -4845,7 +4882,7 @@ ${{{{
}
[ForceInline]
- [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)]
+ [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)]
void InterlockedCompareStore(
UINT dest,
UINT compare_value,
@@ -4856,6 +4893,12 @@ ${{{{
case glsl: __intrinsic_asm "atomicCompSwap($0._data[$1/4], $2, $3)";
case cuda: __intrinsic_asm "atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3)";
case hlsl: __intrinsic_asm ".InterlockedCompareStore";
+ case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedCompareStore(buf[dest / 4], compare_value, value);
@@ -4875,6 +4918,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$3 = atomicExch($0._getPtrAt<uint32_t>($1), $2))";
case hlsl: __intrinsic_asm ".InterlockedExchange";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_exchange(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedExchange(buf[dest / 4], value, original_value);
@@ -4894,6 +4942,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$3 = atomicMax($0._getPtrAt<uint32_t>($1), $2))";
case hlsl: __intrinsic_asm ".InterlockedMax";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedMax(buf[dest / 4], value, original_value);
@@ -4912,6 +4965,11 @@ ${{{{
case cuda: __intrinsic_asm "atomicMax($0._getPtrAt<uint32_t>($1), $2)";
case hlsl: __intrinsic_asm ".InterlockedMax";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedMax(buf[dest / 4], value);
@@ -4931,6 +4989,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$3 = atomicMin($0._getPtrAt<uint32_t>($1), $2))";
case hlsl: __intrinsic_asm ".InterlockedMin";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedMin(buf[dest / 4], value, original_value);
@@ -4949,6 +5012,11 @@ ${{{{
case cuda: __intrinsic_asm "atomicMin($0._getPtrAt<uint32_t>($1), $2)";
case hlsl: __intrinsic_asm ".InterlockedMin";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedMin(buf[dest / 4], value);
@@ -4968,6 +5036,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$3 = atomicOr($0._getPtrAt<uint32_t>($1), $2))";
case hlsl: __intrinsic_asm ".InterlockedOr";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedOr(buf[dest / 4], value, original_value);
@@ -4986,6 +5059,11 @@ ${{{{
case cuda: __intrinsic_asm "atomicOr($0._getPtrAt<uint32_t>($1), $2)";
case hlsl: __intrinsic_asm ".InterlockedOr";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedOr(buf[dest / 4], value);
@@ -5005,6 +5083,11 @@ ${{{{
case cuda: __intrinsic_asm "(*$3 = atomicXor($0._getPtrAt<uint32_t>($1), $2))";
case hlsl: __intrinsic_asm ".InterlockedXor";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value, original_value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedXor(buf[dest / 4], value, original_value);
@@ -5023,6 +5106,11 @@ ${{{{
case cuda: __intrinsic_asm "atomicXor($0._getPtrAt<uint32_t>($1), $2)";
case hlsl: __intrinsic_asm ".InterlockedXor";
case metal:
+ {
+ let buf = __getEquivalentStructuredBuffer<uint>(this);
+ __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value);
+ return;
+ }
case spirv:
let buf = __getEquivalentStructuredBuffer<uint>(this);
::InterlockedXor(buf[dest / 4], value);
@@ -9254,7 +9342,7 @@ for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo)
[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value)
{
static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
@@ -9288,7 +9376,7 @@ void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value)
[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value, out $(T) original_value)
{
static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to a scalar texture or non-texture");
@@ -9335,7 +9423,7 @@ for(const char* T : {"int64_t", "uint64_t"})
{
}}}}
[ForceInline]
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
+[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_metal)]
void InterlockedAdd(__ref $(T) dest, $(T) value)
{
__target_switch
@@ -9515,7 +9603,7 @@ ${{{{
[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value)
{
static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
@@ -9550,7 +9638,7 @@ void InterlockedCompareExchange(__ref int dest, int compare_value, int value, ou
[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value)
{
static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
@@ -9613,7 +9701,7 @@ void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_val
[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
void InterlockedCompareStore(__ref int dest, int compare_value, int value)
{
__target_switch
@@ -9622,16 +9710,41 @@ void InterlockedCompareStore(__ref int dest, int compare_value, int value)
case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)";
case spirv:
+ {
spirv_asm
{
result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value;
};
+ return;
+ }
+ case metal:
+ {
+ if (__isTextureAccess(dest))
+ {
+ vector<int, 4> vec_compare_value = vector<int, 4>(compare_value);
+ if(__isTextureArrayAccess(dest))
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value));
+ }
+ else
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value));
+ }
+ }
+ else
+ {
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value);
+ }
+ return;
+ }
}
}
[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)]
void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value)
{
__target_switch
@@ -9644,6 +9757,26 @@ void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value)
{
result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value;
};
+ case metal:
+ if (__isTextureAccess(dest))
+ {
+ vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value);
+ if(__isTextureArrayAccess(dest))
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value));
+ }
+ else
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value));
+ }
+ }
+ else
+ {
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value);
+ }
+ return;
}
}
@@ -20006,7 +20139,7 @@ extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite)
[__requiresNVAPI]
[ForceInline]
__glsl_extension(GL_EXT_shader_atomic_float)
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)]
+ [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)]
void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue)
{
__target_switch
diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef
index a27146bf9..b13571d18 100644
--- a/source/slang/slang-capabilities.capdef
+++ b/source/slang/slang-capabilities.capdef
@@ -820,12 +820,12 @@ alias subgroup_clustered = GL_KHR_shader_subgroup_clustered | _sm_6_0 | _cuda_sm
alias subgroup_quad = GL_KHR_shader_subgroup_quad | _sm_6_0 | _cuda_sm_7_0;
alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5 | _cuda_sm_7_0;
-alias atomic_glsl_hlsl_nvapi_cuda_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0;
+alias atomic_glsl_hlsl_nvapi_cuda_metal_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0 | metal;
alias atomic_glsl_hlsl_nvapi_cuda5_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0;
alias atomic_glsl_hlsl_nvapi_cuda6_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0;
alias atomic_glsl_hlsl_nvapi_cuda9_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_9_0;
-alias atomic_glsl_hlsl_cuda = atomic_glsl | _sm_5_0 | _cuda_sm_2_0 | metal;
+alias atomic_glsl_hlsl_cuda_metal = atomic_glsl | _sm_5_0 | _cuda_sm_2_0 | metal;
alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | _sm_6_6 | _cuda_sm_9_0 | metal;
alias helper_lane = _sm_6_0 + fragment
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 678b4137a..b690b7c38 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -1039,6 +1039,7 @@ Result linkAndOptimizeIR(
case CodeGenTarget::MetalLib:
case CodeGenTarget::MetalLibAssembly:
byteAddressBufferOptions.scalarizeVectorLoadStore = true;
+ byteAddressBufferOptions.treatGetEquivalentStructuredBufferAsGetThis = true;
byteAddressBufferOptions.translateToStructuredBufferOps = false;
byteAddressBufferOptions.lowerBasicTypeOps = true;
break;
@@ -1135,6 +1136,8 @@ Result linkAndOptimizeIR(
}
break;
case CodeGenTarget::Metal:
+ case CodeGenTarget::MetalLib:
+ case CodeGenTarget::MetalLibAssembly:
{
legalizeIRForMetal(irModule, sink);
}
diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp
index dba3ab5f5..d5685bad6 100644
--- a/source/slang/slang-ir-byte-address-legalize.cpp
+++ b/source/slang/slang-ir-byte-address-legalize.cpp
@@ -38,6 +38,8 @@ struct ByteAddressBufferLegalizationContext
IRModule* m_module;
IRBuilder m_builder;
+ Dictionary<IRInst*, IRType*> byteAddrBufferToReplace;
+
// Everything starts with a request to process a module,
// which delegates to the central recrusive walk of the IR.
//
@@ -787,10 +789,15 @@ struct ByteAddressBufferLegalizationContext
IRInst* getEquivalentStructuredBuffer(IRType* elementType, IRInst* byteAddressBuffer)
{
+ if (this->m_options.treatGetEquivalentStructuredBufferAsGetThis)
+ return byteAddressBuffer;
+
if (!elementType)
{
return nullptr;
}
+ if (as<IRHLSLStructuredBufferTypeBase>(byteAddressBuffer->getDataType()))
+ return byteAddressBuffer;
// The simple case for replacement is when the byte-address buffer to
// be replaced is a global shader parameter. That path will get its
// own routine.
diff --git a/source/slang/slang-ir-byte-address-legalize.h b/source/slang/slang-ir-byte-address-legalize.h
index 1ae69070e..8a92bcf33 100644
--- a/source/slang/slang-ir-byte-address-legalize.h
+++ b/source/slang/slang-ir-byte-address-legalize.h
@@ -14,6 +14,11 @@ struct ByteAddressBufferLegalizationOptions
bool useBitCastFromUInt = false;
bool translateToStructuredBufferOps = false;
bool lowerBasicTypeOps = false;
+
+ /// Causes all calls to `getEquivlentStructuredBuffer` to return a `ByteAddressBuffer` (this) instead of a `StructuredBuffer`.
+ /// This option is used for targets that do not distinctly define `ByteAddressBuffer`/`StructuredBuffer` and introduce
+ /// operations which prevent DCE from destroying old definitions of `ByteAddressBuffer` after variable replacement.
+ bool treatGetEquivalentStructuredBufferAsGetThis = false;
};
/// Legalize byte-address buffer `Load()` and `Store()` operations.
diff --git a/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang b/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang
index f133bb372..f621cc621 100644
--- a/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang
+++ b/tests/hlsl-intrinsic/byte-address-buffer-atomics.slang
@@ -1,6 +1,10 @@
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -dx12 -use-dxil -output-using-type
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cuda -output-using-type
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -metal -output-using-type
+//TEST:SIMPLE(filecheck=METALLIB): -target metallib -entry computeMain -stage compute
+
+// METALLIB: computeMain
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
diff --git a/tests/metal/atomic-texture-buffer.slang b/tests/metal/atomic-texture-buffer.slang
index b1a5bcf25..3e4eda94b 100644
--- a/tests/metal/atomic-texture-buffer.slang
+++ b/tests/metal/atomic-texture-buffer.slang
@@ -44,6 +44,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
// METAL: .atomic_fetch_add
// METAL: .atomic_fetch_and
@@ -59,6 +60,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
InterlockedAdd(intBuffer[0], valInt);
InterlockedAnd(intBuffer[0], valInt);
InterlockedMax(intBuffer[0], valInt);
@@ -73,6 +75,7 @@ void test()
InterlockedXor(intBuffer[0], valInt, originalValueInt);
InterlockedExchange(intBuffer[0], valInt, originalValueInt);
InterlockedCompareExchange(intBuffer[0], valInt, compareValueInt, originalValueInt);
+ InterlockedCompareStore(intBuffer[0], valUInt, compareValueUInt);
InterlockedAdd(uintBuffer[0], valUInt);
InterlockedAnd(uintBuffer[0], valUInt);
@@ -88,6 +91,7 @@ void test()
InterlockedXor(uintBuffer[0], valUInt, originalValueUInt);
InterlockedExchange(uintBuffer[0], valUInt, originalValueUInt);
InterlockedCompareExchange(uintBuffer[0], valUInt, compareValueUInt, originalValueUInt);
+ InterlockedCompareStore(uintBuffer[0], valUInt, compareValueUInt);
}
[numthreads(1, 1, 1)]
diff --git a/tests/metal/atomic-texture-texture1d.slang b/tests/metal/atomic-texture-texture1d.slang
index 52d3d15fd..70f639cb5 100644
--- a/tests/metal/atomic-texture-texture1d.slang
+++ b/tests/metal/atomic-texture-texture1d.slang
@@ -34,6 +34,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
// METAL: .atomic_fetch_add
// METAL: .atomic_fetch_and
@@ -49,6 +50,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
InterlockedAdd(intTexture1D[0], valInt);
InterlockedAnd(intTexture1D[0], valInt);
InterlockedMax(intTexture1D[0], valInt);
@@ -63,6 +65,7 @@ void test()
InterlockedXor(intTexture1D[0], valInt, originalValueInt);
InterlockedExchange(intTexture1D[0], valInt, originalValueInt);
InterlockedCompareExchange(intTexture1D[0], valInt, compareValueInt, originalValueInt);
+ InterlockedCompareStore(intTexture1D[0], valUInt, compareValueUInt);
InterlockedAdd(uintTexture1D[0], valUInt);
InterlockedAnd(uintTexture1D[0], valUInt);
@@ -78,6 +81,7 @@ void test()
InterlockedXor(uintTexture1D[0], valUInt, originalValueUInt);
InterlockedExchange(uintTexture1D[0], valUInt, originalValueUInt);
InterlockedCompareExchange(uintTexture1D[0], valUInt, compareValueUInt, originalValueUInt);
+ InterlockedCompareStore(uintTexture1D[0], valUInt, compareValueUInt);
// Texture1DArray
// METAL: .atomic_fetch_add
@@ -94,6 +98,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
// METAL: .atomic_fetch_add
// METAL: .atomic_fetch_and
@@ -109,6 +114,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
InterlockedAdd(intTexture1DArray[0], valInt);
InterlockedAnd(intTexture1DArray[0], valInt);
InterlockedMax(intTexture1DArray[0], valInt);
@@ -122,7 +128,8 @@ void test()
InterlockedOr(intTexture1DArray[0], valInt, originalValueInt);
InterlockedXor(intTexture1DArray[0], valInt, originalValueInt);
InterlockedExchange(intTexture1DArray[0], valInt, originalValueInt);
- InterlockedCompareExchange(intTexture1DArray[0], valInt, compareValueInt, originalValueInt);
+ InterlockedCompareExchange(intTexture1DArray[0], valInt, compareValueInt, originalValueUInt);
+ InterlockedCompareStore(intTexture1DArray[0], valUInt, compareValueUInt);
InterlockedAdd(uintTexture1DArray[0], valUInt);
InterlockedAnd(uintTexture1DArray[0], valUInt);
@@ -138,6 +145,7 @@ void test()
InterlockedXor(uintTexture1DArray[0], valUInt, originalValueUInt);
InterlockedExchange(uintTexture1DArray[0], valUInt, originalValueUInt);
InterlockedCompareExchange(uintTexture1DArray[0], valUInt, compareValueUInt, originalValueUInt);
+ InterlockedCompareStore(uintTexture1DArray[0], valUInt, compareValueUInt);
}
[numthreads(1, 1, 1)]
diff --git a/tests/metal/atomic-texture-texture2d.slang b/tests/metal/atomic-texture-texture2d.slang
index e147eae22..b18c96168 100644
--- a/tests/metal/atomic-texture-texture2d.slang
+++ b/tests/metal/atomic-texture-texture2d.slang
@@ -34,6 +34,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
// METAL: .atomic_fetch_add
// METAL: .atomic_fetch_and
@@ -49,6 +50,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
InterlockedAdd(intTexture2D[0], valInt);
InterlockedAnd(intTexture2D[0], valInt);
InterlockedMax(intTexture2D[0], valInt);
@@ -63,6 +65,7 @@ void test()
InterlockedXor(intTexture2D[0], valInt, originalValueInt);
InterlockedExchange(intTexture2D[0], valInt, originalValueInt);
InterlockedCompareExchange(intTexture2D[0], valInt, compareValueInt, originalValueInt);
+ InterlockedCompareStore(intTexture2D[0], valUInt, compareValueUInt);
InterlockedAdd(uintTexture2D[0], valUInt);
InterlockedAnd(uintTexture2D[0], valUInt);
@@ -78,6 +81,7 @@ void test()
InterlockedXor(uintTexture2D[0], valUInt, originalValueUInt);
InterlockedExchange(uintTexture2D[0], valUInt, originalValueUInt);
InterlockedCompareExchange(uintTexture2D[0], valUInt, compareValueUInt, originalValueUInt);
+ InterlockedCompareStore(uintTexture2D[0], valUInt, compareValueUInt);
// Texture2DArray
// METAL: .atomic_fetch_add
@@ -94,6 +98,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
// METAL: .atomic_fetch_add
// METAL: .atomic_fetch_and
@@ -109,6 +114,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
InterlockedAdd(intTexture2DArray[0], valInt);
InterlockedAnd(intTexture2DArray[0], valInt);
InterlockedMax(intTexture2DArray[0], valInt);
@@ -123,6 +129,7 @@ void test()
InterlockedXor(intTexture2DArray[0], valInt, originalValueInt);
InterlockedExchange(intTexture2DArray[0], valInt, originalValueInt);
InterlockedCompareExchange(intTexture2DArray[0], valInt, compareValueInt, originalValueInt);
+ InterlockedCompareStore(intTexture2DArray[0], valUInt, compareValueUInt);
InterlockedAdd(uintTexture2DArray[0], valUInt);
InterlockedAnd(uintTexture2DArray[0], valUInt);
@@ -138,6 +145,7 @@ void test()
InterlockedXor(uintTexture2DArray[0], valUInt, originalValueUInt);
InterlockedExchange(uintTexture2DArray[0], valUInt, originalValueUInt);
InterlockedCompareExchange(uintTexture2DArray[0], valUInt, compareValueUInt, originalValueUInt);
+ InterlockedCompareStore(uintTexture2DArray[0], valUInt, compareValueUInt);
}
[numthreads(1, 1, 1)]
diff --git a/tests/metal/atomic-texture-texture3d.slang b/tests/metal/atomic-texture-texture3d.slang
index 5a97cc44f..755b941da 100644
--- a/tests/metal/atomic-texture-texture3d.slang
+++ b/tests/metal/atomic-texture-texture3d.slang
@@ -30,6 +30,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
// METAL: .atomic_fetch_add
// METAL: .atomic_fetch_and
@@ -45,6 +46,7 @@ void test()
// METAL: .atomic_fetch_xor
// METAL: .atomic_exchange
// METAL: .atomic_compare_exchange_weak
+// METAL: .atomic_compare_exchange_weak
InterlockedAdd(intTexture3D[0], valInt);
InterlockedAnd(intTexture3D[0], valInt);
InterlockedMax(intTexture3D[0], valInt);
@@ -59,6 +61,7 @@ void test()
InterlockedXor(intTexture3D[0], valInt, originalValueInt);
InterlockedExchange(intTexture3D[0], valInt, originalValueInt);
InterlockedCompareExchange(intTexture3D[0], valInt, compareValueInt, originalValueInt);
+ InterlockedCompareStore(intTexture3D[0], valUInt, compareValueUInt);
InterlockedAdd(uintTexture3D[0], valUInt);
InterlockedAnd(uintTexture3D[0], valUInt);
@@ -74,6 +77,7 @@ void test()
InterlockedXor(uintTexture3D[0], valUInt, originalValueUInt);
InterlockedExchange(uintTexture3D[0], valUInt, originalValueUInt);
InterlockedCompareExchange(uintTexture3D[0], valUInt, compareValueUInt, originalValueUInt);
+ InterlockedCompareStore(uintTexture3D[0], valUInt, compareValueUInt);
}