summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvenkataram-nv <vedavamadath@nvidia.com>2025-08-09 00:46:09 -0700
committerGitHub <noreply@github.com>2025-08-09 07:46:09 +0000
commitb7df3c7aa27301f88e31ed0a7bbf230688adab6a (patch)
treec8a123c32d8a54ed49728c9cf504e42f0eec1d74
parent06b8e69740a40e5cc9fdaac472a9242534876e5b (diff)
Fix atomics error diagnostics (#8117)
Fixes #8116 --------- Co-authored-by: Jay Kwak <82421531+jkwak-work@users.noreply.github.com>
-rw-r--r--source/slang/hlsl.meta.slang11
-rw-r--r--source/slang/slang-emit-metal.cpp20
-rw-r--r--tests/bugs/byte-address-buffer-interlocked-add-f32.slang9
-rw-r--r--tests/hlsl-intrinsic/texture/float-atomics.slang2
-rw-r--r--tests/slang-extension/atomic-float-byte-address-buffer.slang1
5 files changed, 22 insertions, 21 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 07f59ac46..b8243d6e4 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -22582,18 +22582,21 @@ extension _Texture<float, Shape, 0, 0, 0, $(kCoreModule_ResourceAccessReadWrite)
[__requiresNVAPI]
[ForceInline]
__glsl_extension(GL_EXT_shader_atomic_float)
- [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)]
+ [require(glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)]
void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue)
{
__target_switch
{
- default:
- originalValue = __atomic_add(this[coord], value);
- return;
case hlsl:
__intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)";
case glsl:
__intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)";
+ case metal:
+ originalValue = __atomic_add(this[coord], value);
+ return;
+ default:
+ originalValue = __atomic_add(this[coord], value);
+ return;
}
}
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
index 7dd0c19ad..1915be58b 100644
--- a/source/slang/slang-emit-metal.cpp
+++ b/source/slang/slang-emit-metal.cpp
@@ -400,12 +400,12 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
else
m_writer->emit(");\n");
};
- auto diagnoseFloatAtommic = [&]()
+ auto diagnoseFloatAtomic = [&]()
{
getSink()->diagnose(
inst,
Diagnostics::unsupportedTargetIntrinsic,
- "floating point atomic operation");
+ "Unsupported floating point atomic operation");
};
switch (inst->getOp())
{
@@ -433,7 +433,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
case kIROp_AtomicLoad:
{
if (isFloatingType(inst->getDataType()))
- diagnoseFloatAtommic();
+ diagnoseFloatAtomic();
emitInstResultDecl(inst);
bool isImageOp = false;
@@ -488,7 +488,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
case kIROp_AtomicExchange:
{
if (isFloatingType(inst->getDataType()))
- diagnoseFloatAtommic();
+ diagnoseFloatAtomic();
emitAtomicOp("atomic_exchange", "atomic_exchange_explicit");
return true;
@@ -496,7 +496,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
case kIROp_AtomicCompareExchange:
{
if (isFloatingType(inst->getDataType()))
- diagnoseFloatAtommic();
+ diagnoseFloatAtomic();
bool isImageOp = false;
auto imageSubscript = isTextureAccess(inst);
@@ -543,17 +543,11 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
}
case kIROp_AtomicAdd:
{
- if (isFloatingType(inst->getDataType()))
- diagnoseFloatAtommic();
-
emitAtomicOp("atomic_fetch_add", "atomic_fetch_add_explicit");
return true;
}
case kIROp_AtomicSub:
{
- if (isFloatingType(inst->getDataType()))
- diagnoseFloatAtommic();
-
emitAtomicOp("atomic_fetch_sub", "atomic_fetch_sub_explicit");
return true;
}
@@ -575,7 +569,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
case kIROp_AtomicMin:
{
if (isFloatingType(inst->getDataType()))
- diagnoseFloatAtommic();
+ diagnoseFloatAtomic();
emitAtomicOp("atomic_fetch_min", "atomic_fetch_min_explicit");
return true;
@@ -583,7 +577,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
case kIROp_AtomicMax:
{
if (isFloatingType(inst->getDataType()))
- diagnoseFloatAtommic();
+ diagnoseFloatAtomic();
emitAtomicOp("atomic_fetch_max", "atomic_fetch_max_explicit");
return true;
diff --git a/tests/bugs/byte-address-buffer-interlocked-add-f32.slang b/tests/bugs/byte-address-buffer-interlocked-add-f32.slang
index f1fccdcf3..71b95ed51 100644
--- a/tests/bugs/byte-address-buffer-interlocked-add-f32.slang
+++ b/tests/bugs/byte-address-buffer-interlocked-add-f32.slang
@@ -1,7 +1,8 @@
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX:-mtl -compute -shaderobj -output-using-type
-//TEST_INPUT: set buffer = [ubuffer(data=[0]), ubuffer(data=[1])]
-RWByteAddressBuffer buffer[2];
+//TEST_INPUT: ubuffer(data=[0]):name=buffer
+RWByteAddressBuffer buffer;
//TEST_INPUT:ubuffer(data=[0], stride=4):out,name outputBuffer
RWStructuredBuffer<float> outputBuffer;
@@ -9,6 +10,6 @@ RWStructuredBuffer<float> outputBuffer;
[numthreads(1,1,1)]
void computeMain( uint2 dispatchThreadID : SV_DispatchThreadID )
{
- buffer[0].InterlockedAddF32(0, 1.0f);
- outputBuffer[dispatchThreadID.x] = buffer[0].Load<float>(0);
+ buffer.InterlockedAddF32(0, 1.0f);
+ outputBuffer[dispatchThreadID.x] = buffer.Load<float>(0);
} \ No newline at end of file
diff --git a/tests/hlsl-intrinsic/texture/float-atomics.slang b/tests/hlsl-intrinsic/texture/float-atomics.slang
index 5fce2c3b4..eefef73d3 100644
--- a/tests/hlsl-intrinsic/texture/float-atomics.slang
+++ b/tests/hlsl-intrinsic/texture/float-atomics.slang
@@ -2,6 +2,7 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type -emit-spirv-directly -render-feature hardware-device
//TEST(compute):SIMPLE(filecheck=HLSL):-target hlsl -profile cs_6_6 -entry computeMain
+//TEST(compute):SIMPLE(filecheck=MTL):-target metal -entry computeMain -stage compute
// Test atomics on a RWTexture2D<float>.
@@ -19,6 +20,7 @@ void computeMain(uint3 tid : SV_DispatchThreadID)
float originalValue;
// HLSL: {{.*}}originalValue{{.*}} = NvInterlockedAddFp32({{.*}}t{{.*}}, {{.*}}, {{.*}}1.0{{.*}});
+ // MTL: atomic_fetch_add
t.InterlockedAddF32(uint2(1, 0), 1.0, originalValue);
AllMemoryBarrier();
diff --git a/tests/slang-extension/atomic-float-byte-address-buffer.slang b/tests/slang-extension/atomic-float-byte-address-buffer.slang
index 48e209431..d0ff99908 100644
--- a/tests/slang-extension/atomic-float-byte-address-buffer.slang
+++ b/tests/slang-extension/atomic-float-byte-address-buffer.slang
@@ -6,6 +6,7 @@
//TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-slot u0 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -use-dxil -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-slot u0 -shaderobj
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-mtl -compute -output-using-type -shaderobj
// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used
// Only strictly necessary on the D3D11/D3D12 paths