diff options
| -rw-r--r-- | source/slang/hlsl.meta.slang | 11 | ||||
| -rw-r--r-- | source/slang/slang-emit-metal.cpp | 20 | ||||
| -rw-r--r-- | tests/bugs/byte-address-buffer-interlocked-add-f32.slang | 9 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/texture/float-atomics.slang | 2 | ||||
| -rw-r--r-- | tests/slang-extension/atomic-float-byte-address-buffer.slang | 1 |
5 files changed, 22 insertions, 21 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 07f59ac46..b8243d6e4 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -22582,18 +22582,21 @@ extension _Texture<float, Shape, 0, 0, 0, $(kCoreModule_ResourceAccessReadWrite) [__requiresNVAPI] [ForceInline] __glsl_extension(GL_EXT_shader_atomic_float) - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] + [require(glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue) { __target_switch { - default: - originalValue = __atomic_add(this[coord], value); - return; case hlsl: __intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)"; case glsl: __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)"; + case metal: + originalValue = __atomic_add(this[coord], value); + return; + default: + originalValue = __atomic_add(this[coord], value); + return; } } diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index 7dd0c19ad..1915be58b 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -400,12 +400,12 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) else m_writer->emit(");\n"); }; - auto diagnoseFloatAtommic = [&]() + auto diagnoseFloatAtomic = [&]() { getSink()->diagnose( inst, Diagnostics::unsupportedTargetIntrinsic, - "floating point atomic operation"); + "Unsupported floating point atomic operation"); }; switch (inst->getOp()) { @@ -433,7 +433,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) case kIROp_AtomicLoad: { if (isFloatingType(inst->getDataType())) - diagnoseFloatAtommic(); + diagnoseFloatAtomic(); emitInstResultDecl(inst); bool isImageOp = false; @@ -488,7 +488,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) case kIROp_AtomicExchange: { if (isFloatingType(inst->getDataType())) - diagnoseFloatAtommic(); + diagnoseFloatAtomic(); emitAtomicOp("atomic_exchange", "atomic_exchange_explicit"); return true; @@ -496,7 +496,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) case kIROp_AtomicCompareExchange: { if (isFloatingType(inst->getDataType())) - diagnoseFloatAtommic(); + diagnoseFloatAtomic(); bool isImageOp = false; auto imageSubscript = isTextureAccess(inst); @@ -543,17 +543,11 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicAdd: { - if (isFloatingType(inst->getDataType())) - diagnoseFloatAtommic(); - emitAtomicOp("atomic_fetch_add", "atomic_fetch_add_explicit"); return true; } case kIROp_AtomicSub: { - if (isFloatingType(inst->getDataType())) - diagnoseFloatAtommic(); - emitAtomicOp("atomic_fetch_sub", "atomic_fetch_sub_explicit"); return true; } @@ -575,7 +569,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) case kIROp_AtomicMin: { if (isFloatingType(inst->getDataType())) - diagnoseFloatAtommic(); + diagnoseFloatAtomic(); emitAtomicOp("atomic_fetch_min", "atomic_fetch_min_explicit"); return true; @@ -583,7 +577,7 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) case kIROp_AtomicMax: { if (isFloatingType(inst->getDataType())) - diagnoseFloatAtommic(); + diagnoseFloatAtomic(); emitAtomicOp("atomic_fetch_max", "atomic_fetch_max_explicit"); return true; diff --git a/tests/bugs/byte-address-buffer-interlocked-add-f32.slang b/tests/bugs/byte-address-buffer-interlocked-add-f32.slang index f1fccdcf3..71b95ed51 100644 --- a/tests/bugs/byte-address-buffer-interlocked-add-f32.slang +++ b/tests/bugs/byte-address-buffer-interlocked-add-f32.slang @@ -1,7 +1,8 @@ //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX:-mtl -compute -shaderobj -output-using-type -//TEST_INPUT: set buffer = [ubuffer(data=[0]), ubuffer(data=[1])] -RWByteAddressBuffer buffer[2]; +//TEST_INPUT: ubuffer(data=[0]):name=buffer +RWByteAddressBuffer buffer; //TEST_INPUT:ubuffer(data=[0], stride=4):out,name outputBuffer RWStructuredBuffer<float> outputBuffer; @@ -9,6 +10,6 @@ RWStructuredBuffer<float> outputBuffer; [numthreads(1,1,1)] void computeMain( uint2 dispatchThreadID : SV_DispatchThreadID ) { - buffer[0].InterlockedAddF32(0, 1.0f); - outputBuffer[dispatchThreadID.x] = buffer[0].Load<float>(0); + buffer.InterlockedAddF32(0, 1.0f); + outputBuffer[dispatchThreadID.x] = buffer.Load<float>(0); }
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/texture/float-atomics.slang b/tests/hlsl-intrinsic/texture/float-atomics.slang index 5fce2c3b4..eefef73d3 100644 --- a/tests/hlsl-intrinsic/texture/float-atomics.slang +++ b/tests/hlsl-intrinsic/texture/float-atomics.slang @@ -2,6 +2,7 @@ //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type -emit-spirv-directly -render-feature hardware-device //TEST(compute):SIMPLE(filecheck=HLSL):-target hlsl -profile cs_6_6 -entry computeMain +//TEST(compute):SIMPLE(filecheck=MTL):-target metal -entry computeMain -stage compute // Test atomics on a RWTexture2D<float>. @@ -19,6 +20,7 @@ void computeMain(uint3 tid : SV_DispatchThreadID) float originalValue; // HLSL: {{.*}}originalValue{{.*}} = NvInterlockedAddFp32({{.*}}t{{.*}}, {{.*}}, {{.*}}1.0{{.*}}); + // MTL: atomic_fetch_add t.InterlockedAddF32(uint2(1, 0), 1.0, originalValue); AllMemoryBarrier(); diff --git a/tests/slang-extension/atomic-float-byte-address-buffer.slang b/tests/slang-extension/atomic-float-byte-address-buffer.slang index 48e209431..d0ff99908 100644 --- a/tests/slang-extension/atomic-float-byte-address-buffer.slang +++ b/tests/slang-extension/atomic-float-byte-address-buffer.slang @@ -6,6 +6,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-slot u0 -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -use-dxil -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-slot u0 -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-mtl -compute -output-using-type -shaderobj // The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used // Only strictly necessary on the D3D11/D3D12 paths |
