From 030d7f45726187b5b23a3cfb9743166aa60fae30 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 15 Apr 2024 19:47:23 -0700 Subject: Support 64bit HLSL atomic functions (#3957) Resolves #3951 This adds a few atomic functions for SM6.6. The spec can be found from here: https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html The new functions are: void InterlockedAdd(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedAdd(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedAnd(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedOr(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedXor(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedMin(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedMin(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedMax(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedMax(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedExchange(inout XXX dest, in float value, out float original_value); void InterlockedExchange(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedExchange(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedCompareStore(inout XXX dest, in int64_t compare_value, in int64_t value); void InterlockedCompareStore(inout XXX dest, in uint64_t compare_value, in uint64_t value); void InterlockedCompareStoreFloatBitwise(inout XXX dest, in float compare_value, in float value); void InterlockedCompareExchange(inout XXX dest, in int64_t compare_value, in int64_t value, out int64_t original_value); void InterlockedCompareExchange(inout XXX dest, in uint64_t compare_value, in uint64_t value, out uint64_t original_value); void InterlockedCompareExchangeFloatBitwise(inout XXX dest, in float compare_value, in float value, out float original_value); void RWByteAddressBuffer::InterlockedAnd64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedOr64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedXor64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedMin64(in uint dest_offset, in int64_t value, out int64_t original_value); void RWByteAddressBuffer::InterlockedMin64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedMax64(in uint dest_offset, in int64_t value, out int64_t original_value); void RWByteAddressBuffer::InterlockedMax64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedExchangeFloat(in uint dest_offset, in float value, out float original_value); void RWByteAddressBuffer::InterlockedExchange64(in uint dest_offset, in int64_t value, out int64_t original_value); void RWByteAddressBuffer::InterlockedExchange64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedCompareStore64(in uint dest_offset, in int64_t compare_value, in int64_t value); void RWByteAddressBuffer::InterlockedCompareStore64(in uint dest_offset, in uint64_t compare_value, in uint64_t value); void RWByteAddressBuffer::InterlockedCompareStoreFloatBitwise(in uint dest_offset, in float compare_value, in float value); void RWByteAddressBuffer::InterlockedCompareExchangeFloatBitwise(in uint dest_offset, in float compare_value, in float value, out float original_value); --- .../atomic/atomic-intrinsics-64bit.slang | 336 +++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang (limited to 'tests') diff --git a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang new file mode 100644 index 000000000..aa05f9750 --- /dev/null +++ b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang @@ -0,0 +1,336 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=DX12):-slang -compute -dx12 -profile cs_6_6 -use-dxil -shaderobj -output-using-type + +// This is to support 64-bit `Interlocked*` functions defined for HLSL SM6.6 +// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=f32Buffer +RWStructuredBuffer f32Buffer; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=u64Buffer +RWStructuredBuffer u64Buffer; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=i64Buffer +RWStructuredBuffer i64Buffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=fBuf +RWByteAddressBuffer fBuf; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=uBuf +RWByteAddressBuffer uBuf; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=iBuf +RWByteAddressBuffer iBuf; + +groupshared float f32Shared[4] = { 0.f, 0.f, 0.f, 0.f }; +groupshared uint64_t u64Shared[4] = { 0, 0, 0, 0 }; +groupshared int64_t i64Shared[4] = { 0, 0, 0, 0 }; +groupshared uint64_t indexAlloc = 0; + +//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint groupIndex : SV_GroupIndex, int3 dispatchThreadID: SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + bool result = true; + + uint64_t u64Value[9]; + int64_t i64Value[9]; + float f32Value[9]; + + // Add + InterlockedAdd(u64Shared[idx], uint64_t(1)); + InterlockedAdd(i64Shared[idx], int64_t(1)); + InterlockedAdd(u64Buffer[idx], uint64_t(1)); + InterlockedAdd(i64Buffer[idx], int64_t(1)); + uBuf.InterlockedAdd64(idx * 8, uint64_t(1)); + iBuf.InterlockedAdd64(idx * 8, int64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Add - original_value + InterlockedAdd(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedAdd(i64Shared[idx], int64_t(1), i64Value[1]); + InterlockedAdd(u64Buffer[idx], uint64_t(1), u64Value[2]); + InterlockedAdd(i64Buffer[idx], int64_t(1), i64Value[3]); + uBuf.InterlockedAdd64(idx * 8, uint64_t(1), u64Value[4]); + iBuf.InterlockedAdd64(idx * 8, int64_t(1), i64Value[5]); + + result = result + && (u64Value[0] == 1) + && (i64Value[1] == 1) + && (u64Value[2] == 1) + && (i64Value[3] == 1) + && (u64Value[4] == 1) + && (i64Value[5] == 1) + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Bitwise-And + InterlockedAnd(u64Shared[idx], uint64_t(3)); + InterlockedAnd(u64Buffer[idx], uint64_t(3)); + uBuf.InterlockedAnd64(idx * 8, uint64_t(3)); + + result = result + && (u64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (uBuf.Load(idx * 8) == 2) + ; + + // And - original_value + InterlockedAnd(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedAnd(u64Buffer[idx], uint64_t(1), u64Value[1]); + uBuf.InterlockedAnd64(idx * 8, uint64_t(1), u64Value[2]); + + result = result + && (u64Value[0] == 2) + && (u64Value[1] == 2) + && (u64Value[2] == 2) + && (u64Shared[idx] == 0) + && (u64Buffer[idx] == 0) + && (uBuf.Load(idx * 8) == 0) + ; + + // Bitwise-Or + InterlockedOr(u64Shared[idx], uint64_t(1)); + InterlockedOr(u64Buffer[idx], uint64_t(1)); + uBuf.InterlockedOr64(idx * 8, uint64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (uBuf.Load(idx * 8) == 1) + ; + + // Or - original_value + InterlockedOr(u64Shared[idx], uint64_t(2), u64Value[0]); + InterlockedOr(u64Buffer[idx], uint64_t(2), u64Value[1]); + uBuf.InterlockedOr64(idx * 8, uint64_t(2), u64Value[2]); + + result = result + && (u64Value[0] == 1) + && (u64Value[1] == 1) + && (u64Value[2] == 1) + && (u64Shared[idx] == 3) + && (u64Buffer[idx] == 3) + && (uBuf.Load(idx * 8) == 3) + ; + + // Bitwise-Xor + InterlockedXor(u64Shared[idx], uint64_t(5)); + InterlockedXor(u64Buffer[idx], uint64_t(5)); + uBuf.InterlockedXor64(idx * 8, uint64_t(5)); + + result = result + && (u64Shared[idx] == 6) + && (u64Buffer[idx] == 6) + && (uBuf.Load(idx * 8) == 6) + ; + + // Xor - original_value + InterlockedXor(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedXor(u64Buffer[idx], uint64_t(1), u64Value[1]); + uBuf.InterlockedXor64(idx * 8, uint64_t(1), u64Value[2]); + + result = result + && (u64Value[0] == 6) + && (u64Value[1] == 6) + && (u64Value[2] == 6) + && (u64Shared[idx] == 7) + && (u64Buffer[idx] == 7) + && (uBuf.Load(idx * 8) == 7) + ; + + // Min + InterlockedMin(u64Shared[idx], uint64_t(1)); + InterlockedMin(i64Shared[idx], int64_t(1)); + InterlockedMin(u64Buffer[idx], uint64_t(1)); + InterlockedMin(i64Buffer[idx], int64_t(1)); + uBuf.InterlockedMin64(idx * 8, uint64_t(1)); + iBuf.InterlockedMin64(idx * 8, int64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Min - original_value + InterlockedMin(u64Shared[idx], uint64_t(2), u64Value[0]); + InterlockedMin(i64Shared[idx], int64_t(2), i64Value[1]); + InterlockedMin(u64Buffer[idx], uint64_t(2), u64Value[2]); + InterlockedMin(i64Buffer[idx], int64_t(2), i64Value[3]); + uBuf.InterlockedMin64(idx * 8, uint64_t(2), u64Value[4]); + iBuf.InterlockedMin64(idx * 8, int64_t(2), i64Value[5]); + + result = result + && (u64Value[0] == 1) + && (i64Value[1] == 1) + && (u64Value[2] == 1) + && (i64Value[3] == 1) + && (u64Value[4] == 1) + && (i64Value[5] == 1) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Max + InterlockedMax(u64Shared[idx], uint64_t(2)); + InterlockedMax(i64Shared[idx], int64_t(2)); + InterlockedMax(u64Buffer[idx], uint64_t(2)); + InterlockedMax(i64Buffer[idx], int64_t(2)); + uBuf.InterlockedMax64(idx * 8, uint64_t(2)); + iBuf.InterlockedMax64(idx * 8, int64_t(2)); + + result = result + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Max - original_value + InterlockedMax(u64Shared[idx], uint64_t(0), u64Value[0]); + InterlockedMax(i64Shared[idx], int64_t(0), i64Value[1]); + InterlockedMax(u64Buffer[idx], uint64_t(0), u64Value[2]); + InterlockedMax(i64Buffer[idx], int64_t(0), i64Value[3]); + uBuf.InterlockedMax64(idx * 8, uint64_t(0), u64Value[4]); + iBuf.InterlockedMax64(idx * 8, int64_t(0), i64Value[5]); + + result = result + && (u64Value[0] == 2) + && (i64Value[1] == 2) + && (u64Value[2] == 2) + && (i64Value[3] == 2) + && (u64Value[4] == 2) + && (i64Value[5] == 2) + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Exchange + InterlockedExchange(f32Shared[idx], float(1), f32Value[0]); + InterlockedExchange(u64Shared[idx], uint64_t(1), u64Value[1]); + InterlockedExchange(i64Shared[idx], int64_t(1), i64Value[2]); + InterlockedExchange(f32Buffer[idx], float(1), f32Value[3]); + InterlockedExchange(u64Buffer[idx], uint64_t(1), u64Value[4]); + InterlockedExchange(i64Buffer[idx], int64_t(1), i64Value[5]); + fBuf.InterlockedExchangeFloat(idx * 8, float(1), f32Value[6]); + uBuf.InterlockedExchange64(idx * 8, uint64_t(1), u64Value[7]); + iBuf.InterlockedExchange64(idx * 8, int64_t(1), i64Value[8]); + + result = result + && (f32Value[0] == 0) + && (u64Value[1] == 2) + && (i64Value[2] == 2) + && (f32Value[3] == 0) + && (u64Value[4] == 2) + && (i64Value[5] == 2) + && (f32Value[6] == 0) + && (u64Value[7] == 2) + && (i64Value[8] == 2) + && (f32Buffer[idx] == 1.f) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (f32Buffer[idx] == 1.f) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (fBuf.Load< float>(idx * 8) == 1.f) + && (uBuf.Load(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // CompareStore + InterlockedCompareStore(u64Shared[idx], uint64_t(1), uint64_t(0)); + InterlockedCompareStore(i64Shared[idx], int64_t(1), int64_t(0)); + InterlockedCompareStore(u64Buffer[idx], uint64_t(1), uint64_t(0)); + InterlockedCompareStore(i64Buffer[idx], int64_t(1), int64_t(0)); + uBuf.InterlockedCompareStore64(idx * 8, uint64_t(1), uint64_t(0)); + iBuf.InterlockedCompareStore64(idx * 8, int64_t(1), int64_t(0)); + + result = result + && (u64Shared[idx] == 0) + && (i64Shared[idx] == 0) + && (u64Buffer[idx] == 0) + && (i64Buffer[idx] == 0) + && (uBuf.Load(idx * 8) == 0) + && (iBuf.Load< int64_t>(idx * 8) == 0) + ; + + // CompareStoreFloatBitwise + InterlockedCompareStoreFloatBitwise(f32Shared[idx], float(1), float(0)); + InterlockedCompareStoreFloatBitwise(f32Buffer[idx], float(1), float(0)); + fBuf.InterlockedCompareStoreFloatBitwise(idx * 8, float(1), float(0)); + + result = result + && (f32Shared[idx] == float(0)) + && (f32Buffer[idx] == float(0)) + && (fBuf.Load(idx * 8) == float(0)) + ; + + // CompareExchange + InterlockedCompareExchange(u64Shared[idx], uint64_t(0), uint64_t(1), u64Value[0]); + InterlockedCompareExchange(i64Shared[idx], int64_t(0), int64_t(1), i64Value[1]); + InterlockedCompareExchange(u64Buffer[idx], uint64_t(0), uint64_t(1), u64Value[2]); + InterlockedCompareExchange(i64Buffer[idx], int64_t(0), int64_t(1), i64Value[3]); + uBuf.InterlockedCompareExchange64(idx * 8, uint64_t(0), uint64_t(1), u64Value[4]); + iBuf.InterlockedCompareExchange64(idx * 8, int64_t(0), int64_t(1), i64Value[5]); + + result = result + && (u64Value[0] == 0) + && (i64Value[1] == 0) + && (u64Value[2] == 0) + && (i64Value[3] == 0) + && (u64Value[4] == 0) + && (i64Value[5] == 0) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // CompareExchangeFloatBitwise + InterlockedCompareExchangeFloatBitwise(f32Shared[idx], float(0), float(1), f32Value[0]); + InterlockedCompareExchangeFloatBitwise(f32Buffer[idx], float(0), float(1), f32Value[1]); + fBuf.InterlockedCompareExchangeFloatBitwise(idx * 8, float(0), float(1), f32Value[2]); + + result = result + && (f32Value[0] == float(0)) + && (f32Value[1] == float(0)) + && (f32Value[2] == float(0)) + && (f32Shared[idx] == float(1)) + && (f32Buffer[idx] == float(1)) + && (fBuf.Load(idx * 8) == float(1)) + ; + + outputBuffer[idx] = int(result); +} + +// DX12: 1 +// DX12-NEXT: 1 +// DX12-NEXT: 1 +// DX12-NEXT: 1 -- cgit v1.2.3