diff options
| author | Jay Kwak <82421531+jkwak-work@users.noreply.github.com> | 2024-04-15 19:47:23 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-15 19:47:23 -0700 |
| commit | 030d7f45726187b5b23a3cfb9743166aa60fae30 (patch) | |
| tree | a5618abd8d30034458778543db4122d2df9c7e1b /tests | |
| parent | 54745ac9aff75c579f886980dd3397c79d0f3e00 (diff) | |
Support 64bit HLSL atomic functions (#3957)
Resolves #3951
This adds a few atomic functions for SM6.6.
The spec can be found from here:
https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html
The new functions are:
void InterlockedAdd(inout XXX dest, in int64_t value, out int64_t original_value);
void InterlockedAdd(inout XXX dest, in uint64_t value, out uint64_t original_value);
void InterlockedAnd(inout XXX dest, in uint64_t value, out uint64_t original_value);
void InterlockedOr(inout XXX dest, in uint64_t value, out uint64_t original_value);
void InterlockedXor(inout XXX dest, in uint64_t value, out uint64_t original_value);
void InterlockedMin(inout XXX dest, in int64_t value, out int64_t original_value);
void InterlockedMin(inout XXX dest, in uint64_t value, out uint64_t original_value);
void InterlockedMax(inout XXX dest, in int64_t value, out int64_t original_value);
void InterlockedMax(inout XXX dest, in uint64_t value, out uint64_t original_value);
void InterlockedExchange(inout XXX dest, in float value, out float original_value);
void InterlockedExchange(inout XXX dest, in int64_t value, out int64_t original_value);
void InterlockedExchange(inout XXX dest, in uint64_t value, out uint64_t original_value);
void InterlockedCompareStore(inout XXX dest, in int64_t compare_value, in int64_t value);
void InterlockedCompareStore(inout XXX dest, in uint64_t compare_value, in uint64_t value);
void InterlockedCompareStoreFloatBitwise(inout XXX dest, in float compare_value, in float value);
void InterlockedCompareExchange(inout XXX dest, in int64_t compare_value, in int64_t value, out int64_t original_value);
void InterlockedCompareExchange(inout XXX dest, in uint64_t compare_value, in uint64_t value, out uint64_t original_value);
void InterlockedCompareExchangeFloatBitwise(inout XXX dest, in float compare_value, in float value, out float original_value);
void RWByteAddressBuffer::InterlockedAnd64(in uint dest_offset, in uint64_t value, out uint64_t original_value);
void RWByteAddressBuffer::InterlockedOr64(in uint dest_offset, in uint64_t value, out uint64_t original_value);
void RWByteAddressBuffer::InterlockedXor64(in uint dest_offset, in uint64_t value, out uint64_t original_value);
void RWByteAddressBuffer::InterlockedMin64(in uint dest_offset, in int64_t value, out int64_t original_value);
void RWByteAddressBuffer::InterlockedMin64(in uint dest_offset, in uint64_t value, out uint64_t original_value);
void RWByteAddressBuffer::InterlockedMax64(in uint dest_offset, in int64_t value, out int64_t original_value);
void RWByteAddressBuffer::InterlockedMax64(in uint dest_offset, in uint64_t value, out uint64_t original_value);
void RWByteAddressBuffer::InterlockedExchangeFloat(in uint dest_offset, in float value, out float original_value);
void RWByteAddressBuffer::InterlockedExchange64(in uint dest_offset, in int64_t value, out int64_t original_value);
void RWByteAddressBuffer::InterlockedExchange64(in uint dest_offset, in uint64_t value, out uint64_t original_value);
void RWByteAddressBuffer::InterlockedCompareStore64(in uint dest_offset, in int64_t compare_value, in int64_t value);
void RWByteAddressBuffer::InterlockedCompareStore64(in uint dest_offset, in uint64_t compare_value, in uint64_t value);
void RWByteAddressBuffer::InterlockedCompareStoreFloatBitwise(in uint dest_offset, in float compare_value, in float value);
void RWByteAddressBuffer::InterlockedCompareExchangeFloatBitwise(in uint dest_offset, in float compare_value, in float value, out float original_value);
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang new file mode 100644 index 000000000..aa05f9750 --- /dev/null +++ b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang @@ -0,0 +1,336 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=DX12):-slang -compute -dx12 -profile cs_6_6 -use-dxil -shaderobj -output-using-type + +// This is to support 64-bit `Interlocked*` functions defined for HLSL SM6.6 +// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=f32Buffer +RWStructuredBuffer<float> f32Buffer; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=u64Buffer +RWStructuredBuffer<uint64_t> u64Buffer; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=i64Buffer +RWStructuredBuffer<int64_t> i64Buffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=fBuf +RWByteAddressBuffer fBuf; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=uBuf +RWByteAddressBuffer uBuf; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=iBuf +RWByteAddressBuffer iBuf; + +groupshared float f32Shared[4] = { 0.f, 0.f, 0.f, 0.f }; +groupshared uint64_t u64Shared[4] = { 0, 0, 0, 0 }; +groupshared int64_t i64Shared[4] = { 0, 0, 0, 0 }; +groupshared uint64_t indexAlloc = 0; + +//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint groupIndex : SV_GroupIndex, int3 dispatchThreadID: SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + bool result = true; + + uint64_t u64Value[9]; + int64_t i64Value[9]; + float f32Value[9]; + + // Add + InterlockedAdd(u64Shared[idx], uint64_t(1)); + InterlockedAdd(i64Shared[idx], int64_t(1)); + InterlockedAdd(u64Buffer[idx], uint64_t(1)); + InterlockedAdd(i64Buffer[idx], int64_t(1)); + uBuf.InterlockedAdd64(idx * 8, uint64_t(1)); + iBuf.InterlockedAdd64(idx * 8, int64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Add - original_value + InterlockedAdd(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedAdd(i64Shared[idx], int64_t(1), i64Value[1]); + InterlockedAdd(u64Buffer[idx], uint64_t(1), u64Value[2]); + InterlockedAdd(i64Buffer[idx], int64_t(1), i64Value[3]); + uBuf.InterlockedAdd64(idx * 8, uint64_t(1), u64Value[4]); + iBuf.InterlockedAdd64(idx * 8, int64_t(1), i64Value[5]); + + result = result + && (u64Value[0] == 1) + && (i64Value[1] == 1) + && (u64Value[2] == 1) + && (i64Value[3] == 1) + && (u64Value[4] == 1) + && (i64Value[5] == 1) + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Bitwise-And + InterlockedAnd(u64Shared[idx], uint64_t(3)); + InterlockedAnd(u64Buffer[idx], uint64_t(3)); + uBuf.InterlockedAnd64(idx * 8, uint64_t(3)); + + result = result + && (u64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + ; + + // And - original_value + InterlockedAnd(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedAnd(u64Buffer[idx], uint64_t(1), u64Value[1]); + uBuf.InterlockedAnd64(idx * 8, uint64_t(1), u64Value[2]); + + result = result + && (u64Value[0] == 2) + && (u64Value[1] == 2) + && (u64Value[2] == 2) + && (u64Shared[idx] == 0) + && (u64Buffer[idx] == 0) + && (uBuf.Load<uint64_t>(idx * 8) == 0) + ; + + // Bitwise-Or + InterlockedOr(u64Shared[idx], uint64_t(1)); + InterlockedOr(u64Buffer[idx], uint64_t(1)); + uBuf.InterlockedOr64(idx * 8, uint64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + ; + + // Or - original_value + InterlockedOr(u64Shared[idx], uint64_t(2), u64Value[0]); + InterlockedOr(u64Buffer[idx], uint64_t(2), u64Value[1]); + uBuf.InterlockedOr64(idx * 8, uint64_t(2), u64Value[2]); + + result = result + && (u64Value[0] == 1) + && (u64Value[1] == 1) + && (u64Value[2] == 1) + && (u64Shared[idx] == 3) + && (u64Buffer[idx] == 3) + && (uBuf.Load<uint64_t>(idx * 8) == 3) + ; + + // Bitwise-Xor + InterlockedXor(u64Shared[idx], uint64_t(5)); + InterlockedXor(u64Buffer[idx], uint64_t(5)); + uBuf.InterlockedXor64(idx * 8, uint64_t(5)); + + result = result + && (u64Shared[idx] == 6) + && (u64Buffer[idx] == 6) + && (uBuf.Load<uint64_t>(idx * 8) == 6) + ; + + // Xor - original_value + InterlockedXor(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedXor(u64Buffer[idx], uint64_t(1), u64Value[1]); + uBuf.InterlockedXor64(idx * 8, uint64_t(1), u64Value[2]); + + result = result + && (u64Value[0] == 6) + && (u64Value[1] == 6) + && (u64Value[2] == 6) + && (u64Shared[idx] == 7) + && (u64Buffer[idx] == 7) + && (uBuf.Load<uint64_t>(idx * 8) == 7) + ; + + // Min + InterlockedMin(u64Shared[idx], uint64_t(1)); + InterlockedMin(i64Shared[idx], int64_t(1)); + InterlockedMin(u64Buffer[idx], uint64_t(1)); + InterlockedMin(i64Buffer[idx], int64_t(1)); + uBuf.InterlockedMin64(idx * 8, uint64_t(1)); + iBuf.InterlockedMin64(idx * 8, int64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Min - original_value + InterlockedMin(u64Shared[idx], uint64_t(2), u64Value[0]); + InterlockedMin(i64Shared[idx], int64_t(2), i64Value[1]); + InterlockedMin(u64Buffer[idx], uint64_t(2), u64Value[2]); + InterlockedMin(i64Buffer[idx], int64_t(2), i64Value[3]); + uBuf.InterlockedMin64(idx * 8, uint64_t(2), u64Value[4]); + iBuf.InterlockedMin64(idx * 8, int64_t(2), i64Value[5]); + + result = result + && (u64Value[0] == 1) + && (i64Value[1] == 1) + && (u64Value[2] == 1) + && (i64Value[3] == 1) + && (u64Value[4] == 1) + && (i64Value[5] == 1) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Max + InterlockedMax(u64Shared[idx], uint64_t(2)); + InterlockedMax(i64Shared[idx], int64_t(2)); + InterlockedMax(u64Buffer[idx], uint64_t(2)); + InterlockedMax(i64Buffer[idx], int64_t(2)); + uBuf.InterlockedMax64(idx * 8, uint64_t(2)); + iBuf.InterlockedMax64(idx * 8, int64_t(2)); + + result = result + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Max - original_value + InterlockedMax(u64Shared[idx], uint64_t(0), u64Value[0]); + InterlockedMax(i64Shared[idx], int64_t(0), i64Value[1]); + InterlockedMax(u64Buffer[idx], uint64_t(0), u64Value[2]); + InterlockedMax(i64Buffer[idx], int64_t(0), i64Value[3]); + uBuf.InterlockedMax64(idx * 8, uint64_t(0), u64Value[4]); + iBuf.InterlockedMax64(idx * 8, int64_t(0), i64Value[5]); + + result = result + && (u64Value[0] == 2) + && (i64Value[1] == 2) + && (u64Value[2] == 2) + && (i64Value[3] == 2) + && (u64Value[4] == 2) + && (i64Value[5] == 2) + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Exchange + InterlockedExchange(f32Shared[idx], float(1), f32Value[0]); + InterlockedExchange(u64Shared[idx], uint64_t(1), u64Value[1]); + InterlockedExchange(i64Shared[idx], int64_t(1), i64Value[2]); + InterlockedExchange(f32Buffer[idx], float(1), f32Value[3]); + InterlockedExchange(u64Buffer[idx], uint64_t(1), u64Value[4]); + InterlockedExchange(i64Buffer[idx], int64_t(1), i64Value[5]); + fBuf.InterlockedExchangeFloat(idx * 8, float(1), f32Value[6]); + uBuf.InterlockedExchange64(idx * 8, uint64_t(1), u64Value[7]); + iBuf.InterlockedExchange64(idx * 8, int64_t(1), i64Value[8]); + + result = result + && (f32Value[0] == 0) + && (u64Value[1] == 2) + && (i64Value[2] == 2) + && (f32Value[3] == 0) + && (u64Value[4] == 2) + && (i64Value[5] == 2) + && (f32Value[6] == 0) + && (u64Value[7] == 2) + && (i64Value[8] == 2) + && (f32Buffer[idx] == 1.f) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (f32Buffer[idx] == 1.f) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (fBuf.Load< float>(idx * 8) == 1.f) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // CompareStore + InterlockedCompareStore(u64Shared[idx], uint64_t(1), uint64_t(0)); + InterlockedCompareStore(i64Shared[idx], int64_t(1), int64_t(0)); + InterlockedCompareStore(u64Buffer[idx], uint64_t(1), uint64_t(0)); + InterlockedCompareStore(i64Buffer[idx], int64_t(1), int64_t(0)); + uBuf.InterlockedCompareStore64(idx * 8, uint64_t(1), uint64_t(0)); + iBuf.InterlockedCompareStore64(idx * 8, int64_t(1), int64_t(0)); + + result = result + && (u64Shared[idx] == 0) + && (i64Shared[idx] == 0) + && (u64Buffer[idx] == 0) + && (i64Buffer[idx] == 0) + && (uBuf.Load<uint64_t>(idx * 8) == 0) + && (iBuf.Load< int64_t>(idx * 8) == 0) + ; + + // CompareStoreFloatBitwise + InterlockedCompareStoreFloatBitwise(f32Shared[idx], float(1), float(0)); + InterlockedCompareStoreFloatBitwise(f32Buffer[idx], float(1), float(0)); + fBuf.InterlockedCompareStoreFloatBitwise(idx * 8, float(1), float(0)); + + result = result + && (f32Shared[idx] == float(0)) + && (f32Buffer[idx] == float(0)) + && (fBuf.Load<float>(idx * 8) == float(0)) + ; + + // CompareExchange + InterlockedCompareExchange(u64Shared[idx], uint64_t(0), uint64_t(1), u64Value[0]); + InterlockedCompareExchange(i64Shared[idx], int64_t(0), int64_t(1), i64Value[1]); + InterlockedCompareExchange(u64Buffer[idx], uint64_t(0), uint64_t(1), u64Value[2]); + InterlockedCompareExchange(i64Buffer[idx], int64_t(0), int64_t(1), i64Value[3]); + uBuf.InterlockedCompareExchange64(idx * 8, uint64_t(0), uint64_t(1), u64Value[4]); + iBuf.InterlockedCompareExchange64(idx * 8, int64_t(0), int64_t(1), i64Value[5]); + + result = result + && (u64Value[0] == 0) + && (i64Value[1] == 0) + && (u64Value[2] == 0) + && (i64Value[3] == 0) + && (u64Value[4] == 0) + && (i64Value[5] == 0) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // CompareExchangeFloatBitwise + InterlockedCompareExchangeFloatBitwise(f32Shared[idx], float(0), float(1), f32Value[0]); + InterlockedCompareExchangeFloatBitwise(f32Buffer[idx], float(0), float(1), f32Value[1]); + fBuf.InterlockedCompareExchangeFloatBitwise(idx * 8, float(0), float(1), f32Value[2]); + + result = result + && (f32Value[0] == float(0)) + && (f32Value[1] == float(0)) + && (f32Value[2] == float(0)) + && (f32Shared[idx] == float(1)) + && (f32Buffer[idx] == float(1)) + && (fBuf.Load<float>(idx * 8) == float(1)) + ; + + outputBuffer[idx] = int(result); +} + +// DX12: 1 +// DX12-NEXT: 1 +// DX12-NEXT: 1 +// DX12-NEXT: 1 |
