diff options
| -rw-r--r-- | source/slang/hlsl.meta.slang | 588 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang | 336 |
2 files changed, 908 insertions, 16 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index ef5c3ae5d..3d712559d 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2707,6 +2707,46 @@ ${{{{ return __atomicMax(buf[byteAddress / 8], value); } + [require(hlsl)] + [ForceInline] + void InterlockedMax64(uint byteAddress, int64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMax64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedMax64(uint byteAddress, int64_t value, out int64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMax64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedMax64(uint byteAddress, uint64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMax64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedMax64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMax64"; + } + } + // Min __cuda_sm_version(3.5) @@ -2724,6 +2764,46 @@ ${{{{ return __atomicMin(buf[byteAddress / 8], value); } + [require(hlsl)] + [ForceInline] + void InterlockedMin64(uint byteAddress, int64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMin64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedMin64(uint byteAddress, int64_t value, out int64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMin64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedMin64(uint byteAddress, uint64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMin64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedMin64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedMin64"; + } + } + // And __target_intrinsic(cuda, "atomicAnd($0._getPtrAt<uint64_t>($1), $2)") @@ -2740,6 +2820,26 @@ ${{{{ return __atomicAnd(buf[byteAddress / 8], value); } + [require(hlsl)] + [ForceInline] + void InterlockedAnd64(uint byteAddress, uint64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedAnd64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedAnd64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedAnd64"; + } + } + // Or __target_intrinsic(cuda, "atomicOr($0._getPtrAt<uint64_t>($1), $2)") @@ -2756,6 +2856,26 @@ ${{{{ return __atomicOr(buf[byteAddress / 8], value); } + [require(hlsl)] + [ForceInline] + void InterlockedOr64(uint byteAddress, uint64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedOr64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedOr64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedOr64"; + } + } + // Xor __target_intrinsic(cuda, "atomicXor($0._getPtrAt<uint64_t>($1), $2)") @@ -2772,6 +2892,26 @@ ${{{{ return __atomicXor(buf[byteAddress / 8], value); } + [require(hlsl)] + [ForceInline] + void InterlockedXor64(uint byteAddress, uint64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedXor64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedXor64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedXor64"; + } + } + // Exchange __target_intrinsic(cuda, "atomicExch($0._getPtrAt<uint64_t>($1), $2)") @@ -2788,31 +2928,89 @@ ${{{{ return __atomicExchange(buf[byteAddress / 8], value); } + [require(hlsl)] + [ForceInline] + void InterlockedExchangeFloat(uint byteAddress, float value, out float outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedExchangeFloat"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedExchange64(uint byteAddress, int64_t value, out int64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedExchange64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedExchange64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedExchange64"; + } + } + // SM6.6 6 64bit atomics. - __specialized_for_target(hlsl) - void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue) + [ForceInline] + void InterlockedAdd64(uint byteAddress, int64_t valueToAdd) { - __atomicAdd(this, byteAddress, valueToAdd, outOriginalValue); + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedAdd64"; + case glsl: + case spirv: + let buf = __getEquivalentStructuredBuffer<int64_t>(this); + __atomicAdd(buf[byteAddress / 8], valueToAdd); + } } - __specialized_for_target(glsl) - __specialized_for_target(spirv) - void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) + + [ForceInline] + void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue) { - let buf = __getEquivalentStructuredBuffer<int64_t>(this); - originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedAdd64"; + case glsl: + case spirv: + let buf = __getEquivalentStructuredBuffer<int64_t>(this); + outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); + } } - __specialized_for_target(hlsl) - void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue) + + [ForceInline] + void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd) { - __atomicAdd(this, byteAddress, valueToAdd, outOriginalValue); + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedAdd64"; + case glsl: + case spirv: + let buf = __getEquivalentStructuredBuffer<uint64_t>(this); + __atomicAdd(buf[byteAddress / 8], valueToAdd); + } } - __specialized_for_target(glsl) - __specialized_for_target(spirv) - void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t originalValue) + + [ForceInline] + void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue) { - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedAdd64"; + case glsl: + case spirv: + let buf = __getEquivalentStructuredBuffer<uint64_t>(this); + outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); + } } + __specialized_for_target(hlsl) void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) { @@ -2837,6 +3035,66 @@ ${{{{ let buf = __getEquivalentStructuredBuffer<uint64_t>(this); outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); } + [require(hlsl)] + [ForceInline] + void InterlockedCompareStoreFloatBitwise(uint byteAddress, float compareValue, float value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedCompareStoreFloatBitwise"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedCompareStore64(uint byteAddress, int64_t compareValue, int64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedCompareExchange64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedCompareStore64(uint byteAddress, uint64_t compareValue, uint64_t value) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; + } + } + + [require(hlsl)] + [ForceInline] + void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) + { + __target_switch + { + case hlsl: __intrinsic_asm ".InterlockedCompareExchange64"; + } + } + ${{{{ } // endif (type == RWByteAddressBuffer) }}}} @@ -5200,6 +5458,42 @@ void InterlockedAdd(__ref uint dest, uint value, out uint original_value) } } +[ForceInline] +void InterlockedAdd(__ref int64_t dest, int64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedAdd"; + } +} + +[ForceInline] +void InterlockedAdd(__ref int64_t dest, int64_t value, out int64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedAdd"; + } +} + +[ForceInline] +void InterlockedAdd(__ref uint64_t dest, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedAdd"; + } +} + +[ForceInline] +void InterlockedAdd(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedAdd"; + } +} + __glsl_version(430) void InterlockedAnd(__ref int dest, int value) { @@ -5266,6 +5560,24 @@ void InterlockedAnd(__ref uint dest, uint value, out uint original_value) } } +[ForceInline] +void InterlockedAnd(__ref uint64_t dest, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedAnd"; + } +} + +[ForceInline] +void InterlockedAnd(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedAnd"; + } +} + __glsl_version(430) void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) { @@ -5300,6 +5612,60 @@ void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, } } +[ForceInline] +void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; + } +} + +[ForceInline] +void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; + } +} + +[ForceInline] +void InterlockedCompareExchange(__ref int64_t dest, int64_t compare_value, int64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + } +} + +[ForceInline] +void InterlockedCompareExchange(__ref int64_t dest, int64_t compare_value, int64_t value, out int64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + } +} + +[ForceInline] +void InterlockedCompareExchange(__ref uint64_t dest, uint64_t compare_value, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + } +} + +[ForceInline] +void InterlockedCompareExchange(__ref uint64_t dest, uint64_t compare_value, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + } +} + __glsl_version(430) void InterlockedCompareStore(__ref int dest, int compare_value, int value) { @@ -5332,6 +5698,33 @@ void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) } } +[ForceInline] +void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise"; + } +} + +[ForceInline] +void InterlockedCompareStore(__ref int64_t dest, int64_t compare_value, int64_t value); +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareStore"; + } +} + +[ForceInline] +void InterlockedCompareStore(__ref uint64_t dest, uint64_t compare_value, uint64_t value); +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedCompareStore"; + } +} + __glsl_version(430) void InterlockedExchange(__ref int dest, int value, out int original_value) { @@ -5366,6 +5759,60 @@ void InterlockedExchange(__ref uint dest, uint value, out uint original_value) } } +[ForceInline] +void InterlockedExchange(__ref float dest, float value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedExchange"; + } +} + +[ForceInline] +void InterlockedExchange(__ref float dest, float value, out float original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedExchange"; + } +} + +[ForceInline] +void InterlockedExchange(__ref int64_t dest, int64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedExchange"; + } +} + +[ForceInline] +void InterlockedExchange(__ref int64_t dest, int64_t value, out int64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedExchange"; + } +} + +[ForceInline] +void InterlockedExchange(__ref uint64_t dest, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedExchange"; + } +} + +[ForceInline] +void InterlockedExchange(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedExchange"; + } +} + __glsl_version(430) void InterlockedMax(__ref int dest, int value) { @@ -5432,6 +5879,42 @@ void InterlockedMax(__ref uint dest, uint value, out uint original_value) } } +[ForceInline] +void InterlockedMax(__ref int64_t dest, int64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMax"; + } +} + +[ForceInline] +void InterlockedMax(__ref int64_t dest, int64_t value, out int64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMax"; + } +} + +[ForceInline] +void InterlockedMax(__ref uint64_t dest, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMax"; + } +} + +[ForceInline] +void InterlockedMax(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMax"; + } +} + __glsl_version(430) void InterlockedMin(__ref int dest, int value) { @@ -5498,6 +5981,42 @@ void InterlockedMin(__ref uint dest, uint value, out uint original_value) } } +[ForceInline] +void InterlockedMin(__ref int64_t dest, int64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMin"; + } +} + +[ForceInline] +void InterlockedMin(__ref int64_t dest, int64_t value, out int64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMin"; + } +} + +[ForceInline] +void InterlockedMin(__ref uint64_t dest, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMin"; + } +} + +[ForceInline] +void InterlockedMin(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedMin"; + } +} + __glsl_version(430) void InterlockedOr(__ref int dest, int value) { @@ -5564,6 +6083,24 @@ void InterlockedOr(__ref uint dest, uint value, out uint original_value) } } +[ForceInline] +void InterlockedOr(__ref uint64_t dest, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedOr"; + } +} + +[ForceInline] +void InterlockedOr(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedOr"; + } +} + __glsl_version(430) void InterlockedXor(__ref int dest, int value) { @@ -5630,6 +6167,25 @@ void InterlockedXor(__ref uint dest, uint value, out uint original_value) } } +[ForceInline] +void InterlockedXor(__ref uint64_t dest, uint64_t value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedXor"; + } +} + +[ForceInline] +void InterlockedXor(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +{ + __target_switch + { + case hlsl: __intrinsic_asm "InterlockedXor"; + } +} + + // Is floating-point value finite? __generic<T : __BuiltinFloatingPointType> diff --git a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang new file mode 100644 index 000000000..aa05f9750 --- /dev/null +++ b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang @@ -0,0 +1,336 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=DX12):-slang -compute -dx12 -profile cs_6_6 -use-dxil -shaderobj -output-using-type + +// This is to support 64-bit `Interlocked*` functions defined for HLSL SM6.6 +// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=f32Buffer +RWStructuredBuffer<float> f32Buffer; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=u64Buffer +RWStructuredBuffer<uint64_t> u64Buffer; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=i64Buffer +RWStructuredBuffer<int64_t> i64Buffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=fBuf +RWByteAddressBuffer fBuf; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=uBuf +RWByteAddressBuffer uBuf; +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=iBuf +RWByteAddressBuffer iBuf; + +groupshared float f32Shared[4] = { 0.f, 0.f, 0.f, 0.f }; +groupshared uint64_t u64Shared[4] = { 0, 0, 0, 0 }; +groupshared int64_t i64Shared[4] = { 0, 0, 0, 0 }; +groupshared uint64_t indexAlloc = 0; + +//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint groupIndex : SV_GroupIndex, int3 dispatchThreadID: SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + bool result = true; + + uint64_t u64Value[9]; + int64_t i64Value[9]; + float f32Value[9]; + + // Add + InterlockedAdd(u64Shared[idx], uint64_t(1)); + InterlockedAdd(i64Shared[idx], int64_t(1)); + InterlockedAdd(u64Buffer[idx], uint64_t(1)); + InterlockedAdd(i64Buffer[idx], int64_t(1)); + uBuf.InterlockedAdd64(idx * 8, uint64_t(1)); + iBuf.InterlockedAdd64(idx * 8, int64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Add - original_value + InterlockedAdd(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedAdd(i64Shared[idx], int64_t(1), i64Value[1]); + InterlockedAdd(u64Buffer[idx], uint64_t(1), u64Value[2]); + InterlockedAdd(i64Buffer[idx], int64_t(1), i64Value[3]); + uBuf.InterlockedAdd64(idx * 8, uint64_t(1), u64Value[4]); + iBuf.InterlockedAdd64(idx * 8, int64_t(1), i64Value[5]); + + result = result + && (u64Value[0] == 1) + && (i64Value[1] == 1) + && (u64Value[2] == 1) + && (i64Value[3] == 1) + && (u64Value[4] == 1) + && (i64Value[5] == 1) + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Bitwise-And + InterlockedAnd(u64Shared[idx], uint64_t(3)); + InterlockedAnd(u64Buffer[idx], uint64_t(3)); + uBuf.InterlockedAnd64(idx * 8, uint64_t(3)); + + result = result + && (u64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + ; + + // And - original_value + InterlockedAnd(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedAnd(u64Buffer[idx], uint64_t(1), u64Value[1]); + uBuf.InterlockedAnd64(idx * 8, uint64_t(1), u64Value[2]); + + result = result + && (u64Value[0] == 2) + && (u64Value[1] == 2) + && (u64Value[2] == 2) + && (u64Shared[idx] == 0) + && (u64Buffer[idx] == 0) + && (uBuf.Load<uint64_t>(idx * 8) == 0) + ; + + // Bitwise-Or + InterlockedOr(u64Shared[idx], uint64_t(1)); + InterlockedOr(u64Buffer[idx], uint64_t(1)); + uBuf.InterlockedOr64(idx * 8, uint64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + ; + + // Or - original_value + InterlockedOr(u64Shared[idx], uint64_t(2), u64Value[0]); + InterlockedOr(u64Buffer[idx], uint64_t(2), u64Value[1]); + uBuf.InterlockedOr64(idx * 8, uint64_t(2), u64Value[2]); + + result = result + && (u64Value[0] == 1) + && (u64Value[1] == 1) + && (u64Value[2] == 1) + && (u64Shared[idx] == 3) + && (u64Buffer[idx] == 3) + && (uBuf.Load<uint64_t>(idx * 8) == 3) + ; + + // Bitwise-Xor + InterlockedXor(u64Shared[idx], uint64_t(5)); + InterlockedXor(u64Buffer[idx], uint64_t(5)); + uBuf.InterlockedXor64(idx * 8, uint64_t(5)); + + result = result + && (u64Shared[idx] == 6) + && (u64Buffer[idx] == 6) + && (uBuf.Load<uint64_t>(idx * 8) == 6) + ; + + // Xor - original_value + InterlockedXor(u64Shared[idx], uint64_t(1), u64Value[0]); + InterlockedXor(u64Buffer[idx], uint64_t(1), u64Value[1]); + uBuf.InterlockedXor64(idx * 8, uint64_t(1), u64Value[2]); + + result = result + && (u64Value[0] == 6) + && (u64Value[1] == 6) + && (u64Value[2] == 6) + && (u64Shared[idx] == 7) + && (u64Buffer[idx] == 7) + && (uBuf.Load<uint64_t>(idx * 8) == 7) + ; + + // Min + InterlockedMin(u64Shared[idx], uint64_t(1)); + InterlockedMin(i64Shared[idx], int64_t(1)); + InterlockedMin(u64Buffer[idx], uint64_t(1)); + InterlockedMin(i64Buffer[idx], int64_t(1)); + uBuf.InterlockedMin64(idx * 8, uint64_t(1)); + iBuf.InterlockedMin64(idx * 8, int64_t(1)); + + result = result + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Min - original_value + InterlockedMin(u64Shared[idx], uint64_t(2), u64Value[0]); + InterlockedMin(i64Shared[idx], int64_t(2), i64Value[1]); + InterlockedMin(u64Buffer[idx], uint64_t(2), u64Value[2]); + InterlockedMin(i64Buffer[idx], int64_t(2), i64Value[3]); + uBuf.InterlockedMin64(idx * 8, uint64_t(2), u64Value[4]); + iBuf.InterlockedMin64(idx * 8, int64_t(2), i64Value[5]); + + result = result + && (u64Value[0] == 1) + && (i64Value[1] == 1) + && (u64Value[2] == 1) + && (i64Value[3] == 1) + && (u64Value[4] == 1) + && (i64Value[5] == 1) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // Max + InterlockedMax(u64Shared[idx], uint64_t(2)); + InterlockedMax(i64Shared[idx], int64_t(2)); + InterlockedMax(u64Buffer[idx], uint64_t(2)); + InterlockedMax(i64Buffer[idx], int64_t(2)); + uBuf.InterlockedMax64(idx * 8, uint64_t(2)); + iBuf.InterlockedMax64(idx * 8, int64_t(2)); + + result = result + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Max - original_value + InterlockedMax(u64Shared[idx], uint64_t(0), u64Value[0]); + InterlockedMax(i64Shared[idx], int64_t(0), i64Value[1]); + InterlockedMax(u64Buffer[idx], uint64_t(0), u64Value[2]); + InterlockedMax(i64Buffer[idx], int64_t(0), i64Value[3]); + uBuf.InterlockedMax64(idx * 8, uint64_t(0), u64Value[4]); + iBuf.InterlockedMax64(idx * 8, int64_t(0), i64Value[5]); + + result = result + && (u64Value[0] == 2) + && (i64Value[1] == 2) + && (u64Value[2] == 2) + && (i64Value[3] == 2) + && (u64Value[4] == 2) + && (i64Value[5] == 2) + && (u64Shared[idx] == 2) + && (i64Shared[idx] == 2) + && (u64Buffer[idx] == 2) + && (i64Buffer[idx] == 2) + && (uBuf.Load<uint64_t>(idx * 8) == 2) + && (iBuf.Load< int64_t>(idx * 8) == 2) + ; + + // Exchange + InterlockedExchange(f32Shared[idx], float(1), f32Value[0]); + InterlockedExchange(u64Shared[idx], uint64_t(1), u64Value[1]); + InterlockedExchange(i64Shared[idx], int64_t(1), i64Value[2]); + InterlockedExchange(f32Buffer[idx], float(1), f32Value[3]); + InterlockedExchange(u64Buffer[idx], uint64_t(1), u64Value[4]); + InterlockedExchange(i64Buffer[idx], int64_t(1), i64Value[5]); + fBuf.InterlockedExchangeFloat(idx * 8, float(1), f32Value[6]); + uBuf.InterlockedExchange64(idx * 8, uint64_t(1), u64Value[7]); + iBuf.InterlockedExchange64(idx * 8, int64_t(1), i64Value[8]); + + result = result + && (f32Value[0] == 0) + && (u64Value[1] == 2) + && (i64Value[2] == 2) + && (f32Value[3] == 0) + && (u64Value[4] == 2) + && (i64Value[5] == 2) + && (f32Value[6] == 0) + && (u64Value[7] == 2) + && (i64Value[8] == 2) + && (f32Buffer[idx] == 1.f) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (f32Buffer[idx] == 1.f) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (fBuf.Load< float>(idx * 8) == 1.f) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // CompareStore + InterlockedCompareStore(u64Shared[idx], uint64_t(1), uint64_t(0)); + InterlockedCompareStore(i64Shared[idx], int64_t(1), int64_t(0)); + InterlockedCompareStore(u64Buffer[idx], uint64_t(1), uint64_t(0)); + InterlockedCompareStore(i64Buffer[idx], int64_t(1), int64_t(0)); + uBuf.InterlockedCompareStore64(idx * 8, uint64_t(1), uint64_t(0)); + iBuf.InterlockedCompareStore64(idx * 8, int64_t(1), int64_t(0)); + + result = result + && (u64Shared[idx] == 0) + && (i64Shared[idx] == 0) + && (u64Buffer[idx] == 0) + && (i64Buffer[idx] == 0) + && (uBuf.Load<uint64_t>(idx * 8) == 0) + && (iBuf.Load< int64_t>(idx * 8) == 0) + ; + + // CompareStoreFloatBitwise + InterlockedCompareStoreFloatBitwise(f32Shared[idx], float(1), float(0)); + InterlockedCompareStoreFloatBitwise(f32Buffer[idx], float(1), float(0)); + fBuf.InterlockedCompareStoreFloatBitwise(idx * 8, float(1), float(0)); + + result = result + && (f32Shared[idx] == float(0)) + && (f32Buffer[idx] == float(0)) + && (fBuf.Load<float>(idx * 8) == float(0)) + ; + + // CompareExchange + InterlockedCompareExchange(u64Shared[idx], uint64_t(0), uint64_t(1), u64Value[0]); + InterlockedCompareExchange(i64Shared[idx], int64_t(0), int64_t(1), i64Value[1]); + InterlockedCompareExchange(u64Buffer[idx], uint64_t(0), uint64_t(1), u64Value[2]); + InterlockedCompareExchange(i64Buffer[idx], int64_t(0), int64_t(1), i64Value[3]); + uBuf.InterlockedCompareExchange64(idx * 8, uint64_t(0), uint64_t(1), u64Value[4]); + iBuf.InterlockedCompareExchange64(idx * 8, int64_t(0), int64_t(1), i64Value[5]); + + result = result + && (u64Value[0] == 0) + && (i64Value[1] == 0) + && (u64Value[2] == 0) + && (i64Value[3] == 0) + && (u64Value[4] == 0) + && (i64Value[5] == 0) + && (u64Shared[idx] == 1) + && (i64Shared[idx] == 1) + && (u64Buffer[idx] == 1) + && (i64Buffer[idx] == 1) + && (uBuf.Load<uint64_t>(idx * 8) == 1) + && (iBuf.Load< int64_t>(idx * 8) == 1) + ; + + // CompareExchangeFloatBitwise + InterlockedCompareExchangeFloatBitwise(f32Shared[idx], float(0), float(1), f32Value[0]); + InterlockedCompareExchangeFloatBitwise(f32Buffer[idx], float(0), float(1), f32Value[1]); + fBuf.InterlockedCompareExchangeFloatBitwise(idx * 8, float(0), float(1), f32Value[2]); + + result = result + && (f32Value[0] == float(0)) + && (f32Value[1] == float(0)) + && (f32Value[2] == float(0)) + && (f32Shared[idx] == float(1)) + && (f32Buffer[idx] == float(1)) + && (fBuf.Load<float>(idx * 8) == float(1)) + ; + + outputBuffer[idx] = int(result); +} + +// DX12: 1 +// DX12-NEXT: 1 +// DX12-NEXT: 1 +// DX12-NEXT: 1 |
