From 2dc1f89fb069decb93dbe950fed9665453303550 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 26 Aug 2020 14:38:24 -0400 Subject: Added more Atomic support for int64 types on RWByteAddressBuffer (#1515) * Support for more 64 bit atomics on ByteAddressBuffer. * min max 64bit test. * Disable CUDA version of min max 64 bit test - as produces the wrong output. * Update target-compatibility.md with added 64 bit atomics. Co-authored-by: Yong He --- .../atomic-int64-byte-address-buffer.slang | 5 +++ ...ic-int64-byte-address-buffer.slang.expected.txt | 12 ++--- .../atomic-min-max-u64-byte-address-buffer.slang | 52 ++++++++++++++++++++++ ...-max-u64-byte-address-buffer.slang.expected.txt | 8 ++++ 4 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang create mode 100644 tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang.expected.txt (limited to 'tests') diff --git a/tests/slang-extension/atomic-int64-byte-address-buffer.slang b/tests/slang-extension/atomic-int64-byte-address-buffer.slang index 628c675a2..80ce2150c 100644 --- a/tests/slang-extension/atomic-int64-byte-address-buffer.slang +++ b/tests/slang-extension/atomic-int64-byte-address-buffer.slang @@ -28,5 +28,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) int anotherIdx = tid >> 2; outputBuffer.InterlockedAddI64(anotherIdx << 3, 3); + + // Bit logical + outputBuffer.InterlockedOrU64((idx << 3), (uint64_t(2) << 32) | (tid << 4)); + outputBuffer.InterlockedXorU64((idx << 3), tid << 8); + outputBuffer.InterlockedAndU64((idx << 3), (uint64_t(tid | 2) << 32) | 0xffffffff); } diff --git a/tests/slang-extension/atomic-int64-byte-address-buffer.slang.expected.txt b/tests/slang-extension/atomic-int64-byte-address-buffer.slang.expected.txt index 811dc1584..67fea32ae 100644 --- a/tests/slang-extension/atomic-int64-byte-address-buffer.slang.expected.txt +++ b/tests/slang-extension/atomic-int64-byte-address-buffer.slang.expected.txt @@ -1,8 +1,8 @@ -10 -1 -12 +F0 3 -14 -5 -16 +F2 +3 +F4 +7 +F6 7 diff --git a/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang new file mode 100644 index 000000000..bfe7fb2a4 --- /dev/null +++ b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang @@ -0,0 +1,52 @@ +// No atomic support on CPU +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +// No support for int64_t on DX11 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +// No support for int64_t on fxc - we need SM6.0 and dxil +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 +// For some reason this doesn't work correctly on CUDA? That it behaves as if always does Min. Min and Max do appropriate +// things tho, because if I force the condition I do get the right answer +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used +// Only strictly necessary on the D3D12 path +//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):name=nvapiBuffer +RWStructuredBuffer nvapiBuffer; + +//TEST_INPUT:ubuffer(data=[2 2 2 2 2 2 2 2], stride=4):out,name=outputBuffer +RWByteAddressBuffer outputBuffer; + +//TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7], stride=4):name=inputBuffer +RWStructuredBuffer inputBuffer; + +[numthreads(16, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + + // Produces a different result on CUDA? + + uint64_t value; + { + int idx = tid & 3; + // Do 64 bit load that works on CUDA + value = (uint64_t(inputBuffer[idx * 2 + 1]) << 32) | inputBuffer[idx * 2]; + } + + { + int idx = (tid & 3) ^ (tid >> 2); + if (idx & 1) + { + outputBuffer.InterlockedMaxU64((idx << 3), value); + } + else + { + outputBuffer.InterlockedMinU64((idx << 3), value); + } + } +} + + diff --git a/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang.expected.txt b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang.expected.txt new file mode 100644 index 000000000..d33a683ed --- /dev/null +++ b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang.expected.txt @@ -0,0 +1,8 @@ +0 +1 +6 +7 +0 +1 +6 +7 -- cgit v1.2.3