diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-08-24 15:23:40 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-08-24 15:23:40 -0400 |
| commit | 4804753d4a2ec389cc6ecd759f7ea712848fddf0 (patch) | |
| tree | 0ac88b3da2aae7842cb8f71f55b79412716e8a60 /tests/slang-extension | |
| parent | 67ca54997d445e15891965b8d77561b9d10bb18c (diff) | |
RWByteAddressBuffer::InterlockedCompareExchangeU64 (#1513)
* First pass at incorporating nvapi into test harness.
* D3d12 Atomic Float Add via NVAPI working
* Dx12 atomic float appears to work.
* Atomic float add on Dx12.
* Added atomic64 feature addition to vk.
Fix correct output for atomic-float-byte-address.slang
* Disable atomic float failing tests.
* Upgraded VK headers.
* Detect atomic float availability on VK.
* Try to get test working for in64 atomic.
* Made HLSL prelude controlled via the render-test requirements.
* Added -enable-nvapi to premake.
* Fix D3D12Renderer when NVAPI is not available.
* Small improvements to VKRenderer.
* Improve atomic documentation in target-compatibility.md.
* Fixed NVAPI working on D3D12.
* Test for specific NVAPI features.
* Remove requiredFeatures from Renderer::Desc as was ignored. Tried to document more around nvapiExtnSlot.
* Readded requiredFeatures to Renderer::Desc
* Improve comments in the tests.
* Rename Fp32 -> F32
Added cas-int64-byte-address-buffer.slang test
Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'tests/slang-extension')
5 files changed, 62 insertions, 9 deletions
diff --git a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang index e99494d5f..584dcada1 100644 --- a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang +++ b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang @@ -19,9 +19,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) const float delta = anotherBuffer[idx & 3]; float previousValue = 0; - outputBuffer.InterlockedAddFp32((idx << 2), 1.0f, previousValue); + outputBuffer.InterlockedAddF32((idx << 2), 1.0f, previousValue); // The sum of values in anotherBuffer should also be added int anotherIdx = tid >> 2; - outputBuffer.InterlockedAddFp32(anotherIdx << 2, delta); + outputBuffer.InterlockedAddF32(anotherIdx << 2, delta); }
\ No newline at end of file diff --git a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl index 9fd2f18f9..6e21156aa 100644 --- a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl +++ b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl @@ -14,7 +14,7 @@ layout(std430, binding = 0) buffer _S2 { } _S3; #line 18 "tests/slang-extension/atomic-float-byte-address-buffer-cross.slang" -void RWByteAddressBuffer_InterlockedAddFp32_0(uint _S4, float _S5, out float _S6) +void RWByteAddressBuffer_InterlockedAddF32_0(uint _S4, float _S5, out float _S6) { uint _S7 = _S4 / uint(4); float _S8 = (atomicAdd((((_S3)._data[(_S7)])), (_S5))); @@ -22,7 +22,7 @@ void RWByteAddressBuffer_InterlockedAddFp32_0(uint _S4, float _S5, out float _S6 return; } -void RWByteAddressBuffer_InterlockedAddFp32_1(uint _S9, float _S10) +void RWByteAddressBuffer_InterlockedAddF32_1(uint _S9, float _S10) { uint _S11 = _S9 / uint(4); float _S12 = (atomicAdd((((_S3)._data[(_S11)])), (_S10))); @@ -43,8 +43,8 @@ layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;void main() #line 21 float _S14; - RWByteAddressBuffer_InterlockedAddFp32_0(_S13, 1.00000000000000000000, _S14); - RWByteAddressBuffer_InterlockedAddFp32_1(uint(int(tid_0 >> 2) << 2), delta_0); + RWByteAddressBuffer_InterlockedAddF32_0(_S13, 1.00000000000000000000, _S14); + RWByteAddressBuffer_InterlockedAddF32_1(uint(int(tid_0 >> 2) << 2), delta_0); #line 13 return; diff --git a/tests/slang-extension/atomic-float-byte-address-buffer.slang b/tests/slang-extension/atomic-float-byte-address-buffer.slang index 603b92d65..910fefdfe 100644 --- a/tests/slang-extension/atomic-float-byte-address-buffer.slang +++ b/tests/slang-extension/atomic-float-byte-address-buffer.slang @@ -30,12 +30,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) //const float delta = anotherBuffer[idx & 3]; float previousValue = 0; - workBuffer.InterlockedAddFp32((idx << 2), 1.0f, previousValue); - //workBuffer.InterlockedAddFp32((idx ^ 2) << 2, 2.0f + delta); + workBuffer.InterlockedAddF32((idx << 2), 1.0f, previousValue); + //workBuffer.InterlockedAddF32((idx ^ 2) << 2, 2.0f + delta); // The sum of values in anotherBuffer should also be added //int anotherIdx = tid >> 2; - //workBuffer.InterlockedAddFp32(anotherIdx << 2, delta); + //workBuffer.InterlockedAddF32(anotherIdx << 2, delta); GroupMemoryBarrierWithGroupSync(); diff --git a/tests/slang-extension/cas-int64-byte-address-buffer.slang b/tests/slang-extension/cas-int64-byte-address-buffer.slang new file mode 100644 index 000000000..b75a9fa04 --- /dev/null +++ b/tests/slang-extension/cas-int64-byte-address-buffer.slang @@ -0,0 +1,45 @@ +// No atomic support on CPU +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +// No support for int64_t on DX11 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +// No support for int64_t on fxc - we need SM6.0 and dxil +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used +// Only strictly necessary on the D3D12 path +//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):name=nvapiBuffer +RWStructuredBuffer<int> nvapiBuffer; + +//TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7]):out,name=outputBuffer +RWByteAddressBuffer outputBuffer; + +[numthreads(16, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + int idx = (tid & 3) ^ (tid >> 2); + + // Try directly reading + uint2 currentValue2 = outputBuffer.Load2(idx << 8); + uint64_t currentValue = uint64_t(currentValue2.y) | currentValue2.x; + + while (true) + { + // This is probably not a great way to do this - InterlockedAddI64 would be better + // but we are doing this to test CAS. + + uint64_t readValue; + outputBuffer.InterlockedCompareExchangeU64(idx << 3, currentValue, currentValue + 1, readValue); + + if (readValue == currentValue) + { + break; + } + + currentValue = readValue; + } +}
\ No newline at end of file diff --git a/tests/slang-extension/cas-int64-byte-address-buffer.slang.expected.txt b/tests/slang-extension/cas-int64-byte-address-buffer.slang.expected.txt new file mode 100644 index 000000000..78d24c356 --- /dev/null +++ b/tests/slang-extension/cas-int64-byte-address-buffer.slang.expected.txt @@ -0,0 +1,8 @@ +4 +1 +6 +3 +8 +5 +A +7 |
