From c15e7ade4e27e1649d5b98f5854e9e52bb9e60ae Mon Sep 17 00:00:00 2001 From: Yong He Date: Fri, 2 Feb 2024 22:04:40 -0800 Subject: Atomics+Wave ops intrinsics fixes. (#3542) * Fix atomics intrinsics, increase kMaxDescriptorSets. * Add SPIRVASM to known non-differentiable insts. * Support fp16 wave ops when targeting glsl. * Fixes. * Fix vk validation errors. * Fix. * Add to allowed failures. --- tests/expected-failure.txt | 1 + .../byte-address-half-atomics.slang | 8 +++----- tests/hlsl-intrinsic/texture/float-atomics.slang | 24 ++++++++-------------- tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang | 18 ++++++++++++++++ 4 files changed, 31 insertions(+), 20 deletions(-) create mode 100644 tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang (limited to 'tests') diff --git a/tests/expected-failure.txt b/tests/expected-failure.txt index 64d116ec9..7283c8d97 100644 --- a/tests/expected-failure.txt +++ b/tests/expected-failure.txt @@ -4,3 +4,4 @@ tests/language-feature/saturated-cooperation/fuse-product.slang (vk) tests/language-feature/saturated-cooperation/fuse.slang (vk) tests/bugs/byte-address-buffer-interlocked-add-f32.slang (vk) tests/ir/loop-unroll-0.slang.1 (vk) +tests/hlsl-intrinsic/texture/float-atomics.slang (vk) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang index a89c7c3fa..e8cd266d3 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang @@ -1,11 +1,9 @@ // byte-address-half-atomics.slang // test the atomics on half types. -// Disabled because our current driver doesn't support half atomics yet. -//DISABLED_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -compute -profile cs_6_2 -render-features half -shaderobj - -//TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -//TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -emit-spirv-directly +// Disabled because validation layer doesn't like vector atomics, although nv driver does allow it. +//DISABLED_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type +//TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation //TEST_INPUT:set tmpBuffer = ubuffer(data=[0 0 0 0], stride=4) RWByteAddressBuffer tmpBuffer; diff --git a/tests/hlsl-intrinsic/texture/float-atomics.slang b/tests/hlsl-intrinsic/texture/float-atomics.slang index 4e7405f3d..02cb5570c 100644 --- a/tests/hlsl-intrinsic/texture/float-atomics.slang +++ b/tests/hlsl-intrinsic/texture/float-atomics.slang @@ -7,29 +7,23 @@ //TEST_INPUT: set t = RWTexture2D(format=R32_FLOAT, size=4, content = zero, mipMaps = 1) [format("r32f")] -RWTexture2D t; +globallycoherent RWTexture2D t; //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; [shader("compute")] -[numthreads(1, 1, 1)] +[numthreads(4, 1, 1)] void computeMain(uint3 tid : SV_DispatchThreadID) { float originalValue; + // HLSL: {{.*}}originalValue{{.*}} = NvInterlockedAddFp32({{.*}}t{{.*}}, {{.*}}, {{.*}}1.0{{.*}}); - // HLSL: {{.*}}originalValue{{.*}} = NvInterlockedAddFp32({{.*}}t{{.*}}, {{.*}}, {{.*}}2.0{{.*}}); - // HLSL: {{.*}}originalValue{{.*}} = NvInterlockedAddFp32({{.*}}t{{.*}}, {{.*}}, {{.*}}3.0{{.*}}); - t.InterlockedAddF32(uint2(0, 0), 1.0, originalValue); - t.InterlockedAddF32(uint2(1, 0), 2.0, originalValue); - t.InterlockedAddF32(uint2(1, 1), 3.0, originalValue); - // CHECK: 1.0 - outputBuffer[0] = t[uint2(0, 0)]; - // CHECK: 2.0 - outputBuffer[1] = t[uint2(1, 0)]; - // CHECK: 3.0 - outputBuffer[2] = t[uint2(1, 1)]; - // CHECK: 0.0 - outputBuffer[3] = originalValue; + t.InterlockedAddF32(uint2(1, 0), 1.0, originalValue); + + AllMemoryBarrier(); + + // CHECK: 4.0 + outputBuffer[0] = t[uint2(1, 0)]; } diff --git a/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang b/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang new file mode 100644 index 000000000..617dd8e43 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang @@ -0,0 +1,18 @@ +//TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly +//TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + half2 v1 = half2(1.0h, half(1 << idx)); + + // CHECK: OpGroupNonUniformFAdd + float2 r1 = WavePrefixSum(v1); + + outputBuffer[idx] = (int)r1.x; +} \ No newline at end of file -- cgit v1.2.3