From e0c20a076f2ec84586b6508664df4f59273c6aaf Mon Sep 17 00:00:00 2001 From: "Harsh Aggarwal (NVIDIA)" Date: Wed, 20 Aug 2025 14:41:06 +0530 Subject: Updated support to enable batch3 (#8219) Enable CUDA support for batch 3 tests - Enhanced wave operations with exclusive support - Added proper identity values for min/max operations - Fixed intrinsic name mapping issues - Updated test configurations Co-authored-by: Ellie Hermaszewska --- tests/autodiff-dstdlib/determinant.slang | 1 + .../byte-address-buffer/byte-address-half-atomics.slang | 1 + tests/hlsl-intrinsic/debug-break.slang | 5 +++-- tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang | 5 ++--- tests/hlsl-intrinsic/wave-active-count-bits.slang | 3 ++- tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang | 1 + tests/hlsl-intrinsic/wave-is-first-lane.slang | 2 +- tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang | 2 +- tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang | 2 +- .../wave-multi/wave-multi-prefix-scalar-functional.slang | 2 +- tests/hlsl-intrinsic/wave-prefix-product.slang | 2 +- tests/hlsl-intrinsic/wave-prefix-sum.slang | 2 +- tests/hlsl-intrinsic/wave-vector.slang | 2 +- 13 files changed, 17 insertions(+), 13 deletions(-) (limited to 'tests') diff --git a/tests/autodiff-dstdlib/determinant.slang b/tests/autodiff-dstdlib/determinant.slang index d2e699551..f73d3cdfa 100644 --- a/tests/autodiff-dstdlib/determinant.slang +++ b/tests/autodiff-dstdlib/determinant.slang @@ -1,5 +1,6 @@ //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type -cuda //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer RWStructuredBuffer outputBuffer; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang index f53d38d74..d23a675b2 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang @@ -3,6 +3,7 @@ // Disabled because validation layer doesn't like vector atomics, although nv driver does allow it. //DISABLED_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -cuda -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type //TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation //TEST:SIMPLE(filecheck=HLSL):-target hlsl -entry computeMain -profile cs_6_3 //TEST_INPUT:set tmpBuffer = ubuffer(data=[0 0 0 0], stride=4) diff --git a/tests/hlsl-intrinsic/debug-break.slang b/tests/hlsl-intrinsic/debug-break.slang index c16139f72..8fd4567dc 100644 --- a/tests/hlsl-intrinsic/debug-break.slang +++ b/tests/hlsl-intrinsic/debug-break.slang @@ -1,7 +1,7 @@ //TEST:SIMPLE(filecheck=CHECK_GLSL):-stage compute -entry computeMain -target glsl -line-directive-mode none // We can't enable because output source includes path to prelude. //DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target cpp -line-directive-mode none -//DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target cuda -line-directive-mode none +//TEST:SIMPLE(filecheck=CHECK_CUDA):-stage compute -entry computeMain -target cuda -line-directive-mode none // Not currently supported on HLSL //DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target hlsl -line-directive-mode none // With `slang-llvm` this will crash, but the call stack isn't really usable. @@ -15,6 +15,7 @@ // CHECK_GLSL: void main // CHECK_GLSL: glslDebugBreak{{.*}}(); +// CHECK_CUDA:__brkpt() RWStructuredBuffer outputBuffer; [numthreads(4, 1, 1)] @@ -25,4 +26,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) debugBreak(); outputBuffer[idx] = idx; -} \ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang b/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang index 128c1c1cd..042c0b62e 100644 --- a/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang +++ b/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang @@ -3,8 +3,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj -render-feature double -// We don't want to run a cuda test here... -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; @@ -54,4 +53,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) } outputBuffer[idx] = ft; -} \ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/wave-active-count-bits.slang b/tests/hlsl-intrinsic/wave-active-count-bits.slang index 105b95a6f..a7aa48687 100644 --- a/tests/hlsl-intrinsic/wave-active-count-bits.slang +++ b/tests/hlsl-intrinsic/wave-active-count-bits.slang @@ -2,6 +2,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-cpu -compute -shaderobj //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device +//TEST:COMPARE_COMPUTE_EX:-slang -compute -cuda -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj @@ -14,4 +15,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) int idx = int(dispatchThreadID.x); outputBuffer[idx] = int(WaveActiveCountBits(bool(idx & 5))); -} \ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang index b0cff08a9..4a32ab736 100644 --- a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang +++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang @@ -1,5 +1,6 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-slang -compute -cuda -profile cs_6_0 -shaderobj //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-is-first-lane.slang b/tests/hlsl-intrinsic/wave-is-first-lane.slang index 220a5758b..ae986c7b3 100644 --- a/tests/hlsl-intrinsic/wave-is-first-lane.slang +++ b/tests/hlsl-intrinsic/wave-is-first-lane.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang index 654fd6130..0aca11f38 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang @@ -1,7 +1,7 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl - +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -capability cuda_sm_7_0 -compute -shaderobj //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang index 68e1e9c05..321b99a0e 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang @@ -1,7 +1,7 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl - +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -capability cuda_sm_7_0 -compute -shaderobj //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang index 5de34b20a..67367f264 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang @@ -4,7 +4,7 @@ //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_5 -shaderobj //TEST:COMPARE_COMPUTE_EX:-vk -compute -shaderobj -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; diff --git a/tests/hlsl-intrinsic/wave-prefix-product.slang b/tests/hlsl-intrinsic/wave-prefix-product.slang index 774f5996e..f8c0ed57a 100644 --- a/tests/hlsl-intrinsic/wave-prefix-product.slang +++ b/tests/hlsl-intrinsic/wave-prefix-product.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-prefix-sum.slang b/tests/hlsl-intrinsic/wave-prefix-sum.slang index 4f7c2912d..8a092e20d 100644 --- a/tests/hlsl-intrinsic/wave-prefix-sum.slang +++ b/tests/hlsl-intrinsic/wave-prefix-sum.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-vector.slang b/tests/hlsl-intrinsic/wave-vector.slang index f786794ec..4c3b7ccb6 100644 --- a/tests/hlsl-intrinsic/wave-vector.slang +++ b/tests/hlsl-intrinsic/wave-vector.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj -- cgit v1.2.3