diff options
| author | Harsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com> | 2025-09-04 10:58:02 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-04 05:28:02 +0000 |
| commit | 5ec41675d817f82a7ce3c4d79c68548db0bd4227 (patch) | |
| tree | 57abff17713b5d9ea876be29e3b451c9abe8c49d /tests | |
| parent | b45706b3f532f85525de5746f1f607ba2e57fc88 (diff) | |
Enable CUDA support for additional HLSL intrinsic tests (#8293)
Enable CUDA support for additional HLSL intrinsic tests by implementing
missing functionality and fixing compiler bugs affecting CUDA targets.
- Fix critical bug in InterlockedCompareStore64 where division used /4
instead of /8 for 64-bit types, causing incorrect memory addressing for
all signed int 64_t atomics
- Add signed int64_t atomic wrappers (atomicExch, atomicCAS) to CUDA
prelu de that properly cast to/from unsigned types as required by CUDA's
atomic API
- Enable tests: atomic-intrinsics-64bit.slang
- Implement CUDA support for QuadAny and QuadAll operations using warp
shu ffle primitives (__shfl_sync with quad-level lane masking)
- Add CUDA to quad_control capability definition in
slang-capabilities.capdef
- Add _slang_quadAny/_slang_quadAll helper functions to CUDA prelude
- Enable tests: quad-control-comp-functionality.slang,
subgroup-quad.slang
---------
Co-authored-by: szihs <675653+szihs@users.noreply.github.com>
Diffstat (limited to 'tests')
6 files changed, 77 insertions, 5 deletions
diff --git a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang index 355729d93..da5af8a5c 100644 --- a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang +++ b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang @@ -1,4 +1,5 @@ -//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=DX12):-slang -compute -dx12 -profile cs_6_6 -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_6 -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cuda -profile cs_6_6 -shaderobj -output-using-type // This is to support 64-bit `Interlocked*` functions defined for HLSL SM6.6 // https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html @@ -336,7 +337,7 @@ void computeMain(uint groupIndex : SV_GroupIndex, int3 dispatchThreadID: SV_Disp outputBuffer[idx] = int(result); } -// DX12: 1 -// DX12-NEXT: 1 -// DX12-NEXT: 1 -// DX12-NEXT: 1 +// CHK: 1 +// CHK-NEXT: 1 +// CHK-NEXT: 1 +// CHK-NEXT: 1 diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang index 20c36c2be..6dfd1d883 100644 --- a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang +++ b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang @@ -1,6 +1,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -emit-spirv-directly //TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -emit-spirv-via-glsl //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_7 -dx12 -shaderobj -render-feature hardware-device +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -profile cs_6_7 -shaderobj -render-feature hardware-device //TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj -xslang -DMETAL //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/hlsl-intrinsic/subgroup-quad.slang b/tests/hlsl-intrinsic/subgroup-quad.slang index 1cfbffb49..ec5a80e56 100644 --- a/tests/hlsl-intrinsic/subgroup-quad.slang +++ b/tests/hlsl-intrinsic/subgroup-quad.slang @@ -2,6 +2,7 @@ //TEST:SIMPLE(filecheck=SPIRV): -entry main -stage compute -target spirv -emit-spirv-directly //TEST:SIMPLE(filecheck=HLSL): -entry main -stage compute -target hlsl //TEST:SIMPLE(filecheck=METAL): -entry main -stage compute -target metal +//TEST:SIMPLE(filecheck=CUDA): -entry main -stage compute -target cuda RWStructuredBuffer<float> output; @@ -51,4 +52,13 @@ void main() // METAL: ^ 3 // METAL: quad_shuffle // METAL: quad_shuffle + + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, (_getLaneId() & 0xFFFFFFFC) | ((1U) & 3)) + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, (_getLaneId() & 0xFFFFFFFC) | ((1U) & 3)) + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 1) + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 1) + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 2) + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 2) + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 3) + // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 3) } diff --git a/tests/hlsl-intrinsic/texture-2d-gather.slang b/tests/hlsl-intrinsic/texture-2d-gather.slang new file mode 100644 index 000000000..329041f4d --- /dev/null +++ b/tests/hlsl-intrinsic/texture-2d-gather.slang @@ -0,0 +1,58 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-dx12 -compute -profile cs_6_0 -shaderobj -output-using-type + +// Test CUDA Gather runtime behavior - compare with known gather pattern +// tex2Dgather samples 4 texels in 2x2 pattern around coordinate + +//TEST_INPUT: Texture2D(size=4, content = one):name testTexture +// Create a 4x4 texture with 1.0 values - simple but non-zero to verify gather works +Texture2D<float4> testTexture; + +//TEST_INPUT: Sampler:name samplerState +SamplerState samplerState; + +//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<float> outputBuffer; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + // Simple gather test - sample at center of 2x2 region + // This should gather from texels (0,0), (1,0), (0,1), (1,1) + float2 coords = float2(0.75, 0.75); // Between texels for gather + + // Test basic gather - should return 4 values in specific order + float4 gathered = testTexture.GatherRed(samplerState, coords); + + // Store the gathered values + outputBuffer[0] = gathered.x; // Should be consistent pattern + outputBuffer[1] = gathered.y; + outputBuffer[2] = gathered.z; + outputBuffer[3] = gathered.w; + + // Also test that gather actually works by using texture coordinates + // as the texture values (coord-based pattern) + int2 texelCoord = int2(dispatchThreadID.xy); + float coordValue = float(texelCoord.x + texelCoord.y * 4); // Create pattern: 0,1,2,3,4,5,6,7... + + // Store marker value like CUDA reference (42) + outputBuffer[4] = 42.0; // Marker to verify test is working + + // Test another gather position + float4 gathered2 = testTexture.GatherRed(samplerState, float2(1.25, 1.25)); + outputBuffer[5] = gathered2.x; + outputBuffer[6] = gathered2.y; + outputBuffer[7] = gathered2.z; + outputBuffer[8] = gathered2.w; +} + +// Test results - texture filled with 1.0 values +// CHECK: 1.0 +// CHECK-NEXT: 1.0 +// CHECK-NEXT: 1.0 +// CHECK-NEXT: 1.0 +// CHECK-NEXT: 42.0 +// CHECK-NEXT: 1.0 +// CHECK-NEXT: 1.0 +// CHECK-NEXT: 1.0 +// CHECK-NEXT: 1.0 diff --git a/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang b/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang index 81601e9be..a5be09b0b 100644 --- a/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang +++ b/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang @@ -1,5 +1,6 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -profile sm_6_0 //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl -profile sm_6_0 -Xslang... -capability GL_KHR_shader_subgroup_rotate -X. //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl -profile sm_6_0 -allow-glsl -Xslang... -DUSE_GLSL_SYNTAX -capability GL_KHR_shader_subgroup_rotate -X. diff --git a/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang b/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang index 353afbb35..f67005078 100644 --- a/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang +++ b/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang @@ -1,5 +1,6 @@ // TEST_CATEGORY(wave, compute) // TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -emit-spirv-directly // TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-slang -compute -vk -shaderobj -emit-spirv-via-glsl -profile sm_6_0 -Xslang... -capability GL_KHR_shader_subgroup_rotate -X. //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-metal -compute -shaderobj -xslang -DMETAL |
