summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorHarsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com>2025-09-04 10:58:02 +0530
committerGitHub <noreply@github.com>2025-09-04 05:28:02 +0000
commit5ec41675d817f82a7ce3c4d79c68548db0bd4227 (patch)
tree57abff17713b5d9ea876be29e3b451c9abe8c49d /tests
parentb45706b3f532f85525de5746f1f607ba2e57fc88 (diff)
Enable CUDA support for additional HLSL intrinsic tests (#8293)
Enable CUDA support for additional HLSL intrinsic tests by implementing missing functionality and fixing compiler bugs affecting CUDA targets. - Fix critical bug in InterlockedCompareStore64 where division used /4 instead of /8 for 64-bit types, causing incorrect memory addressing for all signed int 64_t atomics - Add signed int64_t atomic wrappers (atomicExch, atomicCAS) to CUDA prelu de that properly cast to/from unsigned types as required by CUDA's atomic API - Enable tests: atomic-intrinsics-64bit.slang - Implement CUDA support for QuadAny and QuadAll operations using warp shu ffle primitives (__shfl_sync with quad-level lane masking) - Add CUDA to quad_control capability definition in slang-capabilities.capdef - Add _slang_quadAny/_slang_quadAll helper functions to CUDA prelude - Enable tests: quad-control-comp-functionality.slang, subgroup-quad.slang --------- Co-authored-by: szihs <675653+szihs@users.noreply.github.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang11
-rw-r--r--tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang1
-rw-r--r--tests/hlsl-intrinsic/subgroup-quad.slang10
-rw-r--r--tests/hlsl-intrinsic/texture-2d-gather.slang58
-rw-r--r--tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang1
-rw-r--r--tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang1
6 files changed, 77 insertions, 5 deletions
diff --git a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang
index 355729d93..da5af8a5c 100644
--- a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang
+++ b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang
@@ -1,4 +1,5 @@
-//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=DX12):-slang -compute -dx12 -profile cs_6_6 -shaderobj -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_6 -shaderobj -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cuda -profile cs_6_6 -shaderobj -output-using-type
// This is to support 64-bit `Interlocked*` functions defined for HLSL SM6.6
// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html
@@ -336,7 +337,7 @@ void computeMain(uint groupIndex : SV_GroupIndex, int3 dispatchThreadID: SV_Disp
outputBuffer[idx] = int(result);
}
-// DX12: 1
-// DX12-NEXT: 1
-// DX12-NEXT: 1
-// DX12-NEXT: 1
+// CHK: 1
+// CHK-NEXT: 1
+// CHK-NEXT: 1
+// CHK-NEXT: 1
diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang
index 20c36c2be..6dfd1d883 100644
--- a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang
+++ b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang
@@ -1,6 +1,7 @@
//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -emit-spirv-via-glsl
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_7 -dx12 -shaderobj -render-feature hardware-device
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -profile cs_6_7 -shaderobj -render-feature hardware-device
//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj -xslang -DMETAL
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
diff --git a/tests/hlsl-intrinsic/subgroup-quad.slang b/tests/hlsl-intrinsic/subgroup-quad.slang
index 1cfbffb49..ec5a80e56 100644
--- a/tests/hlsl-intrinsic/subgroup-quad.slang
+++ b/tests/hlsl-intrinsic/subgroup-quad.slang
@@ -2,6 +2,7 @@
//TEST:SIMPLE(filecheck=SPIRV): -entry main -stage compute -target spirv -emit-spirv-directly
//TEST:SIMPLE(filecheck=HLSL): -entry main -stage compute -target hlsl
//TEST:SIMPLE(filecheck=METAL): -entry main -stage compute -target metal
+//TEST:SIMPLE(filecheck=CUDA): -entry main -stage compute -target cuda
RWStructuredBuffer<float> output;
@@ -51,4 +52,13 @@ void main()
// METAL: ^ 3
// METAL: quad_shuffle
// METAL: quad_shuffle
+
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, (_getLaneId() & 0xFFFFFFFC) | ((1U) & 3))
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, (_getLaneId() & 0xFFFFFFFC) | ((1U) & 3))
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 1)
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 1)
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 2)
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 2)
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 3)
+ // CUDA: _waveShuffleMultiple({{.*}}, {{.*}}, _getLaneId() ^ 3)
}
diff --git a/tests/hlsl-intrinsic/texture-2d-gather.slang b/tests/hlsl-intrinsic/texture-2d-gather.slang
new file mode 100644
index 000000000..329041f4d
--- /dev/null
+++ b/tests/hlsl-intrinsic/texture-2d-gather.slang
@@ -0,0 +1,58 @@
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-dx12 -compute -profile cs_6_0 -shaderobj -output-using-type
+
+// Test CUDA Gather runtime behavior - compare with known gather pattern
+// tex2Dgather samples 4 texels in 2x2 pattern around coordinate
+
+//TEST_INPUT: Texture2D(size=4, content = one):name testTexture
+// Create a 4x4 texture with 1.0 values - simple but non-zero to verify gather works
+Texture2D<float4> testTexture;
+
+//TEST_INPUT: Sampler:name samplerState
+SamplerState samplerState;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ // Simple gather test - sample at center of 2x2 region
+ // This should gather from texels (0,0), (1,0), (0,1), (1,1)
+ float2 coords = float2(0.75, 0.75); // Between texels for gather
+
+ // Test basic gather - should return 4 values in specific order
+ float4 gathered = testTexture.GatherRed(samplerState, coords);
+
+ // Store the gathered values
+ outputBuffer[0] = gathered.x; // Should be consistent pattern
+ outputBuffer[1] = gathered.y;
+ outputBuffer[2] = gathered.z;
+ outputBuffer[3] = gathered.w;
+
+ // Also test that gather actually works by using texture coordinates
+ // as the texture values (coord-based pattern)
+ int2 texelCoord = int2(dispatchThreadID.xy);
+ float coordValue = float(texelCoord.x + texelCoord.y * 4); // Create pattern: 0,1,2,3,4,5,6,7...
+
+ // Store marker value like CUDA reference (42)
+ outputBuffer[4] = 42.0; // Marker to verify test is working
+
+ // Test another gather position
+ float4 gathered2 = testTexture.GatherRed(samplerState, float2(1.25, 1.25));
+ outputBuffer[5] = gathered2.x;
+ outputBuffer[6] = gathered2.y;
+ outputBuffer[7] = gathered2.z;
+ outputBuffer[8] = gathered2.w;
+}
+
+// Test results - texture filled with 1.0 values
+// CHECK: 1.0
+// CHECK-NEXT: 1.0
+// CHECK-NEXT: 1.0
+// CHECK-NEXT: 1.0
+// CHECK-NEXT: 42.0
+// CHECK-NEXT: 1.0
+// CHECK-NEXT: 1.0
+// CHECK-NEXT: 1.0
+// CHECK-NEXT: 1.0
diff --git a/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang b/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang
index 81601e9be..a5be09b0b 100644
--- a/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang
+++ b/tests/hlsl-intrinsic/wave-rotate/wave-rotate-clustered.slang
@@ -1,5 +1,6 @@
//TEST_CATEGORY(wave, compute)
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly
+//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -profile sm_6_0
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl -profile sm_6_0 -Xslang... -capability GL_KHR_shader_subgroup_rotate -X.
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl -profile sm_6_0 -allow-glsl -Xslang... -DUSE_GLSL_SYNTAX -capability GL_KHR_shader_subgroup_rotate -X.
diff --git a/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang b/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang
index 353afbb35..f67005078 100644
--- a/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang
+++ b/tests/hlsl-intrinsic/wave-rotate/wave-rotate.slang
@@ -1,5 +1,6 @@
// TEST_CATEGORY(wave, compute)
// TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly
+//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -emit-spirv-directly
// TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-slang -compute -vk -shaderobj -emit-spirv-via-glsl -profile sm_6_0 -Xslang... -capability GL_KHR_shader_subgroup_rotate -X.
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-metal -compute -shaderobj -xslang -DMETAL