Add countbits 16-bit and 8-bit support (#6433) (#6897)

Change adds 16-bit and 8-bit support for countbits intrinsic. In cases where a backend's native counbits lacks support, support is emulated. New tests are added for 16-bit and 8-bit support. Additional testing added for 32-bit and minor updates made to 64-bit countbits.
author: sricker-nvidia <115114531+sricker-nvidia@users.noreply.github.com> 2025-05-05 15:30:33 -0700
committer: GitHub <noreply@github.com> 2025-05-05 22:30:33 +0000
commit: 50d9781b7387b0f7f56d19c72afcf390cca72b72 (patch)
tree: 7b6f1401f7a8257fa378930a052ca63f0fda91f4 /tests
parent: 698e43372cefe0fff13150925aeb7f389c21a938 (diff)
4 files changed, 100 insertions, 1 deletions
diff --git a/tests/hlsl-intrinsic/countbits.slang b/tests/hlsl-intrinsic/countbits.slang
index da6828e87..060ad98f4 100644
--- a/tests/hlsl-intrinsic/countbits.slang
+++ b/tests/hlsl-intrinsic/countbits.slang
@@ -2,8 +2,10 @@
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx11
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -wgpu -compute
+// TODO: test GLSL pathway once emit-spirv-via-glsl is fixed and shader output reading is fixed for GLSL
 
 //CHK:1
 
diff --git a/tests/hlsl-intrinsic/countbits16.slang b/tests/hlsl-intrinsic/countbits16.slang
new file mode 100644
index 000000000..dbfdc9217
--- /dev/null
+++ b/tests/hlsl-intrinsic/countbits16.slang
@@ -0,0 +1,47 @@
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cpu
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute
+//TODO: metal is currently failing even with emulation, investigate.
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute -profile metallib_2_4
+// No support for uint16_t on fxc - we need SM6.2 and dxil to use uint16_t with d3d12
+// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_2 -use-dxil -shaderobj -render-feature hardware-device
+// wgpu only has 32-bit support, so we do not try and test it here
+// TODO: test GLSL pathway once emit-spirv-via-glsl is fixed and shader output reading is fixed for GLSL
+
+//CHK:1
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    uint r1 = countbits(uint16_t(0b1U) << 8);
+    uint2 r2 = countbits(uint16_t2(uint16_t(0b0U) << 8, uint16_t(0b1U) << 8));
+    uint3 r3 = countbits(uint16_t3(uint16_t(0b0U) << 8, uint16_t(0b1U) << 8, uint16_t(0b11U) << 8));
+    uint4 r4 = countbits(uint16_t4(uint16_t(0b0U) << 8, uint16_t(0b1U) << 8, uint16_t(0b11U) << 8, uint16_t(0b111U) << 8));
+
+    uint r5 = countbits(int16_t(0b1) << 8);
+    uint2 r6 = countbits(int16_t2(int16_t(0b0) << 8, int16_t(0b1) << 8));
+    uint3 r7 = countbits(int16_t3(int16_t(0b0) << 8, int16_t(0b1) << 8, int16_t(0b11) << 8));
+    uint4 r8 = countbits(int16_t4(int16_t(0b0) << 8, int16_t(0b1) << 8, int16_t(0b11) << 8, int16_t(0b111) << 8));
+
+    uint16_t smallShiftU16 = uint16_t(0b111) << 16;
+    int16_t smallShiftI16 = int16_t(0b1111) << 16;
+
+    uint bitCountBigShiftU16 = countbits(smallShiftU16);
+    uint bitCountBigShiftI16 = countbits(smallShiftI16);
+
+    outputBuffer[0] = true
+        && (r1 == 1)
+        && (r2.x == 0 && r2.y == 1)
+        && (r3.x == 0 && r3.y == 1 && r3.z == 2)
+        && (r4.x == 0 && r4.y == 1 && r4.z == 2 && r4.w == 3)
+        && (r5 == 1)
+        && (r6.x == 0 && r6.y == 1)
+        && (r7.x == 0 && r7.y == 1 && r7.z == 2)
+        && (r8.x == 0 && r8.y == 1 && r8.z == 2 && r8.w == 3)
+        && (bitCountBigShiftU16 == 0 && bitCountBigShiftI16 == 0)
+        ;
+}
diff --git a/tests/hlsl-intrinsic/countbits64.slang b/tests/hlsl-intrinsic/countbits64.slang
index a24b31477..90799e411 100644
--- a/tests/hlsl-intrinsic/countbits64.slang
+++ b/tests/hlsl-intrinsic/countbits64.slang
@@ -1,10 +1,14 @@
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cpu
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute -render-feature int64
+// emit-spirv-via-glsl is currently ignored, but even working around this, output does not appear to be captured for GLSL
+// No support for uint64_t in GLSL without an extension like GL_EXT_shader_explicit_arithmetic_types_int64
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -vk -compute -render-feature int64 -emit-spirv-via-glsl -profile GLSL_400 -Xslang... -capability GL_EXT_shader_explicit_arithmetic_types_int64.
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute
 // No support for uint64_t on fxc - we need SM6.0 and dxil to use uint64_t with d3d12
 // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -render-feature hardware-device
+// wgpu only has 32-bit support, so we do not try and test it here
 
 //CHK:1
 
diff --git a/tests/hlsl-intrinsic/countbits8.slang b/tests/hlsl-intrinsic/countbits8.slang
new file mode 100644
index 000000000..1db8e805c
--- /dev/null
+++ b/tests/hlsl-intrinsic/countbits8.slang
@@ -0,0 +1,46 @@
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cpu
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute
+//TODO: metal is currently failing even with emulation, investigate.
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute -profile metallib_2_4
+// Not testing the following:
+// -dx12/hlsl, No support for uint8_t with hlsl
+// -wgpu, only has 32-bit support
+// -vk/glsl, No support for uint8_t with glsl
+
+//CHK:1
+
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    uint r1 = countbits(uint8_t(0b1U) << 4);
+    uint2 r2 = countbits(uint8_t2(uint8_t(0b0U) << 4, uint8_t(0b1U) << 4));
+    uint3 r3 = countbits(uint8_t3(uint8_t(0b0U) << 4, uint8_t(0b1U) << 4, uint8_t(0b11U) << 4));
+    uint4 r4 = countbits(uint8_t4(uint8_t(0b0U) << 4, uint8_t(0b1U) << 4, uint8_t(0b11U) << 4, uint8_t(0b111U) << 4));
+
+    uint r5 = countbits(int8_t(0b1) << 4);
+    uint2 r6 = countbits(int8_t2(int8_t(0b0) << 4, int8_t(0b1) << 4));
+    uint3 r7 = countbits(int8_t3(int8_t(0b0) << 4, int8_t(0b1) << 4, int8_t(0b11) << 4));
+    uint4 r8 = countbits(int8_t4(int8_t(0b0) << 4, int8_t(0b1) << 4, int8_t(0b11) << 4, int8_t(0b111) << 4));
+
+    uint8_t smallShiftU8 = uint8_t(0b111) << 8;
+    int8_t smallShiftI8 = int8_t(0b1111) << 8;
+
+    uint bitCountBigShiftU8 = countbits(smallShiftU8);
+    uint bitCountBigShiftI8 = countbits(smallShiftI8);
+
+    outputBuffer[0] = true
+        && (r1 == 1)
+        && (r2.x == 0 && r2.y == 1)
+        && (r3.x == 0 && r3.y == 1 && r3.z == 2)
+        && (r4.x == 0 && r4.y == 1 && r4.z == 2 && r4.w == 3)
+        && (r5 == 1)
+        && (r6.x == 0 && r6.y == 1)
+        && (r7.x == 0 && r7.y == 1 && r7.z == 2)
+        && (r8.x == 0 && r8.y == 1 && r8.z == 2 && r8.w == 3)
+        && (bitCountBigShiftU8 == 0 && bitCountBigShiftI8 == 0)
+        ;
+}
author	sricker-nvidia <115114531+sricker-nvidia@users.noreply.github.com>	2025-05-05 15:30:33 -0700
committer	GitHub <noreply@github.com>	2025-05-05 22:30:33 +0000
commit	50d9781b7387b0f7f56d19c72afcf390cca72b72 (patch)
tree	7b6f1401f7a8257fa378930a052ca63f0fda91f4 /tests
parent	698e43372cefe0fff13150925aeb7f389c21a938 (diff)