diff options
| author | sricker-nvidia <115114531+sricker-nvidia@users.noreply.github.com> | 2025-04-19 04:33:27 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-19 11:33:27 +0000 |
| commit | 043278a527ab5744674417a08d924c67a60a486b (patch) | |
| tree | 19c3ead87def94f2d418926d5f15b9eab1ced440 /tests | |
| parent | 6bfabfee317887e678eed9cd6768df2ffd3b9704 (diff) | |
Implement 64bit countbits intrinsic (#6433) (#6845)
Change modifies the countbits intrinsic to use generics in order to
support 64bit countbits on select platforms where this is supported.
On platforms where this is not natively supported, we emulate by
converting the 64-bit type into a uint2 (metal and spir-v).
This should align with the implementation of other uint64_t
intrinsics such as abs, min, max and clamp.
Added new countbits64 test to verify changes.
Updated documentation for 64bit-type-support.html
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/hlsl-intrinsic/countbits64.slang | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/tests/hlsl-intrinsic/countbits64.slang b/tests/hlsl-intrinsic/countbits64.slang new file mode 100644 index 000000000..a24b31477 --- /dev/null +++ b/tests/hlsl-intrinsic/countbits64.slang @@ -0,0 +1,44 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cpu +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute -render-feature int64 +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute +// No support for uint64_t on fxc - we need SM6.0 and dxil to use uint64_t with d3d12 +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -render-feature hardware-device + +//CHK:1 + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +RWStructuredBuffer<uint> outputBuffer; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint r1 = countbits(0b1ULL << 32); + uint2 r2 = countbits(uint64_t2(0b0ULL << 32, 0b1ULL << 32)); + uint3 r3 = countbits(uint64_t3(0b0ULL << 32, 0b1ULL << 32, 0b11ULL << 32)); + uint4 r4 = countbits(uint64_t4(0b0ULL << 32, 0b1ULL << 32, 0b11ULL << 32, 0b111ULL << 32)); + + uint r5 = countbits(0b1LL << 32); + uint2 r6 = countbits(int64_t2(0b0LL << 32, 0b1LL << 32)); + uint3 r7 = countbits(int64_t3(0b0LL << 32, 0b1LL << 32, 0b11LL << 32)); + uint4 r8 = countbits(int64_t4(0b0LL << 32, 0b1LL << 32, 0b11LL << 32, 0b111LL << 32)); + + uint bigShiftU32 = 0b111U << 32; + int bigShiftI32 = 0b1111 << 32; + + uint bitCountBigShiftU32 = countbits(bigShiftU32); + uint bitCountBigShiftI32 = countbits(bigShiftI32); + + outputBuffer[0] = true + && (r1 == 1) + && (r2.x == 0 && r2.y == 1) + && (r3.x == 0 && r3.y == 1 && r3.z == 2) + && (r4.x == 0 && r4.y == 1 && r4.z == 2 && r4.w == 3) + && (r5 == 1) + && (r6.x == 0 && r6.y == 1) + && (r7.x == 0 && r7.y == 1 && r7.z == 2) + && (r8.x == 0 && r8.y == 1 && r8.z == 2 && r8.w == 3) + && (bitCountBigShiftU32 == 0 && bitCountBigShiftI32 == 0) + ; +} |
