diff options
| author | sricker-nvidia <115114531+sricker-nvidia@users.noreply.github.com> | 2025-05-05 15:30:33 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-05 22:30:33 +0000 |
| commit | 50d9781b7387b0f7f56d19c72afcf390cca72b72 (patch) | |
| tree | 7b6f1401f7a8257fa378930a052ca63f0fda91f4 | |
| parent | 698e43372cefe0fff13150925aeb7f389c21a938 (diff) | |
Add countbits 16-bit and 8-bit support (#6433) (#6897)
Change adds 16-bit and 8-bit support for countbits intrinsic. In
cases where a backend's native counbits lacks support, support
is emulated.
New tests are added for 16-bit and 8-bit support. Additional testing
added for 32-bit and minor updates made to 64-bit countbits.
| -rw-r--r-- | prelude/slang-cpp-scalar-intrinsics.h | 97 | ||||
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 97 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 81 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/countbits.slang | 4 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/countbits16.slang | 47 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/countbits64.slang | 4 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/countbits8.slang | 46 |
7 files changed, 290 insertions, 86 deletions
diff --git a/prelude/slang-cpp-scalar-intrinsics.h b/prelude/slang-cpp-scalar-intrinsics.h index 0a19eb327..9b045941a 100644 --- a/prelude/slang-cpp-scalar-intrinsics.h +++ b/prelude/slang-cpp-scalar-intrinsics.h @@ -628,45 +628,13 @@ SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians) return (a * (SLANG_PRELUDE_PI * 2)); } -// ----------------------------- I32 ----------------------------------------- - -SLANG_FORCE_INLINE int32_t I32_abs(int32_t f) -{ - return (f < 0) ? -f : f; -} - -SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b) -{ - return a < b ? a : b; -} -SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b) -{ - return a > b ? a : b; -} - -SLANG_FORCE_INLINE float I32_asfloat(int32_t x) -{ - Union32 u; - u.i = x; - return u.f; -} -SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x) -{ - return uint32_t(x); -} -SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi) -{ - Union64 u; - u.u = (uint64_t(hi) << 32) | uint32_t(low); - return u.d; -} - -SLANG_FORCE_INLINE uint32_t I32_countbits(int32_t v) +// ----------------------------- U16 ----------------------------------------- +SLANG_FORCE_INLINE uint32_t U16_countbits(uint16_t v) { #if SLANG_GCC_FAMILY && !defined(SLANG_LLVM) return __builtin_popcount(uint32_t(v)); #elif SLANG_PROCESSOR_X86_64 && SLANG_VC - return __popcnt(uint32_t(v)); + return __popcnt16(v); #else uint32_t c = 0; while (v) @@ -678,6 +646,25 @@ SLANG_FORCE_INLINE uint32_t I32_countbits(int32_t v) #endif } +// ----------------------------- I16 ----------------------------------------- +SLANG_FORCE_INLINE uint32_t I16_countbits(int16_t v) +{ + return U16_countbits(uint16_t(v)); +} + +// ----------------------------- U8 ----------------------------------------- +SLANG_FORCE_INLINE uint32_t U8_countbits(uint8_t v) +{ + // No native 8bit __popcnt yet, just cast and use 16bit variant + return U16_countbits(uint16_t(v)); +} + +// ----------------------------- I8 ----------------------------------------- +SLANG_FORCE_INLINE uint32_t I8_countbits(int16_t v) +{ + return U8_countbits(uint8_t(v)); +} + // ----------------------------- U32 ----------------------------------------- SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f) @@ -730,6 +717,44 @@ SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v) #endif } +// ----------------------------- I32 ----------------------------------------- + +SLANG_FORCE_INLINE int32_t I32_abs(int32_t f) +{ + return (f < 0) ? -f : f; +} + +SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b) +{ + return a < b ? a : b; +} +SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b) +{ + return a > b ? a : b; +} + +SLANG_FORCE_INLINE float I32_asfloat(int32_t x) +{ + Union32 u; + u.i = x; + return u.f; +} +SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x) +{ + return uint32_t(x); +} +SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi) +{ + Union64 u; + u.u = (uint64_t(hi) << 32) | uint32_t(low); + return u.d; +} + +SLANG_FORCE_INLINE uint32_t I32_countbits(int32_t v) +{ + return U32_countbits(uint32_t(v)); +} + // ----------------------------- U64 ----------------------------------------- SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f) @@ -749,7 +774,7 @@ SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b) SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v) { #if SLANG_GCC_FAMILY && !defined(SLANG_LLVM) - return uint32_t(__builtin_popcountl(v)); + return uint32_t(__builtin_popcountll(v)); #elif SLANG_PROCESSOR_X86_64 && SLANG_VC return uint32_t(__popcnt64(v)); #else diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 738f2fa16..91ff98a17 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -1788,44 +1788,34 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fma(double a, double b, double c) return ::fma(a, b, c); } -// ----------------------------- I32 ----------------------------------------- +// ----------------------------- U8 ----------------------------------------- -// Unary -SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_abs(int32_t f) +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U8_countbits(uint8_t v) { - return (f < 0) ? -f : f; + // No native 8bit popc yet, just cast and use 32bit variant + return __popc(uint32_t(v)); } -// Binary -SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b) -{ - return a < b ? a : b; -} -SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b) -{ - return a > b ? a : b; -} +// ----------------------------- I8 ----------------------------------------- -SLANG_FORCE_INLINE SLANG_CUDA_CALL float I32_asfloat(int32_t x) +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I8_countbits(int8_t v) { - Union32 u; - u.i = x; - return u.f; + return U8_countbits(uint8_t(v)); } -SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x) -{ - return uint32_t(x); -} -SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi) + +// ----------------------------- U16 ----------------------------------------- + +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U16_countbits(uint16_t v) { - Union64 u; - u.u = (uint64_t(hi) << 32) | uint32_t(low); - return u.d; + // No native 16bit popc yet, just cast and use 32bit variant + return __popc(uint32_t(v)); } -SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_countbits(int32_t v) +// ----------------------------- I16 ----------------------------------------- + +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I16_countbits(int16_t v) { - return __popc(uint32_t(v)); + return U16_countbits(uint16_t(v)); } // ----------------------------- U32 ----------------------------------------- @@ -1870,26 +1860,44 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_countbits(uint32_t v) return __popc(v); } +// ----------------------------- I32 ----------------------------------------- -// ----------------------------- I64 ----------------------------------------- - -SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_abs(int64_t f) +// Unary +SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; } -SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b) +// Binary +SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; } -SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b) +SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; } -SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I64_countbits(int64_t v) +SLANG_FORCE_INLINE SLANG_CUDA_CALL float I32_asfloat(int32_t x) +{ + Union32 u; + u.i = x; + return u.f; +} +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x) +{ + return uint32_t(x); +} +SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi) +{ + Union64 u; + u.u = (uint64_t(hi) << 32) | uint32_t(low); + return u.d; +} + +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_countbits(int32_t v) { - return __popcll(uint64_t(v)); + return U32_countbits(uint32_t(v)); } // ----------------------------- U64 ----------------------------------------- @@ -1914,6 +1922,27 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v) return __popcll(v); } +// ----------------------------- I64 ----------------------------------------- + +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_abs(int64_t f) +{ + return (f < 0) ? -f : f; +} + +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b) +{ + return a < b ? a : b; +} +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b) +{ + return a > b ? a : b; +} + +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I64_countbits(int64_t v) +{ + return U64_countbits(uint64_t(v)); +} + // ----------------------------- IPTR ----------------------------------------- SLANG_FORCE_INLINE SLANG_CUDA_CALL intptr_t IPTR_abs(intptr_t f) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 0f04006e5..07160ae9d 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -8047,6 +8047,16 @@ vector<T,N> cospi(vector<T,N> x) } } +// emulate 64-bit countbits when not natively supported. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +internal uint __emulatedCountbits64(uint64_t value) +{ + uint2 value_uint2 = bit_cast<uint2>(value); + uint2 counted_bits_uint2 = countbits(value_uint2); + return counted_bits_uint2.x + counted_bits_uint2.y; +} /// Population count. /// Counts the number of set bits in the binary representation of a value. @@ -8060,19 +8070,32 @@ vector<T,N> cospi(vector<T,N> x) __generic<T : __BuiltinIntegerType> uint countbits(T value) { + // Emulate 8-bit support + // 8-bit support is not currently supported anywhere natively + if (T is int8_t || T is uint8_t) + { + return countbits(__intCast<uint32_t>(value)); + } + __target_switch { case hlsl: + // 64-bit support dependent on SM6.0 and dxil + // 16-bit support dependent on SM6.2 and dxil __intrinsic_asm "countbits"; case glsl: + // 64-bit support dependent on GL_ARB_gpu_shader_int64 + // 16-bit support dependent on GL_EXT_shader_16bit_storage __intrinsic_asm "bitCount"; case metal: - if(T is int64_t || T is uint64_t) + if (T is int64_t || T is uint64_t) + { + return __emulatedCountbits64(__intCast<uint64_t>(value)); + } + else if (T is int16_t || T is uint16_t) { - // emulate 64-bit - uint2 value_uint2 = bit_cast<uint2>(value); - uint2 counted_bits_uint2 = countbits(value_uint2); - return counted_bits_uint2.x + counted_bits_uint2.y; + // emulate 16-bit + return countbits(__intCast<uint32_t>(value)); } else { @@ -8084,17 +8107,28 @@ uint countbits(T value) case spirv: if(T is int64_t || T is uint64_t) { - // emulate 64-bit - uint2 value_uint2 = bit_cast<uint2>(value); - uint2 counted_bits_uint2 = countbits(value_uint2); - return counted_bits_uint2.x + counted_bits_uint2.y; + return __emulatedCountbits64(__intCast<uint64_t>(value)); + } + else if (T is int16_t || T is uint16_t) + { + // emulate 16-bit + return countbits(__intCast<uint32_t>(value)); } else { + // OpBitCount only supports 32-bit return spirv_asm {OpBitCount $$uint result $value}; } case wgsl: - __intrinsic_asm "countOneBits"; + // wgsl only supports 32-bit integers + if (T is int32_t) + { + // wgsl countOneBits returns the same type as the + // one it was given. Cast signed ints to unsigned + // so we can provide the correct return value. + return countbits(__intCast<uint32_t>(value)); + } + __intrinsic_asm "countOneBits"; } } @@ -8104,6 +8138,13 @@ __generic<T : __BuiltinIntegerType, let N : int> [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] vector<uint, N> countbits(vector<T, N> value) { + // Emulate 8-bit support + // 8-bit support is not currently supported anywhere natively + if (T is int8_t || T is uint8_t) + { + VECTOR_MAP_UNARY(uint, N, countbits, value); + } + __target_switch { case hlsl: @@ -8111,9 +8152,9 @@ vector<uint, N> countbits(vector<T, N> value) case glsl: __intrinsic_asm "bitCount"; case metal: - if(T is int64_t || T is uint64_t) + if(T is int64_t || T is uint64_t || T is int16_t || T is uint16_t) { - // emulate 64-bit + // Emulate 64-bit and 16-bit VECTOR_MAP_UNARY(uint, N, countbits, value); } else @@ -8121,9 +8162,9 @@ vector<uint, N> countbits(vector<T, N> value) __intrinsic_asm "popcount"; } case spirv: - if(T is int64_t || T is uint64_t) + if(T is int64_t || T is uint64_t || T is int16_t || T is uint16_t) { - // emulate 64-bit + // Emulate 64-bit and 16-bit VECTOR_MAP_UNARY(uint, N, countbits, value); } else @@ -8131,7 +8172,17 @@ vector<uint, N> countbits(vector<T, N> value) return spirv_asm {OpBitCount $$vector<uint, N> result $value}; } case wgsl: - __intrinsic_asm "countOneBits"; + // wgsl only supports 32-bit integers + if (T is int32_t) + { + vector<uint32_t, N> ret; + for (int i = 0; i < N; i++) + { + ret[i] = countbits(__intCast<uint32_t>(value[i])); + } + return ret; + } + __intrinsic_asm "countOneBits"; default: VECTOR_MAP_UNARY(uint, N, countbits, value); } diff --git a/tests/hlsl-intrinsic/countbits.slang b/tests/hlsl-intrinsic/countbits.slang index da6828e87..060ad98f4 100644 --- a/tests/hlsl-intrinsic/countbits.slang +++ b/tests/hlsl-intrinsic/countbits.slang @@ -2,8 +2,10 @@ //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx11 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -wgpu -compute +// TODO: test GLSL pathway once emit-spirv-via-glsl is fixed and shader output reading is fixed for GLSL //CHK:1 diff --git a/tests/hlsl-intrinsic/countbits16.slang b/tests/hlsl-intrinsic/countbits16.slang new file mode 100644 index 000000000..dbfdc9217 --- /dev/null +++ b/tests/hlsl-intrinsic/countbits16.slang @@ -0,0 +1,47 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cpu +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute +//TODO: metal is currently failing even with emulation, investigate. +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute -profile metallib_2_4 +// No support for uint16_t on fxc - we need SM6.2 and dxil to use uint16_t with d3d12 +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_2 -use-dxil -shaderobj -render-feature hardware-device +// wgpu only has 32-bit support, so we do not try and test it here +// TODO: test GLSL pathway once emit-spirv-via-glsl is fixed and shader output reading is fixed for GLSL + +//CHK:1 + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +RWStructuredBuffer<uint> outputBuffer; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint r1 = countbits(uint16_t(0b1U) << 8); + uint2 r2 = countbits(uint16_t2(uint16_t(0b0U) << 8, uint16_t(0b1U) << 8)); + uint3 r3 = countbits(uint16_t3(uint16_t(0b0U) << 8, uint16_t(0b1U) << 8, uint16_t(0b11U) << 8)); + uint4 r4 = countbits(uint16_t4(uint16_t(0b0U) << 8, uint16_t(0b1U) << 8, uint16_t(0b11U) << 8, uint16_t(0b111U) << 8)); + + uint r5 = countbits(int16_t(0b1) << 8); + uint2 r6 = countbits(int16_t2(int16_t(0b0) << 8, int16_t(0b1) << 8)); + uint3 r7 = countbits(int16_t3(int16_t(0b0) << 8, int16_t(0b1) << 8, int16_t(0b11) << 8)); + uint4 r8 = countbits(int16_t4(int16_t(0b0) << 8, int16_t(0b1) << 8, int16_t(0b11) << 8, int16_t(0b111) << 8)); + + uint16_t smallShiftU16 = uint16_t(0b111) << 16; + int16_t smallShiftI16 = int16_t(0b1111) << 16; + + uint bitCountBigShiftU16 = countbits(smallShiftU16); + uint bitCountBigShiftI16 = countbits(smallShiftI16); + + outputBuffer[0] = true + && (r1 == 1) + && (r2.x == 0 && r2.y == 1) + && (r3.x == 0 && r3.y == 1 && r3.z == 2) + && (r4.x == 0 && r4.y == 1 && r4.z == 2 && r4.w == 3) + && (r5 == 1) + && (r6.x == 0 && r6.y == 1) + && (r7.x == 0 && r7.y == 1 && r7.z == 2) + && (r8.x == 0 && r8.y == 1 && r8.z == 2 && r8.w == 3) + && (bitCountBigShiftU16 == 0 && bitCountBigShiftI16 == 0) + ; +} diff --git a/tests/hlsl-intrinsic/countbits64.slang b/tests/hlsl-intrinsic/countbits64.slang index a24b31477..90799e411 100644 --- a/tests/hlsl-intrinsic/countbits64.slang +++ b/tests/hlsl-intrinsic/countbits64.slang @@ -1,10 +1,14 @@ //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cpu //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute -render-feature int64 +// emit-spirv-via-glsl is currently ignored, but even working around this, output does not appear to be captured for GLSL +// No support for uint64_t in GLSL without an extension like GL_EXT_shader_explicit_arithmetic_types_int64 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -vk -compute -render-feature int64 -emit-spirv-via-glsl -profile GLSL_400 -Xslang... -capability GL_EXT_shader_explicit_arithmetic_types_int64. //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute // No support for uint64_t on fxc - we need SM6.0 and dxil to use uint64_t with d3d12 // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -render-feature hardware-device +// wgpu only has 32-bit support, so we do not try and test it here //CHK:1 diff --git a/tests/hlsl-intrinsic/countbits8.slang b/tests/hlsl-intrinsic/countbits8.slang new file mode 100644 index 000000000..1db8e805c --- /dev/null +++ b/tests/hlsl-intrinsic/countbits8.slang @@ -0,0 +1,46 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -cpu +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -vk -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -cuda -compute +//TODO: metal is currently failing even with emulation, investigate. +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -mtl -compute -profile metallib_2_4 +// Not testing the following: +// -dx12/hlsl, No support for uint8_t with hlsl +// -wgpu, only has 32-bit support +// -vk/glsl, No support for uint8_t with glsl + +//CHK:1 + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +RWStructuredBuffer<uint> outputBuffer; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint r1 = countbits(uint8_t(0b1U) << 4); + uint2 r2 = countbits(uint8_t2(uint8_t(0b0U) << 4, uint8_t(0b1U) << 4)); + uint3 r3 = countbits(uint8_t3(uint8_t(0b0U) << 4, uint8_t(0b1U) << 4, uint8_t(0b11U) << 4)); + uint4 r4 = countbits(uint8_t4(uint8_t(0b0U) << 4, uint8_t(0b1U) << 4, uint8_t(0b11U) << 4, uint8_t(0b111U) << 4)); + + uint r5 = countbits(int8_t(0b1) << 4); + uint2 r6 = countbits(int8_t2(int8_t(0b0) << 4, int8_t(0b1) << 4)); + uint3 r7 = countbits(int8_t3(int8_t(0b0) << 4, int8_t(0b1) << 4, int8_t(0b11) << 4)); + uint4 r8 = countbits(int8_t4(int8_t(0b0) << 4, int8_t(0b1) << 4, int8_t(0b11) << 4, int8_t(0b111) << 4)); + + uint8_t smallShiftU8 = uint8_t(0b111) << 8; + int8_t smallShiftI8 = int8_t(0b1111) << 8; + + uint bitCountBigShiftU8 = countbits(smallShiftU8); + uint bitCountBigShiftI8 = countbits(smallShiftI8); + + outputBuffer[0] = true + && (r1 == 1) + && (r2.x == 0 && r2.y == 1) + && (r3.x == 0 && r3.y == 1 && r3.z == 2) + && (r4.x == 0 && r4.y == 1 && r4.z == 2 && r4.w == 3) + && (r5 == 1) + && (r6.x == 0 && r6.y == 1) + && (r7.x == 0 && r7.y == 1 && r7.z == 2) + && (r8.x == 0 && r8.y == 1 && r8.z == 2 && r8.w == 3) + && (bitCountBigShiftU8 == 0 && bitCountBigShiftI8 == 0) + ; +} |
