summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorsricker-nvidia <115114531+sricker-nvidia@users.noreply.github.com>2025-04-19 04:33:27 -0700
committerGitHub <noreply@github.com>2025-04-19 11:33:27 +0000
commit043278a527ab5744674417a08d924c67a60a486b (patch)
tree19c3ead87def94f2d418926d5f15b9eab1ced440 /source
parent6bfabfee317887e678eed9cd6768df2ffd3b9704 (diff)
Implement 64bit countbits intrinsic (#6433) (#6845)
Change modifies the countbits intrinsic to use generics in order to support 64bit countbits on select platforms where this is supported. On platforms where this is not natively supported, we emulate by converting the 64-bit type into a uint2 (metal and spir-v). This should align with the implementation of other uint64_t intrinsics such as abs, min, max and clamp. Added new countbits64 test to verify changes. Updated documentation for 64bit-type-support.html
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang51
1 files changed, 44 insertions, 7 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index e71997c6c..6b1a4579f 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -8028,7 +8028,8 @@ vector<T,N> cospi(vector<T,N> x)
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint countbits(uint value)
+__generic<T : __BuiltinIntegerType>
+uint countbits(T value)
{
__target_switch
{
@@ -8037,22 +8038,42 @@ uint countbits(uint value)
case glsl:
__intrinsic_asm "bitCount";
case metal:
- __intrinsic_asm "popcount";
+ if(T is int64_t || T is uint64_t)
+ {
+ // emulate 64-bit
+ uint2 value_uint2 = bit_cast<uint2>(value);
+ uint2 counted_bits_uint2 = countbits(value_uint2);
+ return counted_bits_uint2.x + counted_bits_uint2.y;
+ }
+ else
+ {
+ __intrinsic_asm "popcount";
+ }
case cuda:
case cpp:
__intrinsic_asm "$P_countbits($0)";
case spirv:
- return spirv_asm {OpBitCount $$uint result $value};
+ if(T is int64_t || T is uint64_t)
+ {
+ // emulate 64-bit
+ uint2 value_uint2 = bit_cast<uint2>(value);
+ uint2 counted_bits_uint2 = countbits(value_uint2);
+ return counted_bits_uint2.x + counted_bits_uint2.y;
+ }
+ else
+ {
+ return spirv_asm {OpBitCount $$uint result $value};
+ }
case wgsl:
__intrinsic_asm "countOneBits";
}
}
-__generic <let N : int>
+__generic<T : __BuiltinIntegerType, let N : int>
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-vector<uint, N> countbits(vector<uint, N> value)
+vector<uint, N> countbits(vector<T, N> value)
{
__target_switch
{
@@ -8061,9 +8082,25 @@ vector<uint, N> countbits(vector<uint, N> value)
case glsl:
__intrinsic_asm "bitCount";
case metal:
- __intrinsic_asm "popcount";
+ if(T is int64_t || T is uint64_t)
+ {
+ // emulate 64-bit
+ VECTOR_MAP_UNARY(uint, N, countbits, value);
+ }
+ else
+ {
+ __intrinsic_asm "popcount";
+ }
case spirv:
- return spirv_asm {OpBitCount $$vector<uint, N> result $value};
+ if(T is int64_t || T is uint64_t)
+ {
+ // emulate 64-bit
+ VECTOR_MAP_UNARY(uint, N, countbits, value);
+ }
+ else
+ {
+ return spirv_asm {OpBitCount $$vector<uint, N> result $value};
+ }
case wgsl:
__intrinsic_asm "countOneBits";
default: