summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorJames Helferty (NVIDIA) <jhelferty@nvidia.com>2025-08-15 09:21:48 -0700
committerGitHub <noreply@github.com>2025-08-15 16:21:48 +0000
commitdcda42e7dcdb5e260013757763bf5dbf67d69568 (patch)
tree93426c9741db6c7aea9aaa2976159d05de4bc235 /source
parentf75bf474ef87737c87ef6dcb431bd0b87faee0a8 (diff)
Use 64bit int instead of emulation on metal (#8180)
Metal's popcount prototype is `T popcount(T x)` but we want to use it to implement `countbits` where the prototype always returns `uint`. Using `popcount` directly would implicitly cast successfully to the 32-bit return value in all cases except when the argument is a 64-bit type. Thus, this change always explicitly casts the result to `$TR`, which should be one of the `uint[N]` types, and should always be able to hold the number of bits in the type. Addresses #6877
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang24
1 files changed, 2 insertions, 22 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 78f6a4eb8..0d5b8cb1f 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -8190,19 +8190,7 @@ uint countbits(T value)
__intrinsic_asm "bitCount";
}
case metal:
- if (T is int64_t || T is uint64_t)
- {
- return __emulatedCountbits64(__intCast<uint64_t>(value));
- }
- else if (T is int16_t || T is uint16_t)
- {
- // emulate 16-bit
- return countbits(__intCast<uint32_t>(value));
- }
- else
- {
- __intrinsic_asm "popcount";
- }
+ __intrinsic_asm "($TR)popcount($0)";
case cuda:
case cpp:
__intrinsic_asm "$P_countbits($0)";
@@ -8262,15 +8250,7 @@ vector<uint, N> countbits(vector<T, N> value)
__intrinsic_asm "bitCount";
}
case metal:
- if(T is int64_t || T is uint64_t || T is int16_t || T is uint16_t)
- {
- // Emulate 64-bit and 16-bit
- VECTOR_MAP_UNARY(uint, N, countbits, value);
- }
- else
- {
- __intrinsic_asm "popcount";
- }
+ __intrinsic_asm "($TR)popcount($0)";
case spirv:
if(T is int64_t || T is uint64_t || T is int16_t || T is uint16_t)
{