From dcda42e7dcdb5e260013757763bf5dbf67d69568 Mon Sep 17 00:00:00 2001 From: "James Helferty (NVIDIA)" Date: Fri, 15 Aug 2025 09:21:48 -0700 Subject: Use 64bit int instead of emulation on metal (#8180) Metal's popcount prototype is `T popcount(T x)` but we want to use it to implement `countbits` where the prototype always returns `uint`. Using `popcount` directly would implicitly cast successfully to the 32-bit return value in all cases except when the argument is a 64-bit type. Thus, this change always explicitly casts the result to `$TR`, which should be one of the `uint[N]` types, and should always be able to hold the number of bits in the type. Addresses #6877 --- tests/hlsl-intrinsic/scalar-int64.slang | 3 ++- tests/hlsl-intrinsic/scalar-uint64.slang | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/hlsl-intrinsic/scalar-int64.slang b/tests/hlsl-intrinsic/scalar-int64.slang index f4518f198..f029d5da8 100644 --- a/tests/hlsl-intrinsic/scalar-int64.slang +++ b/tests/hlsl-intrinsic/scalar-int64.slang @@ -6,6 +6,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_0 -dx12 -use-dxil -shaderobj -render-feature hardware-device //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature int64 //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj +//TEST(compute, metal):COMPARE_COMPUTE_EX:-slang -compute -mtl //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; @@ -38,4 +39,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) int64_t v = (ti * 0x400010035435435ll) / 3ll + 7ll - 9ll; outputBuffer[uint(idx)] = int(v) ^ int(((v >> 32) & 0xffffffff)); -} \ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/scalar-uint64.slang b/tests/hlsl-intrinsic/scalar-uint64.slang index f75dd8acc..e790452a6 100644 --- a/tests/hlsl-intrinsic/scalar-uint64.slang +++ b/tests/hlsl-intrinsic/scalar-uint64.slang @@ -7,6 +7,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -render-feature hardware-device //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature int64 //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj +//TEST(compute, metal):COMPARE_COMPUTE_EX:-slang -compute -mtl //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; @@ -44,4 +45,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) v = max(u, v); outputBuffer[dispatchThreadID.x] = int(v) ^ int(v >> 32); -} \ No newline at end of file +} -- cgit v1.2.3