From 61c9eb3a9117b6227cf40d970e57cf3d746b3926 Mon Sep 17 00:00:00 2001 From: "James Helferty (NVIDIA)" Date: Mon, 30 Jun 2025 11:59:14 -0400 Subject: Implement IFloat.scale for vectors, matrices (#7545) * Test for IFloat.scale usage Test that using IFloat.scale doesn't cause an internal compiler error. * Generic implementation of IFloat.scale() Fixes #7156 * Implement IFloat.scale for matrix Adds matrix implementation and test coverage. * Avoid explicitly constructing a matrix * Remove intrinsicOp from IFloat.scale impls Updates IFloat.scale implementations: - Remove __intrinsic_op($(kIROp_Mul)) since we're providing an implementation - Add [__unsafeForceInlineEarly] where missing --- tests/bugs/gh-7156-ifloat-scale.slang | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tests/bugs/gh-7156-ifloat-scale.slang (limited to 'tests/bugs') diff --git a/tests/bugs/gh-7156-ifloat-scale.slang b/tests/bugs/gh-7156-ifloat-scale.slang new file mode 100644 index 000000000..6002524c3 --- /dev/null +++ b/tests/bugs/gh-7156-ifloat-scale.slang @@ -0,0 +1,26 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -compute -output-using-type +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-hlsl -compute -output-using-type +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-metal -compute -output-using-type +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-wgpu -compute -output-using-type + +// CHECK: 1210.0 + +//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer outputBuffer; + +// Use of IFloat.scale used to cause an internal compiler error +func double_it(value: T) -> T { + return value.scale(2.0); +} + +[numthreads(1, 1, 1)] +[shader("compute")] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + let mat = matrix(1,0,0,1, 0,1,1,0, 0,0,1,0, 0,0,0,1); + let vi = vector(100,100,100,100); + let dm = double_it(mat); + let dm_sum = dot(dm[0], vi) + dot(dm[1], vi) + dot(dm[2], vi) + dot(dm[3], vi); + outputBuffer[dispatchThreadID.x] = double_it(float2(1.0, 2.0)).x + double_it(4.0) + dm_sum; +} + -- cgit v1.2.3