Implement IFloat.scale for vectors, matrices (#7545)

* Test for IFloat.scale usage Test that using IFloat.scale doesn't cause an internal compiler error. * Generic implementation of IFloat.scale() Fixes #7156 * Implement IFloat.scale for matrix Adds matrix implementation and test coverage. * Avoid explicitly constructing a matrix * Remove intrinsicOp from IFloat.scale impls Updates IFloat.scale implementations: - Remove __intrinsic_op($(kIROp_Mul)) since we're providing an implementation - Add [__unsafeForceInlineEarly] where missing
author: James Helferty (NVIDIA) <jhelferty@nvidia.com> 2025-06-30 11:59:14 -0400
committer: GitHub <noreply@github.com> 2025-06-30 15:59:14 +0000
commit: 61c9eb3a9117b6227cf40d970e57cf3d746b3926 (patch)
tree: 740b42289cbdf1baa76f841c8debc4fa14c8b045 /tests
parent: 67af8c718ce5e3f95a25e5188840f63b41a33ecc (diff)
1 files changed, 26 insertions, 0 deletions
diff --git a/tests/bugs/gh-7156-ifloat-scale.slang b/tests/bugs/gh-7156-ifloat-scale.slang
new file mode 100644
index 000000000..6002524c3
--- /dev/null
+++ b/tests/bugs/gh-7156-ifloat-scale.slang
@@ -0,0 +1,26 @@
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -compute -output-using-type
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-hlsl -compute -output-using-type
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-metal -compute -output-using-type
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-wgpu -compute -output-using-type
+
+// CHECK: 1210.0
+
+//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+// Use of IFloat.scale used to cause an internal compiler error
+func double_it<T: IFloat>(value: T) -> T {
+    return value.scale(2.0);
+}
+
+[numthreads(1, 1, 1)]
+[shader("compute")]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    let mat = matrix<float,4,4>(1,0,0,1, 0,1,1,0, 0,0,1,0, 0,0,0,1);
+    let vi = vector<float,4>(100,100,100,100);
+    let dm = double_it(mat);
+    let dm_sum = dot(dm[0], vi) + dot(dm[1], vi) + dot(dm[2], vi) + dot(dm[3], vi);
+    outputBuffer[dispatchThreadID.x] = double_it(float2(1.0, 2.0)).x + double_it(4.0) + dm_sum;
+}
+
author	James Helferty (NVIDIA) <jhelferty@nvidia.com>	2025-06-30 11:59:14 -0400
committer	GitHub <noreply@github.com>	2025-06-30 15:59:14 +0000
commit	61c9eb3a9117b6227cf40d970e57cf3d746b3926 (patch)
tree	740b42289cbdf1baa76f841c8debc4fa14c8b045 /tests
parent	67af8c718ce5e3f95a25e5188840f63b41a33ecc (diff)