From 2fb95f99c3efbe54f92f6338ab8c6970f1ec35ee Mon Sep 17 00:00:00 2001
From: "James Helferty (NVIDIA)" <jhelferty@nvidia.com>
Date: Mon, 12 May 2025 21:03:59 -0400
Subject: Add half-precision matrix type aliases in GLSL (#7066)

Fixes #6708

This commit adds type aliases for half-precision matrices, including
f16mat3x2, f16mat3x3, f16mat3x4, f16mat4x2, f16mat4x3, and f16mat4x4.
Convenience aliases for square matrices (f16mat2, f16mat3, f16mat4) are
also added.

This commit introduces a new test file that validates the usage of
half-precision types in a compute shader context.
---
 source/slang/glsl.meta.slang   | 12 ++++++
 tests/glsl/float16_types.slang | 85 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 tests/glsl/float16_types.slang

diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index 05cef836c..2d9078855 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -101,6 +101,18 @@ public typealias f16mat2x2 = matrix<half, 2, 2>;
 public typealias f16mat2x3 = matrix<half, 2, 3>;
 public typealias f16mat2x4 = matrix<half, 2, 4>;
 
+public typealias f16mat3x2 = matrix<half, 3, 2>;
+public typealias f16mat3x3 = matrix<half, 3, 3>;
+public typealias f16mat3x4 = matrix<half, 3, 4>;
+
+public typealias f16mat4x2 = matrix<half, 4, 2>;
+public typealias f16mat4x3 = matrix<half, 4, 3>;
+public typealias f16mat4x4 = matrix<half, 4, 4>;
+
+// Convenience aliases for square matrices
+public typealias f16mat2 = f16mat2x2;
+public typealias f16mat3 = f16mat3x3;
+public typealias f16mat4 = f16mat4x4;
 
 public out float4 gl_Position : SV_Position;
 public out float gl_PointSize : SV_PointSize;
diff --git a/tests/glsl/float16_types.slang b/tests/glsl/float16_types.slang
new file mode 100644
index 000000000..507ed3113
--- /dev/null
+++ b/tests/glsl/float16_types.slang
@@ -0,0 +1,85 @@
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -stage compute -entry computeMain -allow-glsl -target glsl
+//TEST:SIMPLE(filecheck=CHECK_SPV): -stage compute -entry computeMain -allow-glsl -target spirv -emit-spirv-directly
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -stage compute -entry computeMain -target hlsl -allow-glsl
+//TEST:SIMPLE(filecheck=CHECK_CUDA): -stage compute -entry computeMain -target cuda -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+
+#version 450
+
+//TEST_INPUT:ubuffer(data=[0 1 2 3  4 5 6 7  8 9 10 11  12 13 14 15], stride=4):name=inputBuffer
+layout(binding = 0) buffer MyBlockNameIn
+{
+    uvec4 a[4];
+} inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0  0 0 0 0  0 0 0 0], stride=4):out,name=outputBuffer
+layout(binding = 1) buffer MyBlockName
+{
+    uvec4 a;
+    uvec4 b;
+    uvec3 c;
+} outputBuffer;
+
+layout(local_size_x = 1) in;
+void computeMain()
+{
+    {
+        f16vec4 m0 = f16vec4(inputBuffer.a[0]);
+        f16vec4 m1 = f16vec4(inputBuffer.a[1]);
+        f16vec4 m2 = f16vec4(inputBuffer.a[2]);
+        f16vec4 m3 = f16vec4(inputBuffer.a[3]);
+        f16mat4 m = f16mat4(m0, m1, m2, m3);
+        f16vec4 res = m * f16vec4(1, 1, 1, 1);
+        outputBuffer.a = uvec4(res);
+        // BUF: 18
+        // BUF-NEXT: 1C
+        // BUF-NEXT: 20
+        // BUF-NEXT: 24
+    }
+
+    {
+        f16vec4 m0 = f16vec4(inputBuffer.a[0]);
+        f16vec4 m1 = f16vec4(inputBuffer.a[1]);
+        f16vec4 m2 = f16vec4(inputBuffer.a[2]);
+        f16mat3x4 m = f16mat3x4(m0, m1, m2);
+        f16vec4 res = m * f16vec3(1, 1, 1);
+        outputBuffer.b = uvec4(res);
+        // BUF-NEXT: C
+        // BUF-NEXT: F
+        // BUF-NEXT: 12
+        // BUF-NEXT: 15
+    }
+
+    {
+        f16vec3 m0 = f16vec3(inputBuffer.a[0].xyz);
+        f16vec3 m1 = f16vec3(inputBuffer.a[1].xyz);
+        f16mat2x3 m = f16mat2x3(m0, m1);
+        f16vec3 res = m * f16vec2(1, 1);
+        outputBuffer.c = uvec3(res);
+        // BUF-NEXT: 4
+        // BUF-NEXT: 6
+        // BUF-NEXT: 8
+    }
+
+
+
+    // CHECK_GLSL: f16mat4x4
+    // CHECK_GLSL: f16vec4
+    // CHECK_GLSL: f16mat3x4
+    // CHECK_GLSL: f16mat2x3
+    // CHECK_GLSL: f16vec3
+    // CHECK_GLSL: f16vec2
+
+    // CHECK_SPV: OpTypeFloat 16
+
+    // CHECK_HLSL: vector<half,4>
+    // CHECK_HLSL: matrix<half,int(4),int(4)>
+    // CHECK_HLSL: matrix<half,int(3),int(4)>
+    // CHECK_HLSL: matrix<half,int(2),int(3)>
+
+    // CHECK_CUDA: __half4
+    // CHECK_CUDA: makeMatrix<__half, 4, 4>
+    // CHECK_CUDA: makeMatrix<__half, 3, 4>
+    // CHECK_CUDA: makeMatrix<__half, 2, 3>
+}
-- 
cgit v1.2.3