Add Slang-specific intrinsics for integer pack/unpack (#6459)

* update hlsl meta * update test * use slang syntax in meta file * improve meta file * fix pack clamp u8 * remove builtin packed types, use typealias instead * fix wgsl pack clamp * fix formatting --------- Co-authored-by: Yong He <yonghe@outlook.com>
author: Darren Wihandi <65404740+fairywreath@users.noreply.github.com> 2025-02-28 16:23:29 -0500
committer: GitHub <noreply@github.com> 2025-02-28 13:23:29 -0800
commit: efbfa7832afff7e6285713086259abda2456ed55 (patch)
tree: d5a94eb66867d7f9dc01e4c1a25502443bf71040 /source/slang/hlsl.meta.slang
parent: 618b4c7657f539e66f032cd40554798bc0d68f6d (diff)
1 files changed, 227 insertions, 184 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index c9f3fb533..a671a3dc4 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -17309,8 +17309,8 @@ uint dot4add_u8packed(uint x, uint y, uint acc)
             result:$$uint = OpIAdd %dotResult $acc;
         };
     default:
-        uint4 vecX = unpack_u8u32(uint8_t4_packed(x));
-        uint4 vecY = unpack_u8u32(uint8_t4_packed(y));
+        uint4 vecX = unpackUint4x8ToUint32(x);
+        uint4 vecY = unpackUint4x8ToUint32(y);
         return dot(vecX, vecY) + acc;
     }
 }
@@ -17337,8 +17337,8 @@ int dot4add_i8packed(uint x, uint y, int acc)
             result:$$int = OpIAdd %dotResult $acc;
         };
     default:
-        int4 vecX = unpack_s8s32(int8_t4_packed(x));
-        int4 vecY = unpack_s8s32(int8_t4_packed(y));
+        int4 vecX = unpackInt4x8ToInt32(x);
+        int4 vecY = unpackInt4x8ToInt32(y);
         return dot(vecX, vecY) + acc;
     }
 }
@@ -24035,383 +24035,426 @@ T workgroupUniformLoad<T>(__ref T v)
 }
 
 //
-// Pack/Unpack Math Intrinsics
+// HLSL Pack/Unpack Math Intrinsics
 //
 // These were introduced in SM 6.6 but requirements are dropped to SM 5.0 here
 // to expose these intrinsics on targets that do not have SM 6.6 features.
 //
 
-//@hidden:
+//@public:
+
+typealias uint8_t4_packed = uint;
+typealias int8_t4_packed = uint;
+
+/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
-uint16_t __lsb_as_u16(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int16_t4 unpack_s8s16(int8_t4_packed packed)
 {
-    return uint16_t(val & 0xFFU);
+    return unpackInt4x8ToInt16(packed);
 }
 
-//@hidden:
+/// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
-uint32_t __lsb_as_u32(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint16_t4 unpack_u8u16(uint8_t4_packed packed)
 {
-    return (val & 0xFFU);
+    return unpackUint4x8ToUint16(packed);
 }
 
-//@hidden:
+/// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
-int8_t __lsb_as_s8(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int32_t4 unpack_s8s32(int8_t4_packed packed)
 {
-    return int8_t(val & 0xFFU);
+    return unpackInt4x8ToInt32(packed);
 }
 
-//@hidden:
+/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
-int16_t __lsb_as_s16(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint32_t4 unpack_u8u32(uint8_t4_packed packed)
 {
-    return int16_t(__lsb_as_s8(val));
+    return unpackUint4x8ToUint32(packed);
 }
 
-//@hidden:
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
-int32_t __lsb_as_s32(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
 {
-    return int32_t(__lsb_as_s8(val));
+    return packUint4x8(unpackedValue);
 }
 
-//@hidden:
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int32_t4 unpackedValue)
+{
+    return packInt4x8(unpackedValue);
+}
+
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
+{
+    return packUint4x8(unpackedValue);
+}
+
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int16_t4 unpackedValue)
+{
+    return packInt4x8(unpackedValue);
+}
+
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
+{
+    return packUint4x8Clamp(unpackedValue);
+}
+
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
+{
+    return packInt4x8Clamp(unpackedValue);
+}
+
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
-uint32_t __lsb_clamp_u8_as_u32(int32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
 {
-    return clamp(val, 0, 255);
+    return packUint4x8Clamp(unpackedValue);
 }
 
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
+{
+    return packInt4x8Clamp(unpackedValue);
+}
+
+// Work-graphs
+
+//@public:
+/// read-only input to Broadcasting launch node.
+__generic<T>
+//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader.
+//[require(broadcasting_node)]
+[require(spirv)]
+struct DispatchNodeInputRecord
+{
+    /// Provide an access to a record object that only holds a single record.
+    NodePayloadPtr<T> Get()
+    {
+        int index = 0;
+        __target_switch
+        {
+        case spirv:
+            return spirv_asm
+            {
+                %in_payload_t = OpTypeNodePayloadArrayAMDX $$T;
+                %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t;
+                %var = OpVariable %in_payload_ptr_t NodePayloadAMDX;
+                result : $$NodePayloadPtr<T> = OpAccessChain %var $index;
+            };
+        }
+    }
+};
+
+//
+// Pack/Unpack Intrinsics
+//
+
 //@hidden:
+
+[__readNone]
+[ForceInline]
+uint16_t __lsbAsUint16(uint32_t val)
+{
+    return uint16_t(val & 0xFFU);
+}
+
+[__readNone]
+[ForceInline]
+uint32_t __lsbAsUint32(uint32_t val)
+{
+    return (val & 0xFFU);
+}
+
+[__readNone]
+[ForceInline]
+int8_t __lsbAsInt8(uint32_t val)
+{
+    return int8_t(val);
+}
+
 [__readNone]
 [ForceInline]
-uint32_t __lsb_clamp_s8_as_u32(int32_t val)
+int16_t __lsbAsInt16(uint32_t val)
 {
-    return (uint32_t(clamp(val, -128, 127)) & 0xFFU);
+    return int16_t(__lsbAsInt8(val));
+}
+
+[__readNone]
+[ForceInline]
+int32_t __lsbAsInt32(uint32_t val)
+{
+    return int32_t(__lsbAsInt8(val));
 }
 
 //@public:
-/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
+
+/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int16_t4 unpack_s8s16(int8_t4_packed packed)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint32_t4 unpackUint4x8ToUint32(uint packedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "unpack_s8s16";
-    case spirv: 
+    case hlsl: __intrinsic_asm "unpack_u8u32";
+    case wgsl: __intrinsic_asm "unpack4xU8";
+    case spirv:
         return spirv_asm
         {
-            %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
-            result:$$vector<int16_t, 4> = OpSConvert %s8Vec
+            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packedValue;
+            result:$$vector<uint32_t, 4> = OpUConvert %u8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
-        return int16_t4
+        return uint32_t4
         (
-            __lsb_as_s16(packedValue),
-            __lsb_as_s16(packedValue >> 8U),
-            __lsb_as_s16(packedValue >> 16U),
-            __lsb_as_s16(packedValue >> 24U),
+            __lsbAsUint32(packedValue),
+            __lsbAsUint32(packedValue >> 8U),
+            __lsbAsUint32(packedValue >> 16U),
+            uint32_t(packedValue >> 24U),
         );
     }
 }
 
-//@public:
 /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint16_t4 unpack_u8u16(uint8_t4_packed packed)
+uint16_t4 unpackUint4x8ToUint16(uint packedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "unpack_u8u16";
-    case spirv: 
+    case spirv:
         return spirv_asm
         {
-            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
+            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packedValue;
             result:$$vector<uint16_t, 4> = OpUConvert %u8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
         return uint16_t4
         (
-            __lsb_as_u16(packedValue),
-            __lsb_as_u16(packedValue >> 8U),
-            __lsb_as_u16(packedValue >> 16U),
-            __lsb_as_u16(packedValue >> 24U),
+            __lsbAsUint16(packedValue),
+            __lsbAsUint16(packedValue >> 8U),
+            __lsbAsUint16(packedValue >> 16U),
+            uint16_t(packedValue >> 24U),
         );
     }
 }
 
-//@public:
 /// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int32_t4 unpack_s8s32(int8_t4_packed packed)
+int32_t4 unpackInt4x8ToInt32(uint packedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "unpack_s8s32";
     case wgsl: __intrinsic_asm "unpack4xI8";
-    case spirv: 
+    case spirv:
         return spirv_asm
         {
-            %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
+            %s8Vec = OpBitcast $$vector<int8_t, 4> $packedValue;
             result:$$vector<int32_t, 4> = OpSConvert %s8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
         return int32_t4
         (
-            __lsb_as_s32(packedValue),
-            __lsb_as_s32(packedValue >> 8U),
-            __lsb_as_s32(packedValue >> 16U),
-            __lsb_as_s32(packedValue >> 24U),
+            __lsbAsInt32(packedValue),
+            __lsbAsInt32(packedValue >> 8U),
+            __lsbAsInt32(packedValue >> 16U),
+            int32_t(int8_t(packedValue >> 24U)),
         );
     }
 }
 
-//@public:
-/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
+/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint32_t4 unpack_u8u32(uint8_t4_packed packed)
+int16_t4 unpackInt4x8ToInt16(uint packedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "unpack_u8u32";
-    case wgsl: __intrinsic_asm "unpack4xU8";
-    case spirv: 
+    case hlsl: __intrinsic_asm "unpack_s8s16";
+    case spirv:
         return spirv_asm
         {
-            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
-            result:$$vector<uint32_t, 4> = OpUConvert %u8Vec
+            %s8Vec = OpBitcast $$vector<int8_t, 4> $packedValue;
+            result:$$vector<int16_t, 4> = OpSConvert %s8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
-        return uint32_t4
+        return int16_t4
         (
-            __lsb_as_u32(packedValue),
-            __lsb_as_u32(packedValue >> 8U),
-            __lsb_as_u32(packedValue >> 16U),
-            __lsb_as_u32(packedValue >> 24U),
+            __lsbAsInt16(packedValue),
+            __lsbAsInt16(packedValue >> 8U),
+            __lsbAsInt16(packedValue >> 16U),
+            int16_t(int8_t(packedValue >> 24U)),
         );
     }
 }
 
-//@public:
 /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
+uint packUint4x8(uint32_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_u8";
     case wgsl: __intrinsic_asm "pack4xU8";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return __lsbAsUint32(unpackedValue.x)
+            | (__lsbAsUint32(unpackedValue.y) << 8U)
+            | (__lsbAsUint32(unpackedValue.z) << 16U)
+            | (unpackedValue.w << 24U);
     }
 }
 
-//@public:
-/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_s8(int32_t4 unpackedValue)
+uint packUint4x8(uint16_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_s8";
-    case wgsl: __intrinsic_asm "pack4xI8";
+    case hlsl: __intrinsic_asm "pack_u8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
+uint packInt4x8(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_u8";
+    case hlsl: __intrinsic_asm "pack_s8";
+    case wgsl: __intrinsic_asm "pack4xI8";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_s8(int16_t4 unpackedValue)
+uint packInt4x8(int16_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_s8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
+uint packUint4x8Clamp(int32_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_clamp_u8";
     case wgsl: __intrinsic_asm "pack4xU8Clamp(vec4<u32>($0))";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_clamp_u8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, 0, 255));
     }
 }
 
-//@public:
-/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
+uint packUint4x8Clamp(int16_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_s8";
-    case wgsl: __intrinsic_asm "pack4xI8Clamp";
+    case hlsl: __intrinsic_asm "pack_clamp_u8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_clamp_s8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, 0, 255));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
+uint packInt4x8Clamp(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_u8";
+    case hlsl: __intrinsic_asm "pack_clamp_s8";
+    case wgsl: __intrinsic_asm "pack4xI8Clamp";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_clamp_u8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, -128, 127));
     }
 }
 
-//@public:
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
 /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
+uint packInt4x8Clamp(int16_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_clamp_s8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_clamp_s8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, -128, 127));
     }
 }
-
-// Work-graphs
-
-//@public:
-/// read-only input to Broadcasting launch node.
-__generic<T>
-//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader.
-//[require(broadcasting_node)]
-[require(spirv)]
-struct DispatchNodeInputRecord
-{
-    /// Provide an access to a record object that only holds a single record.
-    NodePayloadPtr<T> Get()
-    {
-        int index = 0;
-        __target_switch
-        {
-        case spirv:
-            return spirv_asm
-            {
-                %in_payload_t = OpTypeNodePayloadArrayAMDX $$T;
-                %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t;
-                %var = OpVariable %in_payload_ptr_t NodePayloadAMDX;
-                result : $$NodePayloadPtr<T> = OpAccessChain %var $index;
-            };
-        }
-    }
-};
-
author	Darren Wihandi <65404740+fairywreath@users.noreply.github.com>	2025-02-28 16:23:29 -0500
committer	GitHub <noreply@github.com>	2025-02-28 13:23:29 -0800
commit	efbfa7832afff7e6285713086259abda2456ed55 (patch)
tree	d5a94eb66867d7f9dc01e4c1a25502443bf71040 /source/slang/hlsl.meta.slang
parent	618b4c7657f539e66f032cd40554798bc0d68f6d (diff)