summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang354
1 files changed, 354 insertions, 0 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 85a81eabf..774c6a247 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -20806,3 +20806,357 @@ T workgroupUniformLoad<T>(__ref T v)
return v;
}
}
+
+//
+// Pack/Unpack Math Intrinsics
+//
+// These were introduced in SM 6.6 but requirements are dropped to SM 5.0 here
+// to expose these intrinsics on targets that do not have SM 6.6 features.
+//
+
+//@hidden:
+[__readNone]
+[ForceInline]
+uint16_t __lsb_as_u16(uint32_t val)
+{
+ return uint16_t(val & 0xFFU);
+}
+
+//@hidden:
+[__readNone]
+[ForceInline]
+uint32_t __lsb_as_u32(uint32_t val)
+{
+ return (val & 0xFFU);
+}
+
+//@hidden:
+[__readNone]
+[ForceInline]
+int8_t __lsb_as_s8(uint32_t val)
+{
+ return int8_t(val & 0xFFU);
+}
+
+//@hidden:
+[__readNone]
+[ForceInline]
+int16_t __lsb_as_s16(uint32_t val)
+{
+ return int16_t(__lsb_as_s8(val));
+}
+
+//@hidden:
+[__readNone]
+[ForceInline]
+int32_t __lsb_as_s32(uint32_t val)
+{
+ return int32_t(__lsb_as_s8(val));
+}
+
+//@hidden:
+[__readNone]
+[ForceInline]
+uint32_t __lsb_clamp_u8_as_u32(int32_t val)
+{
+ return clamp(val, 0, 255);
+}
+
+//@hidden:
+[__readNone]
+[ForceInline]
+uint32_t __lsb_clamp_s8_as_u32(int32_t val)
+{
+ return (uint32_t(clamp(val, -128, 127)) & 0xFFU);
+}
+
+//@public:
+/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int16_t4 unpack_s8s16(int8_t4_packed packed)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "unpack_s8s16";
+ case spirv:
+ return spirv_asm
+ {
+ %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
+ result:$$vector<int16_t, 4> = OpSConvert %s8Vec
+ };
+ default:
+ uint32_t packedValue = uint32_t(packed);
+ return int16_t4
+ (
+ __lsb_as_s16(packedValue),
+ __lsb_as_s16(packedValue >> 8U),
+ __lsb_as_s16(packedValue >> 16U),
+ __lsb_as_s16(packedValue >> 24U),
+ );
+ }
+}
+
+//@public:
+/// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint16_t4 unpack_u8u16(uint8_t4_packed packed)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "unpack_u8u16";
+ case spirv:
+ return spirv_asm
+ {
+ %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
+ result:$$vector<uint16_t, 4> = OpUConvert %u8Vec
+ };
+ default:
+ uint32_t packedValue = uint32_t(packed);
+ return uint16_t4
+ (
+ __lsb_as_u16(packedValue),
+ __lsb_as_u16(packedValue >> 8U),
+ __lsb_as_u16(packedValue >> 16U),
+ __lsb_as_u16(packedValue >> 24U),
+ );
+ }
+}
+
+//@public:
+/// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int32_t4 unpack_s8s32(int8_t4_packed packed)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "unpack_s8s32";
+ case wgsl: __intrinsic_asm "unpack4xI8";
+ case spirv:
+ return spirv_asm
+ {
+ %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
+ result:$$vector<int32_t, 4> = OpSConvert %s8Vec
+ };
+ default:
+ uint32_t packedValue = uint32_t(packed);
+ return int32_t4
+ (
+ __lsb_as_s32(packedValue),
+ __lsb_as_s32(packedValue >> 8U),
+ __lsb_as_s32(packedValue >> 16U),
+ __lsb_as_s32(packedValue >> 24U),
+ );
+ }
+}
+
+//@public:
+/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint32_t4 unpack_u8u32(uint8_t4_packed packed)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "unpack_u8u32";
+ case wgsl: __intrinsic_asm "unpack4xU8";
+ case spirv:
+ return spirv_asm
+ {
+ %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
+ result:$$vector<uint32_t, 4> = OpUConvert %u8Vec
+ };
+ default:
+ uint32_t packedValue = uint32_t(packed);
+ return uint32_t4
+ (
+ __lsb_as_u32(packedValue),
+ __lsb_as_u32(packedValue >> 8U),
+ __lsb_as_u32(packedValue >> 16U),
+ __lsb_as_u32(packedValue >> 24U),
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_u8";
+ case wgsl: __intrinsic_asm "pack4xU8";
+ default:
+ return uint8_t4_packed
+ (
+ __lsb_as_u32(unpackedValue.x)
+ | (__lsb_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int32_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_s8";
+ case wgsl: __intrinsic_asm "pack4xI8";
+ default:
+ return int8_t4_packed
+ (
+ __lsb_as_u32(unpackedValue.x)
+ | (__lsb_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_u8";
+ default:
+ return uint8_t4_packed
+ (
+ __lsb_as_u32(unpackedValue.x)
+ | (__lsb_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int16_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_s8";
+ default:
+ return int8_t4_packed
+ (
+ __lsb_as_u32(unpackedValue.x)
+ | (__lsb_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_clamp_u8";
+ case wgsl: __intrinsic_asm "pack4xU8Clamp(vec4<u32>($0))";
+ default:
+ return uint8_t4_packed
+ (
+ __lsb_clamp_u8_as_u32(unpackedValue.x)
+ | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_clamp_s8";
+ case wgsl: __intrinsic_asm "pack4xI8Clamp";
+ default:
+ return int8_t4_packed
+ (
+ __lsb_clamp_s8_as_u32(unpackedValue.x)
+ | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_clamp_u8";
+ default:
+ return uint8_t4_packed
+ (
+ __lsb_clamp_u8_as_u32(unpackedValue.x)
+ | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+
+//@public:
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "pack_clamp_s8";
+ default:
+ return int8_t4_packed
+ (
+ __lsb_clamp_s8_as_u32(unpackedValue.x)
+ | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
+ | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
+ | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
+ );
+ }
+}
+