diff options
| author | Darren Wihandi <65404740+fairywreath@users.noreply.github.com> | 2025-03-18 13:29:29 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-18 10:29:29 -0700 |
| commit | eee974d74617944ca2b6f6ac424e98a12a51b82c (patch) | |
| tree | 9067715066a6118052409176350422444320795d /source | |
| parent | 336b5d56d197fc64be75611c9dcaac71c56996d8 (diff) | |
Implement floating-point pack/unpack intrinsics for all targets (#6503)
* Implement floating-point pack/unpack intrinsics
* remove unused functions and update caps in glsl meta file
* rename pack capability
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/glsl.meta.slang | 224 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 472 | ||||
| -rw-r--r-- | source/slang/slang-capabilities.capdef | 10 |
3 files changed, 467 insertions, 239 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 2a89f2b66..4412ae460 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -617,230 +617,34 @@ public vector<float, N> uintBitsToFloat(highp vector<uint, N> x) [__readNone] [ForceInline] -uint packUnorm1x16(float c) -{ - return uint(round(clamp(c, 0.0, 1.0) * 65535.0)); -} - -[__readNone] -[ForceInline] -uint packSnorm1x16(float v) -{ - return uint(round(clamp(v ,-1.0, 1.0) * 32767.0)); -} - -[__readNone] -[ForceInline] -uint packUnorm1x8(float c) -{ - return uint(round(clamp(c, 0.0, 1.0) * 255.0)); -} - -[__readNone] -[ForceInline] -uint packSnorm1x8(float c) -{ - return uint(round(clamp(c, -1.0, 1.0) * 127.0)); -} - -[__readNone] -[ForceInline] -float unpackUnorm1x16(uint p) -{ - const uint wordMask = 0xffff; - return float(p & wordMask) / 65535.0; -} - -[__readNone] -[ForceInline] -float unpackSnorm1x16(uint p) -{ - const uint wordMask = 0xffff; - return clamp(float(p & wordMask) / 32767.0, -1.0, 1.0); -} - -[__readNone] -[ForceInline] -float unpackUnorm1x8(uint p) -{ - const uint byteMask = 0xff; - return float(p & byteMask) / 255.0; -} - -[__readNone] -[ForceInline] -float unpackSnorm1x8(uint p) -{ - const uint byteMask = 0xff; - return clamp(float(p & byteMask) / 127.0, -1.0, 1.0); -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -uint float2half(float f) -{ - uint u = floatBitsToUint(f); - uint s = ((u >> uint(16)) & uint(0x8000)); - uint e = 0; - uint m = ((u >> uint(13)) & uint(0x03ff)); - if (m != 0) - { - e = ((((u & uint(0x7f800000)) - uint(0x38000000)) >> uint(13)) & uint(0x7c00)); - } - return (s | e | m); -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packUnorm2x16(vec2 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packUnorm2x16"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackUnorm2x16 $v - }; - default: - return packUnorm1x16(v.x) | (packUnorm1x16(v.y) << uint(16)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packSnorm2x16(vec2 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packSnorm2x16"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackSnorm2x16 $v - }; - default: - return packSnorm1x16(v.x) | (packSnorm1x16(v.y) << uint(16)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packUnorm4x8(vec4 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packUnorm4x8"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackUnorm4x8 $v - }; - default: - return packUnorm1x8(v.x) | (packUnorm1x8(v.y) << uint(8)) | (packUnorm1x8(v.z) << uint(16)) | (packUnorm1x8(v.w) << uint(24)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packSnorm4x8(vec4 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packSnorm4x8"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackSnorm4x8 $v - }; - default: - return packSnorm1x8(v.x) | (packSnorm1x8(v.y) << uint(8)) | (packSnorm1x8(v.z) << uint(16)) | (packSnorm1x8(v.w) << uint(24)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec2 unpackUnorm2x16(uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackUnorm2x16"; - case spirv: return spirv_asm { - result:$$vec2 = OpExtInst glsl450 UnpackUnorm2x16 $p - }; - default: - return vec2(unpackUnorm1x16(p & uint(0xffff)), unpackUnorm1x16(p >> uint(16))); - } + return unpackUnorm2x16ToFloat(p); } [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec2 unpackSnorm2x16(uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackSnorm2x16"; - case spirv: return spirv_asm { - result:$$vec2 = OpExtInst glsl450 UnpackSnorm2x16 $p - }; - default: - return vec2(unpackSnorm1x16(p & uint(0xffff)), unpackSnorm1x16(p >> uint(16))); - } + return unpackSnorm2x16ToFloat(p); } [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec4 unpackUnorm4x8(highp uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackUnorm4x8"; - case spirv: return spirv_asm { - result:$$vec4 = OpExtInst glsl450 UnpackUnorm4x8 $p - }; - default: - return vec4( - unpackUnorm1x8(p), - unpackUnorm1x8(p >> 8), - unpackUnorm1x8(p >> 16), - unpackUnorm1x8(p >> 24)); - } + return unpackUnorm4x8ToFloat(p); } [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec4 unpackSnorm4x8(highp uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackSnorm4x8"; - case spirv: return spirv_asm { - result:$$vec4 = OpExtInst glsl450 UnpackSnorm4x8 $p - }; - default: - return vec4( - unpackSnorm1x8(p), - unpackSnorm1x8(p >> 8), - unpackSnorm1x8(p >> 16), - unpackSnorm1x8(p >> 24)); - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] -public uint packHalf2x16(vec2 v) -{ - __target_switch - { - case glsl: __intrinsic_asm "packHalf2x16"; - case spirv: return spirv_asm { - result:$$uint = OpExtInst glsl450 PackHalf2x16 $v - }; - default: - return float2half(v.x) | (float2half(v.y) << uint(16)); - } + return unpackSnorm4x8ToFloat(p); } [__readNone] @@ -865,18 +669,10 @@ public float half2float(uint h) [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] public vec2 unpackHalf2x16(uint p) { - __target_switch - { - case glsl: __intrinsic_asm "unpackHalf2x16"; - case spirv: return spirv_asm { - result:$$vec2 = OpExtInst glsl450 UnpackHalf2x16 $p - }; - default: - return vec2(half2float(p & uint(0xffff)), half2float(p >> uint(16))); - } + return unpackHalf2x16ToFloat(p); } [__readNone] diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c7e4925be..558ac6301 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -24169,7 +24169,7 @@ typealias int8_t4_packed = uint; /// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int16_t4 unpack_s8s16(int8_t4_packed packed) { return unpackInt4x8ToInt16(packed); @@ -24178,7 +24178,7 @@ int16_t4 unpack_s8s16(int8_t4_packed packed) /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint16_t4 unpack_u8u16(uint8_t4_packed packed) { return unpackUint4x8ToUint16(packed); @@ -24187,7 +24187,7 @@ uint16_t4 unpack_u8u16(uint8_t4_packed packed) /// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int32_t4 unpack_s8s32(int8_t4_packed packed) { return unpackInt4x8ToInt32(packed); @@ -24196,7 +24196,7 @@ int32_t4 unpack_s8s32(int8_t4_packed packed) /// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint32_t4 unpack_u8u32(uint8_t4_packed packed) { return unpackUint4x8ToUint32(packed); @@ -24205,7 +24205,7 @@ uint32_t4 unpack_u8u32(uint8_t4_packed packed) /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_u8(uint32_t4 unpackedValue) { return packUint4x8(unpackedValue); @@ -24214,7 +24214,7 @@ uint8_t4_packed pack_u8(uint32_t4 unpackedValue) /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_s8(int32_t4 unpackedValue) { return packInt4x8(unpackedValue); @@ -24223,7 +24223,7 @@ int8_t4_packed pack_s8(int32_t4 unpackedValue) /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_u8(uint16_t4 unpackedValue) { return packUint4x8(unpackedValue); @@ -24232,7 +24232,7 @@ uint8_t4_packed pack_u8(uint16_t4 unpackedValue) /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_s8(int16_t4 unpackedValue) { return packInt4x8(unpackedValue); @@ -24242,7 +24242,7 @@ int8_t4_packed pack_s8(int16_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) { return packUint4x8Clamp(unpackedValue); @@ -24252,7 +24252,7 @@ uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) { return packInt4x8Clamp(unpackedValue); @@ -24262,7 +24262,7 @@ int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) { return packUint4x8Clamp(unpackedValue); @@ -24272,7 +24272,7 @@ uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue) { return packInt4x8Clamp(unpackedValue); @@ -24347,13 +24347,41 @@ int32_t __lsbAsInt32(uint32_t val) return int32_t(__lsbAsInt8(val)); } +[__readNone] +[ForceInline] +uint32_t2 __unpackUint2x16ToUint32(uint packedValue) +{ + return uint32_t2(packedValue & 0xFFFFU, packedValue >> 16U); +} + +[__readNone] +[ForceInline] +int32_t2 __unpackInt2x16ToInt32(uint packedValue) +{ + int signedValue = int(packedValue); + return int32_t2(signedValue << 16U, signedValue) >> 16U; +} + +[__readNone] +[ForceInline] +uint __packUint2x16(uint32_t2 unpackedValue) +{ + return unpackedValue.x | (unpackedValue.y << 16U); +} + +[__readNone] +[ForceInline] +uint __packInt2x16(int32_t2 unpackedValue) +{ + return uint(unpackedValue.x | (unpackedValue.y << 16U)); +} + //@public: /// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint32_t4 unpackUint4x8ToUint32(uint packedValue) { __target_switch @@ -24380,7 +24408,7 @@ uint32_t4 unpackUint4x8ToUint32(uint packedValue) /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint16_t4 unpackUint4x8ToUint16(uint packedValue) { __target_switch @@ -24406,7 +24434,7 @@ uint16_t4 unpackUint4x8ToUint16(uint packedValue) /// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int32_t4 unpackInt4x8ToInt32(uint packedValue) { __target_switch @@ -24433,7 +24461,7 @@ int32_t4 unpackInt4x8ToInt32(uint packedValue) /// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] int16_t4 unpackInt4x8ToInt16(uint packedValue) { __target_switch @@ -24459,7 +24487,7 @@ int16_t4 unpackInt4x8ToInt16(uint packedValue) /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8(uint32_t4 unpackedValue) { __target_switch @@ -24477,7 +24505,7 @@ uint packUint4x8(uint32_t4 unpackedValue) /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8(uint16_t4 unpackedValue) { __target_switch @@ -24491,7 +24519,7 @@ uint packUint4x8(uint16_t4 unpackedValue) /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8(int32_t4 unpackedValue) { __target_switch @@ -24506,7 +24534,7 @@ uint packInt4x8(int32_t4 unpackedValue) /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8(int16_t4 unpackedValue) { __target_switch @@ -24521,7 +24549,7 @@ uint packInt4x8(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8Clamp(int32_t4 unpackedValue) { __target_switch @@ -24537,7 +24565,7 @@ uint packUint4x8Clamp(int32_t4 unpackedValue) /// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packUint4x8Clamp(int16_t4 unpackedValue) { __target_switch @@ -24552,7 +24580,7 @@ uint packUint4x8Clamp(int16_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8Clamp(int32_t4 unpackedValue) { __target_switch @@ -24568,7 +24596,7 @@ uint packInt4x8Clamp(int32_t4 unpackedValue) /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] uint packInt4x8Clamp(int16_t4 unpackedValue) { __target_switch @@ -24578,3 +24606,397 @@ uint packInt4x8Clamp(int16_t4 unpackedValue) return packInt4x8(clamp(unpackedValue, -128, 127)); } } + +// +// Floating-point Pack/Unpack Intrinsics +// + +// @public: + +/// Unpack a 32-bit unsigned integer into four 8-bit unsigned integers. +/// Then, each 8-bit value is converted to a normalized single-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +float4 unpackUnorm4x8ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackUnorm4x8"; + case metal: __intrinsic_asm "unpack_unorm4x8_to_float"; + case spirv: + return spirv_asm + { + result:$$float4 = OpExtInst glsl450 UnpackUnorm4x8 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack4x8unorm"; + default: + uint4 unpackedIntegers = unpackUint4x8ToUint32(packedValue); + return float4(unpackedIntegers) / 255.0; + } +} + +/// Unpack a 32-bit unsigned integer into four 8-bit unsigned integers. +/// Then, each 8-bit value is converted to a normalized half-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +half4 unpackUnorm4x8ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_unorm4x8_to_half"; + default: + return half4(unpackUnorm4x8ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into four 8-bit signed integers. +/// Then, each 8-bit value is converted to a normalized single-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +float4 unpackSnorm4x8ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackSnorm4x8"; + case metal: __intrinsic_asm "unpack_snorm4x8_to_float"; + case spirv: + return spirv_asm + { + result:$$float4 = OpExtInst glsl450 UnpackSnorm4x8 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack4x8snorm"; + default: + int4 unpackedIntegers = unpackInt4x8ToInt32(packedValue); + return clamp(float4(unpackedIntegers) / 127.0, -1.0, 1.0); + } +} + +/// Unpack a 32-bit unsigned integer into four 8-bit signed integers. +/// Then, each 8-bit value is converted to a normalized half-precision +/// floating-point value to generate a 4-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +half4 unpackSnorm4x8ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_snorm4x8_to_half"; + default: + return half4(unpackSnorm4x8ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit usigned integers. +/// Then, each 16-bit value is converted to a normalized single-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +float2 unpackUnorm2x16ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackUnorm2x16"; + case metal: __intrinsic_asm "unpack_unorm2x16_to_float"; + case spirv: + return spirv_asm + { + result:$$float2 = OpExtInst glsl450 UnpackUnorm2x16 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack2x16unorm"; + default: + uint2 unpackedIntegers = __unpackUint2x16ToUint32(packedValue); + return float2(unpackedIntegers) / 65535.0; + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit usigned integers. +/// Then, each 16-bit value is converted to a normalized half-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +half2 unpackUnorm2x16ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_unorm2x16_to_half"; + default: + return half2(unpackUnorm2x16ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to a normalized single-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +float2 unpackSnorm2x16ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackSnorm2x16"; + case metal: __intrinsic_asm "unpack_snorm2x16_to_float"; + case spirv: + return spirv_asm + { + result:$$float2 = OpExtInst glsl450 UnpackSnorm2x16 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack2x16snorm"; + default: + int2 unpackedIntegers = __unpackInt2x16ToInt32(packedValue); + return clamp(float2(unpackedIntegers) / 32767.0, -1.0, 1.0); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to a normalized half-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +half2 unpackSnorm2x16ToHalf(uint packedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "unpack_snorm2x16_to_half"; + default: + return half2(unpackSnorm2x16ToFloat(packedValue)); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to an IEEE-754 binary16 single-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +float2 unpackHalf2x16ToFloat(uint packedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackHalf2x16"; + case spirv: + return spirv_asm + { + result:$$float2 = OpExtInst glsl450 UnpackHalf2x16 $packedValue; + }; + case wgsl: __intrinsic_asm "unpack2x16float"; + default: + uint2 unpackedIntegers = __unpackUint2x16ToUint32(packedValue); + return f16tof32(unpackedIntegers); + } +} + +/// Unpack a 32-bit unsigned integer into two 16-bit signed integers. +/// Then, each 16-bit value is converted to an IEEE-754 binary16 half-precision +/// floating-point value to generate a 2-component vector. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +half2 unpackHalf2x16ToHalf(uint packedValue) +{ + return half2(unpackHalf2x16ToFloat(packedValue)); +} + +/// Convert a 4-component vector of normalized unsigned single-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packUnorm4x8(float4 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packUnorm4x8"; + case metal: __intrinsic_asm "pack_float_to_unorm4x8"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackUnorm4x8 $unpackedValue + }; + case wgsl: __intrinsic_asm "pack4x8unorm"; + default: + uint4 values = uint4(round(saturate(unpackedValue) * 255.0)); + return packUint4x8(values); + } +} + +/// Convert a 4-component vector of normalized unsigned half-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packUnorm4x8(half4 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_unorm4x8"; + default: + return packUnorm4x8(float4(unpackedValue)); + } +} + +/// Convert a 4-component vector of normalized signed single-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packSnorm4x8(float4 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packSnorm4x8"; + case metal: __intrinsic_asm "pack_float_to_snorm4x8"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackSnorm4x8 $unpackedValue + }; + case wgsl: __intrinsic_asm "pack4x8snorm"; + default: + int4 values = int4(round(clamp(unpackedValue, -1.0, 1.0) * 127.0)) & 0xFF; + return packInt4x8(values); + } +} + +/// Convert a 4-component vector of normalized signed half-precision floating-point +/// values to four 8-bit integer values, then pack these 8-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packSnorm4x8(half4 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_snorm4x8"; + default: + return packSnorm4x8(float4(unpackedValue)); + } +} + +/// Convert a 2-component vector of normalized unsigned single-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packUnorm2x16(float2 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packUnorm2x16"; + case metal: __intrinsic_asm "pack_float_to_unorm2x16"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackUnorm2x16 $unpackedValue; + }; + case wgsl: __intrinsic_asm "pack2x16unorm"; + default: + uint2 values = uint2(round(saturate(unpackedValue) * 65535.0)); + return __packUint2x16(values); + } +} + +/// Convert a 2-component vector of normalized unsigned half-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packUnorm2x16(half2 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_unorm2x16"; + default: + return packUnorm2x16(float2(unpackedValue)); + } +} + +/// Convert a 2-component vector of normalized signed single-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packSnorm2x16(float2 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packSnorm2x16"; + case metal: __intrinsic_asm "pack_float_to_snorm2x16"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackSnorm2x16 $unpackedValue; + }; + case wgsl: __intrinsic_asm "pack2x16snorm"; + default: + int2 values = int2(round(clamp(unpackedValue, -1.0, 1.0) * 32767.0)) & 0xFFFF; + return __packInt2x16(values); + } +} + +/// Convert a 2-component vector of normalized signed half-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packSnorm2x16(half2 unpackedValue) +{ + __target_switch + { + case metal: __intrinsic_asm "pack_half_to_snorm2x16"; + default: + return packSnorm2x16(float2(unpackedValue)); + } +} + +/// Convert a 2-component vector of IEEE-754 binary16 single-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packHalf2x16(float2 unpackedValue) +{ + __target_switch + { + case glsl: __intrinsic_asm "packHalf2x16"; + case spirv: + return spirv_asm + { + result:$$uint = OpExtInst glsl450 PackHalf2x16 $unpackedValue; + }; + case wgsl: __intrinsic_asm "pack2x16float"; + default: + uint2 values = f32tof16(unpackedValue); + return __packUint2x16(values); + } +} + +/// Convert a 2-component vector of IEEE-754 binary16 half-precision floating-point +/// values to two 16-bit integer values, then pack these 16-bit values into a +/// 32-bit unsigned integer. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, pack_vector)] +uint packHalf2x16(half2 unpackedValue) +{ + return packHalf2x16(float2(unpackedValue)); +} diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 130439fe1..2285bd2e5 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -1937,6 +1937,16 @@ alias shader5_sm_4_0 = GL_ARB_gpu_shader5 | sm_4_0_version; /// [Compound] alias shader5_sm_5_0 = GL_ARB_gpu_shader5 | sm_5_0_version; +/// Capabilities required to use pack/unpack intrinsics on packed vector data +/// [Compound] +alias pack_vector = GL_ARB_gpu_shader5 + | _sm_6_6 + | _cuda_sm_9_0 + | wgsl + | metal + | cpp + ; + /// Capabilities required to use GLSL-style subgroup operations 'subgroup_basic' /// [Compound] alias subgroup_basic = GL_KHR_shader_subgroup_basic |
