diff options
| author | Jay Kwak <82421531+jkwak-work@users.noreply.github.com> | 2024-05-02 09:59:45 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-02 09:59:45 -0700 |
| commit | 679a457940027420817a85070b3fdb9bfc0cca2e (patch) | |
| tree | 51463c1f19035108e4e1b3a76354b27beae07669 /source | |
| parent | d53d793db6f4d82358ada700e1bd98b497384cdc (diff) | |
Implement SPIR-V target for GLSL functions (#4083)
Fixes #4051
This commit implements SPIR-V target for GLSL functions.
It also fixes a few problesm of GLSL targetting implemention too.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/glsl.meta.slang | 598 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 26 |
2 files changed, 538 insertions, 86 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 98770293c..0ba6c17aa 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -4,10 +4,10 @@ #define lowp #define VECTOR_MAP_UNARY(TYPE, COUNT, FUNC, VALUE) \ - vector<TYPE,COUNT> result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(VALUE[i]); } return result + vector<TYPE,COUNT> result; [ForceUnroll] for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(VALUE[i]); } return result #define VECTOR_MAP_TRINARY(TYPE, COUNT, FUNC, A, B, C) \ - vector<TYPE,COUNT> result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result + vector<TYPE,COUNT> result; [ForceUnroll] for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result // // OpenGL 4.60 spec @@ -331,6 +331,10 @@ public T asinh(T x) { case cpp: __intrinsic_asm "$P_asinh($0)"; case cuda: __intrinsic_asm "$P_asinh($0)"; + case glsl: __intrinsic_asm "asinh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Asinh $x + }; default: return log(x + sqrt(x * x + T(1))); } @@ -342,7 +346,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int> [require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public vector<T,N> asinh(vector<T,N> x) { - VECTOR_MAP_UNARY(T, N, asinh, x); + __target_switch + { + case glsl: __intrinsic_asm "asinh"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Asinh $x + }; + default: + VECTOR_MAP_UNARY(T, N, asinh, x); + } } __generic<T : __BuiltinFloatingPointType> @@ -355,6 +367,10 @@ public T acosh(T x) { case cpp: __intrinsic_asm "$P_acosh($0)"; case cuda: __intrinsic_asm "$P_acosh($0)"; + case glsl: __intrinsic_asm "acosh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Acosh $x + }; default: return log(x + sqrt( x * x - T(1))); } @@ -366,7 +382,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int> [require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public vector<T,N> acosh(vector<T,N> x) { - VECTOR_MAP_UNARY(T, N, acosh, x); + __target_switch + { + case glsl: __intrinsic_asm "acosh"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Acosh $x + }; + default: + VECTOR_MAP_UNARY(T, N, acosh, x); + } } __generic<T : __BuiltinFloatingPointType> @@ -379,6 +403,10 @@ public T atanh(T x) { case cpp: __intrinsic_asm "$P_atanh($0)"; case cuda: __intrinsic_asm "$P_atanh($0)"; + case glsl: __intrinsic_asm "atanh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Atanh $x + }; default: return T(0.5) * log((T(1) + x) / (T(1) - x)); } @@ -390,7 +418,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int> [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector<T,N> atanh(vector<T,N> x) { - VECTOR_MAP_UNARY(T, N, atanh, x); + __target_switch + { + case glsl: __intrinsic_asm "atanh"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Atanh $x + }; + default: + VECTOR_MAP_UNARY(T, N, atanh, x); + } } // @@ -491,6 +527,45 @@ public vector<T, N> mod(vector<T, N> x, vector<T, N> y) return fmod(x, y); } +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +public vector<T,N> min(vector<T,N> x, T y) +{ + __target_switch + { + case glsl: __intrinsic_asm "min"; + default: + return min(x, vector<T,N>(y)); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +public vector<T,N> max(vector<T,N> x, T y) +{ + __target_switch + { + case glsl: __intrinsic_asm "max"; + default: + return max(x, vector<T,N>(y)); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +public vector<T,N> clamp(vector<T,N> x, T minBound, T maxBound) +{ + __target_switch + { + case glsl: __intrinsic_asm "clamp"; + default: + return clamp(x, vector<T,N>(minBound), vector<T,N>(maxBound)); + } +} + __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] @@ -506,7 +581,12 @@ __generic<T : __BuiltinFloatingPointType, let N:int> [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector<T, N> mix(vector<T, N> x, vector<T, N> y, T a) { - return lerp(x, y, vector<T, N>(a)); + __target_switch + { + case glsl: __intrinsic_asm "mix"; + default: + return mix(x, y, vector<T, N>(a)); + } } __generic<T : __BuiltinFloatingPointType, let N:int> @@ -524,7 +604,15 @@ __generic<T> [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public T mix(T x, T y, bool a) { - return (a ? y : x); + __target_switch + { + case glsl: __intrinsic_asm "mix"; + case spirv: return spirv_asm { + result:$$T = OpSelect $a $x $y + }; + default: + return (a ? y : x); + } } __generic<T, let N:int> @@ -533,12 +621,21 @@ __generic<T, let N:int> [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector<T, N> mix(vector<T, N> x, vector<T, N> y, vector<bool, N> a) { - vector<T, N> result; - for (int i = 0; i < N; i++) + __target_switch { - result[i] = (a[i] ? y[i] : x[i]); + case glsl: __intrinsic_asm "mix"; + case spirv: return spirv_asm { + result:$$vector<T,N> = OpSelect $a $x $y + }; + default: + vector<T, N> result; + [ForceUnroll] + for (int i = 0; i < N; i++) + { + result[i] = (a[i] ? y[i] : x[i]); + } + return result; } - return result; } [__readNone] @@ -617,28 +714,28 @@ public vector<float, N> uintBitsToFloat(highp vector<uint, N> x) [ForceInline] uint packUnorm1x16(float c) { - return uint(clamp(c, 0.0, 1.0) * 65535.0 + 0.5); + return uint(round(clamp(c, 0.0, 1.0) * 65535.0)); } [__readNone] [ForceInline] uint packSnorm1x16(float v) { - return uint(clamp(v ,-1.0, 1.0) * 32767.0 + 32767.5); + return uint(round(clamp(v ,-1.0, 1.0) * 32767.0)); } [__readNone] [ForceInline] uint packUnorm1x8(float c) { - return uint(clamp(c, 0.0, 1.0) * 255.0 + 0.5); + return uint(round(clamp(c, 0.0, 1.0) * 255.0)); } [__readNone] [ForceInline] uint packSnorm1x8(float c) { - return uint(clamp(c, -1.0, 1.0) * 127.0 + 127.5); + return uint(round(clamp(c, -1.0, 1.0) * 127.0)); } [__readNone] @@ -654,7 +751,7 @@ float unpackUnorm1x16(uint p) float unpackSnorm1x16(uint p) { const uint wordMask = 0xffff; - return clamp((float(p & wordMask) - 32767.0) / 32767.0, -1.0, 1.0); + return clamp(float(p & wordMask) / 32767.0, -1.0, 1.0); } [__readNone] @@ -670,7 +767,7 @@ float unpackUnorm1x8(uint p) float unpackSnorm1x8(uint p) { const uint byteMask = 0xff; - return clamp((float(p & byteMask) - 127.0) / 127.0, -1.0, 1.0); + return clamp(float(p & byteMask) / 127.0, -1.0, 1.0); } [__readNone] @@ -689,6 +786,44 @@ uint float2half(float f) return (s | e | m); } +__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +public T ldexp(T x, E exp) +{ + __target_switch + { + case hlsl: __intrinsic_asm "ldexp"; + case glsl: __intrinsic_asm "ldexp"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Ldexp $x $exp + }; + default: + return ldexp(x, __floatCast<T>(exp)); + } +} + +__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +public vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp) +{ + __target_switch + { + case hlsl: __intrinsic_asm "ldexp"; + case glsl: __intrinsic_asm "ldexp"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp + }; + default: + vector<T,N> temp; + [ForceUnroll] + for (int i = 0; i < N; ++i) + temp[i] = __floatCast<T>(exp[i]); + return ldexp(x, temp); + } +} + [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] @@ -697,6 +832,9 @@ public uint packUnorm2x16(vec2 v) __target_switch { case glsl: __intrinsic_asm "packUnorm2x16"; + case spirv: return spirv_asm { + result:$$uint = OpExtInst glsl450 PackUnorm2x16 $v + }; default: return packUnorm1x16(v.x) | (packUnorm1x16(v.y) << uint(16)); } @@ -710,6 +848,9 @@ public uint packSnorm2x16(vec2 v) __target_switch { case glsl: __intrinsic_asm "packSnorm2x16"; + case spirv: return spirv_asm { + result:$$uint = OpExtInst glsl450 PackSnorm2x16 $v + }; default: return packSnorm1x16(v.x) | (packSnorm1x16(v.y) << uint(16)); } @@ -723,6 +864,9 @@ public uint packUnorm4x8(vec4 v) __target_switch { case glsl: __intrinsic_asm "packUnorm4x8"; + case spirv: return spirv_asm { + result:$$uint = OpExtInst glsl450 PackUnorm4x8 $v + }; default: return packUnorm1x8(v.x) | (packUnorm1x8(v.y) << uint(8)) | (packUnorm1x8(v.z) << uint(16)) | (packUnorm1x8(v.w) << uint(24)); } @@ -736,6 +880,9 @@ public uint packSnorm4x8(vec4 v) __target_switch { case glsl: __intrinsic_asm "packSnorm4x8"; + case spirv: return spirv_asm { + result:$$uint = OpExtInst glsl450 PackSnorm4x8 $v + }; default: return packSnorm1x8(v.x) | (packSnorm1x8(v.y) << uint(8)) | (packSnorm1x8(v.z) << uint(16)) | (packSnorm1x8(v.w) << uint(24)); } @@ -749,6 +896,9 @@ public vec2 unpackUnorm2x16(uint p) __target_switch { case glsl: __intrinsic_asm "unpackUnorm2x16"; + case spirv: return spirv_asm { + result:$$vec2 = OpExtInst glsl450 UnpackUnorm2x16 $p + }; default: return vec2(unpackUnorm1x16(p & uint(0xffff)), unpackUnorm1x16(p >> uint(16))); } @@ -762,6 +912,9 @@ public vec2 unpackSnorm2x16(uint p) __target_switch { case glsl: __intrinsic_asm "unpackSnorm2x16"; + case spirv: return spirv_asm { + result:$$vec2 = OpExtInst glsl450 UnpackSnorm2x16 $p + }; default: return vec2(unpackSnorm1x16(p & uint(0xffff)), unpackSnorm1x16(p >> uint(16))); } @@ -775,6 +928,9 @@ public vec4 unpackUnorm4x8(highp uint p) __target_switch { case glsl: __intrinsic_asm "unpackUnorm4x8"; + case spirv: return spirv_asm { + result:$$vec4 = OpExtInst glsl450 UnpackUnorm4x8 $p + }; default: return vec4( unpackUnorm1x8(p), @@ -792,6 +948,9 @@ public vec4 unpackSnorm4x8(highp uint p) __target_switch { case glsl: __intrinsic_asm "unpackSnorm4x8"; + case spirv: return spirv_asm { + result:$$vec4 = OpExtInst glsl450 UnpackSnorm4x8 $p + }; default: return vec4( unpackSnorm1x8(p), @@ -809,6 +968,9 @@ public uint packHalf2x16(vec2 v) __target_switch { case glsl: __intrinsic_asm "packHalf2x16"; + case spirv: return spirv_asm { + result:$$uint = OpExtInst glsl450 PackHalf2x16 $v + }; default: return float2half(v.x) | (float2half(v.y) << uint(16)); } @@ -842,6 +1004,9 @@ public vec2 unpackHalf2x16(uint p) __target_switch { case glsl: __intrinsic_asm "unpackHalf2x16"; + case spirv: return spirv_asm { + result:$$vec2 = OpExtInst glsl450 UnpackHalf2x16 $p + }; default: return vec2(half2float(p & uint(0xffff)), half2float(p >> uint(16))); } @@ -855,6 +1020,9 @@ public double packDouble2x32(uvec2 v) __target_switch { case glsl: __intrinsic_asm "packDouble2x32"; + case spirv: return spirv_asm { + result:$$double = OpExtInst glsl450 PackDouble2x32 $v + }; default: // TODO: there is no "asdouble()" //return asdouble(uint64_t(v.x) | (uint64_t(v.y) << 32)); @@ -870,6 +1038,9 @@ public uvec2 unpackDouble2x32(double v) __target_switch { case glsl: __intrinsic_asm "unpackDouble2x32"; + case spirv: return spirv_asm { + result:$$uvec2 = OpExtInst glsl450 UnpackDouble2x32 $v + }; default: // TODO: there is no "asuint64()" uint64_t u = 0; // asuint64(v); @@ -887,7 +1058,15 @@ __generic<T : __BuiltinFloatingPointType> [require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public T faceforward(T n, T i, T ng) { - return dot(ng, i) < T(0.0f) ? n : -n; + __target_switch + { + case glsl: __intrinsic_asm "faceforward"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 FaceForward $n $i $ng + }; + default: + return dot(ng, i) < T(0.0f) ? n : -n; + } } // @@ -904,6 +1083,15 @@ public matrix<T, R, C> outerProduct(vector<T, C> c, vector<T, R> r) __target_switch { case glsl: __intrinsic_asm "outerProduct"; + + // Note: SPIR-V takes the input arguments in an opposite order + // compared to GLSL. SPIR-V spec document says, + // "Its (second argument) number of components must equal the + // number of columns in Result Type." + // + case spirv: return spirv_asm { + result:$$matrix<T,R,C> = OpOuterProduct $c $r + }; default: matrix<T, R, C> result; for (int j = 0; j < R; ++j) @@ -918,13 +1106,15 @@ public matrix<T, R, C> outerProduct(vector<T, C> c, vector<T, R> r) } __generic<T : __BuiltinFloatingPointType, let N : int> -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -matrix<T,N,N> inverse(matrix<T,N,N> m) +[require(glsl_spirv, GLSL_400)] +public matrix<T,N,N> inverse(matrix<T,N,N> m) { __target_switch { case glsl: __intrinsic_asm "inverse"; - case hlsl: __intrinsic_asm "inverse"; + case spirv: return spirv_asm { + OpExtInst $$matrix<T,N,N> result glsl450 MatrixInverse $m + }; } } @@ -934,201 +1124,423 @@ matrix<T,N,N> inverse(matrix<T,N,N> m) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public uint uaddCarry(highp uint x, highp uint y, out lowp uint carry) { - let result = x * y; - carry = ((result < x || result < y) ? 1 : 0); - return result; + __target_switch + { + case glsl: __intrinsic_asm "uaddCarry"; + case spirv: return spirv_asm { + %ResType = OpTypeStruct $$uint $$uint; + %temp:%ResType = OpIAddCarry $x $y; + %carry:$$uint = OpCompositeExtract %temp 1; + OpStore &carry %carry; + result:$$uint = OpCompositeExtract %temp 0 + }; + default: + let result = x * y; + carry = ((result < x || result < y) ? 1 : 0); + return result; + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<uint,N> uaddCarry(highp vector<uint,N> x, highp vector<uint,N> y, out lowp vector<uint,N> carry) { - VECTOR_MAP_TRINARY(uint, N, uaddCarry, x, y, carry); + __target_switch + { + case glsl: __intrinsic_asm "uaddCarry"; + case spirv: return spirv_asm { + %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>; + %temp:%ResType = OpIAddCarry $x $y; + %carry:$$vector<uint,N> = OpCompositeExtract %temp 1; + OpStore &carry %carry; + result:$$vector<uint,N> = OpCompositeExtract %temp 0 + }; + default: + VECTOR_MAP_TRINARY(uint, N, uaddCarry, x, y, carry); + } } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public uint usubBorrow(highp uint x, highp uint y, out lowp uint borrow) { - borrow = (y > x) ? 1 : 0; - return x - y; + __target_switch + { + case glsl: __intrinsic_asm "usubBorrow"; + case spirv: return spirv_asm { + %ResType = OpTypeStruct $$uint $$uint; + %temp:%ResType = OpISubBorrow $x $y; + %borrow:$$uint = OpCompositeExtract %temp 1; + OpStore &borrow %borrow; + result:$$uint = OpCompositeExtract %temp 0 + }; + default: + borrow = (y > x) ? 1 : 0; + return x - y; + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<uint,N> usubBorrow(highp vector<uint,N> x, highp vector<uint,N> y, out lowp vector<uint,N> borrow) { - VECTOR_MAP_TRINARY(uint, N, usubBorrow, x, y, borrow); + __target_switch + { + case glsl: __intrinsic_asm "usubBorrow"; + case spirv: return spirv_asm { + %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>; + %temp:%ResType = OpISubBorrow $x $y; + %borrow:$$vector<uint,N> = OpCompositeExtract %temp 1; + OpStore &borrow %borrow; + result:$$vector<uint,N> = OpCompositeExtract %temp 0 + }; + default: + VECTOR_MAP_TRINARY(uint, N, usubBorrow, x, y, borrow); + } } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public void umulExtended(highp uint x, highp uint y, out highp uint msb, out highp uint lsb) { - uint64_t result = x * y; - msb = uint(result >> 32); - lsb = uint(result); + __target_switch + { + case glsl: __intrinsic_asm "umulExtended"; + case spirv: spirv_asm { + %ResType = OpTypeStruct $$uint $$uint; + %temp:%ResType = OpUMulExtended $x $y; + %lsb:$$uint = OpCompositeExtract %temp 0; + %msb:$$uint = OpCompositeExtract %temp 1; + OpStore &lsb %lsb; + OpStore &msb %msb; + }; + default: + uint64_t result = x * y; + msb = uint(result >> 32); + lsb = uint(result); + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public void umulExtended(highp vector<uint,N> x, highp vector<uint,N> y, out highp vector<uint,N> msb, out highp vector<uint,N> lsb) { - for(int i = 0; i < N; ++i) + __target_switch { - umulExtended(x[i], y[i], msb[i], lsb[i]); + case glsl: __intrinsic_asm "umulExtended"; + case spirv: spirv_asm { + %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>; + %temp:%ResType = OpUMulExtended $x $y; + %lsb:$$vector<uint,N> = OpCompositeExtract %temp 0; + %msb:$$vector<uint,N> = OpCompositeExtract %temp 1; + OpStore &lsb %lsb; + OpStore &msb %msb; + }; + default: + [ForceUnroll] + for(int i = 0; i < N; ++i) + { + umulExtended(x[i], y[i], msb[i], lsb[i]); + } } } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public void imulExtended(highp int x, highp int y, out highp int msb, out highp int lsb) { - int64_t result = x * y; - msb = int(result >> 32); - lsb = int(result); + __target_switch + { + case glsl: __intrinsic_asm "imulExtended"; + case spirv: spirv_asm { + %ResType = OpTypeStruct $$int $$int; + %temp:%ResType = OpSMulExtended $x $y; + %lsb:$$int = OpCompositeExtract %temp 0; + %msb:$$int = OpCompositeExtract %temp 1; + OpStore &lsb %lsb; + OpStore &msb %msb; + }; + default: + int64_t result = x * y; + msb = int(result >> 32); + lsb = int(result); + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public void imulExtended(highp vector<int,N> x, highp vector<int,N> y, out highp vector<int,N> msb, out highp vector<int,N> lsb) { - for(int i = 0; i < N; ++i) + __target_switch { - imulExtended(x[i], y[i], msb[i], lsb[i]); + case glsl: __intrinsic_asm "imulExtended"; + case spirv: spirv_asm { + %ResType = OpTypeStruct $$vector<int,N> $$vector<int,N>; + %temp:%ResType = OpSMulExtended $x $y; + %lsb:$$vector<int,N> = OpCompositeExtract %temp 0; + %msb:$$vector<int,N> = OpCompositeExtract %temp 1; + OpStore &lsb %lsb; + OpStore &msb %msb; + }; + default: + [ForceUnroll] + for(int i = 0; i < N; ++i) + { + imulExtended(x[i], y[i], msb[i], lsb[i]); + } } } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public int bitfieldExtract(int value, int offset, int bits) { - return int(uint(value >> offset) & ((1u << bits) - 1)); + __target_switch + { + case glsl: __intrinsic_asm "bitfieldExtract"; + case spirv: return spirv_asm { + result:$$int = OpBitFieldSExtract $value $offset $bits + }; + default: + return int(uint(value >> offset) & ((1u << bits) - 1)); + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<int,N> bitfieldExtract(vector<int,N> value, int offset, int bits) { - vector<int,N> result; - for (int i = 0; i < N; ++i) + __target_switch { - result[i] = bitfieldExtract(value[i], offset, bits); + case glsl: __intrinsic_asm "bitfieldExtract"; + case spirv: return spirv_asm { + result:$$vector<int,N> = OpBitFieldSExtract $value $offset $bits + }; + default: + vector<int,N> result; + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + result[i] = bitfieldExtract(value[i], offset, bits); + } + return result; } - return result; } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public uint bitfieldExtract(uint value, int offset, int bits) { - return (value >> offset) & ((1u << bits) - 1); + __target_switch + { + case glsl: __intrinsic_asm "bitfieldExtract"; + case spirv: return spirv_asm { + result:$$uint = OpBitFieldUExtract $value $offset $bits + }; + default: + return (value >> offset) & ((1u << bits) - 1); + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<uint,N> bitfieldExtract(vector<uint,N> value, int offset, int bits) { - vector<uint,N> result; - for (int i = 0; i < N; ++i) + __target_switch { - result[i] = bitfieldExtract(value[i], offset, bits); + case glsl: __intrinsic_asm "bitfieldExtract"; + case spirv: return spirv_asm { + result:$$vector<uint,N> = OpBitFieldUExtract $value $offset $bits + }; + default: + vector<uint,N> result; + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + result[i] = bitfieldExtract(value[i], offset, bits); + } + return result; } - return result; } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public uint bitfieldInsert(uint base, uint insert, int offset, int bits) { - uint clearMask = ~(((1u << bits) - 1u) << offset); - uint clearedBase = base & clearMask; - uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset; - return clearedBase | maskedInsert; + __target_switch + { + case glsl: __intrinsic_asm "bitfieldInsert"; + case spirv: return spirv_asm { + result:$$uint = OpBitFieldInsert $base $insert $offset $bits + }; + default: + uint clearMask = ~(((1u << bits) - 1u) << offset); + uint clearedBase = base & clearMask; + uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset; + return clearedBase | maskedInsert; + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<uint,N> bitfieldInsert(vector<uint,N> base, vector<uint,N> insert, int offset, int bits) { - vector<uint,N> result; - for (int i = 0; i < N; ++i) + __target_switch { - result[i] = bitfieldInsert(base[i], insert[i], offset, bits); + case glsl: __intrinsic_asm "bitfieldInsert"; + case spirv: return spirv_asm { + result:$$vector<uint,N> = OpBitFieldInsert $base $insert $offset $bits + }; + default: + vector<uint,N> result; + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + result[i] = bitfieldInsert(base[i], insert[i], offset, bits); + } + return result; } - return result; } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public int bitfieldInsert(int base, int insert, int offset, int bits) { - uint clearMask = ~(((1u << bits) - 1u) << offset); - uint clearedBase = base & clearMask; - uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset; - return clearedBase | maskedInsert; + __target_switch + { + case glsl: __intrinsic_asm "bitfieldInsert"; + case spirv: return spirv_asm { + result:$$int = OpBitFieldInsert $base $insert $offset $bits + }; + default: + uint clearMask = ~(((1u << bits) - 1u) << offset); + uint clearedBase = base & clearMask; + uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset; + return clearedBase | maskedInsert; + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<int,N> bitfieldInsert(vector<int,N> base, vector<int,N> insert, int offset, int bits) { - vector<int,N> result; - for (int i = 0; i < N; ++i) + __target_switch { - result[i] = bitfieldInsert(base[i], insert[i], offset, bits); + case glsl: __intrinsic_asm "bitfieldInsert"; + case spirv: return spirv_asm { + result:$$vector<int,N> = OpBitFieldInsert $base $insert $offset $bits + }; + default: + vector<int,N> result; + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + result[i] = bitfieldInsert(base[i], insert[i], offset, bits); + } + return result; } - return result; } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public int bitfieldReverse(highp int value) { - value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1); - value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2); - value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4); - value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8); - value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16); - return value; + __target_switch + { + case glsl: __intrinsic_asm "bitfieldReverse"; + case spirv: return spirv_asm { + result:$$int = OpBitReverse $value + }; + default: + value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1); + value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2); + value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4); + value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8); + value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16); + return value; + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<int,N> bitfieldReverse(highp vector<int,N> value) { - VECTOR_MAP_UNARY(int, N, bitfieldReverse, value); + __target_switch + { + case glsl: __intrinsic_asm "bitfieldReverse"; + case spirv: return spirv_asm { + result:$$vector<int,N> = OpBitReverse $value + }; + default: + VECTOR_MAP_UNARY(int, N, bitfieldReverse, value); + } } [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public uint bitfieldReverse(highp uint value) { - value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1); - value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2); - value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4); - value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8); - value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16); - return value; + __target_switch + { + case glsl: __intrinsic_asm "bitfieldReverse"; + case spirv: return spirv_asm { + result:$$uint = OpBitReverse $value + }; + default: + value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1); + value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2); + value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4); + value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8); + value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16); + return value; + } } __generic<let N:int> [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public vector<uint,N> bitfieldReverse(highp vector<uint,N> value) { - VECTOR_MAP_UNARY(int, N, bitfieldReverse, value); + __target_switch + { + case glsl: __intrinsic_asm "bitfieldReverse"; + case spirv: return spirv_asm { + result:$$vector<uint,N> = OpBitReverse $value + }; + default: + VECTOR_MAP_UNARY(int, N, bitfieldReverse, value); + } } [__readNone] @@ -1145,7 +1557,15 @@ __generic<let N:int> [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector<uint,N> bitCount(vector<uint,N> value) { - VECTOR_MAP_UNARY(uint, N, countbits, value); + __target_switch + { + case glsl: __intrinsic_asm "bitCount"; + case spirv: return spirv_asm { + result:$$vector<uint,N> = OpBitCount $value + }; + default: + VECTOR_MAP_UNARY(uint, N, countbits, value); + } } [__readNone] @@ -1153,7 +1573,15 @@ public vector<uint,N> bitCount(vector<uint,N> value) [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public int bitCount(int value) { - return countbits(uint(value)); + __target_switch + { + case glsl: __intrinsic_asm "bitCount"; + case spirv: return spirv_asm { + result:$$int = OpBitCount $value + }; + default: + return countbits(uint(value)); + } } __generic<let N:int> @@ -1162,7 +1590,15 @@ __generic<let N:int> [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector<int,N> bitCount(vector<int,N> value) { - VECTOR_MAP_UNARY(int, N, countbits, value); + __target_switch + { + case glsl: __intrinsic_asm "bitCount"; + case spirv: return spirv_asm { + result:$$vector<int,N> = OpBitCount $value + }; + default: + VECTOR_MAP_UNARY(int, N, countbits, value); + } } [__readNone] diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2250ed6d4..7cafe764f 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -5741,7 +5741,15 @@ __generic<T : __BuiltinFloatingPointType> [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T distance(T x, T y) { - return length(x - y); + __target_switch + { + case glsl: __intrinsic_asm "distance"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Distance $x $y + }; + default: + return length(x - y); + } } // Vector dot product @@ -6028,9 +6036,7 @@ T exp2(T x) __target_switch { case glsl: - if (__isHalf<T>()) - __intrinsic_asm "exp2($0)"; - __intrinsic_asm "exp2(float($0))"; + __intrinsic_asm "exp2($0)"; case spirv: if (__isHalf<T>()) { @@ -6060,6 +6066,8 @@ vector<T,N> exp2(vector<T,N> x) { __target_switch { + case glsl: + __intrinsic_asm "exp2($0)"; case hlsl: __intrinsic_asm "exp2"; case spirv: return spirv_asm { OpExtInst $$vector<T,N> result glsl450 Exp2 $x @@ -7878,7 +7886,15 @@ __generic<T : __BuiltinFloatingPointType> [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T length(T x) { - return abs(x); + __target_switch + { + case glsl: __intrinsic_asm "length"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Length $x + }; + default: + return abs(x); + } } // Linear interpolation |
