diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/core.meta.slang | 2 | ||||
| -rw-r--r-- | source/slang/glsl.meta.slang | 201 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 1209 | ||||
| -rw-r--r-- | source/slang/slang-emit-metal.cpp | 43 |
4 files changed, 1125 insertions, 330 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 3fc2fc570..22822196c 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -421,6 +421,8 @@ __generic<T, let N : int> __intrinsic_op(select) vector<T,N> select(vector<bool, // Allow real-number types to be cast into each other __intrinsic_op($(kIROp_FloatCast)) T __realCast<T : __BuiltinRealType, U : __BuiltinRealType>(U val); +__intrinsic_op($(kIROp_CastIntToFloat)) + T __realCast<T : __BuiltinRealType, U : __BuiltinIntegerType>(U val); __intrinsic_op($(kIROp_IntCast)) T __intCast<T : __BuiltinType, U : __BuiltinType>(U val); ${{{{ diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 9715a44ce..bacc8958e 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -321,114 +321,6 @@ public vector<T,N> atan(vector<T,N> y, vector<T,N> x) return atan2(y, x); } -__generic<T : __BuiltinFloatingPointType> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public T asinh(T x) -{ - __target_switch - { - case cpp: __intrinsic_asm "$P_asinh($0)"; - case cuda: __intrinsic_asm "$P_asinh($0)"; - case glsl: __intrinsic_asm "asinh"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Asinh $x - }; - default: - return log(x + sqrt(x * x + T(1))); - } -} - -__generic<T : __BuiltinFloatingPointType, let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public vector<T,N> asinh(vector<T,N> x) -{ - __target_switch - { - case glsl: __intrinsic_asm "asinh"; - case spirv: return spirv_asm { - OpExtInst $$vector<T,N> result glsl450 Asinh $x - }; - default: - VECTOR_MAP_UNARY(T, N, asinh, x); - } -} - -__generic<T : __BuiltinFloatingPointType> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public T acosh(T x) -{ - __target_switch - { - case cpp: __intrinsic_asm "$P_acosh($0)"; - case cuda: __intrinsic_asm "$P_acosh($0)"; - case glsl: __intrinsic_asm "acosh"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Acosh $x - }; - default: - return log(x + sqrt( x * x - T(1))); - } -} - -__generic<T : __BuiltinFloatingPointType, let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public vector<T,N> acosh(vector<T,N> x) -{ - __target_switch - { - case glsl: __intrinsic_asm "acosh"; - case spirv: return spirv_asm { - OpExtInst $$vector<T,N> result glsl450 Acosh $x - }; - default: - VECTOR_MAP_UNARY(T, N, acosh, x); - } -} - -__generic<T : __BuiltinFloatingPointType> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public T atanh(T x) -{ - __target_switch - { - case cpp: __intrinsic_asm "$P_atanh($0)"; - case cuda: __intrinsic_asm "$P_atanh($0)"; - case glsl: __intrinsic_asm "atanh"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Atanh $x - }; - default: - return T(0.5) * log((T(1) + x) / (T(1) - x)); - } -} - -__generic<T : __BuiltinFloatingPointType, let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public vector<T,N> atanh(vector<T,N> x) -{ - __target_switch - { - case glsl: __intrinsic_asm "atanh"; - case spirv: return spirv_asm { - OpExtInst $$vector<T,N> result glsl450 Atanh $x - }; - default: - VECTOR_MAP_UNARY(T, N, atanh, x); - } -} - // // Section 8.2. Exponential Functions // @@ -458,66 +350,19 @@ public vector<T, N> inversesqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] public T roundEven(T x) { - __target_switch - { - case glsl: __intrinsic_asm "roundEven"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 RoundEven $x - }; - default: - T nearest = round(x); - - // Check if the value is exactly halfway between two integers - if (abs(x - nearest) == T(0.5)) - { - // If halfway, choose the even number - if (mod(nearest, T(2)) != T(0)) - { - // If the nearest number is odd, - // move to the closest even number - nearest -= ((x < nearest) ? T(1) : T(-1)); - } - } - return nearest; - } + return rint(x); } __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] public vector<T,N> roundEven(vector<T,N> x) { - __target_switch - { - case glsl: __intrinsic_asm "roundEven"; - case spirv: return spirv_asm { - OpExtInst $$vector<T,N> result glsl450 RoundEven $x - }; - default: - VECTOR_MAP_UNARY(T, N, roundEven, x); - } -} - -__generic<T : __BuiltinFloatingPointType> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public T fract(T x) -{ - return frac(x); -} - -__generic<T : __BuiltinFloatingPointType, let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public vector<T, N> fract(vector<T, N> x) -{ - return frac(x); + return rint(x); } __generic<T : __BuiltinFloatingPointType> @@ -824,44 +669,6 @@ uint float2half(float f) return (s | e | m); } -__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType> -[__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public T ldexp(T x, E exp) -{ - __target_switch - { - case hlsl: __intrinsic_asm "ldexp"; - case glsl: __intrinsic_asm "ldexp"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Ldexp $x $exp - }; - default: - return ldexp(x, __floatCast<T>(exp)); - } -} - -__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int> -[__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp) -{ - __target_switch - { - case hlsl: __intrinsic_asm "ldexp"; - case glsl: __intrinsic_asm "ldexp"; - case spirv: return spirv_asm { - OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp - }; - default: - vector<T,N> temp; - [ForceUnroll] - for (int i = 0; i < N; ++i) - temp[i] = __floatCast<T>(exp[i]); - return ldexp(x, temp); - } -} - [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index ca1fb0af3..6b3c5db59 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -5,9 +5,6 @@ typedef uint UINT; __intrinsic_op($(kIROp_RequireGLSLExtension)) void __requireGLSLExtension(String extensionName); -__intrinsic_op($(kIROp_FloatCast)) -T __floatCast<T, U>(U v); - [sealed] interface IBufferDataLayout { @@ -4093,12 +4090,13 @@ matrix<T,N,M> abs(matrix<T,N,M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T abs(T x) { __target_switch { case hlsl: __intrinsic_asm "abs"; + case metal: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case cuda: __intrinsic_asm "$P_abs($0)"; case cpp: __intrinsic_asm "$P_abs($0)"; @@ -4110,12 +4108,13 @@ T abs(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> abs(vector<T, N> x) { __target_switch { case hlsl: __intrinsic_asm "abs"; + case metal: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case spirv: return spirv_asm { result:$$vector<T,N> = OpExtInst glsl450 FAbs $x; @@ -4127,7 +4126,7 @@ vector<T, N> abs(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> abs(matrix<T,N,M> x) { __target_switch @@ -4138,11 +4137,40 @@ matrix<T,N,M> abs(matrix<T,N,M> x) } } +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fabs(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "fabs"; + default: + return abs(x); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T, N> fabs(vector<T, N> x) +{ + __target_switch + { + case metal: __intrinsic_asm "fabs"; + default: + return abs(x); + } +} + + // Inverse cosine (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T acos(T x) { __target_switch @@ -4151,6 +4179,7 @@ T acos(T x) case cuda: __intrinsic_asm "$P_acos($0)"; case glsl: __intrinsic_asm "acos"; case hlsl: __intrinsic_asm "acos"; + case metal: __intrinsic_asm "acos"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Acos $x }; @@ -4159,13 +4188,14 @@ T acos(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> acos(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "acos"; case hlsl: __intrinsic_asm "acos"; + case metal: __intrinsic_asm "acos"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Acos $x }; @@ -4176,7 +4206,7 @@ vector<T, N> acos(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> acos(matrix<T, N, M> x) { __target_switch @@ -4187,9 +4217,51 @@ matrix<T, N, M> acos(matrix<T, N, M> x) } } +// Inverse hyperbolic cosine + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T acosh(T x) +{ + __target_switch + { + case cpp: __intrinsic_asm "$P_acosh($0)"; + case cuda: __intrinsic_asm "$P_acosh($0)"; + case glsl: __intrinsic_asm "acosh"; + case metal: __intrinsic_asm "acosh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Acosh $x + }; + default: + return log(x + sqrt( x * x - T(1))); + } +} + +__generic<T : __BuiltinFloatingPointType, let N:int> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +vector<T,N> acosh(vector<T,N> x) +{ + __target_switch + { + case glsl: __intrinsic_asm "acosh"; + case metal: __intrinsic_asm "acosh"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Acosh $x + }; + default: + VECTOR_MAP_UNARY(T, N, acosh, x); + } +} + + // Test if all components are non-zero (HLSL SM 1.0) __generic<T : __BuiltinType> [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(T x) { __target_switch @@ -4198,6 +4270,8 @@ bool all(T x) __intrinsic_asm "bool($0)"; case hlsl: __intrinsic_asm "all"; + case metal: + __intrinsic_asm "all"; case spirv: let zero = __default<T>(); if (__isInt<T>()) @@ -4219,12 +4293,15 @@ bool all(T x) __generic<T : __BuiltinType, let N : int> [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(vector<T,N> x) { __target_switch { case hlsl: __intrinsic_asm "all"; + case metal: + __intrinsic_asm "all"; case glsl: __intrinsic_asm "all(bvec$N0($0))"; case spirv: @@ -4261,7 +4338,7 @@ bool all(vector<T,N> x) __generic<T : __BuiltinType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(matrix<T,N,M> x) { __target_switch @@ -4318,6 +4395,7 @@ int3 WorkgroupSize(); __generic<T : __BuiltinType> [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool any(T x) { __target_switch @@ -4326,6 +4404,8 @@ bool any(T x) __intrinsic_asm "bool($0)"; case hlsl: __intrinsic_asm "any"; + case metal: + __intrinsic_asm "any"; case spirv: let zero = __default<T>(); if (__isInt<T>()) @@ -4346,12 +4426,15 @@ bool any(T x) __generic<T : __BuiltinType, let N : int> [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool any(vector<T, N> x) { __target_switch { case hlsl: __intrinsic_asm "any"; + case metal: + __intrinsic_asm "any"; case glsl: __intrinsic_asm "any(bvec$N0($0))"; case spirv: @@ -4541,7 +4624,7 @@ matrix<float,N,M> asfloat(matrix<float,N,M> x) // Inverse sine (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T asin(T x) { __target_switch @@ -4550,6 +4633,7 @@ T asin(T x) case cuda: __intrinsic_asm "$P_asin($0)"; case glsl: __intrinsic_asm "asin"; case hlsl: __intrinsic_asm "asin"; + case metal: __intrinsic_asm "asin"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Asin $x }; @@ -4558,13 +4642,14 @@ T asin(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> asin(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "asin"; case hlsl: __intrinsic_asm "asin"; + case metal: __intrinsic_asm "asin"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Asin $x }; @@ -4575,7 +4660,7 @@ vector<T, N> asin(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> asin(matrix<T, N, M> x) { __target_switch @@ -4586,6 +4671,46 @@ matrix<T, N, M> asin(matrix<T, N, M> x) } } +// Inverse hyperbolic sine + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T asinh(T x) +{ + __target_switch + { + case cpp: __intrinsic_asm "$P_asinh($0)"; + case cuda: __intrinsic_asm "$P_asinh($0)"; + case glsl: __intrinsic_asm "asinh"; + case metal: __intrinsic_asm "asinh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Asinh $x + }; + default: + return log(x + sqrt(x * x + T(1))); + } +} + +__generic<T : __BuiltinFloatingPointType, let N:int> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +vector<T,N> asinh(vector<T,N> x) +{ + __target_switch + { + case glsl: __intrinsic_asm "asinh"; + case metal: __intrinsic_asm "asinh"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Asinh $x + }; + default: + VECTOR_MAP_UNARY(T, N, asinh, x); + } +} + // Reinterpret bits as an int (HLSL SM 4.0) [__readNone] @@ -5029,7 +5154,7 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> va // Inverse tangent (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T atan(T x) { __target_switch @@ -5038,6 +5163,7 @@ T atan(T x) case cuda: __intrinsic_asm "$P_atan($0)"; case glsl: __intrinsic_asm "atan"; case hlsl: __intrinsic_asm "atan"; + case metal: __intrinsic_asm "atan"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Atan $x }; @@ -5046,13 +5172,14 @@ T atan(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> atan(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "atan"; case hlsl: __intrinsic_asm "atan"; + case metal: __intrinsic_asm "atan"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Atan $x }; @@ -5063,7 +5190,7 @@ vector<T, N> atan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> atan(matrix<T, N, M> x) { __target_switch @@ -5076,7 +5203,7 @@ matrix<T, N, M> atan(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T atan2(T y, T x) { __target_switch @@ -5085,6 +5212,7 @@ T atan2(T y, T x) case cuda: __intrinsic_asm "$P_atan2($0, $1)"; case glsl: __intrinsic_asm "atan($0,$1)"; case hlsl: __intrinsic_asm "atan2"; + case metal: __intrinsic_asm "atan2"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Atan2 $y $x }; @@ -5093,13 +5221,14 @@ T atan2(T y, T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> atan2(vector<T, N> y, vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "atan($0,$1)"; case hlsl: __intrinsic_asm "atan2"; + case metal: __intrinsic_asm "atan2"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Atan2 $y $x }; @@ -5110,7 +5239,7 @@ vector<T, N> atan2(vector<T, N> y, vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x) { __target_switch @@ -5121,10 +5250,50 @@ matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x) } } +// Hyperbolic inverse tangent + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T atanh(T x) +{ + __target_switch + { + case cpp: __intrinsic_asm "$P_atanh($0)"; + case cuda: __intrinsic_asm "$P_atanh($0)"; + case glsl: __intrinsic_asm "atanh"; + case metal: __intrinsic_asm "atanh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Atanh $x + }; + default: + return T(0.5) * log((T(1) + x) / (T(1) - x)); + } +} + +__generic<T : __BuiltinFloatingPointType, let N:int> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> atanh(vector<T,N> x) +{ + __target_switch + { + case glsl: __intrinsic_asm "atanh"; + case metal: __intrinsic_asm "atanh"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Atanh $x + }; + default: + VECTOR_MAP_UNARY(T, N, atanh, x); + } +} + // Ceiling (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T ceil(T x) { __target_switch @@ -5133,6 +5302,7 @@ T ceil(T x) case cuda: __intrinsic_asm "$P_ceil($0)"; case glsl: __intrinsic_asm "ceil"; case hlsl: __intrinsic_asm "ceil"; + case metal: __intrinsic_asm "ceil"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Ceil $x }; @@ -5141,13 +5311,14 @@ T ceil(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> ceil(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "ceil"; case hlsl: __intrinsic_asm "ceil"; + case metal: __intrinsic_asm "ceil"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Ceil $x }; @@ -5158,7 +5329,7 @@ vector<T, N> ceil(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> ceil(matrix<T, N, M> x) { __target_switch @@ -5169,6 +5340,87 @@ matrix<T, N, M> ceil(matrix<T, N, M> x) } } +// Copy-sign + +__generic<let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector<half,N> copysign_half(vector<half,N> x, vector<half,N> y) +{ + let ux = reinterpret<vector<uint16_t,N>>(x); + let uy = reinterpret<vector<uint16_t,N>>(y); + vector<uint16_t,N> signY = (uy & (uint16_t(1) << uint16_t(15))); + vector<uint16_t,N> newX = (ux & ((uint16_t(1) << uint16_t(15)) - uint16_t(1))) + signY; + return reinterpret<vector<half,N>>(newX); +} + +__generic<let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector<float,N> copysign_float(vector<float,N> x, vector<float,N> y) +{ + let ux = reinterpret<vector<uint32_t,N>>(x); + let uy = reinterpret<vector<uint32_t,N>>(y); + vector<uint32_t,N> signY = (uy & (uint32_t(1) << uint32_t(31))); + vector<uint32_t,N> newX = (ux & ((uint32_t(1) << uint32_t(31)) - uint32_t(1))) + signY; + return reinterpret<vector<float,N>>(newX); +} + +__generic<let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector<double,N> copysign_double(vector<double,N> x, vector<double,N> y) +{ + let ux = reinterpret<vector<uint64_t,N>>(x); + let uy = reinterpret<vector<uint64_t,N>>(y); + vector<uint64_t,N> signY = (uy & (uint64_t(1) << uint64_t(63))); + vector<uint64_t,N> newX = (ux & ((uint64_t(1) << uint64_t(63)) - uint64_t(1))) + signY; + return reinterpret<vector<double,N>>(newX); +} + +__generic<T:__BuiltinFloatingPointType, U:__BuiltinFloatingPointType, let N : int> +__intrinsic_op($(kIROp_FloatCast)) +vector<T,N> __real_cast(vector<U,N> val); + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector<T,N> copysign(vector<T,N> x, vector<T,N> y) +{ + __target_switch + { + case metal: __intrinsic_asm "copysign"; + default: + { + // sign of -0.0 needs to be respected. + if (T is half) + return __real_cast<T>(copysign_half( + __real_cast<half>(x), + __real_cast<half>(y))); + if (T is float) + return __real_cast<T>(copysign_float( + __real_cast<float>(x), + __real_cast<float>(y))); + return __real_cast<T>(copysign_double( + __real_cast<double>(x), + __real_cast<double>(y))); + } + } +} + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +T copysign(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "copysign"; + default: + return copysign(vector<T,1>(x), vector<T,1>(y))[0]; + } +} + // Check access status to tiled resource bool CheckAccessFullyMapped(uint status); @@ -5320,7 +5572,7 @@ void clip(matrix<T,N,M> x) // Cosine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T cos(T x) { __target_switch @@ -5329,6 +5581,7 @@ T cos(T x) case cuda: __intrinsic_asm "$P_cos($0)"; case glsl: __intrinsic_asm "cos"; case hlsl: __intrinsic_asm "cos"; + case metal: __intrinsic_asm "cos"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Cos $x }; @@ -5337,13 +5590,14 @@ T cos(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> cos(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "cos"; case hlsl: __intrinsic_asm "cos"; + case metal: __intrinsic_asm "cos"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Cos $x }; @@ -5354,7 +5608,7 @@ vector<T, N> cos(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> cos(matrix<T, N, M> x) { __target_switch @@ -5368,7 +5622,7 @@ matrix<T, N, M> cos(matrix<T, N, M> x) // Hyperbolic cosine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] T cosh(T x) { __target_switch @@ -5377,6 +5631,7 @@ T cosh(T x) case cuda: __intrinsic_asm "$P_cosh($0)"; case glsl: __intrinsic_asm "cosh"; case hlsl: __intrinsic_asm "cosh"; + case metal: __intrinsic_asm "cosh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Cosh $x }; @@ -5385,13 +5640,14 @@ T cosh(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] vector<T,N> cosh(vector<T,N> x) { __target_switch { case glsl: __intrinsic_asm "cosh"; case hlsl: __intrinsic_asm "cosh"; + case metal: __intrinsic_asm "cosh"; case spirv: return spirv_asm { OpExtInst $$vector<T,N> result glsl450 Cosh $x }; @@ -5402,7 +5658,7 @@ vector<T,N> cosh(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] matrix<T, N, M> cosh(matrix<T, N, M> x) { __target_switch @@ -5413,6 +5669,35 @@ matrix<T, N, M> cosh(matrix<T, N, M> x) } } +// Cosine degree + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +T cospi(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "cospi"; + default: + return cos(T.getPi() * x); + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector<T,N> cospi(vector<T,N> x) +{ + __target_switch + { + case metal: __intrinsic_asm "cospi"; + default: + return cos(T.getPi() * x); + } +} + + // Population count [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] @@ -5776,6 +6061,63 @@ T distance(T x, T y) } } +// fdim + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv)] +T fdim(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "fdim"; + default: + return max(T(0), x - y); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv)] +vector<T,N> fdim(vector<T,N> x, vector<T,N> y) +{ + __target_switch + { + case metal: __intrinsic_asm "fdim"; + default: + return max(T(0), x - y); + } +} + +// divide + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +T divide(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "divide"; + default: + return x / y; + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector<T,N> divide(vector<T,N> x, vector<T,N> y) +{ + __target_switch + { + case metal: __intrinsic_asm "divide"; + default: + return x / y; + } +} + + // Vector dot product __generic<T : __BuiltinFloatingPointType> @@ -6005,7 +6347,7 @@ matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T exp(T x) { __target_switch @@ -6014,6 +6356,7 @@ T exp(T x) case cuda: __intrinsic_asm "$P_exp($0)"; case glsl: __intrinsic_asm "exp"; case hlsl: __intrinsic_asm "exp"; + case metal: __intrinsic_asm "exp"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Exp $x }; @@ -6022,13 +6365,14 @@ T exp(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> exp(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "exp"; case hlsl: __intrinsic_asm "exp"; + case metal: __intrinsic_asm "exp"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Exp $x }; @@ -6039,7 +6383,7 @@ vector<T, N> exp(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> exp(matrix<T, N, M> x) { __target_switch @@ -6054,7 +6398,7 @@ matrix<T, N, M> exp(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T exp2(T x) { __target_switch @@ -6068,13 +6412,14 @@ T exp2(T x) } else { - float xf = __floatCast<float>(x); + float xf = __realCast<float>(x); return T(spirv_asm { result:$$float = OpExtInst glsl450 Exp2 $xf }); } case hlsl: __intrinsic_asm "exp2($0)"; + case metal: __intrinsic_asm "exp2"; case cpp: __intrinsic_asm "$P_exp2($0)"; case cuda: @@ -6085,7 +6430,7 @@ T exp2(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T,N> exp2(vector<T,N> x) { __target_switch @@ -6093,6 +6438,7 @@ vector<T,N> exp2(vector<T,N> x) case glsl: __intrinsic_asm "exp2($0)"; case hlsl: __intrinsic_asm "exp2"; + case metal: __intrinsic_asm "exp2"; case spirv: return spirv_asm { OpExtInst $$vector<T,N> result glsl450 Exp2 $x }; @@ -6103,7 +6449,7 @@ vector<T,N> exp2(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> exp2(matrix<T,N,M> x) { __target_switch @@ -6114,6 +6460,36 @@ matrix<T,N,M> exp2(matrix<T,N,M> x) } } +// Base-10 exponent + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T exp10(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "exp10"; + default: + const T ln10 = T(2.302585092994045901); // ln(10) + return exp(x * ln10); + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> exp10(vector<T,N> x) +{ + __target_switch + { + case metal: __intrinsic_asm "exp10"; + default: + const T ln10 = T(2.30258509299); // ln(10) + return exp(x * ln10); + } +} + // Convert 16-bit float stored in low bits of integer __glsl_version(420) @@ -6439,7 +6815,7 @@ vector<uint,N> firstbitlow(vector<uint,N> value) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T floor(T x) { __target_switch @@ -6448,6 +6824,7 @@ T floor(T x) case cuda: __intrinsic_asm "$P_floor($0)"; case glsl: __intrinsic_asm "floor"; case hlsl: __intrinsic_asm "floor"; + case metal: __intrinsic_asm "floor"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Floor $x }; @@ -6456,13 +6833,14 @@ T floor(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> floor(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "floor"; case hlsl: __intrinsic_asm "floor"; + case metal: __intrinsic_asm "floor"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Floor $x }; @@ -6473,7 +6851,7 @@ vector<T, N> floor(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> floor(matrix<T, N, M> x) { __target_switch @@ -6487,7 +6865,7 @@ matrix<T, N, M> floor(matrix<T, N, M> x) // Fused multiply-add __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] T fma(T a, T b, T c) { __target_switch @@ -6500,6 +6878,7 @@ T fma(T a, T b, T c) return mad(a, b, c); else __intrinsic_asm "fma($0, $1, $2)"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fma $a $b $c }; @@ -6510,13 +6889,14 @@ T fma(T a, T b, T c) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<T, N> fma(vector<T, N> a, vector<T, N> b, vector<T, N> c) { __target_switch { case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "fma"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Fma $a $b $c }; @@ -6527,7 +6907,7 @@ vector<T, N> fma(vector<T, N> a, vector<T, N> b, vector<T, N> c) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] matrix<T, N, M> fma(matrix<T, N, M> a, matrix<T, N, M> b, matrix<T, N, M> c) { __target_switch @@ -6541,19 +6921,24 @@ matrix<T, N, M> fma(matrix<T, N, M> a, matrix<T, N, M> b, matrix<T, N, M> c) // Floating point remainder of x/y __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T fmod(T x, T y) { - // In HLSL, fmod returns a remainder. + // In HLSL, `fmod` returns a remainder. // Definition of `fmod` in HLSL is, // "The floating-point remainder is calculated such that x = i * y + f, // where i is an integer, f has the same sign as x, and the absolute value // of f is less than the absolute value of y." // - // In GLSL, mod is a Modulus function. + // In GLSL, `mod` is a Modulus function. // OpenGL document defines "Modulus" as "Returns x - y * floor(x / y)". // The use of "Floor()" makes the difference. // + // In Metal, `fmod` is Modulus function. + // Metal document defines it as "Returns x - y * trunc(x/y)". + // Note that the function name is same to HLSL but it behaves differently. + // // The tricky ones are when x or y is a negative value. // // | Remainder | Modulus @@ -6588,10 +6973,13 @@ T fmod(T x, T y) { case cpp: __intrinsic_asm "$P_fmod($0, $1)"; case cuda: __intrinsic_asm "$P_fmod($0, $1)"; - case hlsl: __intrinsic_asm "fmod"; case glsl: // GLSL doesn't have a function for remainder. - __intrinsic_asm "(($0 < 0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))"; + __intrinsic_asm "(($0 < 0.0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))"; + case hlsl: __intrinsic_asm "fmod"; + case metal: + // Metal doesn't have a function for remainder. + __intrinsic_asm "(($0 < 0.0) ? -fmod(-$0,abs($1)) : fmod($0,abs($1)))"; case spirv: // OpFRem return "The floating-point remainder whose sign // matches the sign of Operand 1", where Operand 1 is "x". @@ -6604,7 +6992,8 @@ T fmod(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> fmod(vector<T, N> x, vector<T, N> y) { __target_switch @@ -6620,7 +7009,8 @@ vector<T, N> fmod(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y) { __target_switch @@ -6634,7 +7024,7 @@ matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y) // Fractional part __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T frac(T x) { __target_switch @@ -6643,6 +7033,7 @@ T frac(T x) case cuda: __intrinsic_asm "$P_frac($0)"; case glsl: __intrinsic_asm "fract"; case hlsl: __intrinsic_asm "frac"; + case metal: __intrinsic_asm "fract"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fract $x }; @@ -6651,13 +7042,14 @@ T frac(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> frac(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "fract"; case hlsl: __intrinsic_asm "frac"; + case metal: __intrinsic_asm "fract"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Fract $x }; @@ -6673,10 +7065,29 @@ matrix<T, N, M> frac(matrix<T, N, M> x) MATRIX_MAP_UNARY(T, N, M, frac, x); } +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fract(T x) +{ + return frac(x); +} + +__generic<T : __BuiltinFloatingPointType, let N:int> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T, N> fract(vector<T, N> x) +{ + return frac(x); +} + + // Split float into mantissa and exponent __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T frexp(T x, out int exp) { __target_switch @@ -6685,6 +7096,7 @@ T frexp(T x, out int exp) case cuda: __intrinsic_asm "$P_frexp($0, $1)"; case glsl: __intrinsic_asm "frexp"; case hlsl: __intrinsic_asm "frexp"; + case metal: __intrinsic_asm "frexp($0, *($1))"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 Frexp $x &exp }; @@ -6693,12 +7105,14 @@ T frexp(T x, out int exp) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp) { __target_switch { - case hlsl: __intrinsic_asm "frexp"; case glsl: __intrinsic_asm "frexp"; + case hlsl: __intrinsic_asm "frexp"; + case metal: __intrinsic_asm "frexp($0, *($1))"; case spirv: return spirv_asm { result:$$vector<T, N> = OpExtInst glsl450 Frexp $x &exp }; @@ -6709,7 +7123,7 @@ vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<int, N, M, L> exp) { __target_switch @@ -7920,7 +8334,7 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T ldexp(T x, T exp) { __target_switch @@ -7933,7 +8347,7 @@ T ldexp(T x, T exp) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp) { __target_switch @@ -7946,7 +8360,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp) { __target_switch @@ -7957,6 +8371,47 @@ matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp) } } +__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T ldexp(T x, E exp) +{ + __target_switch + { + case glsl: __intrinsic_asm "ldexp"; + case hlsl: __intrinsic_asm "ldexp"; + case metal: __intrinsic_asm "ldexp"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Ldexp $x $exp + }; + default: + return ldexp(x, __realCast<T>(exp)); + } +} + +__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp) +{ + __target_switch + { + case glsl: __intrinsic_asm "ldexp"; + case hlsl: __intrinsic_asm "ldexp"; + case metal: __intrinsic_asm "ldexp"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp + }; + default: + vector<T,N> temp; + [ForceUnroll] + for (int i = 0; i < N; ++i) + temp[i] = __realCast<T>(exp[i]); + return ldexp(x, temp); + } +} + + // Vector length __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] @@ -8058,7 +8513,7 @@ float4 lit(float n_dot_l, float n_dot_h, float m) // Base-e logarithm __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T log(T x) { __target_switch @@ -8067,6 +8522,7 @@ T log(T x) case cuda: __intrinsic_asm "$P_log($0)"; case glsl: __intrinsic_asm "log"; case hlsl: __intrinsic_asm "log"; + case metal: __intrinsic_asm "log"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Log $x }; @@ -8075,13 +8531,14 @@ T log(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> log(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "log"; case hlsl: __intrinsic_asm "log"; + case metal: __intrinsic_asm "log"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Log $x }; @@ -8092,7 +8549,7 @@ vector<T, N> log(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> log(matrix<T, N, M> x) { __target_switch @@ -8106,12 +8563,13 @@ matrix<T, N, M> log(matrix<T, N, M> x) // Base-10 logarithm __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T log10(T x) { __target_switch { case hlsl: __intrinsic_asm "log10"; + case metal: __intrinsic_asm "log10"; case glsl: __intrinsic_asm "(log( $0 ) * $S0( 0.43429448190325182765112891891661) )"; case cuda: __intrinsic_asm "$P_log10($0)"; case cpp: __intrinsic_asm "$P_log10($0)"; @@ -8128,12 +8586,13 @@ T log10(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T,N> log10(vector<T,N> x) { __target_switch { case hlsl: __intrinsic_asm "log10"; + case metal: __intrinsic_asm "log10"; case glsl: __intrinsic_asm "(log( $0 ) * $S0(0.43429448190325182765112891891661) )"; case spirv: { @@ -8150,7 +8609,7 @@ vector<T,N> log10(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> log10(matrix<T,N,M> x) { __target_switch @@ -8164,7 +8623,7 @@ matrix<T,N,M> log10(matrix<T,N,M> x) // Base-2 logarithm __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T log2(T x) { __target_switch @@ -8173,6 +8632,7 @@ T log2(T x) case cuda: __intrinsic_asm "$P_log2($0)"; case glsl: __intrinsic_asm "log2"; case hlsl: __intrinsic_asm "log2"; + case metal: __intrinsic_asm "log2"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Log2 $x }; @@ -8181,13 +8641,14 @@ T log2(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T,N> log2(vector<T,N> x) { __target_switch { case glsl: __intrinsic_asm "log2"; case hlsl: __intrinsic_asm "log2"; + case metal: __intrinsic_asm "log2"; case spirv: return spirv_asm { OpExtInst $$vector<T,N> result glsl450 Log2 $x }; @@ -8198,7 +8659,7 @@ vector<T,N> log2(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> log2(matrix<T,N,M> x) { __target_switch @@ -8213,7 +8674,7 @@ matrix<T,N,M> log2(matrix<T,N,M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] T mad(T mvalue, T avalue, T bvalue) { __target_switch @@ -8222,6 +8683,7 @@ T mad(T mvalue, T avalue, T bvalue) case cuda: __intrinsic_asm "$P_fma($0, $1, $2)"; case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fma $mvalue $avalue $bvalue }; @@ -8230,13 +8692,14 @@ T mad(T mvalue, T avalue, T bvalue) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue) { __target_switch { case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Fma $mvalue $avalue $bvalue }; @@ -8247,7 +8710,7 @@ vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue) { __target_switch @@ -8385,12 +8848,13 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T max(T x, T y) { __target_switch { case hlsl: __intrinsic_asm "max"; + case metal: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case cuda: __intrinsic_asm "$P_max($0, $1)"; case cpp: __intrinsic_asm "$P_max($0, $1)"; @@ -8402,12 +8866,13 @@ T max(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> max(vector<T, N> x, vector<T, N> y) { __target_switch { case hlsl: __intrinsic_asm "max"; + case metal: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case spirv: return spirv_asm { result:$$vector<T, N> = OpExtInst glsl450 FMax $x $y @@ -8419,7 +8884,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) { __target_switch @@ -8430,6 +8895,107 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) } } +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T max3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "max3"; + default: + return max(x, max(y, z)); + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> max3(vector<T,N> x, vector<T,N> y, vector<T,N> z) +{ + __target_switch + { + case metal: __intrinsic_asm "max3"; + default: + return max(x, max(y, z)); + } +} + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmax(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax"; + default: + if (isnan(x)) return y; + return max(x, y); + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> fmax(vector<T,N> x, vector<T,N> y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax"; + default: + VECTOR_MAP_BINARY(T, N, fmax, x, y); + } +} + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmax3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax3"; + default: + { + bool isnanX = isnan(x); + bool isnanY = isnan(y); + bool isnanZ = isnan(z); + + if (isnanX) + { + return isnanY ? z : y; + } + else if (isnanY) + { + if (isnanZ) + return x; + return max(x, z); + } + else if (isnanZ) + { + return max(x, y); + } + + return max(y, max(x, z)); + } + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> fmax3(vector<T,N> x, vector<T,N> y, vector<T,N> z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax3"; + default: + VECTOR_MAP_TRINARY(T, N, fmax3, x, y, z); + } +} + + // minimum __generic<T : __BuiltinIntegerType> __target_intrinsic(hlsl) @@ -8481,12 +9047,13 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T min(T x, T y) { __target_switch { case hlsl: __intrinsic_asm "min"; + case metal: __intrinsic_asm "min"; case glsl: __intrinsic_asm "min"; case cuda: __intrinsic_asm "$P_min($0, $1)"; case cpp: __intrinsic_asm "$P_min($0, $1)"; @@ -8498,12 +9065,13 @@ T min(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T,N> min(vector<T,N> x, vector<T,N> y) { __target_switch { case hlsl: __intrinsic_asm "min"; + case metal: __intrinsic_asm "min"; case glsl: __intrinsic_asm "min"; case spirv: return spirv_asm { result:$$vector<T,N> = OpExtInst glsl450 FMin $x $y @@ -8515,7 +9083,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) { __target_switch @@ -8526,16 +9094,212 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) } } +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T min3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "min3"; + default: + return min(x, min(y, z)); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> min3(vector<T,N> x, vector<T,N> y, vector<T,N> z) +{ + __target_switch + { + case metal: __intrinsic_asm "min3"; + default: + return min(x, min(y, z)); + } +} + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmin(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin"; + default: + if (isnan(x)) return y; + return min(x, y); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> fmin(vector<T,N> x, vector<T,N> y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin"; + default: + VECTOR_MAP_BINARY(T, N, fmin, x, y); + } +} + + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmin3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin3"; + default: + { + bool isnanX = isnan(x); + bool isnanY = isnan(y); + bool isnanZ = isnan(z); + + if (isnan(x)) + { + return isnanY ? z : y; + } + else if (isnanY) + { + if (isnanZ) + return x; + return min(x, z); + } + else if (isnanZ) + { + return min(x, y); + } + + return min(x, min(y, z)); + } + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> fmin3(vector<T,N> x, vector<T,N> y, vector<T,N> z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin3"; + default: + VECTOR_MAP_TRINARY(T, N, fmin3, x, y, z); + } +} + + +// Median +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T median3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "median3"; + default: + { + // | a | b | c | m | + // ----------+---+---+---+---+ + // x > y > z | z | y | x | y | + // x > z > y | y | z | x | z | + // y > x > z | z | y | x | x | + // y > z > x | z | y | z | z | + // z > x > y | y | z | x | x | + // z > y > x | y | z | y | y | + + T a = min(y, z); + T b = max(y, z); + T c = max(x, a); + T m = min(b, c); + return m; + } + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> median3(vector<T,N> x, vector<T,N> y, vector<T,N> z) +{ + __target_switch + { + case metal: __intrinsic_asm "median3"; + default: + { + vector<T,N> a = min(y, z); + vector<T,N> b = max(y, z); + vector<T,N> c = max(x, a); + vector<T,N> m = min(b, c); + return m; + } + } +} + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmedian3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmedian3"; + default: + { + bool isnanX = isnan(x); + bool isnanY = isnan(y); + bool isnanZ = isnan(z); + + if (isnanX) + { + return isnanY ? z : y; + } + else if (isnanY || isnanZ) + { + // "the function can return either non-NaN value" + return x; + } + + return median3(x, y, z); + } + } +} + +__generic<T : __BuiltinFloatingPointType, let N: int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> fmedian3(vector<T,N> x, vector<T,N> y, vector<T,N> z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmedian3"; + default: + VECTOR_MAP_TRINARY(T, N, fmedian3, x, y, z); + } +} + + // split into integer and fractional parts (both with same sign) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T modf(T x, out T ip) { __target_switch { + case cpp: __intrinsic_asm "$P_modf($0, $1)"; + case cuda: __intrinsic_asm "$P_modf($0, $1)"; case hlsl: __intrinsic_asm "modf"; case glsl: __intrinsic_asm "modf"; + case metal: __intrinsic_asm "modf($0, *($1))"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 Modf $x &ip }; @@ -8544,13 +9308,14 @@ T modf(T x, out T ip) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T,N> modf(vector<T,N> x, out vector<T,N> ip) { __target_switch { case hlsl: __intrinsic_asm "modf"; case glsl: __intrinsic_asm "modf"; + case metal: __intrinsic_asm "modf($0, *($1))"; case spirv: return spirv_asm { result:$$vector<T,N> = OpExtInst glsl450 Modf $x &ip }; @@ -8561,7 +9326,7 @@ vector<T,N> modf(vector<T,N> x, out vector<T,N> ip) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int> [__readNone] -[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M,L> ip) { __target_switch @@ -8883,6 +9648,50 @@ matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right) } } +// next-after: next representable floating-point value +// after x in the direction of y + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] +T nextafter(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "nextafter"; + default: + if (isnan(x)) return x; + if (isnan(y)) return y; + if (x == y) return y; + if (T is half) + { + T delta = __realCast<T>(bit_cast<half>(uint16_t(1))); + return x + ((x < y) ? delta : -delta); + } + if (T is float) + { + T delta = __realCast<T>(bit_cast<float>(uint32_t(1))); + return x + ((x < y) ? delta : -delta); + } + T delta = __realCast<T>(bit_cast<double>(uint64_t(1))); + return x + ((x < y) ? delta : -delta); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] +vector<T,N> nextafter(vector<T,N> x, vector<T,N> y) +{ + __target_switch + { + case metal: __intrinsic_asm "nextafter"; + default: + VECTOR_MAP_BINARY(T, N, nextafter, x, y); + } +} + + // noise (deprecated) [__readNone] @@ -8981,7 +9790,7 @@ T normalize(T x) // Raise to a power __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T pow(T x, T y) { __target_switch @@ -8990,6 +9799,7 @@ T pow(T x, T y) case cuda: __intrinsic_asm "$P_pow($0, $1)"; case glsl: __intrinsic_asm "pow"; case hlsl: __intrinsic_asm "pow"; + case metal: __intrinsic_asm "pow"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Pow $x $y }; @@ -8998,13 +9808,14 @@ T pow(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> pow(vector<T, N> x, vector<T, N> y) { __target_switch { case glsl: __intrinsic_asm "pow"; case hlsl: __intrinsic_asm "pow"; + case metal: __intrinsic_asm "pow"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Pow $x $y }; @@ -9015,7 +9826,7 @@ vector<T, N> pow(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y) { __target_switch @@ -9026,6 +9837,32 @@ matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y) } } +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T powr(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "powr"; + default: + return pow(abs(x), y); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T, N> powr(vector<T, N> x, vector<T, N> y) +{ + __target_switch + { + case metal: __intrinsic_asm "powr"; + default: + return pow(abs(x), y); + } +} + // Output message // TODO: add check to ensure format is const literal. @@ -9360,10 +10197,60 @@ vector<uint, N> reversebits(vector<uint, N> value) } } +// round even +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T rint(T x) +{ + __target_switch + { + case glsl: __intrinsic_asm "roundEven"; + case metal: __intrinsic_asm "rint"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 RoundEven $x + }; + default: + T nearest = round(x); + + // Check if the value is exactly halfway between two integers + if (abs(x - nearest) == T(0.5)) + { + // If halfway, choose the even number + if ((nearest / T(2)) * T(2) != nearest) + { + // If the nearest number is odd, + // move to the closest even number + nearest -= ((x < nearest) ? T(1) : T(-1)); + } + } + return nearest; + } +} + +__generic<T : __BuiltinFloatingPointType, let N:int> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +vector<T,N> rint(vector<T,N> x) +{ + __target_switch + { + case glsl: __intrinsic_asm "roundEven"; + case metal: __intrinsic_asm "rint"; + case spirv: return spirv_asm { + OpExtInst $$vector<T,N> result glsl450 RoundEven $x + }; + default: + VECTOR_MAP_UNARY(T, N, rint, x); + } +} + // Round-to-nearest __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T round(T x) { __target_switch @@ -9372,6 +10259,7 @@ T round(T x) case cuda: __intrinsic_asm "$P_round($0)"; case glsl: __intrinsic_asm "round"; case hlsl: __intrinsic_asm "round"; + case metal: __intrinsic_asm "round"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Round $x }; @@ -9380,13 +10268,14 @@ T round(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> round(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "round"; case hlsl: __intrinsic_asm "round"; + case metal: __intrinsic_asm "round"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Round $x }; @@ -9397,7 +10286,7 @@ vector<T, N> round(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> round(matrix<T,N,M> x) { __target_switch @@ -9411,7 +10300,7 @@ matrix<T,N,M> round(matrix<T,N,M> x) // Reciprocal of square root __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T rsqrt(T x) { __target_switch @@ -9420,6 +10309,7 @@ T rsqrt(T x) case cuda: __intrinsic_asm "$P_rsqrt($0)"; case glsl: __intrinsic_asm "inversesqrt($0)"; case hlsl: __intrinsic_asm "rsqrt"; + case metal: __intrinsic_asm "rsqrt"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 InverseSqrt $x }; @@ -9430,13 +10320,14 @@ T rsqrt(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> rsqrt(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "inversesqrt($0)"; case hlsl: __intrinsic_asm "rsqrt"; + case metal: __intrinsic_asm "rsqrt"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 InverseSqrt $x }; @@ -9447,7 +10338,7 @@ vector<T, N> rsqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> rsqrt(matrix<T, N, M> x) { __target_switch @@ -9568,12 +10459,11 @@ matrix<int, N, M> sign(matrix<T, N, M> x) } } - // Sine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T sin(T x) { __target_switch @@ -9582,6 +10472,7 @@ T sin(T x) case cuda: __intrinsic_asm "$P_sin($0)"; case glsl: __intrinsic_asm "sin"; case hlsl: __intrinsic_asm "sin"; + case metal: __intrinsic_asm "sin"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sin $x }; @@ -9590,13 +10481,14 @@ T sin(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> sin(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "sin"; case hlsl: __intrinsic_asm "sin"; + case metal: __intrinsic_asm "sin"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Sin $x }; @@ -9607,7 +10499,7 @@ vector<T, N> sin(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> sin(matrix<T, N, M> x) { __target_switch @@ -9621,13 +10513,40 @@ matrix<T, N, M> sin(matrix<T, N, M> x) // Sine and cosine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(metal)] +T __sincos_metal(T x, out T c) +{ + __target_switch + { + case metal: __intrinsic_asm "sincos($0, *$1)"; + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(metal)] +vector<T,N> __sincos_metal(vector<T,N> x, out vector<T,N> c) +{ + __target_switch + { + case metal: __intrinsic_asm "sincos($0, *$1)"; + } +} + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] void sincos(T x, out T s, out T c) { __target_switch { case cuda: __intrinsic_asm "$P_sincos($0, $1, $2)"; case hlsl: __intrinsic_asm "sincos"; + case metal: + //__intrinsic_asm "*($1) = sincos($0, *($2))"; + s = __sincos_metal(x, c); + return; default: s = sin(x); c = cos(x); @@ -9636,12 +10555,17 @@ void sincos(T x, out T s, out T c) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c) { __target_switch { case hlsl: __intrinsic_asm "sincos"; + case metal: + //__intrinsic_asm "*($1) = sincos($0, *($2))"; + s = __sincos_metal(x, c); + return; default: s = sin(x); c = cos(x); @@ -9650,7 +10574,8 @@ void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L1: int, let L2 : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c) { __target_switch @@ -9665,7 +10590,7 @@ void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c) // Hyperbolic Sine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T sinh(T x) { __target_switch @@ -9674,6 +10599,7 @@ T sinh(T x) case cuda: __intrinsic_asm "$P_sinh($0)"; case glsl: __intrinsic_asm "sinh"; case hlsl: __intrinsic_asm "sinh"; + case metal: __intrinsic_asm "sinh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sinh $x }; @@ -9682,13 +10608,14 @@ T sinh(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> sinh(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "sinh"; case hlsl: __intrinsic_asm "sinh"; + case metal: __intrinsic_asm "sinh"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Sinh $x }; @@ -9699,7 +10626,7 @@ vector<T, N> sinh(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> sinh(matrix<T, N, M> x) { __target_switch @@ -9710,6 +10637,35 @@ matrix<T, N, M> sinh(matrix<T, N, M> x) } } +// Sine degree + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T sinpi(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "sinpi"; + default: + return sin(T.getPi() * x); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> sinpi(vector<T,N> x) +{ + __target_switch + { + case metal: __intrinsic_asm "sinpi"; + default: + return sin(T.getPi() * x); + } +} + + // Smooth step (Hermite interpolation) __generic<T : __BuiltinFloatingPointType> [__readNone] @@ -9762,7 +10718,7 @@ matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N // Square root __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T sqrt(T x) { __target_switch @@ -9771,6 +10727,7 @@ T sqrt(T x) case cuda: __intrinsic_asm "$P_sqrt($0)"; case glsl: __intrinsic_asm "sqrt"; case hlsl: __intrinsic_asm "sqrt"; + case metal: __intrinsic_asm "sqrt"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sqrt $x }; @@ -9779,13 +10736,14 @@ T sqrt(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> sqrt(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "sqrt"; case hlsl: __intrinsic_asm "sqrt"; + case metal: __intrinsic_asm "sqrt"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Sqrt $x }; @@ -9796,7 +10754,7 @@ vector<T, N> sqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> sqrt(matrix<T, N, M> x) { __target_switch @@ -9858,7 +10816,7 @@ matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x) // Tangent __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T tan(T x) { __target_switch @@ -9867,6 +10825,7 @@ T tan(T x) case cuda: __intrinsic_asm "$P_tan($0)"; case glsl: __intrinsic_asm "tan"; case hlsl: __intrinsic_asm "tan"; + case metal: __intrinsic_asm "tan"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Tan $x }; @@ -9875,13 +10834,14 @@ T tan(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> tan(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "tan"; case hlsl: __intrinsic_asm "tan"; + case metal: __intrinsic_asm "tan"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Tan $x }; @@ -9892,7 +10852,7 @@ vector<T, N> tan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> tan(matrix<T, N, M> x) { __target_switch @@ -9906,7 +10866,7 @@ matrix<T, N, M> tan(matrix<T, N, M> x) // Hyperbolic tangent __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T tanh(T x) { __target_switch @@ -9915,6 +10875,7 @@ T tanh(T x) case cuda: __intrinsic_asm "$P_tanh($0)"; case glsl: __intrinsic_asm "tanh"; case hlsl: __intrinsic_asm "tanh"; + case metal: __intrinsic_asm "tanh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Tanh $x }; @@ -9923,13 +10884,14 @@ T tanh(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T,N> tanh(vector<T,N> x) { __target_switch { case glsl: __intrinsic_asm "tanh"; case hlsl: __intrinsic_asm "tanh"; + case metal: __intrinsic_asm "tanh"; case spirv: return spirv_asm { OpExtInst $$vector<T,N> result glsl450 Tanh $x }; @@ -9940,7 +10902,7 @@ vector<T,N> tanh(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T,N,M> tanh(matrix<T,N,M> x) { __target_switch @@ -9951,6 +10913,35 @@ matrix<T,N,M> tanh(matrix<T,N,M> x) } } +// Tangent degree + +__generic<T : __BuiltinFloatingPointType> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T tanpi(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "tanpi"; + default: + return tan(T.getPi() * x); + } +} + +__generic<T : __BuiltinFloatingPointType, let N : int> +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector<T,N> tanpi(vector<T,N> x) +{ + __target_switch + { + case metal: __intrinsic_asm "tanpi"; + default: + return tan(T.getPi() * x); + } +} + + // Matrix transpose __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] @@ -10020,7 +11011,7 @@ matrix<T, M, N> transpose(matrix<T, N, M> x) // Truncate to integer __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T trunc(T x) { __target_switch @@ -10029,6 +11020,7 @@ T trunc(T x) case cuda: __intrinsic_asm "$P_trunc($0)"; case glsl: __intrinsic_asm "trunc"; case hlsl: __intrinsic_asm "trunc"; + case metal: __intrinsic_asm "trunc"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Trunc $x }; @@ -10037,13 +11029,14 @@ T trunc(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector<T, N> trunc(vector<T, N> x) { __target_switch { case glsl: __intrinsic_asm "trunc"; case hlsl: __intrinsic_asm "trunc"; + case metal: __intrinsic_asm "trunc"; case spirv: return spirv_asm { OpExtInst $$vector<T, N> result glsl450 Trunc $x }; @@ -10054,7 +11047,7 @@ vector<T, N> trunc(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix<T, N, M> trunc(matrix<T, N, M> x) { __target_switch diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index 2c327b613..7da48cac1 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -298,35 +298,27 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO void MetalSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) { - // In some cases we *need* to use the built-in syntax sugar for vector types, - // so we will try to emit those whenever possible. - // - if( elementCount >= 1 && elementCount <= 4 ) - { - switch( elementType->getOp() ) + emitSimpleTypeImpl(elementType); + + switch (elementType->getOp()) + { + case kIROp_FloatType: + case kIROp_HalfType: + case kIROp_BoolType: + case kIROp_Int8Type: + case kIROp_UInt8Type: + case kIROp_Int16Type: + case kIROp_UInt16Type: + case kIROp_IntType: + case kIROp_UIntType: + case kIROp_Int64Type: + case kIROp_UInt64Type: + if (elementCount > 1) { - case kIROp_FloatType: - case kIROp_IntType: - case kIROp_UIntType: - // TODO: There are more types that need to be covered here - emitType(elementType); m_writer->emit(elementCount); - return; - - default: - break; } + break; } - - // As a fallback, we will use the `vector<...>` type constructor, - // although we should not expect to run into types that don't - // have a sugared form. - // - m_writer->emit("vector<"); - emitType(elementType); - m_writer->emit(","); - m_writer->emit(elementCount); - m_writer->emit(">"); } void MetalSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) @@ -855,6 +847,7 @@ void MetalSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) void MetalSourceEmitter::emitFrontMatterImpl(TargetRequest*) { m_writer->emit("#include <metal_stdlib>\n"); + m_writer->emit("#include <metal_math>\n"); m_writer->emit("using namespace metal;\n"); } |
