summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source/slang/core.meta.slang2
-rw-r--r--source/slang/glsl.meta.slang201
-rw-r--r--source/slang/hlsl.meta.slang1209
-rw-r--r--source/slang/slang-emit-metal.cpp43
-rw-r--r--tests/metal/math.slang513
5 files changed, 1638 insertions, 330 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 3fc2fc570..22822196c 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -421,6 +421,8 @@ __generic<T, let N : int> __intrinsic_op(select) vector<T,N> select(vector<bool,
// Allow real-number types to be cast into each other
__intrinsic_op($(kIROp_FloatCast))
T __realCast<T : __BuiltinRealType, U : __BuiltinRealType>(U val);
+__intrinsic_op($(kIROp_CastIntToFloat))
+ T __realCast<T : __BuiltinRealType, U : __BuiltinIntegerType>(U val);
__intrinsic_op($(kIROp_IntCast))
T __intCast<T : __BuiltinType, U : __BuiltinType>(U val);
${{{{
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index 9715a44ce..bacc8958e 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -321,114 +321,6 @@ public vector<T,N> atan(vector<T,N> y, vector<T,N> x)
return atan2(y, x);
}
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public T asinh(T x)
-{
- __target_switch
- {
- case cpp: __intrinsic_asm "$P_asinh($0)";
- case cuda: __intrinsic_asm "$P_asinh($0)";
- case glsl: __intrinsic_asm "asinh";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Asinh $x
- };
- default:
- return log(x + sqrt(x * x + T(1)));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public vector<T,N> asinh(vector<T,N> x)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "asinh";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Asinh $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, asinh, x);
- }
-}
-
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public T acosh(T x)
-{
- __target_switch
- {
- case cpp: __intrinsic_asm "$P_acosh($0)";
- case cuda: __intrinsic_asm "$P_acosh($0)";
- case glsl: __intrinsic_asm "acosh";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Acosh $x
- };
- default:
- return log(x + sqrt( x * x - T(1)));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public vector<T,N> acosh(vector<T,N> x)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "acosh";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Acosh $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, acosh, x);
- }
-}
-
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public T atanh(T x)
-{
- __target_switch
- {
- case cpp: __intrinsic_asm "$P_atanh($0)";
- case cuda: __intrinsic_asm "$P_atanh($0)";
- case glsl: __intrinsic_asm "atanh";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Atanh $x
- };
- default:
- return T(0.5) * log((T(1) + x) / (T(1) - x));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public vector<T,N> atanh(vector<T,N> x)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atanh";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Atanh $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, atanh, x);
- }
-}
-
//
// Section 8.2. Exponential Functions
//
@@ -458,66 +350,19 @@ public vector<T, N> inversesqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
public T roundEven(T x)
{
- __target_switch
- {
- case glsl: __intrinsic_asm "roundEven";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 RoundEven $x
- };
- default:
- T nearest = round(x);
-
- // Check if the value is exactly halfway between two integers
- if (abs(x - nearest) == T(0.5))
- {
- // If halfway, choose the even number
- if (mod(nearest, T(2)) != T(0))
- {
- // If the nearest number is odd,
- // move to the closest even number
- nearest -= ((x < nearest) ? T(1) : T(-1));
- }
- }
- return nearest;
- }
+ return rint(x);
}
__generic<T : __BuiltinFloatingPointType, let N:int>
[__readNone]
[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
public vector<T,N> roundEven(vector<T,N> x)
{
- __target_switch
- {
- case glsl: __intrinsic_asm "roundEven";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 RoundEven $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, roundEven, x);
- }
-}
-
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public T fract(T x)
-{
- return frac(x);
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public vector<T, N> fract(vector<T, N> x)
-{
- return frac(x);
+ return rint(x);
}
__generic<T : __BuiltinFloatingPointType>
@@ -824,44 +669,6 @@ uint float2half(float f)
return (s | e | m);
}
-__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType>
-[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public T ldexp(T x, E exp)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "ldexp";
- case glsl: __intrinsic_asm "ldexp";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Ldexp $x $exp
- };
- default:
- return ldexp(x, __floatCast<T>(exp));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int>
-[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "ldexp";
- case glsl: __intrinsic_asm "ldexp";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp
- };
- default:
- vector<T,N> temp;
- [ForceUnroll]
- for (int i = 0; i < N; ++i)
- temp[i] = __floatCast<T>(exp[i]);
- return ldexp(x, temp);
- }
-}
-
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index ca1fb0af3..6b3c5db59 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -5,9 +5,6 @@ typedef uint UINT;
__intrinsic_op($(kIROp_RequireGLSLExtension))
void __requireGLSLExtension(String extensionName);
-__intrinsic_op($(kIROp_FloatCast))
-T __floatCast<T, U>(U v);
-
[sealed]
interface IBufferDataLayout
{
@@ -4093,12 +4090,13 @@ matrix<T,N,M> abs(matrix<T,N,M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T abs(T x)
{
__target_switch
{
case hlsl: __intrinsic_asm "abs";
+ case metal: __intrinsic_asm "abs";
case glsl: __intrinsic_asm "abs";
case cuda: __intrinsic_asm "$P_abs($0)";
case cpp: __intrinsic_asm "$P_abs($0)";
@@ -4110,12 +4108,13 @@ T abs(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> abs(vector<T, N> x)
{
__target_switch
{
case hlsl: __intrinsic_asm "abs";
+ case metal: __intrinsic_asm "abs";
case glsl: __intrinsic_asm "abs";
case spirv: return spirv_asm {
result:$$vector<T,N> = OpExtInst glsl450 FAbs $x;
@@ -4127,7 +4126,7 @@ vector<T, N> abs(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> abs(matrix<T,N,M> x)
{
__target_switch
@@ -4138,11 +4137,40 @@ matrix<T,N,M> abs(matrix<T,N,M> x)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fabs(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fabs";
+ default:
+ return abs(x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> fabs(vector<T, N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fabs";
+ default:
+ return abs(x);
+ }
+}
+
+
// Inverse cosine (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T acos(T x)
{
__target_switch
@@ -4151,6 +4179,7 @@ T acos(T x)
case cuda: __intrinsic_asm "$P_acos($0)";
case glsl: __intrinsic_asm "acos";
case hlsl: __intrinsic_asm "acos";
+ case metal: __intrinsic_asm "acos";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Acos $x
};
@@ -4159,13 +4188,14 @@ T acos(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> acos(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "acos";
case hlsl: __intrinsic_asm "acos";
+ case metal: __intrinsic_asm "acos";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Acos $x
};
@@ -4176,7 +4206,7 @@ vector<T, N> acos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> acos(matrix<T, N, M> x)
{
__target_switch
@@ -4187,9 +4217,51 @@ matrix<T, N, M> acos(matrix<T, N, M> x)
}
}
+// Inverse hyperbolic cosine
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T acosh(T x)
+{
+ __target_switch
+ {
+ case cpp: __intrinsic_asm "$P_acosh($0)";
+ case cuda: __intrinsic_asm "$P_acosh($0)";
+ case glsl: __intrinsic_asm "acosh";
+ case metal: __intrinsic_asm "acosh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Acosh $x
+ };
+ default:
+ return log(x + sqrt( x * x - T(1)));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+vector<T,N> acosh(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "acosh";
+ case metal: __intrinsic_asm "acosh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Acosh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, acosh, x);
+ }
+}
+
+
// Test if all components are non-zero (HLSL SM 1.0)
__generic<T : __BuiltinType>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool all(T x)
{
__target_switch
@@ -4198,6 +4270,8 @@ bool all(T x)
__intrinsic_asm "bool($0)";
case hlsl:
__intrinsic_asm "all";
+ case metal:
+ __intrinsic_asm "all";
case spirv:
let zero = __default<T>();
if (__isInt<T>())
@@ -4219,12 +4293,15 @@ bool all(T x)
__generic<T : __BuiltinType, let N : int>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool all(vector<T,N> x)
{
__target_switch
{
case hlsl:
__intrinsic_asm "all";
+ case metal:
+ __intrinsic_asm "all";
case glsl:
__intrinsic_asm "all(bvec$N0($0))";
case spirv:
@@ -4261,7 +4338,7 @@ bool all(vector<T,N> x)
__generic<T : __BuiltinType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool all(matrix<T,N,M> x)
{
__target_switch
@@ -4318,6 +4395,7 @@ int3 WorkgroupSize();
__generic<T : __BuiltinType>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool any(T x)
{
__target_switch
@@ -4326,6 +4404,8 @@ bool any(T x)
__intrinsic_asm "bool($0)";
case hlsl:
__intrinsic_asm "any";
+ case metal:
+ __intrinsic_asm "any";
case spirv:
let zero = __default<T>();
if (__isInt<T>())
@@ -4346,12 +4426,15 @@ bool any(T x)
__generic<T : __BuiltinType, let N : int>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool any(vector<T, N> x)
{
__target_switch
{
case hlsl:
__intrinsic_asm "any";
+ case metal:
+ __intrinsic_asm "any";
case glsl:
__intrinsic_asm "any(bvec$N0($0))";
case spirv:
@@ -4541,7 +4624,7 @@ matrix<float,N,M> asfloat(matrix<float,N,M> x)
// Inverse sine (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T asin(T x)
{
__target_switch
@@ -4550,6 +4633,7 @@ T asin(T x)
case cuda: __intrinsic_asm "$P_asin($0)";
case glsl: __intrinsic_asm "asin";
case hlsl: __intrinsic_asm "asin";
+ case metal: __intrinsic_asm "asin";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Asin $x
};
@@ -4558,13 +4642,14 @@ T asin(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> asin(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "asin";
case hlsl: __intrinsic_asm "asin";
+ case metal: __intrinsic_asm "asin";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Asin $x
};
@@ -4575,7 +4660,7 @@ vector<T, N> asin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> asin(matrix<T, N, M> x)
{
__target_switch
@@ -4586,6 +4671,46 @@ matrix<T, N, M> asin(matrix<T, N, M> x)
}
}
+// Inverse hyperbolic sine
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T asinh(T x)
+{
+ __target_switch
+ {
+ case cpp: __intrinsic_asm "$P_asinh($0)";
+ case cuda: __intrinsic_asm "$P_asinh($0)";
+ case glsl: __intrinsic_asm "asinh";
+ case metal: __intrinsic_asm "asinh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Asinh $x
+ };
+ default:
+ return log(x + sqrt(x * x + T(1)));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+vector<T,N> asinh(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "asinh";
+ case metal: __intrinsic_asm "asinh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Asinh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, asinh, x);
+ }
+}
+
// Reinterpret bits as an int (HLSL SM 4.0)
[__readNone]
@@ -5029,7 +5154,7 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> va
// Inverse tangent (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T atan(T x)
{
__target_switch
@@ -5038,6 +5163,7 @@ T atan(T x)
case cuda: __intrinsic_asm "$P_atan($0)";
case glsl: __intrinsic_asm "atan";
case hlsl: __intrinsic_asm "atan";
+ case metal: __intrinsic_asm "atan";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Atan $x
};
@@ -5046,13 +5172,14 @@ T atan(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> atan(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "atan";
case hlsl: __intrinsic_asm "atan";
+ case metal: __intrinsic_asm "atan";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Atan $x
};
@@ -5063,7 +5190,7 @@ vector<T, N> atan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> atan(matrix<T, N, M> x)
{
__target_switch
@@ -5076,7 +5203,7 @@ matrix<T, N, M> atan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T atan2(T y, T x)
{
__target_switch
@@ -5085,6 +5212,7 @@ T atan2(T y, T x)
case cuda: __intrinsic_asm "$P_atan2($0, $1)";
case glsl: __intrinsic_asm "atan($0,$1)";
case hlsl: __intrinsic_asm "atan2";
+ case metal: __intrinsic_asm "atan2";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Atan2 $y $x
};
@@ -5093,13 +5221,14 @@ T atan2(T y, T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "atan($0,$1)";
case hlsl: __intrinsic_asm "atan2";
+ case metal: __intrinsic_asm "atan2";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Atan2 $y $x
};
@@ -5110,7 +5239,7 @@ vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
{
__target_switch
@@ -5121,10 +5250,50 @@ matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
}
}
+// Hyperbolic inverse tangent
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T atanh(T x)
+{
+ __target_switch
+ {
+ case cpp: __intrinsic_asm "$P_atanh($0)";
+ case cuda: __intrinsic_asm "$P_atanh($0)";
+ case glsl: __intrinsic_asm "atanh";
+ case metal: __intrinsic_asm "atanh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Atanh $x
+ };
+ default:
+ return T(0.5) * log((T(1) + x) / (T(1) - x));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> atanh(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "atanh";
+ case metal: __intrinsic_asm "atanh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Atanh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, atanh, x);
+ }
+}
+
// Ceiling (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T ceil(T x)
{
__target_switch
@@ -5133,6 +5302,7 @@ T ceil(T x)
case cuda: __intrinsic_asm "$P_ceil($0)";
case glsl: __intrinsic_asm "ceil";
case hlsl: __intrinsic_asm "ceil";
+ case metal: __intrinsic_asm "ceil";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Ceil $x
};
@@ -5141,13 +5311,14 @@ T ceil(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> ceil(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "ceil";
case hlsl: __intrinsic_asm "ceil";
+ case metal: __intrinsic_asm "ceil";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Ceil $x
};
@@ -5158,7 +5329,7 @@ vector<T, N> ceil(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> ceil(matrix<T, N, M> x)
{
__target_switch
@@ -5169,6 +5340,87 @@ matrix<T, N, M> ceil(matrix<T, N, M> x)
}
}
+// Copy-sign
+
+__generic<let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<half,N> copysign_half(vector<half,N> x, vector<half,N> y)
+{
+ let ux = reinterpret<vector<uint16_t,N>>(x);
+ let uy = reinterpret<vector<uint16_t,N>>(y);
+ vector<uint16_t,N> signY = (uy & (uint16_t(1) << uint16_t(15)));
+ vector<uint16_t,N> newX = (ux & ((uint16_t(1) << uint16_t(15)) - uint16_t(1))) + signY;
+ return reinterpret<vector<half,N>>(newX);
+}
+
+__generic<let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<float,N> copysign_float(vector<float,N> x, vector<float,N> y)
+{
+ let ux = reinterpret<vector<uint32_t,N>>(x);
+ let uy = reinterpret<vector<uint32_t,N>>(y);
+ vector<uint32_t,N> signY = (uy & (uint32_t(1) << uint32_t(31)));
+ vector<uint32_t,N> newX = (ux & ((uint32_t(1) << uint32_t(31)) - uint32_t(1))) + signY;
+ return reinterpret<vector<float,N>>(newX);
+}
+
+__generic<let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<double,N> copysign_double(vector<double,N> x, vector<double,N> y)
+{
+ let ux = reinterpret<vector<uint64_t,N>>(x);
+ let uy = reinterpret<vector<uint64_t,N>>(y);
+ vector<uint64_t,N> signY = (uy & (uint64_t(1) << uint64_t(63)));
+ vector<uint64_t,N> newX = (ux & ((uint64_t(1) << uint64_t(63)) - uint64_t(1))) + signY;
+ return reinterpret<vector<double,N>>(newX);
+}
+
+__generic<T:__BuiltinFloatingPointType, U:__BuiltinFloatingPointType, let N : int>
+__intrinsic_op($(kIROp_FloatCast))
+vector<T,N> __real_cast(vector<U,N> val);
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<T,N> copysign(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "copysign";
+ default:
+ {
+ // sign of -0.0 needs to be respected.
+ if (T is half)
+ return __real_cast<T>(copysign_half(
+ __real_cast<half>(x),
+ __real_cast<half>(y)));
+ if (T is float)
+ return __real_cast<T>(copysign_float(
+ __real_cast<float>(x),
+ __real_cast<float>(y)));
+ return __real_cast<T>(copysign_double(
+ __real_cast<double>(x),
+ __real_cast<double>(y)));
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+T copysign(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "copysign";
+ default:
+ return copysign(vector<T,1>(x), vector<T,1>(y))[0];
+ }
+}
+
// Check access status to tiled resource
bool CheckAccessFullyMapped(uint status);
@@ -5320,7 +5572,7 @@ void clip(matrix<T,N,M> x)
// Cosine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T cos(T x)
{
__target_switch
@@ -5329,6 +5581,7 @@ T cos(T x)
case cuda: __intrinsic_asm "$P_cos($0)";
case glsl: __intrinsic_asm "cos";
case hlsl: __intrinsic_asm "cos";
+ case metal: __intrinsic_asm "cos";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Cos $x
};
@@ -5337,13 +5590,14 @@ T cos(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> cos(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "cos";
case hlsl: __intrinsic_asm "cos";
+ case metal: __intrinsic_asm "cos";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Cos $x
};
@@ -5354,7 +5608,7 @@ vector<T, N> cos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> cos(matrix<T, N, M> x)
{
__target_switch
@@ -5368,7 +5622,7 @@ matrix<T, N, M> cos(matrix<T, N, M> x)
// Hyperbolic cosine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
T cosh(T x)
{
__target_switch
@@ -5377,6 +5631,7 @@ T cosh(T x)
case cuda: __intrinsic_asm "$P_cosh($0)";
case glsl: __intrinsic_asm "cosh";
case hlsl: __intrinsic_asm "cosh";
+ case metal: __intrinsic_asm "cosh";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Cosh $x
};
@@ -5385,13 +5640,14 @@ T cosh(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
vector<T,N> cosh(vector<T,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "cosh";
case hlsl: __intrinsic_asm "cosh";
+ case metal: __intrinsic_asm "cosh";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Cosh $x
};
@@ -5402,7 +5658,7 @@ vector<T,N> cosh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
matrix<T, N, M> cosh(matrix<T, N, M> x)
{
__target_switch
@@ -5413,6 +5669,35 @@ matrix<T, N, M> cosh(matrix<T, N, M> x)
}
}
+// Cosine degree
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+T cospi(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "cospi";
+ default:
+ return cos(T.getPi() * x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<T,N> cospi(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "cospi";
+ default:
+ return cos(T.getPi() * x);
+ }
+}
+
+
// Population count
[__readNone]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
@@ -5776,6 +6061,63 @@ T distance(T x, T y)
}
}
+// fdim
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv)]
+T fdim(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fdim";
+ default:
+ return max(T(0), x - y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv)]
+vector<T,N> fdim(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fdim";
+ default:
+ return max(T(0), x - y);
+ }
+}
+
+// divide
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+T divide(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "divide";
+ default:
+ return x / y;
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<T,N> divide(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "divide";
+ default:
+ return x / y;
+ }
+}
+
+
// Vector dot product
__generic<T : __BuiltinFloatingPointType>
@@ -6005,7 +6347,7 @@ matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T exp(T x)
{
__target_switch
@@ -6014,6 +6356,7 @@ T exp(T x)
case cuda: __intrinsic_asm "$P_exp($0)";
case glsl: __intrinsic_asm "exp";
case hlsl: __intrinsic_asm "exp";
+ case metal: __intrinsic_asm "exp";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Exp $x
};
@@ -6022,13 +6365,14 @@ T exp(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> exp(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "exp";
case hlsl: __intrinsic_asm "exp";
+ case metal: __intrinsic_asm "exp";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Exp $x
};
@@ -6039,7 +6383,7 @@ vector<T, N> exp(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> exp(matrix<T, N, M> x)
{
__target_switch
@@ -6054,7 +6398,7 @@ matrix<T, N, M> exp(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T exp2(T x)
{
__target_switch
@@ -6068,13 +6412,14 @@ T exp2(T x)
}
else
{
- float xf = __floatCast<float>(x);
+ float xf = __realCast<float>(x);
return T(spirv_asm {
result:$$float = OpExtInst glsl450 Exp2 $xf
});
}
case hlsl:
__intrinsic_asm "exp2($0)";
+ case metal: __intrinsic_asm "exp2";
case cpp:
__intrinsic_asm "$P_exp2($0)";
case cuda:
@@ -6085,7 +6430,7 @@ T exp2(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> exp2(vector<T,N> x)
{
__target_switch
@@ -6093,6 +6438,7 @@ vector<T,N> exp2(vector<T,N> x)
case glsl:
__intrinsic_asm "exp2($0)";
case hlsl: __intrinsic_asm "exp2";
+ case metal: __intrinsic_asm "exp2";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Exp2 $x
};
@@ -6103,7 +6449,7 @@ vector<T,N> exp2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> exp2(matrix<T,N,M> x)
{
__target_switch
@@ -6114,6 +6460,36 @@ matrix<T,N,M> exp2(matrix<T,N,M> x)
}
}
+// Base-10 exponent
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T exp10(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "exp10";
+ default:
+ const T ln10 = T(2.302585092994045901); // ln(10)
+ return exp(x * ln10);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> exp10(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "exp10";
+ default:
+ const T ln10 = T(2.30258509299); // ln(10)
+ return exp(x * ln10);
+ }
+}
+
// Convert 16-bit float stored in low bits of integer
__glsl_version(420)
@@ -6439,7 +6815,7 @@ vector<uint,N> firstbitlow(vector<uint,N> value)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T floor(T x)
{
__target_switch
@@ -6448,6 +6824,7 @@ T floor(T x)
case cuda: __intrinsic_asm "$P_floor($0)";
case glsl: __intrinsic_asm "floor";
case hlsl: __intrinsic_asm "floor";
+ case metal: __intrinsic_asm "floor";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Floor $x
};
@@ -6456,13 +6833,14 @@ T floor(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> floor(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "floor";
case hlsl: __intrinsic_asm "floor";
+ case metal: __intrinsic_asm "floor";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Floor $x
};
@@ -6473,7 +6851,7 @@ vector<T, N> floor(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> floor(matrix<T, N, M> x)
{
__target_switch
@@ -6487,7 +6865,7 @@ matrix<T, N, M> floor(matrix<T, N, M> x)
// Fused multiply-add
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
T fma(T a, T b, T c)
{
__target_switch
@@ -6500,6 +6878,7 @@ T fma(T a, T b, T c)
return mad(a, b, c);
else
__intrinsic_asm "fma($0, $1, $2)";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Fma $a $b $c
};
@@ -6510,13 +6889,14 @@ T fma(T a, T b, T c)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<T, N> fma(vector<T, N> a, vector<T, N> b, vector<T, N> c)
{
__target_switch
{
case glsl: __intrinsic_asm "fma";
case hlsl: __intrinsic_asm "fma";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Fma $a $b $c
};
@@ -6527,7 +6907,7 @@ vector<T, N> fma(vector<T, N> a, vector<T, N> b, vector<T, N> c)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
matrix<T, N, M> fma(matrix<T, N, M> a, matrix<T, N, M> b, matrix<T, N, M> c)
{
__target_switch
@@ -6541,19 +6921,24 @@ matrix<T, N, M> fma(matrix<T, N, M> a, matrix<T, N, M> b, matrix<T, N, M> c)
// Floating point remainder of x/y
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T fmod(T x, T y)
{
- // In HLSL, fmod returns a remainder.
+ // In HLSL, `fmod` returns a remainder.
// Definition of `fmod` in HLSL is,
// "The floating-point remainder is calculated such that x = i * y + f,
// where i is an integer, f has the same sign as x, and the absolute value
// of f is less than the absolute value of y."
//
- // In GLSL, mod is a Modulus function.
+ // In GLSL, `mod` is a Modulus function.
// OpenGL document defines "Modulus" as "Returns x - y * floor(x / y)".
// The use of "Floor()" makes the difference.
//
+ // In Metal, `fmod` is Modulus function.
+ // Metal document defines it as "Returns x - y * trunc(x/y)".
+ // Note that the function name is same to HLSL but it behaves differently.
+ //
// The tricky ones are when x or y is a negative value.
//
// | Remainder | Modulus
@@ -6588,10 +6973,13 @@ T fmod(T x, T y)
{
case cpp: __intrinsic_asm "$P_fmod($0, $1)";
case cuda: __intrinsic_asm "$P_fmod($0, $1)";
- case hlsl: __intrinsic_asm "fmod";
case glsl:
// GLSL doesn't have a function for remainder.
- __intrinsic_asm "(($0 < 0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))";
+ __intrinsic_asm "(($0 < 0.0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))";
+ case hlsl: __intrinsic_asm "fmod";
+ case metal:
+ // Metal doesn't have a function for remainder.
+ __intrinsic_asm "(($0 < 0.0) ? -fmod(-$0,abs($1)) : fmod($0,abs($1)))";
case spirv:
// OpFRem return "The floating-point remainder whose sign
// matches the sign of Operand 1", where Operand 1 is "x".
@@ -6604,7 +6992,8 @@ T fmod(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
{
__target_switch
@@ -6620,7 +7009,8 @@ vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
{
__target_switch
@@ -6634,7 +7024,7 @@ matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
// Fractional part
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T frac(T x)
{
__target_switch
@@ -6643,6 +7033,7 @@ T frac(T x)
case cuda: __intrinsic_asm "$P_frac($0)";
case glsl: __intrinsic_asm "fract";
case hlsl: __intrinsic_asm "frac";
+ case metal: __intrinsic_asm "fract";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Fract $x
};
@@ -6651,13 +7042,14 @@ T frac(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> frac(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "fract";
case hlsl: __intrinsic_asm "frac";
+ case metal: __intrinsic_asm "fract";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Fract $x
};
@@ -6673,10 +7065,29 @@ matrix<T, N, M> frac(matrix<T, N, M> x)
MATRIX_MAP_UNARY(T, N, M, frac, x);
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fract(T x)
+{
+ return frac(x);
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> fract(vector<T, N> x)
+{
+ return frac(x);
+}
+
+
// Split float into mantissa and exponent
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T frexp(T x, out int exp)
{
__target_switch
@@ -6685,6 +7096,7 @@ T frexp(T x, out int exp)
case cuda: __intrinsic_asm "$P_frexp($0, $1)";
case glsl: __intrinsic_asm "frexp";
case hlsl: __intrinsic_asm "frexp";
+ case metal: __intrinsic_asm "frexp($0, *($1))";
case spirv: return spirv_asm {
result:$$T = OpExtInst glsl450 Frexp $x &exp
};
@@ -6693,12 +7105,14 @@ T frexp(T x, out int exp)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp)
{
__target_switch
{
- case hlsl: __intrinsic_asm "frexp";
case glsl: __intrinsic_asm "frexp";
+ case hlsl: __intrinsic_asm "frexp";
+ case metal: __intrinsic_asm "frexp($0, *($1))";
case spirv: return spirv_asm {
result:$$vector<T, N> = OpExtInst glsl450 Frexp $x &exp
};
@@ -6709,7 +7123,7 @@ vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<int, N, M, L> exp)
{
__target_switch
@@ -7920,7 +8334,7 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T ldexp(T x, T exp)
{
__target_switch
@@ -7933,7 +8347,7 @@ T ldexp(T x, T exp)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
{
__target_switch
@@ -7946,7 +8360,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp)
{
__target_switch
@@ -7957,6 +8371,47 @@ matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp)
}
}
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T ldexp(T x, E exp)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "ldexp";
+ case hlsl: __intrinsic_asm "ldexp";
+ case metal: __intrinsic_asm "ldexp";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Ldexp $x $exp
+ };
+ default:
+ return ldexp(x, __realCast<T>(exp));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "ldexp";
+ case hlsl: __intrinsic_asm "ldexp";
+ case metal: __intrinsic_asm "ldexp";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp
+ };
+ default:
+ vector<T,N> temp;
+ [ForceUnroll]
+ for (int i = 0; i < N; ++i)
+ temp[i] = __realCast<T>(exp[i]);
+ return ldexp(x, temp);
+ }
+}
+
+
// Vector length
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
@@ -8058,7 +8513,7 @@ float4 lit(float n_dot_l, float n_dot_h, float m)
// Base-e logarithm
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T log(T x)
{
__target_switch
@@ -8067,6 +8522,7 @@ T log(T x)
case cuda: __intrinsic_asm "$P_log($0)";
case glsl: __intrinsic_asm "log";
case hlsl: __intrinsic_asm "log";
+ case metal: __intrinsic_asm "log";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Log $x
};
@@ -8075,13 +8531,14 @@ T log(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> log(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "log";
case hlsl: __intrinsic_asm "log";
+ case metal: __intrinsic_asm "log";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Log $x
};
@@ -8092,7 +8549,7 @@ vector<T, N> log(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> log(matrix<T, N, M> x)
{
__target_switch
@@ -8106,12 +8563,13 @@ matrix<T, N, M> log(matrix<T, N, M> x)
// Base-10 logarithm
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T log10(T x)
{
__target_switch
{
case hlsl: __intrinsic_asm "log10";
+ case metal: __intrinsic_asm "log10";
case glsl: __intrinsic_asm "(log( $0 ) * $S0( 0.43429448190325182765112891891661) )";
case cuda: __intrinsic_asm "$P_log10($0)";
case cpp: __intrinsic_asm "$P_log10($0)";
@@ -8128,12 +8586,13 @@ T log10(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> log10(vector<T,N> x)
{
__target_switch
{
case hlsl: __intrinsic_asm "log10";
+ case metal: __intrinsic_asm "log10";
case glsl: __intrinsic_asm "(log( $0 ) * $S0(0.43429448190325182765112891891661) )";
case spirv:
{
@@ -8150,7 +8609,7 @@ vector<T,N> log10(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> log10(matrix<T,N,M> x)
{
__target_switch
@@ -8164,7 +8623,7 @@ matrix<T,N,M> log10(matrix<T,N,M> x)
// Base-2 logarithm
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T log2(T x)
{
__target_switch
@@ -8173,6 +8632,7 @@ T log2(T x)
case cuda: __intrinsic_asm "$P_log2($0)";
case glsl: __intrinsic_asm "log2";
case hlsl: __intrinsic_asm "log2";
+ case metal: __intrinsic_asm "log2";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Log2 $x
};
@@ -8181,13 +8641,14 @@ T log2(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> log2(vector<T,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "log2";
case hlsl: __intrinsic_asm "log2";
+ case metal: __intrinsic_asm "log2";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Log2 $x
};
@@ -8198,7 +8659,7 @@ vector<T,N> log2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> log2(matrix<T,N,M> x)
{
__target_switch
@@ -8213,7 +8674,7 @@ matrix<T,N,M> log2(matrix<T,N,M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
T mad(T mvalue, T avalue, T bvalue)
{
__target_switch
@@ -8222,6 +8683,7 @@ T mad(T mvalue, T avalue, T bvalue)
case cuda: __intrinsic_asm "$P_fma($0, $1, $2)";
case glsl: __intrinsic_asm "fma";
case hlsl: __intrinsic_asm "mad";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Fma $mvalue $avalue $bvalue
};
@@ -8230,13 +8692,14 @@ T mad(T mvalue, T avalue, T bvalue)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
{
__target_switch
{
case glsl: __intrinsic_asm "fma";
case hlsl: __intrinsic_asm "mad";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Fma $mvalue $avalue $bvalue
};
@@ -8247,7 +8710,7 @@ vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue)
{
__target_switch
@@ -8385,12 +8848,13 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T max(T x, T y)
{
__target_switch
{
case hlsl: __intrinsic_asm "max";
+ case metal: __intrinsic_asm "max";
case glsl: __intrinsic_asm "max";
case cuda: __intrinsic_asm "$P_max($0, $1)";
case cpp: __intrinsic_asm "$P_max($0, $1)";
@@ -8402,12 +8866,13 @@ T max(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> max(vector<T, N> x, vector<T, N> y)
{
__target_switch
{
case hlsl: __intrinsic_asm "max";
+ case metal: __intrinsic_asm "max";
case glsl: __intrinsic_asm "max";
case spirv: return spirv_asm {
result:$$vector<T, N> = OpExtInst glsl450 FMax $x $y
@@ -8419,7 +8884,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
{
__target_switch
@@ -8430,6 +8895,107 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T max3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "max3";
+ default:
+ return max(x, max(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> max3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "max3";
+ default:
+ return max(x, max(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmax(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax";
+ default:
+ if (isnan(x)) return y;
+ return max(x, y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmax(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax";
+ default:
+ VECTOR_MAP_BINARY(T, N, fmax, x, y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmax3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax3";
+ default:
+ {
+ bool isnanX = isnan(x);
+ bool isnanY = isnan(y);
+ bool isnanZ = isnan(z);
+
+ if (isnanX)
+ {
+ return isnanY ? z : y;
+ }
+ else if (isnanY)
+ {
+ if (isnanZ)
+ return x;
+ return max(x, z);
+ }
+ else if (isnanZ)
+ {
+ return max(x, y);
+ }
+
+ return max(y, max(x, z));
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmax3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax3";
+ default:
+ VECTOR_MAP_TRINARY(T, N, fmax3, x, y, z);
+ }
+}
+
+
// minimum
__generic<T : __BuiltinIntegerType>
__target_intrinsic(hlsl)
@@ -8481,12 +9047,13 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T min(T x, T y)
{
__target_switch
{
case hlsl: __intrinsic_asm "min";
+ case metal: __intrinsic_asm "min";
case glsl: __intrinsic_asm "min";
case cuda: __intrinsic_asm "$P_min($0, $1)";
case cpp: __intrinsic_asm "$P_min($0, $1)";
@@ -8498,12 +9065,13 @@ T min(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> min(vector<T,N> x, vector<T,N> y)
{
__target_switch
{
case hlsl: __intrinsic_asm "min";
+ case metal: __intrinsic_asm "min";
case glsl: __intrinsic_asm "min";
case spirv: return spirv_asm {
result:$$vector<T,N> = OpExtInst glsl450 FMin $x $y
@@ -8515,7 +9083,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
{
__target_switch
@@ -8526,16 +9094,212 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T min3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "min3";
+ default:
+ return min(x, min(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> min3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "min3";
+ default:
+ return min(x, min(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmin(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin";
+ default:
+ if (isnan(x)) return y;
+ return min(x, y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmin(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin";
+ default:
+ VECTOR_MAP_BINARY(T, N, fmin, x, y);
+ }
+}
+
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmin3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin3";
+ default:
+ {
+ bool isnanX = isnan(x);
+ bool isnanY = isnan(y);
+ bool isnanZ = isnan(z);
+
+ if (isnan(x))
+ {
+ return isnanY ? z : y;
+ }
+ else if (isnanY)
+ {
+ if (isnanZ)
+ return x;
+ return min(x, z);
+ }
+ else if (isnanZ)
+ {
+ return min(x, y);
+ }
+
+ return min(x, min(y, z));
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmin3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin3";
+ default:
+ VECTOR_MAP_TRINARY(T, N, fmin3, x, y, z);
+ }
+}
+
+
+// Median
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T median3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "median3";
+ default:
+ {
+ // | a | b | c | m |
+ // ----------+---+---+---+---+
+ // x > y > z | z | y | x | y |
+ // x > z > y | y | z | x | z |
+ // y > x > z | z | y | x | x |
+ // y > z > x | z | y | z | z |
+ // z > x > y | y | z | x | x |
+ // z > y > x | y | z | y | y |
+
+ T a = min(y, z);
+ T b = max(y, z);
+ T c = max(x, a);
+ T m = min(b, c);
+ return m;
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> median3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "median3";
+ default:
+ {
+ vector<T,N> a = min(y, z);
+ vector<T,N> b = max(y, z);
+ vector<T,N> c = max(x, a);
+ vector<T,N> m = min(b, c);
+ return m;
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmedian3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmedian3";
+ default:
+ {
+ bool isnanX = isnan(x);
+ bool isnanY = isnan(y);
+ bool isnanZ = isnan(z);
+
+ if (isnanX)
+ {
+ return isnanY ? z : y;
+ }
+ else if (isnanY || isnanZ)
+ {
+ // "the function can return either non-NaN value"
+ return x;
+ }
+
+ return median3(x, y, z);
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmedian3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmedian3";
+ default:
+ VECTOR_MAP_TRINARY(T, N, fmedian3, x, y, z);
+ }
+}
+
+
// split into integer and fractional parts (both with same sign)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T modf(T x, out T ip)
{
__target_switch
{
+ case cpp: __intrinsic_asm "$P_modf($0, $1)";
+ case cuda: __intrinsic_asm "$P_modf($0, $1)";
case hlsl: __intrinsic_asm "modf";
case glsl: __intrinsic_asm "modf";
+ case metal: __intrinsic_asm "modf($0, *($1))";
case spirv: return spirv_asm {
result:$$T = OpExtInst glsl450 Modf $x &ip
};
@@ -8544,13 +9308,14 @@ T modf(T x, out T ip)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
{
__target_switch
{
case hlsl: __intrinsic_asm "modf";
case glsl: __intrinsic_asm "modf";
+ case metal: __intrinsic_asm "modf($0, *($1))";
case spirv: return spirv_asm {
result:$$vector<T,N> = OpExtInst glsl450 Modf $x &ip
};
@@ -8561,7 +9326,7 @@ vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int>
[__readNone]
-[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M,L> ip)
{
__target_switch
@@ -8883,6 +9648,50 @@ matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right)
}
}
+// next-after: next representable floating-point value
+// after x in the direction of y
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
+T nextafter(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "nextafter";
+ default:
+ if (isnan(x)) return x;
+ if (isnan(y)) return y;
+ if (x == y) return y;
+ if (T is half)
+ {
+ T delta = __realCast<T>(bit_cast<half>(uint16_t(1)));
+ return x + ((x < y) ? delta : -delta);
+ }
+ if (T is float)
+ {
+ T delta = __realCast<T>(bit_cast<float>(uint32_t(1)));
+ return x + ((x < y) ? delta : -delta);
+ }
+ T delta = __realCast<T>(bit_cast<double>(uint64_t(1)));
+ return x + ((x < y) ? delta : -delta);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
+vector<T,N> nextafter(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "nextafter";
+ default:
+ VECTOR_MAP_BINARY(T, N, nextafter, x, y);
+ }
+}
+
+
// noise (deprecated)
[__readNone]
@@ -8981,7 +9790,7 @@ T normalize(T x)
// Raise to a power
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T pow(T x, T y)
{
__target_switch
@@ -8990,6 +9799,7 @@ T pow(T x, T y)
case cuda: __intrinsic_asm "$P_pow($0, $1)";
case glsl: __intrinsic_asm "pow";
case hlsl: __intrinsic_asm "pow";
+ case metal: __intrinsic_asm "pow";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Pow $x $y
};
@@ -8998,13 +9808,14 @@ T pow(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> pow(vector<T, N> x, vector<T, N> y)
{
__target_switch
{
case glsl: __intrinsic_asm "pow";
case hlsl: __intrinsic_asm "pow";
+ case metal: __intrinsic_asm "pow";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Pow $x $y
};
@@ -9015,7 +9826,7 @@ vector<T, N> pow(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y)
{
__target_switch
@@ -9026,6 +9837,32 @@ matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T powr(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "powr";
+ default:
+ return pow(abs(x), y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> powr(vector<T, N> x, vector<T, N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "powr";
+ default:
+ return pow(abs(x), y);
+ }
+}
+
// Output message
// TODO: add check to ensure format is const literal.
@@ -9360,10 +10197,60 @@ vector<uint, N> reversebits(vector<uint, N> value)
}
}
+// round even
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T rint(T x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "roundEven";
+ case metal: __intrinsic_asm "rint";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 RoundEven $x
+ };
+ default:
+ T nearest = round(x);
+
+ // Check if the value is exactly halfway between two integers
+ if (abs(x - nearest) == T(0.5))
+ {
+ // If halfway, choose the even number
+ if ((nearest / T(2)) * T(2) != nearest)
+ {
+ // If the nearest number is odd,
+ // move to the closest even number
+ nearest -= ((x < nearest) ? T(1) : T(-1));
+ }
+ }
+ return nearest;
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+vector<T,N> rint(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "roundEven";
+ case metal: __intrinsic_asm "rint";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 RoundEven $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, rint, x);
+ }
+}
+
// Round-to-nearest
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T round(T x)
{
__target_switch
@@ -9372,6 +10259,7 @@ T round(T x)
case cuda: __intrinsic_asm "$P_round($0)";
case glsl: __intrinsic_asm "round";
case hlsl: __intrinsic_asm "round";
+ case metal: __intrinsic_asm "round";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Round $x
};
@@ -9380,13 +10268,14 @@ T round(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> round(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "round";
case hlsl: __intrinsic_asm "round";
+ case metal: __intrinsic_asm "round";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Round $x
};
@@ -9397,7 +10286,7 @@ vector<T, N> round(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> round(matrix<T,N,M> x)
{
__target_switch
@@ -9411,7 +10300,7 @@ matrix<T,N,M> round(matrix<T,N,M> x)
// Reciprocal of square root
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T rsqrt(T x)
{
__target_switch
@@ -9420,6 +10309,7 @@ T rsqrt(T x)
case cuda: __intrinsic_asm "$P_rsqrt($0)";
case glsl: __intrinsic_asm "inversesqrt($0)";
case hlsl: __intrinsic_asm "rsqrt";
+ case metal: __intrinsic_asm "rsqrt";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 InverseSqrt $x
};
@@ -9430,13 +10320,14 @@ T rsqrt(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> rsqrt(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "inversesqrt($0)";
case hlsl: __intrinsic_asm "rsqrt";
+ case metal: __intrinsic_asm "rsqrt";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 InverseSqrt $x
};
@@ -9447,7 +10338,7 @@ vector<T, N> rsqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> rsqrt(matrix<T, N, M> x)
{
__target_switch
@@ -9568,12 +10459,11 @@ matrix<int, N, M> sign(matrix<T, N, M> x)
}
}
-
// Sine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T sin(T x)
{
__target_switch
@@ -9582,6 +10472,7 @@ T sin(T x)
case cuda: __intrinsic_asm "$P_sin($0)";
case glsl: __intrinsic_asm "sin";
case hlsl: __intrinsic_asm "sin";
+ case metal: __intrinsic_asm "sin";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Sin $x
};
@@ -9590,13 +10481,14 @@ T sin(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> sin(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "sin";
case hlsl: __intrinsic_asm "sin";
+ case metal: __intrinsic_asm "sin";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Sin $x
};
@@ -9607,7 +10499,7 @@ vector<T, N> sin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> sin(matrix<T, N, M> x)
{
__target_switch
@@ -9621,13 +10513,40 @@ matrix<T, N, M> sin(matrix<T, N, M> x)
// Sine and cosine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(metal)]
+T __sincos_metal(T x, out T c)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sincos($0, *$1)";
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(metal)]
+vector<T,N> __sincos_metal(vector<T,N> x, out vector<T,N> c)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sincos($0, *$1)";
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
void sincos(T x, out T s, out T c)
{
__target_switch
{
case cuda: __intrinsic_asm "$P_sincos($0, $1, $2)";
case hlsl: __intrinsic_asm "sincos";
+ case metal:
+ //__intrinsic_asm "*($1) = sincos($0, *($2))";
+ s = __sincos_metal(x, c);
+ return;
default:
s = sin(x);
c = cos(x);
@@ -9636,12 +10555,17 @@ void sincos(T x, out T s, out T c)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
{
__target_switch
{
case hlsl: __intrinsic_asm "sincos";
+ case metal:
+ //__intrinsic_asm "*($1) = sincos($0, *($2))";
+ s = __sincos_metal(x, c);
+ return;
default:
s = sin(x);
c = cos(x);
@@ -9650,7 +10574,8 @@ void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L1: int, let L2 : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c)
{
__target_switch
@@ -9665,7 +10590,7 @@ void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c)
// Hyperbolic Sine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T sinh(T x)
{
__target_switch
@@ -9674,6 +10599,7 @@ T sinh(T x)
case cuda: __intrinsic_asm "$P_sinh($0)";
case glsl: __intrinsic_asm "sinh";
case hlsl: __intrinsic_asm "sinh";
+ case metal: __intrinsic_asm "sinh";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Sinh $x
};
@@ -9682,13 +10608,14 @@ T sinh(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> sinh(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "sinh";
case hlsl: __intrinsic_asm "sinh";
+ case metal: __intrinsic_asm "sinh";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Sinh $x
};
@@ -9699,7 +10626,7 @@ vector<T, N> sinh(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> sinh(matrix<T, N, M> x)
{
__target_switch
@@ -9710,6 +10637,35 @@ matrix<T, N, M> sinh(matrix<T, N, M> x)
}
}
+// Sine degree
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T sinpi(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sinpi";
+ default:
+ return sin(T.getPi() * x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> sinpi(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sinpi";
+ default:
+ return sin(T.getPi() * x);
+ }
+}
+
+
// Smooth step (Hermite interpolation)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
@@ -9762,7 +10718,7 @@ matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N
// Square root
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T sqrt(T x)
{
__target_switch
@@ -9771,6 +10727,7 @@ T sqrt(T x)
case cuda: __intrinsic_asm "$P_sqrt($0)";
case glsl: __intrinsic_asm "sqrt";
case hlsl: __intrinsic_asm "sqrt";
+ case metal: __intrinsic_asm "sqrt";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Sqrt $x
};
@@ -9779,13 +10736,14 @@ T sqrt(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> sqrt(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "sqrt";
case hlsl: __intrinsic_asm "sqrt";
+ case metal: __intrinsic_asm "sqrt";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Sqrt $x
};
@@ -9796,7 +10754,7 @@ vector<T, N> sqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> sqrt(matrix<T, N, M> x)
{
__target_switch
@@ -9858,7 +10816,7 @@ matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x)
// Tangent
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T tan(T x)
{
__target_switch
@@ -9867,6 +10825,7 @@ T tan(T x)
case cuda: __intrinsic_asm "$P_tan($0)";
case glsl: __intrinsic_asm "tan";
case hlsl: __intrinsic_asm "tan";
+ case metal: __intrinsic_asm "tan";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Tan $x
};
@@ -9875,13 +10834,14 @@ T tan(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> tan(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "tan";
case hlsl: __intrinsic_asm "tan";
+ case metal: __intrinsic_asm "tan";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Tan $x
};
@@ -9892,7 +10852,7 @@ vector<T, N> tan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> tan(matrix<T, N, M> x)
{
__target_switch
@@ -9906,7 +10866,7 @@ matrix<T, N, M> tan(matrix<T, N, M> x)
// Hyperbolic tangent
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T tanh(T x)
{
__target_switch
@@ -9915,6 +10875,7 @@ T tanh(T x)
case cuda: __intrinsic_asm "$P_tanh($0)";
case glsl: __intrinsic_asm "tanh";
case hlsl: __intrinsic_asm "tanh";
+ case metal: __intrinsic_asm "tanh";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Tanh $x
};
@@ -9923,13 +10884,14 @@ T tanh(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> tanh(vector<T,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "tanh";
case hlsl: __intrinsic_asm "tanh";
+ case metal: __intrinsic_asm "tanh";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Tanh $x
};
@@ -9940,7 +10902,7 @@ vector<T,N> tanh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> tanh(matrix<T,N,M> x)
{
__target_switch
@@ -9951,6 +10913,35 @@ matrix<T,N,M> tanh(matrix<T,N,M> x)
}
}
+// Tangent degree
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T tanpi(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "tanpi";
+ default:
+ return tan(T.getPi() * x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> tanpi(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "tanpi";
+ default:
+ return tan(T.getPi() * x);
+ }
+}
+
+
// Matrix transpose
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
@@ -10020,7 +11011,7 @@ matrix<T, M, N> transpose(matrix<T, N, M> x)
// Truncate to integer
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T trunc(T x)
{
__target_switch
@@ -10029,6 +11020,7 @@ T trunc(T x)
case cuda: __intrinsic_asm "$P_trunc($0)";
case glsl: __intrinsic_asm "trunc";
case hlsl: __intrinsic_asm "trunc";
+ case metal: __intrinsic_asm "trunc";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Trunc $x
};
@@ -10037,13 +11029,14 @@ T trunc(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> trunc(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "trunc";
case hlsl: __intrinsic_asm "trunc";
+ case metal: __intrinsic_asm "trunc";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Trunc $x
};
@@ -10054,7 +11047,7 @@ vector<T, N> trunc(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> trunc(matrix<T, N, M> x)
{
__target_switch
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
index 2c327b613..7da48cac1 100644
--- a/source/slang/slang-emit-metal.cpp
+++ b/source/slang/slang-emit-metal.cpp
@@ -298,35 +298,27 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO
void MetalSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount)
{
- // In some cases we *need* to use the built-in syntax sugar for vector types,
- // so we will try to emit those whenever possible.
- //
- if( elementCount >= 1 && elementCount <= 4 )
- {
- switch( elementType->getOp() )
+ emitSimpleTypeImpl(elementType);
+
+ switch (elementType->getOp())
+ {
+ case kIROp_FloatType:
+ case kIROp_HalfType:
+ case kIROp_BoolType:
+ case kIROp_Int8Type:
+ case kIROp_UInt8Type:
+ case kIROp_Int16Type:
+ case kIROp_UInt16Type:
+ case kIROp_IntType:
+ case kIROp_UIntType:
+ case kIROp_Int64Type:
+ case kIROp_UInt64Type:
+ if (elementCount > 1)
{
- case kIROp_FloatType:
- case kIROp_IntType:
- case kIROp_UIntType:
- // TODO: There are more types that need to be covered here
- emitType(elementType);
m_writer->emit(elementCount);
- return;
-
- default:
- break;
}
+ break;
}
-
- // As a fallback, we will use the `vector<...>` type constructor,
- // although we should not expect to run into types that don't
- // have a sugared form.
- //
- m_writer->emit("vector<");
- emitType(elementType);
- m_writer->emit(",");
- m_writer->emit(elementCount);
- m_writer->emit(">");
}
void MetalSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl)
@@ -855,6 +847,7 @@ void MetalSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst)
void MetalSourceEmitter::emitFrontMatterImpl(TargetRequest*)
{
m_writer->emit("#include <metal_stdlib>\n");
+ m_writer->emit("#include <metal_math>\n");
m_writer->emit("using namespace metal;\n");
}
diff --git a/tests/metal/math.slang b/tests/metal/math.slang
new file mode 100644
index 000000000..288d6137c
--- /dev/null
+++ b/tests/metal/math.slang
@@ -0,0 +1,513 @@
+//TEST:SIMPLE(filecheck=METAL): -stage compute -entry computeMain -target metal
+//TEST:SIMPLE(filecheck=GLSL): -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=GLSL_SPIRV): -stage compute -entry computeMain -target spirv -emit-spirv-via-glsl
+//TEST:SIMPLE(filecheck=SPIR): -stage compute -entry computeMain -target spirv -emit-spirv-directly
+//TEST:SIMPLE(filecheck=HLSL): -stage compute -entry computeMain -target hlsl
+//TEST:SIMPLE(filecheck=CUDA): -stage compute -entry computeMain -target cuda
+//TEST:SIMPLE(filecheck=CPP): -stage compute -entry computeMain -target cpp
+
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -output-using-type -emit-spirv-via-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -output-using-type -emit-spirv-directly
+//TEST:SIMPLE(filecheck=METALLIB): -target metallib
+
+//TEST_INPUT:ubuffer(data=[0 1 -1], stride=4):name=inputBuffer
+RWStructuredBuffer<int> inputBuffer;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+// METALLIB: define void @computeMain
+
+// It is unclear why "nextafter" is not working for Metal.
+#define TEST_WHEN_nextafter_WORKS 0
+
+__generic<T:__BuiltinFloatingPointType>
+bool Test_Scalar()
+{
+ // METAL-LABEL: Test_Scalar
+ const T zero = T(inputBuffer[0]);
+ const T one = T(inputBuffer[1]);
+
+ const int zeroInt = int(inputBuffer[0]);
+
+ T outFloat1, outFloat2;
+ int outInt;
+
+ bool voidResult = true;
+
+ // METAL: sincos(
+ // METAL-NOT: sincos(
+ sincos<T>(zero, outFloat1, outFloat2);
+ voidResult = voidResult && zero == outFloat1 && one == outFloat2;
+
+ return voidResult
+ // METAL: acos(
+ // METALLIB: acos.f32
+ && zero == acos<T>(one)
+
+ // METAL: acosh(
+ // METALLIB: acosh.f32
+ && zero == acosh<T>(one)
+
+ // METAL: asin(
+ // METALLIB: asin.f32
+ && zero == asin<T>(zero)
+
+ // METAL: asinh(
+ // METALLIB: asinh.f32
+ && zero == asinh<T>(zero)
+
+ // METAL: atan(
+ // METALLIB: atan.f32
+ && zero == atan<T>(zero)
+
+ // METAL: atan2(
+ // METALLIB: atan2.f32
+ && zero == atan2<T>(zero, zero)
+
+ // METAL: atanh(
+ // METALLIB: atanh.f32
+ && zero == atanh<T>(zero)
+
+ // METAL: ceil(
+ // METALLIB: ceil.f32
+ && zero == ceil<T>(zero)
+
+ // METAL: copysign(
+ // METALLIB: bitcast float
+ && zero == copysign<T>(zero, zero)
+
+ // METAL: cos(
+ // METALLIB: cos.f32
+ && one == cos<T>(zero)
+
+ // METAL: cosh(
+ // METALLIB: cosh.f32
+ && one == cosh<T>(zero)
+
+ // METAL: cospi(
+ // METALLIB: cospi.f32
+ && one == cospi<T>(zero)
+
+ // METAL: divide(
+ // METALLIB: fdiv
+ && zero == divide<T>(zero, one)
+
+ // METAL: exp(
+ // METALLIB: exp.f32
+ && one == exp<T>(zero)
+
+ // METAL: exp2(
+ // METALLIB: exp2.f32
+ && one == exp2<T>(zero)
+
+ // METAL: exp10(
+ // METALLIB: exp10.f32
+ && one == exp10<T>(zero)
+
+ // METAL: fabs(
+ // METALLIB: fabs.f32
+ && zero == fabs<T>(zero)
+
+ // METAL: abs(
+ && zero == abs<T>(zero)
+
+ // METAL: fdim(
+ && zero == fdim<T>(zero, zero)
+
+ // METAL: floor(
+ // METALLIB: floor.f32
+ && zero == floor<T>(zero)
+
+ // METAL: fma(
+ // METALLIB: fma.f32
+ && zero == fma(zero, zero, zero)
+
+ // METAL: fmax(
+ // METALLIB: fmax.f32
+ && zero == fmax<T>(zero, zero)
+
+ // METAL: max(
+ && zero == max<T>(zero, zero)
+
+ // METAL: fmax3(
+ // METALLIB: fmax3.f32
+ && zero == fmax3<T>(zero, zero, zero)
+
+ // METAL: max3(
+ && zero == max3<T>(zero, zero, zero)
+
+ // METAL: fmedian3(
+ // METALLIB: fmedian3.f32
+ && zero == fmedian3<T>(zero, zero, zero)
+
+ // METAL: median3(
+ && zero == median3<T>(zero, zero, zero)
+
+ // METAL: fmin(
+ // METALLIB: fmin.f32
+ && zero == fmin<T>(zero, zero)
+
+ // METAL: min(
+ && zero == min<T>(zero, zero)
+
+ // METAL: fmin3(
+ // METALLIB: fmin3.f32
+ && zero == fmin3<T>(zero, zero, zero)
+
+ // METAL: min3(
+ && zero == min3<T>(zero, zero, zero)
+
+ // METAL-COUNT-2: fmod(
+ // METALLIB-COUNT-2: fmod.f32
+ && zero == fmod<T>(zero, one)
+
+ // METAL: fract(
+ // METALLIB: fract.f32
+ && zero == fract<T>(zero)
+
+ // METAL: frexp(
+ // METALLIB: frexp_float
+ && zero == frexp<T>(zero, outInt) && zeroInt == outInt
+
+ // METAL: ldexp(
+ // METALLIB: ldexp.f32
+ && zero == ldexp<T>(zero, zeroInt)
+
+ // METAL: log(
+ // METALLIB: log.f32
+ && zero == log<T>(one)
+
+ // METAL: log2(
+ // METALLIB: log2.f32
+ && zero == log2<T>(one)
+
+ // METAL: log10(
+ // METALLIB: log10.f32
+ && zero == log10<T>(one)
+
+ // METAL: modf(
+ && zero == modf<T>(zero, outFloat1)
+
+#if TEST_WHEN_nextafter_WORKS
+ // M-ETAL: nextafter(
+ && zero == nextafter<T>(zero, zero)
+#endif
+
+ // METAL: pow(
+ // METALLIB: pow.f32
+ && zero == pow<T>(zero, one)
+
+ // METAL: powr(
+ // METALLIB: powr.f32
+ && zero == powr<T>(zero, one)
+
+ // METAL: rint(
+ // METALLIB: rint.f32
+ && zero == rint<T>(zero)
+
+ // METAL: round(
+ // METALLIB: round.f32
+ && zero == round<T>(zero)
+
+ // METAL: rsqrt(
+ // METALLIB: rsqrt.f32
+ && one == rsqrt<T>(one)
+
+ // METAL: sin(
+ // METALLIB: sin.f32
+ && zero == sin<T>(zero)
+
+ // METAL: sinh(
+ // METALLIB: sinh.f32
+ && zero == sinh<T>(zero)
+
+ // METAL: sinpi(
+ // METALLIB: sinpi.f32
+ && zero == sinpi<T>(zero)
+
+ // METAL: sqrt(
+ // METALLIB: sqrt.f32
+ && zero == sqrt<T>(zero)
+
+ // METAL: tan(
+ // METALLIB: tan.f32
+ && zero == tan<T>(zero)
+
+ // METAL: tanh(
+ // METALLIB: tanh.f32
+ && zero == tanh<T>(zero)
+
+ // METAL: tanpi(
+ // METALLIB: tanpi.f32
+ && zero == tanpi<T>(zero)
+
+ // METAL: trunc(
+ && zero == trunc<T>(zero)
+ ;
+
+ // METALLIB: ret
+}
+
+__generic<T:__BuiltinFloatingPointType, let N : int>
+bool Test_Vector()
+{
+ // METAL-LABEL: Test_Vector_0
+ const vector<T,N> zero = T(inputBuffer[0]);
+ const vector<T,N> one = T(inputBuffer[1]);
+
+ const vector<int,N> zeroInt = int(inputBuffer[0]);
+
+ vector<T,N> outFloat1, outFloat2;
+ vector<int,N> outInt;
+
+ bool voidResult = true;
+
+ // METAL: sincos(
+ // METAL-NOT: sincos(
+ sincos<T>(zero, outFloat1, outFloat2);
+ voidResult = voidResult && zero == outFloat1 && one == outFloat2;
+
+ return voidResult
+ // METAL: acos(
+ // METAL-NOT: acos(
+ && zero == acos<T>(one)
+
+ // METAL: acosh(
+ // METAL-NOT: acosh(
+ && zero == acosh<T>(one)
+
+ // METAL: asin(
+ // METAL-NOT: asin(
+ && zero == asin<T>(zero)
+
+ // METAL: asinh(
+ // METAL-NOT: asinh(
+ && zero == asinh<T>(zero)
+
+ // METAL: atan(
+ // METAL-NOT: atan(
+ && zero == atan<T>(zero)
+
+ // METAL: atan2(
+ // METAL-NOT: atan2(
+ && zero == atan2<T>(zero, zero)
+
+ // METAL: atanh(
+ // METAL-NOT: atanh(
+ && zero == atanh<T>(zero)
+
+ // METAL: ceil(
+ // METAL-NOT: ceil(
+ && zero == ceil<T>(zero)
+
+ // METAL: copysign(
+ // METAL-NOT: copysign(
+ && zero == copysign<T>(zero, zero)
+
+ // METAL: cos(
+ // METAL-NOT: cos(
+ && one == cos<T>(zero)
+
+ // METAL: cosh(
+ // METAL-NOT: cosh(
+ && one == cosh<T>(zero)
+
+ // METAL: cospi(
+ // METAL-NOT: cospi(
+ && one == cospi<T>(zero)
+
+ // METAL: divide(
+ // METAL-NOT: divide(
+ && zero == divide<T>(zero, one)
+
+ // METAL: exp(
+ // METAL-NOT: exp(
+ && one == exp<T>(zero)
+
+ // METAL: exp2(
+ // METAL-NOT: exp2(
+ && one == exp2<T>(zero)
+
+ // METAL: exp10(
+ // METAL-NOT: exp10(
+ && one == exp10<T>(zero)
+
+ // METAL: fabs(
+ // METAL-NOT: fabs(
+ && zero == fabs<T>(zero)
+
+ // METAL: abs(
+ // METAL-NOT: abs(
+ && zero == abs<T>(zero)
+
+ // METAL: fdim(
+ // METAL-NOT: fdim(
+ && zero == fdim<T>(zero, zero)
+
+ // METAL: floor(
+ // METAL-NOT: floor(
+ && zero == floor<T>(zero)
+
+ // METAL: fma(
+ // METAL-NOT: fma(
+ && zero == fma(zero, zero, zero)
+
+ // METAL: fmax(
+ // METAL-NOT: fmax(
+ && zero == fmax<T>(zero, zero)
+
+ // METAL: max(
+ // METAL-NOT: max(
+ && zero == max<T>(zero, zero)
+
+ // METAL: fmax3(
+ // METAL-NOT: fmax3(
+ && zero == fmax3<T>(zero, zero, zero)
+
+ // METAL: max3(
+ // METAL-NOT: max3(
+ && zero == max3<T>(zero, zero, zero)
+
+ // METAL: fmedian3(
+ // METAL-NOT: fmedian3(
+ && zero == fmedian3<T>(zero, zero, zero)
+
+ // METAL: median3(
+ // METAL-NOT: median3(
+ && zero == median3<T>(zero, zero, zero)
+
+ // METAL: fmin(
+ // METAL-NOT: fmin(
+ && zero == fmin<T>(zero, zero)
+
+ // METAL: min(
+ // METAL-NOT: min(
+ && zero == min<T>(zero, zero)
+
+ // METAL: fmin3(
+ // METAL-NOT: fmin3(
+ && zero == fmin3<T>(zero, zero, zero)
+
+ // METAL: min3(
+ // METAL-NOT: min3(
+ && zero == min3<T>(zero, zero, zero)
+
+ // METAL-COUNT-2: fmod(
+ // METAL-NOT: fmod(
+ && zero == fmod<T>(zero, one)
+
+ // METAL: fract(
+ // METAL-NOT: fract(
+ && zero == fract<T>(zero)
+
+ // METAL: frexp(
+ // METAL-NOT: frexp(
+ && zero == frexp<T>(zero, outInt) && all(zeroInt == outInt)
+
+ // METAL: ldexp(
+ // METAL-NOT: ldexp(
+ && zero == ldexp<T>(zero, zeroInt)
+
+ // METAL: log(
+ // METAL-NOT: log(
+ && zero == log<T>(one)
+
+ // METAL: log2(
+ // METAL-NOT: log2(
+ && zero == log2<T>(one)
+
+ // METAL: log10(
+ // METAL-NOT: log10(
+ && zero == log10<T>(one)
+
+ // METAL: modf(
+ // METAL-NOT: modf(
+ && zero == modf<T>(zero, outFloat1)
+
+#if TEST_WHEN_nextafter_WORKS
+ // M-ETAL: nextafter(
+ // METAL-NOT: nextafter(
+ && zero == nextafter<T>(zero, zero)
+#endif
+
+ // METAL: pow(
+ // METAL-NOT: pow(
+ && zero == pow<T>(zero, one)
+
+ // METAL: powr(
+ // METAL-NOT: powr(
+ && zero == powr<T>(zero, one)
+
+ // METAL: rint(
+ // METAL-NOT: rint(
+ && zero == rint<T>(zero)
+
+ // METAL: round(
+ // METAL-NOT: round(
+ && zero == round<T>(zero)
+
+ // METAL: rsqrt(
+ // METAL-NOT: rsqrt(
+ && one == rsqrt<T>(one)
+
+ // METAL: sin(
+ // METAL-NOT: sin(
+ && zero == sin<T>(zero)
+
+ // METAL: sinh(
+ // METAL-NOT: sinh(
+ && zero == sinh<T>(zero)
+
+ // METAL: sinpi(
+ // METAL-NOT: sinpi(
+ && zero == sinpi<T>(zero)
+
+ // METAL: sqrt(
+ // METAL-NOT: sqrt(
+ && zero == sqrt<T>(zero)
+
+ // METAL: tan(
+ // METAL-NOT: tan(
+ && zero == tan<T>(zero)
+
+ // METAL: tanh(
+ // METAL-NOT: tanh(
+ && zero == tanh<T>(zero)
+
+ // METAL: tanpi(
+ // METAL-NOT: tanpi(
+ && zero == tanpi<T>(zero)
+
+ // METAL: trunc(
+ // METAL-NOT: trunc(
+ && zero == trunc<T>(zero)
+ ;
+
+ // METAL-LABEL: Test_Vector_1
+}
+
+[numthreads(1,1,1)]
+void computeMain()
+{
+ // GLSL: void main(
+ // GLSL_SPIRV: OpEntryPoint
+ // SPIR: OpEntryPoint
+ // HLSL: void computeMain(
+ // CUDA: void computeMain(
+ // CPP: void _computeMain(
+
+ bool result = true
+ && Test_Scalar<float>()
+ && Test_Vector<float, 2>()
+ && Test_Vector<float, 3>()
+ && Test_Vector<float, 4>()
+ && Test_Scalar<half>()
+ && Test_Vector<half, 2>()
+ && Test_Vector<half, 3>()
+ && Test_Vector<half, 4>()
+ ;
+
+ // BUF: 1
+ outputBuffer[0] = int(result);
+}