summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/slang/core.meta.slang2
-rw-r--r--source/slang/glsl.meta.slang201
-rw-r--r--source/slang/hlsl.meta.slang1209
-rw-r--r--source/slang/slang-emit-metal.cpp43
4 files changed, 1125 insertions, 330 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 3fc2fc570..22822196c 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -421,6 +421,8 @@ __generic<T, let N : int> __intrinsic_op(select) vector<T,N> select(vector<bool,
// Allow real-number types to be cast into each other
__intrinsic_op($(kIROp_FloatCast))
T __realCast<T : __BuiltinRealType, U : __BuiltinRealType>(U val);
+__intrinsic_op($(kIROp_CastIntToFloat))
+ T __realCast<T : __BuiltinRealType, U : __BuiltinIntegerType>(U val);
__intrinsic_op($(kIROp_IntCast))
T __intCast<T : __BuiltinType, U : __BuiltinType>(U val);
${{{{
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index 9715a44ce..bacc8958e 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -321,114 +321,6 @@ public vector<T,N> atan(vector<T,N> y, vector<T,N> x)
return atan2(y, x);
}
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public T asinh(T x)
-{
- __target_switch
- {
- case cpp: __intrinsic_asm "$P_asinh($0)";
- case cuda: __intrinsic_asm "$P_asinh($0)";
- case glsl: __intrinsic_asm "asinh";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Asinh $x
- };
- default:
- return log(x + sqrt(x * x + T(1)));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public vector<T,N> asinh(vector<T,N> x)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "asinh";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Asinh $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, asinh, x);
- }
-}
-
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public T acosh(T x)
-{
- __target_switch
- {
- case cpp: __intrinsic_asm "$P_acosh($0)";
- case cuda: __intrinsic_asm "$P_acosh($0)";
- case glsl: __intrinsic_asm "acosh";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Acosh $x
- };
- default:
- return log(x + sqrt( x * x - T(1)));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public vector<T,N> acosh(vector<T,N> x)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "acosh";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Acosh $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, acosh, x);
- }
-}
-
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
-public T atanh(T x)
-{
- __target_switch
- {
- case cpp: __intrinsic_asm "$P_atanh($0)";
- case cuda: __intrinsic_asm "$P_atanh($0)";
- case glsl: __intrinsic_asm "atanh";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Atanh $x
- };
- default:
- return T(0.5) * log((T(1) + x) / (T(1) - x));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public vector<T,N> atanh(vector<T,N> x)
-{
- __target_switch
- {
- case glsl: __intrinsic_asm "atanh";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Atanh $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, atanh, x);
- }
-}
-
//
// Section 8.2. Exponential Functions
//
@@ -458,66 +350,19 @@ public vector<T, N> inversesqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
public T roundEven(T x)
{
- __target_switch
- {
- case glsl: __intrinsic_asm "roundEven";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 RoundEven $x
- };
- default:
- T nearest = round(x);
-
- // Check if the value is exactly halfway between two integers
- if (abs(x - nearest) == T(0.5))
- {
- // If halfway, choose the even number
- if (mod(nearest, T(2)) != T(0))
- {
- // If the nearest number is odd,
- // move to the closest even number
- nearest -= ((x < nearest) ? T(1) : T(-1));
- }
- }
- return nearest;
- }
+ return rint(x);
}
__generic<T : __BuiltinFloatingPointType, let N:int>
[__readNone]
[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
public vector<T,N> roundEven(vector<T,N> x)
{
- __target_switch
- {
- case glsl: __intrinsic_asm "roundEven";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 RoundEven $x
- };
- default:
- VECTOR_MAP_UNARY(T, N, roundEven, x);
- }
-}
-
-__generic<T : __BuiltinFloatingPointType>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public T fract(T x)
-{
- return frac(x);
-}
-
-__generic<T : __BuiltinFloatingPointType, let N:int>
-[__readNone]
-[ForceInline]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public vector<T, N> fract(vector<T, N> x)
-{
- return frac(x);
+ return rint(x);
}
__generic<T : __BuiltinFloatingPointType>
@@ -824,44 +669,6 @@ uint float2half(float f)
return (s | e | m);
}
-__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType>
-[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public T ldexp(T x, E exp)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "ldexp";
- case glsl: __intrinsic_asm "ldexp";
- case spirv: return spirv_asm {
- OpExtInst $$T result glsl450 Ldexp $x $exp
- };
- default:
- return ldexp(x, __floatCast<T>(exp));
- }
-}
-
-__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int>
-[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
-public vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "ldexp";
- case glsl: __intrinsic_asm "ldexp";
- case spirv: return spirv_asm {
- OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp
- };
- default:
- vector<T,N> temp;
- [ForceUnroll]
- for (int i = 0; i < N; ++i)
- temp[i] = __floatCast<T>(exp[i]);
- return ldexp(x, temp);
- }
-}
-
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index ca1fb0af3..6b3c5db59 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -5,9 +5,6 @@ typedef uint UINT;
__intrinsic_op($(kIROp_RequireGLSLExtension))
void __requireGLSLExtension(String extensionName);
-__intrinsic_op($(kIROp_FloatCast))
-T __floatCast<T, U>(U v);
-
[sealed]
interface IBufferDataLayout
{
@@ -4093,12 +4090,13 @@ matrix<T,N,M> abs(matrix<T,N,M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T abs(T x)
{
__target_switch
{
case hlsl: __intrinsic_asm "abs";
+ case metal: __intrinsic_asm "abs";
case glsl: __intrinsic_asm "abs";
case cuda: __intrinsic_asm "$P_abs($0)";
case cpp: __intrinsic_asm "$P_abs($0)";
@@ -4110,12 +4108,13 @@ T abs(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> abs(vector<T, N> x)
{
__target_switch
{
case hlsl: __intrinsic_asm "abs";
+ case metal: __intrinsic_asm "abs";
case glsl: __intrinsic_asm "abs";
case spirv: return spirv_asm {
result:$$vector<T,N> = OpExtInst glsl450 FAbs $x;
@@ -4127,7 +4126,7 @@ vector<T, N> abs(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> abs(matrix<T,N,M> x)
{
__target_switch
@@ -4138,11 +4137,40 @@ matrix<T,N,M> abs(matrix<T,N,M> x)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fabs(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fabs";
+ default:
+ return abs(x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> fabs(vector<T, N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fabs";
+ default:
+ return abs(x);
+ }
+}
+
+
// Inverse cosine (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T acos(T x)
{
__target_switch
@@ -4151,6 +4179,7 @@ T acos(T x)
case cuda: __intrinsic_asm "$P_acos($0)";
case glsl: __intrinsic_asm "acos";
case hlsl: __intrinsic_asm "acos";
+ case metal: __intrinsic_asm "acos";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Acos $x
};
@@ -4159,13 +4188,14 @@ T acos(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> acos(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "acos";
case hlsl: __intrinsic_asm "acos";
+ case metal: __intrinsic_asm "acos";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Acos $x
};
@@ -4176,7 +4206,7 @@ vector<T, N> acos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> acos(matrix<T, N, M> x)
{
__target_switch
@@ -4187,9 +4217,51 @@ matrix<T, N, M> acos(matrix<T, N, M> x)
}
}
+// Inverse hyperbolic cosine
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T acosh(T x)
+{
+ __target_switch
+ {
+ case cpp: __intrinsic_asm "$P_acosh($0)";
+ case cuda: __intrinsic_asm "$P_acosh($0)";
+ case glsl: __intrinsic_asm "acosh";
+ case metal: __intrinsic_asm "acosh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Acosh $x
+ };
+ default:
+ return log(x + sqrt( x * x - T(1)));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+vector<T,N> acosh(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "acosh";
+ case metal: __intrinsic_asm "acosh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Acosh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, acosh, x);
+ }
+}
+
+
// Test if all components are non-zero (HLSL SM 1.0)
__generic<T : __BuiltinType>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool all(T x)
{
__target_switch
@@ -4198,6 +4270,8 @@ bool all(T x)
__intrinsic_asm "bool($0)";
case hlsl:
__intrinsic_asm "all";
+ case metal:
+ __intrinsic_asm "all";
case spirv:
let zero = __default<T>();
if (__isInt<T>())
@@ -4219,12 +4293,15 @@ bool all(T x)
__generic<T : __BuiltinType, let N : int>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool all(vector<T,N> x)
{
__target_switch
{
case hlsl:
__intrinsic_asm "all";
+ case metal:
+ __intrinsic_asm "all";
case glsl:
__intrinsic_asm "all(bvec$N0($0))";
case spirv:
@@ -4261,7 +4338,7 @@ bool all(vector<T,N> x)
__generic<T : __BuiltinType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool all(matrix<T,N,M> x)
{
__target_switch
@@ -4318,6 +4395,7 @@ int3 WorkgroupSize();
__generic<T : __BuiltinType>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool any(T x)
{
__target_switch
@@ -4326,6 +4404,8 @@ bool any(T x)
__intrinsic_asm "bool($0)";
case hlsl:
__intrinsic_asm "any";
+ case metal:
+ __intrinsic_asm "any";
case spirv:
let zero = __default<T>();
if (__isInt<T>())
@@ -4346,12 +4426,15 @@ bool any(T x)
__generic<T : __BuiltinType, let N : int>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
bool any(vector<T, N> x)
{
__target_switch
{
case hlsl:
__intrinsic_asm "any";
+ case metal:
+ __intrinsic_asm "any";
case glsl:
__intrinsic_asm "any(bvec$N0($0))";
case spirv:
@@ -4541,7 +4624,7 @@ matrix<float,N,M> asfloat(matrix<float,N,M> x)
// Inverse sine (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T asin(T x)
{
__target_switch
@@ -4550,6 +4633,7 @@ T asin(T x)
case cuda: __intrinsic_asm "$P_asin($0)";
case glsl: __intrinsic_asm "asin";
case hlsl: __intrinsic_asm "asin";
+ case metal: __intrinsic_asm "asin";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Asin $x
};
@@ -4558,13 +4642,14 @@ T asin(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> asin(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "asin";
case hlsl: __intrinsic_asm "asin";
+ case metal: __intrinsic_asm "asin";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Asin $x
};
@@ -4575,7 +4660,7 @@ vector<T, N> asin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> asin(matrix<T, N, M> x)
{
__target_switch
@@ -4586,6 +4671,46 @@ matrix<T, N, M> asin(matrix<T, N, M> x)
}
}
+// Inverse hyperbolic sine
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T asinh(T x)
+{
+ __target_switch
+ {
+ case cpp: __intrinsic_asm "$P_asinh($0)";
+ case cuda: __intrinsic_asm "$P_asinh($0)";
+ case glsl: __intrinsic_asm "asinh";
+ case metal: __intrinsic_asm "asinh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Asinh $x
+ };
+ default:
+ return log(x + sqrt(x * x + T(1)));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+vector<T,N> asinh(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "asinh";
+ case metal: __intrinsic_asm "asinh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Asinh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, asinh, x);
+ }
+}
+
// Reinterpret bits as an int (HLSL SM 4.0)
[__readNone]
@@ -5029,7 +5154,7 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> va
// Inverse tangent (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T atan(T x)
{
__target_switch
@@ -5038,6 +5163,7 @@ T atan(T x)
case cuda: __intrinsic_asm "$P_atan($0)";
case glsl: __intrinsic_asm "atan";
case hlsl: __intrinsic_asm "atan";
+ case metal: __intrinsic_asm "atan";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Atan $x
};
@@ -5046,13 +5172,14 @@ T atan(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> atan(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "atan";
case hlsl: __intrinsic_asm "atan";
+ case metal: __intrinsic_asm "atan";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Atan $x
};
@@ -5063,7 +5190,7 @@ vector<T, N> atan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> atan(matrix<T, N, M> x)
{
__target_switch
@@ -5076,7 +5203,7 @@ matrix<T, N, M> atan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T atan2(T y, T x)
{
__target_switch
@@ -5085,6 +5212,7 @@ T atan2(T y, T x)
case cuda: __intrinsic_asm "$P_atan2($0, $1)";
case glsl: __intrinsic_asm "atan($0,$1)";
case hlsl: __intrinsic_asm "atan2";
+ case metal: __intrinsic_asm "atan2";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Atan2 $y $x
};
@@ -5093,13 +5221,14 @@ T atan2(T y, T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "atan($0,$1)";
case hlsl: __intrinsic_asm "atan2";
+ case metal: __intrinsic_asm "atan2";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Atan2 $y $x
};
@@ -5110,7 +5239,7 @@ vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
{
__target_switch
@@ -5121,10 +5250,50 @@ matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
}
}
+// Hyperbolic inverse tangent
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T atanh(T x)
+{
+ __target_switch
+ {
+ case cpp: __intrinsic_asm "$P_atanh($0)";
+ case cuda: __intrinsic_asm "$P_atanh($0)";
+ case glsl: __intrinsic_asm "atanh";
+ case metal: __intrinsic_asm "atanh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Atanh $x
+ };
+ default:
+ return T(0.5) * log((T(1) + x) / (T(1) - x));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> atanh(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "atanh";
+ case metal: __intrinsic_asm "atanh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Atanh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, atanh, x);
+ }
+}
+
// Ceiling (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T ceil(T x)
{
__target_switch
@@ -5133,6 +5302,7 @@ T ceil(T x)
case cuda: __intrinsic_asm "$P_ceil($0)";
case glsl: __intrinsic_asm "ceil";
case hlsl: __intrinsic_asm "ceil";
+ case metal: __intrinsic_asm "ceil";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Ceil $x
};
@@ -5141,13 +5311,14 @@ T ceil(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> ceil(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "ceil";
case hlsl: __intrinsic_asm "ceil";
+ case metal: __intrinsic_asm "ceil";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Ceil $x
};
@@ -5158,7 +5329,7 @@ vector<T, N> ceil(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> ceil(matrix<T, N, M> x)
{
__target_switch
@@ -5169,6 +5340,87 @@ matrix<T, N, M> ceil(matrix<T, N, M> x)
}
}
+// Copy-sign
+
+__generic<let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<half,N> copysign_half(vector<half,N> x, vector<half,N> y)
+{
+ let ux = reinterpret<vector<uint16_t,N>>(x);
+ let uy = reinterpret<vector<uint16_t,N>>(y);
+ vector<uint16_t,N> signY = (uy & (uint16_t(1) << uint16_t(15)));
+ vector<uint16_t,N> newX = (ux & ((uint16_t(1) << uint16_t(15)) - uint16_t(1))) + signY;
+ return reinterpret<vector<half,N>>(newX);
+}
+
+__generic<let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<float,N> copysign_float(vector<float,N> x, vector<float,N> y)
+{
+ let ux = reinterpret<vector<uint32_t,N>>(x);
+ let uy = reinterpret<vector<uint32_t,N>>(y);
+ vector<uint32_t,N> signY = (uy & (uint32_t(1) << uint32_t(31)));
+ vector<uint32_t,N> newX = (ux & ((uint32_t(1) << uint32_t(31)) - uint32_t(1))) + signY;
+ return reinterpret<vector<float,N>>(newX);
+}
+
+__generic<let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<double,N> copysign_double(vector<double,N> x, vector<double,N> y)
+{
+ let ux = reinterpret<vector<uint64_t,N>>(x);
+ let uy = reinterpret<vector<uint64_t,N>>(y);
+ vector<uint64_t,N> signY = (uy & (uint64_t(1) << uint64_t(63)));
+ vector<uint64_t,N> newX = (ux & ((uint64_t(1) << uint64_t(63)) - uint64_t(1))) + signY;
+ return reinterpret<vector<double,N>>(newX);
+}
+
+__generic<T:__BuiltinFloatingPointType, U:__BuiltinFloatingPointType, let N : int>
+__intrinsic_op($(kIROp_FloatCast))
+vector<T,N> __real_cast(vector<U,N> val);
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<T,N> copysign(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "copysign";
+ default:
+ {
+ // sign of -0.0 needs to be respected.
+ if (T is half)
+ return __real_cast<T>(copysign_half(
+ __real_cast<half>(x),
+ __real_cast<half>(y)));
+ if (T is float)
+ return __real_cast<T>(copysign_float(
+ __real_cast<float>(x),
+ __real_cast<float>(y)));
+ return __real_cast<T>(copysign_double(
+ __real_cast<double>(x),
+ __real_cast<double>(y)));
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+T copysign(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "copysign";
+ default:
+ return copysign(vector<T,1>(x), vector<T,1>(y))[0];
+ }
+}
+
// Check access status to tiled resource
bool CheckAccessFullyMapped(uint status);
@@ -5320,7 +5572,7 @@ void clip(matrix<T,N,M> x)
// Cosine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T cos(T x)
{
__target_switch
@@ -5329,6 +5581,7 @@ T cos(T x)
case cuda: __intrinsic_asm "$P_cos($0)";
case glsl: __intrinsic_asm "cos";
case hlsl: __intrinsic_asm "cos";
+ case metal: __intrinsic_asm "cos";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Cos $x
};
@@ -5337,13 +5590,14 @@ T cos(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> cos(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "cos";
case hlsl: __intrinsic_asm "cos";
+ case metal: __intrinsic_asm "cos";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Cos $x
};
@@ -5354,7 +5608,7 @@ vector<T, N> cos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> cos(matrix<T, N, M> x)
{
__target_switch
@@ -5368,7 +5622,7 @@ matrix<T, N, M> cos(matrix<T, N, M> x)
// Hyperbolic cosine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
T cosh(T x)
{
__target_switch
@@ -5377,6 +5631,7 @@ T cosh(T x)
case cuda: __intrinsic_asm "$P_cosh($0)";
case glsl: __intrinsic_asm "cosh";
case hlsl: __intrinsic_asm "cosh";
+ case metal: __intrinsic_asm "cosh";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Cosh $x
};
@@ -5385,13 +5640,14 @@ T cosh(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
vector<T,N> cosh(vector<T,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "cosh";
case hlsl: __intrinsic_asm "cosh";
+ case metal: __intrinsic_asm "cosh";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Cosh $x
};
@@ -5402,7 +5658,7 @@ vector<T,N> cosh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
matrix<T, N, M> cosh(matrix<T, N, M> x)
{
__target_switch
@@ -5413,6 +5669,35 @@ matrix<T, N, M> cosh(matrix<T, N, M> x)
}
}
+// Cosine degree
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+T cospi(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "cospi";
+ default:
+ return cos(T.getPi() * x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<T,N> cospi(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "cospi";
+ default:
+ return cos(T.getPi() * x);
+ }
+}
+
+
// Population count
[__readNone]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
@@ -5776,6 +6061,63 @@ T distance(T x, T y)
}
}
+// fdim
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv)]
+T fdim(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fdim";
+ default:
+ return max(T(0), x - y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv)]
+vector<T,N> fdim(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fdim";
+ default:
+ return max(T(0), x - y);
+ }
+}
+
+// divide
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+T divide(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "divide";
+ default:
+ return x / y;
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv)]
+vector<T,N> divide(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "divide";
+ default:
+ return x / y;
+ }
+}
+
+
// Vector dot product
__generic<T : __BuiltinFloatingPointType>
@@ -6005,7 +6347,7 @@ matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T exp(T x)
{
__target_switch
@@ -6014,6 +6356,7 @@ T exp(T x)
case cuda: __intrinsic_asm "$P_exp($0)";
case glsl: __intrinsic_asm "exp";
case hlsl: __intrinsic_asm "exp";
+ case metal: __intrinsic_asm "exp";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Exp $x
};
@@ -6022,13 +6365,14 @@ T exp(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> exp(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "exp";
case hlsl: __intrinsic_asm "exp";
+ case metal: __intrinsic_asm "exp";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Exp $x
};
@@ -6039,7 +6383,7 @@ vector<T, N> exp(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> exp(matrix<T, N, M> x)
{
__target_switch
@@ -6054,7 +6398,7 @@ matrix<T, N, M> exp(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T exp2(T x)
{
__target_switch
@@ -6068,13 +6412,14 @@ T exp2(T x)
}
else
{
- float xf = __floatCast<float>(x);
+ float xf = __realCast<float>(x);
return T(spirv_asm {
result:$$float = OpExtInst glsl450 Exp2 $xf
});
}
case hlsl:
__intrinsic_asm "exp2($0)";
+ case metal: __intrinsic_asm "exp2";
case cpp:
__intrinsic_asm "$P_exp2($0)";
case cuda:
@@ -6085,7 +6430,7 @@ T exp2(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> exp2(vector<T,N> x)
{
__target_switch
@@ -6093,6 +6438,7 @@ vector<T,N> exp2(vector<T,N> x)
case glsl:
__intrinsic_asm "exp2($0)";
case hlsl: __intrinsic_asm "exp2";
+ case metal: __intrinsic_asm "exp2";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Exp2 $x
};
@@ -6103,7 +6449,7 @@ vector<T,N> exp2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> exp2(matrix<T,N,M> x)
{
__target_switch
@@ -6114,6 +6460,36 @@ matrix<T,N,M> exp2(matrix<T,N,M> x)
}
}
+// Base-10 exponent
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T exp10(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "exp10";
+ default:
+ const T ln10 = T(2.302585092994045901); // ln(10)
+ return exp(x * ln10);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> exp10(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "exp10";
+ default:
+ const T ln10 = T(2.30258509299); // ln(10)
+ return exp(x * ln10);
+ }
+}
+
// Convert 16-bit float stored in low bits of integer
__glsl_version(420)
@@ -6439,7 +6815,7 @@ vector<uint,N> firstbitlow(vector<uint,N> value)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T floor(T x)
{
__target_switch
@@ -6448,6 +6824,7 @@ T floor(T x)
case cuda: __intrinsic_asm "$P_floor($0)";
case glsl: __intrinsic_asm "floor";
case hlsl: __intrinsic_asm "floor";
+ case metal: __intrinsic_asm "floor";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Floor $x
};
@@ -6456,13 +6833,14 @@ T floor(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> floor(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "floor";
case hlsl: __intrinsic_asm "floor";
+ case metal: __intrinsic_asm "floor";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Floor $x
};
@@ -6473,7 +6851,7 @@ vector<T, N> floor(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> floor(matrix<T, N, M> x)
{
__target_switch
@@ -6487,7 +6865,7 @@ matrix<T, N, M> floor(matrix<T, N, M> x)
// Fused multiply-add
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
T fma(T a, T b, T c)
{
__target_switch
@@ -6500,6 +6878,7 @@ T fma(T a, T b, T c)
return mad(a, b, c);
else
__intrinsic_asm "fma($0, $1, $2)";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Fma $a $b $c
};
@@ -6510,13 +6889,14 @@ T fma(T a, T b, T c)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<T, N> fma(vector<T, N> a, vector<T, N> b, vector<T, N> c)
{
__target_switch
{
case glsl: __intrinsic_asm "fma";
case hlsl: __intrinsic_asm "fma";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Fma $a $b $c
};
@@ -6527,7 +6907,7 @@ vector<T, N> fma(vector<T, N> a, vector<T, N> b, vector<T, N> c)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
matrix<T, N, M> fma(matrix<T, N, M> a, matrix<T, N, M> b, matrix<T, N, M> c)
{
__target_switch
@@ -6541,19 +6921,24 @@ matrix<T, N, M> fma(matrix<T, N, M> a, matrix<T, N, M> b, matrix<T, N, M> c)
// Floating point remainder of x/y
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T fmod(T x, T y)
{
- // In HLSL, fmod returns a remainder.
+ // In HLSL, `fmod` returns a remainder.
// Definition of `fmod` in HLSL is,
// "The floating-point remainder is calculated such that x = i * y + f,
// where i is an integer, f has the same sign as x, and the absolute value
// of f is less than the absolute value of y."
//
- // In GLSL, mod is a Modulus function.
+ // In GLSL, `mod` is a Modulus function.
// OpenGL document defines "Modulus" as "Returns x - y * floor(x / y)".
// The use of "Floor()" makes the difference.
//
+ // In Metal, `fmod` is Modulus function.
+ // Metal document defines it as "Returns x - y * trunc(x/y)".
+ // Note that the function name is same to HLSL but it behaves differently.
+ //
// The tricky ones are when x or y is a negative value.
//
// | Remainder | Modulus
@@ -6588,10 +6973,13 @@ T fmod(T x, T y)
{
case cpp: __intrinsic_asm "$P_fmod($0, $1)";
case cuda: __intrinsic_asm "$P_fmod($0, $1)";
- case hlsl: __intrinsic_asm "fmod";
case glsl:
// GLSL doesn't have a function for remainder.
- __intrinsic_asm "(($0 < 0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))";
+ __intrinsic_asm "(($0 < 0.0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))";
+ case hlsl: __intrinsic_asm "fmod";
+ case metal:
+ // Metal doesn't have a function for remainder.
+ __intrinsic_asm "(($0 < 0.0) ? -fmod(-$0,abs($1)) : fmod($0,abs($1)))";
case spirv:
// OpFRem return "The floating-point remainder whose sign
// matches the sign of Operand 1", where Operand 1 is "x".
@@ -6604,7 +6992,8 @@ T fmod(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
{
__target_switch
@@ -6620,7 +7009,8 @@ vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
{
__target_switch
@@ -6634,7 +7024,7 @@ matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
// Fractional part
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T frac(T x)
{
__target_switch
@@ -6643,6 +7033,7 @@ T frac(T x)
case cuda: __intrinsic_asm "$P_frac($0)";
case glsl: __intrinsic_asm "fract";
case hlsl: __intrinsic_asm "frac";
+ case metal: __intrinsic_asm "fract";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Fract $x
};
@@ -6651,13 +7042,14 @@ T frac(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> frac(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "fract";
case hlsl: __intrinsic_asm "frac";
+ case metal: __intrinsic_asm "fract";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Fract $x
};
@@ -6673,10 +7065,29 @@ matrix<T, N, M> frac(matrix<T, N, M> x)
MATRIX_MAP_UNARY(T, N, M, frac, x);
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fract(T x)
+{
+ return frac(x);
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> fract(vector<T, N> x)
+{
+ return frac(x);
+}
+
+
// Split float into mantissa and exponent
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T frexp(T x, out int exp)
{
__target_switch
@@ -6685,6 +7096,7 @@ T frexp(T x, out int exp)
case cuda: __intrinsic_asm "$P_frexp($0, $1)";
case glsl: __intrinsic_asm "frexp";
case hlsl: __intrinsic_asm "frexp";
+ case metal: __intrinsic_asm "frexp($0, *($1))";
case spirv: return spirv_asm {
result:$$T = OpExtInst glsl450 Frexp $x &exp
};
@@ -6693,12 +7105,14 @@ T frexp(T x, out int exp)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp)
{
__target_switch
{
- case hlsl: __intrinsic_asm "frexp";
case glsl: __intrinsic_asm "frexp";
+ case hlsl: __intrinsic_asm "frexp";
+ case metal: __intrinsic_asm "frexp($0, *($1))";
case spirv: return spirv_asm {
result:$$vector<T, N> = OpExtInst glsl450 Frexp $x &exp
};
@@ -6709,7 +7123,7 @@ vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<int, N, M, L> exp)
{
__target_switch
@@ -7920,7 +8334,7 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T ldexp(T x, T exp)
{
__target_switch
@@ -7933,7 +8347,7 @@ T ldexp(T x, T exp)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
{
__target_switch
@@ -7946,7 +8360,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp)
{
__target_switch
@@ -7957,6 +8371,47 @@ matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp)
}
}
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T ldexp(T x, E exp)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "ldexp";
+ case hlsl: __intrinsic_asm "ldexp";
+ case metal: __intrinsic_asm "ldexp";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Ldexp $x $exp
+ };
+ default:
+ return ldexp(x, __realCast<T>(exp));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "ldexp";
+ case hlsl: __intrinsic_asm "ldexp";
+ case metal: __intrinsic_asm "ldexp";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp
+ };
+ default:
+ vector<T,N> temp;
+ [ForceUnroll]
+ for (int i = 0; i < N; ++i)
+ temp[i] = __realCast<T>(exp[i]);
+ return ldexp(x, temp);
+ }
+}
+
+
// Vector length
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
@@ -8058,7 +8513,7 @@ float4 lit(float n_dot_l, float n_dot_h, float m)
// Base-e logarithm
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T log(T x)
{
__target_switch
@@ -8067,6 +8522,7 @@ T log(T x)
case cuda: __intrinsic_asm "$P_log($0)";
case glsl: __intrinsic_asm "log";
case hlsl: __intrinsic_asm "log";
+ case metal: __intrinsic_asm "log";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Log $x
};
@@ -8075,13 +8531,14 @@ T log(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> log(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "log";
case hlsl: __intrinsic_asm "log";
+ case metal: __intrinsic_asm "log";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Log $x
};
@@ -8092,7 +8549,7 @@ vector<T, N> log(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> log(matrix<T, N, M> x)
{
__target_switch
@@ -8106,12 +8563,13 @@ matrix<T, N, M> log(matrix<T, N, M> x)
// Base-10 logarithm
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T log10(T x)
{
__target_switch
{
case hlsl: __intrinsic_asm "log10";
+ case metal: __intrinsic_asm "log10";
case glsl: __intrinsic_asm "(log( $0 ) * $S0( 0.43429448190325182765112891891661) )";
case cuda: __intrinsic_asm "$P_log10($0)";
case cpp: __intrinsic_asm "$P_log10($0)";
@@ -8128,12 +8586,13 @@ T log10(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> log10(vector<T,N> x)
{
__target_switch
{
case hlsl: __intrinsic_asm "log10";
+ case metal: __intrinsic_asm "log10";
case glsl: __intrinsic_asm "(log( $0 ) * $S0(0.43429448190325182765112891891661) )";
case spirv:
{
@@ -8150,7 +8609,7 @@ vector<T,N> log10(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> log10(matrix<T,N,M> x)
{
__target_switch
@@ -8164,7 +8623,7 @@ matrix<T,N,M> log10(matrix<T,N,M> x)
// Base-2 logarithm
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T log2(T x)
{
__target_switch
@@ -8173,6 +8632,7 @@ T log2(T x)
case cuda: __intrinsic_asm "$P_log2($0)";
case glsl: __intrinsic_asm "log2";
case hlsl: __intrinsic_asm "log2";
+ case metal: __intrinsic_asm "log2";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Log2 $x
};
@@ -8181,13 +8641,14 @@ T log2(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> log2(vector<T,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "log2";
case hlsl: __intrinsic_asm "log2";
+ case metal: __intrinsic_asm "log2";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Log2 $x
};
@@ -8198,7 +8659,7 @@ vector<T,N> log2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> log2(matrix<T,N,M> x)
{
__target_switch
@@ -8213,7 +8674,7 @@ matrix<T,N,M> log2(matrix<T,N,M> x)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
T mad(T mvalue, T avalue, T bvalue)
{
__target_switch
@@ -8222,6 +8683,7 @@ T mad(T mvalue, T avalue, T bvalue)
case cuda: __intrinsic_asm "$P_fma($0, $1, $2)";
case glsl: __intrinsic_asm "fma";
case hlsl: __intrinsic_asm "mad";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Fma $mvalue $avalue $bvalue
};
@@ -8230,13 +8692,14 @@ T mad(T mvalue, T avalue, T bvalue)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
{
__target_switch
{
case glsl: __intrinsic_asm "fma";
case hlsl: __intrinsic_asm "mad";
+ case metal: __intrinsic_asm "fma";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Fma $mvalue $avalue $bvalue
};
@@ -8247,7 +8710,7 @@ vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)]
matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue)
{
__target_switch
@@ -8385,12 +8848,13 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T max(T x, T y)
{
__target_switch
{
case hlsl: __intrinsic_asm "max";
+ case metal: __intrinsic_asm "max";
case glsl: __intrinsic_asm "max";
case cuda: __intrinsic_asm "$P_max($0, $1)";
case cpp: __intrinsic_asm "$P_max($0, $1)";
@@ -8402,12 +8866,13 @@ T max(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> max(vector<T, N> x, vector<T, N> y)
{
__target_switch
{
case hlsl: __intrinsic_asm "max";
+ case metal: __intrinsic_asm "max";
case glsl: __intrinsic_asm "max";
case spirv: return spirv_asm {
result:$$vector<T, N> = OpExtInst glsl450 FMax $x $y
@@ -8419,7 +8884,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
{
__target_switch
@@ -8430,6 +8895,107 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T max3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "max3";
+ default:
+ return max(x, max(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> max3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "max3";
+ default:
+ return max(x, max(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmax(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax";
+ default:
+ if (isnan(x)) return y;
+ return max(x, y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmax(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax";
+ default:
+ VECTOR_MAP_BINARY(T, N, fmax, x, y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmax3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax3";
+ default:
+ {
+ bool isnanX = isnan(x);
+ bool isnanY = isnan(y);
+ bool isnanZ = isnan(z);
+
+ if (isnanX)
+ {
+ return isnanY ? z : y;
+ }
+ else if (isnanY)
+ {
+ if (isnanZ)
+ return x;
+ return max(x, z);
+ }
+ else if (isnanZ)
+ {
+ return max(x, y);
+ }
+
+ return max(y, max(x, z));
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmax3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmax3";
+ default:
+ VECTOR_MAP_TRINARY(T, N, fmax3, x, y, z);
+ }
+}
+
+
// minimum
__generic<T : __BuiltinIntegerType>
__target_intrinsic(hlsl)
@@ -8481,12 +9047,13 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T min(T x, T y)
{
__target_switch
{
case hlsl: __intrinsic_asm "min";
+ case metal: __intrinsic_asm "min";
case glsl: __intrinsic_asm "min";
case cuda: __intrinsic_asm "$P_min($0, $1)";
case cpp: __intrinsic_asm "$P_min($0, $1)";
@@ -8498,12 +9065,13 @@ T min(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> min(vector<T,N> x, vector<T,N> y)
{
__target_switch
{
case hlsl: __intrinsic_asm "min";
+ case metal: __intrinsic_asm "min";
case glsl: __intrinsic_asm "min";
case spirv: return spirv_asm {
result:$$vector<T,N> = OpExtInst glsl450 FMin $x $y
@@ -8515,7 +9083,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
{
__target_switch
@@ -8526,16 +9094,212 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T min3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "min3";
+ default:
+ return min(x, min(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> min3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "min3";
+ default:
+ return min(x, min(y, z));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmin(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin";
+ default:
+ if (isnan(x)) return y;
+ return min(x, y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmin(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin";
+ default:
+ VECTOR_MAP_BINARY(T, N, fmin, x, y);
+ }
+}
+
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmin3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin3";
+ default:
+ {
+ bool isnanX = isnan(x);
+ bool isnanY = isnan(y);
+ bool isnanZ = isnan(z);
+
+ if (isnan(x))
+ {
+ return isnanY ? z : y;
+ }
+ else if (isnanY)
+ {
+ if (isnanZ)
+ return x;
+ return min(x, z);
+ }
+ else if (isnanZ)
+ {
+ return min(x, y);
+ }
+
+ return min(x, min(y, z));
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmin3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmin3";
+ default:
+ VECTOR_MAP_TRINARY(T, N, fmin3, x, y, z);
+ }
+}
+
+
+// Median
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T median3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "median3";
+ default:
+ {
+ // | a | b | c | m |
+ // ----------+---+---+---+---+
+ // x > y > z | z | y | x | y |
+ // x > z > y | y | z | x | z |
+ // y > x > z | z | y | x | x |
+ // y > z > x | z | y | z | z |
+ // z > x > y | y | z | x | x |
+ // z > y > x | y | z | y | y |
+
+ T a = min(y, z);
+ T b = max(y, z);
+ T c = max(x, a);
+ T m = min(b, c);
+ return m;
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> median3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "median3";
+ default:
+ {
+ vector<T,N> a = min(y, z);
+ vector<T,N> b = max(y, z);
+ vector<T,N> c = max(x, a);
+ vector<T,N> m = min(b, c);
+ return m;
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T fmedian3(T x, T y, T z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmedian3";
+ default:
+ {
+ bool isnanX = isnan(x);
+ bool isnanY = isnan(y);
+ bool isnanZ = isnan(z);
+
+ if (isnanX)
+ {
+ return isnanY ? z : y;
+ }
+ else if (isnanY || isnanZ)
+ {
+ // "the function can return either non-NaN value"
+ return x;
+ }
+
+ return median3(x, y, z);
+ }
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N: int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> fmedian3(vector<T,N> x, vector<T,N> y, vector<T,N> z)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "fmedian3";
+ default:
+ VECTOR_MAP_TRINARY(T, N, fmedian3, x, y, z);
+ }
+}
+
+
// split into integer and fractional parts (both with same sign)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T modf(T x, out T ip)
{
__target_switch
{
+ case cpp: __intrinsic_asm "$P_modf($0, $1)";
+ case cuda: __intrinsic_asm "$P_modf($0, $1)";
case hlsl: __intrinsic_asm "modf";
case glsl: __intrinsic_asm "modf";
+ case metal: __intrinsic_asm "modf($0, *($1))";
case spirv: return spirv_asm {
result:$$T = OpExtInst glsl450 Modf $x &ip
};
@@ -8544,13 +9308,14 @@ T modf(T x, out T ip)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
{
__target_switch
{
case hlsl: __intrinsic_asm "modf";
case glsl: __intrinsic_asm "modf";
+ case metal: __intrinsic_asm "modf($0, *($1))";
case spirv: return spirv_asm {
result:$$vector<T,N> = OpExtInst glsl450 Modf $x &ip
};
@@ -8561,7 +9326,7 @@ vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int>
[__readNone]
-[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M,L> ip)
{
__target_switch
@@ -8883,6 +9648,50 @@ matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right)
}
}
+// next-after: next representable floating-point value
+// after x in the direction of y
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
+T nextafter(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "nextafter";
+ default:
+ if (isnan(x)) return x;
+ if (isnan(y)) return y;
+ if (x == y) return y;
+ if (T is half)
+ {
+ T delta = __realCast<T>(bit_cast<half>(uint16_t(1)));
+ return x + ((x < y) ? delta : -delta);
+ }
+ if (T is float)
+ {
+ T delta = __realCast<T>(bit_cast<float>(uint32_t(1)));
+ return x + ((x < y) ? delta : -delta);
+ }
+ T delta = __realCast<T>(bit_cast<double>(uint64_t(1)));
+ return x + ((x < y) ? delta : -delta);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)]
+vector<T,N> nextafter(vector<T,N> x, vector<T,N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "nextafter";
+ default:
+ VECTOR_MAP_BINARY(T, N, nextafter, x, y);
+ }
+}
+
+
// noise (deprecated)
[__readNone]
@@ -8981,7 +9790,7 @@ T normalize(T x)
// Raise to a power
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T pow(T x, T y)
{
__target_switch
@@ -8990,6 +9799,7 @@ T pow(T x, T y)
case cuda: __intrinsic_asm "$P_pow($0, $1)";
case glsl: __intrinsic_asm "pow";
case hlsl: __intrinsic_asm "pow";
+ case metal: __intrinsic_asm "pow";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Pow $x $y
};
@@ -8998,13 +9808,14 @@ T pow(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> pow(vector<T, N> x, vector<T, N> y)
{
__target_switch
{
case glsl: __intrinsic_asm "pow";
case hlsl: __intrinsic_asm "pow";
+ case metal: __intrinsic_asm "pow";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Pow $x $y
};
@@ -9015,7 +9826,7 @@ vector<T, N> pow(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y)
{
__target_switch
@@ -9026,6 +9837,32 @@ matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y)
}
}
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T powr(T x, T y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "powr";
+ default:
+ return pow(abs(x), y);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T, N> powr(vector<T, N> x, vector<T, N> y)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "powr";
+ default:
+ return pow(abs(x), y);
+ }
+}
+
// Output message
// TODO: add check to ensure format is const literal.
@@ -9360,10 +10197,60 @@ vector<uint, N> reversebits(vector<uint, N> value)
}
}
+// round even
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+T rint(T x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "roundEven";
+ case metal: __intrinsic_asm "rint";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 RoundEven $x
+ };
+ default:
+ T nearest = round(x);
+
+ // Check if the value is exactly halfway between two integers
+ if (abs(x - nearest) == T(0.5))
+ {
+ // If halfway, choose the even number
+ if ((nearest / T(2)) * T(2) != nearest)
+ {
+ // If the nearest number is odd,
+ // move to the closest even number
+ nearest -= ((x < nearest) ? T(1) : T(-1));
+ }
+ }
+ return nearest;
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N:int>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)]
+vector<T,N> rint(vector<T,N> x)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "roundEven";
+ case metal: __intrinsic_asm "rint";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 RoundEven $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, rint, x);
+ }
+}
+
// Round-to-nearest
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T round(T x)
{
__target_switch
@@ -9372,6 +10259,7 @@ T round(T x)
case cuda: __intrinsic_asm "$P_round($0)";
case glsl: __intrinsic_asm "round";
case hlsl: __intrinsic_asm "round";
+ case metal: __intrinsic_asm "round";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Round $x
};
@@ -9380,13 +10268,14 @@ T round(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> round(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "round";
case hlsl: __intrinsic_asm "round";
+ case metal: __intrinsic_asm "round";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Round $x
};
@@ -9397,7 +10286,7 @@ vector<T, N> round(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> round(matrix<T,N,M> x)
{
__target_switch
@@ -9411,7 +10300,7 @@ matrix<T,N,M> round(matrix<T,N,M> x)
// Reciprocal of square root
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T rsqrt(T x)
{
__target_switch
@@ -9420,6 +10309,7 @@ T rsqrt(T x)
case cuda: __intrinsic_asm "$P_rsqrt($0)";
case glsl: __intrinsic_asm "inversesqrt($0)";
case hlsl: __intrinsic_asm "rsqrt";
+ case metal: __intrinsic_asm "rsqrt";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 InverseSqrt $x
};
@@ -9430,13 +10320,14 @@ T rsqrt(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> rsqrt(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "inversesqrt($0)";
case hlsl: __intrinsic_asm "rsqrt";
+ case metal: __intrinsic_asm "rsqrt";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 InverseSqrt $x
};
@@ -9447,7 +10338,7 @@ vector<T, N> rsqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> rsqrt(matrix<T, N, M> x)
{
__target_switch
@@ -9568,12 +10459,11 @@ matrix<int, N, M> sign(matrix<T, N, M> x)
}
}
-
// Sine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T sin(T x)
{
__target_switch
@@ -9582,6 +10472,7 @@ T sin(T x)
case cuda: __intrinsic_asm "$P_sin($0)";
case glsl: __intrinsic_asm "sin";
case hlsl: __intrinsic_asm "sin";
+ case metal: __intrinsic_asm "sin";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Sin $x
};
@@ -9590,13 +10481,14 @@ T sin(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> sin(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "sin";
case hlsl: __intrinsic_asm "sin";
+ case metal: __intrinsic_asm "sin";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Sin $x
};
@@ -9607,7 +10499,7 @@ vector<T, N> sin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> sin(matrix<T, N, M> x)
{
__target_switch
@@ -9621,13 +10513,40 @@ matrix<T, N, M> sin(matrix<T, N, M> x)
// Sine and cosine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(metal)]
+T __sincos_metal(T x, out T c)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sincos($0, *$1)";
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(metal)]
+vector<T,N> __sincos_metal(vector<T,N> x, out vector<T,N> c)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sincos($0, *$1)";
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
void sincos(T x, out T s, out T c)
{
__target_switch
{
case cuda: __intrinsic_asm "$P_sincos($0, $1, $2)";
case hlsl: __intrinsic_asm "sincos";
+ case metal:
+ //__intrinsic_asm "*($1) = sincos($0, *($2))";
+ s = __sincos_metal(x, c);
+ return;
default:
s = sin(x);
c = cos(x);
@@ -9636,12 +10555,17 @@ void sincos(T x, out T s, out T c)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
{
__target_switch
{
case hlsl: __intrinsic_asm "sincos";
+ case metal:
+ //__intrinsic_asm "*($1) = sincos($0, *($2))";
+ s = __sincos_metal(x, c);
+ return;
default:
s = sin(x);
c = cos(x);
@@ -9650,7 +10574,8 @@ void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L1: int, let L2 : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c)
{
__target_switch
@@ -9665,7 +10590,7 @@ void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c)
// Hyperbolic Sine
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T sinh(T x)
{
__target_switch
@@ -9674,6 +10599,7 @@ T sinh(T x)
case cuda: __intrinsic_asm "$P_sinh($0)";
case glsl: __intrinsic_asm "sinh";
case hlsl: __intrinsic_asm "sinh";
+ case metal: __intrinsic_asm "sinh";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Sinh $x
};
@@ -9682,13 +10608,14 @@ T sinh(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> sinh(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "sinh";
case hlsl: __intrinsic_asm "sinh";
+ case metal: __intrinsic_asm "sinh";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Sinh $x
};
@@ -9699,7 +10626,7 @@ vector<T, N> sinh(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> sinh(matrix<T, N, M> x)
{
__target_switch
@@ -9710,6 +10637,35 @@ matrix<T, N, M> sinh(matrix<T, N, M> x)
}
}
+// Sine degree
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T sinpi(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sinpi";
+ default:
+ return sin(T.getPi() * x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> sinpi(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "sinpi";
+ default:
+ return sin(T.getPi() * x);
+ }
+}
+
+
// Smooth step (Hermite interpolation)
__generic<T : __BuiltinFloatingPointType>
[__readNone]
@@ -9762,7 +10718,7 @@ matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N
// Square root
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T sqrt(T x)
{
__target_switch
@@ -9771,6 +10727,7 @@ T sqrt(T x)
case cuda: __intrinsic_asm "$P_sqrt($0)";
case glsl: __intrinsic_asm "sqrt";
case hlsl: __intrinsic_asm "sqrt";
+ case metal: __intrinsic_asm "sqrt";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Sqrt $x
};
@@ -9779,13 +10736,14 @@ T sqrt(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> sqrt(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "sqrt";
case hlsl: __intrinsic_asm "sqrt";
+ case metal: __intrinsic_asm "sqrt";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Sqrt $x
};
@@ -9796,7 +10754,7 @@ vector<T, N> sqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> sqrt(matrix<T, N, M> x)
{
__target_switch
@@ -9858,7 +10816,7 @@ matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x)
// Tangent
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T tan(T x)
{
__target_switch
@@ -9867,6 +10825,7 @@ T tan(T x)
case cuda: __intrinsic_asm "$P_tan($0)";
case glsl: __intrinsic_asm "tan";
case hlsl: __intrinsic_asm "tan";
+ case metal: __intrinsic_asm "tan";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Tan $x
};
@@ -9875,13 +10834,14 @@ T tan(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> tan(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "tan";
case hlsl: __intrinsic_asm "tan";
+ case metal: __intrinsic_asm "tan";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Tan $x
};
@@ -9892,7 +10852,7 @@ vector<T, N> tan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> tan(matrix<T, N, M> x)
{
__target_switch
@@ -9906,7 +10866,7 @@ matrix<T, N, M> tan(matrix<T, N, M> x)
// Hyperbolic tangent
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T tanh(T x)
{
__target_switch
@@ -9915,6 +10875,7 @@ T tanh(T x)
case cuda: __intrinsic_asm "$P_tanh($0)";
case glsl: __intrinsic_asm "tanh";
case hlsl: __intrinsic_asm "tanh";
+ case metal: __intrinsic_asm "tanh";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Tanh $x
};
@@ -9923,13 +10884,14 @@ T tanh(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T,N> tanh(vector<T,N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "tanh";
case hlsl: __intrinsic_asm "tanh";
+ case metal: __intrinsic_asm "tanh";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Tanh $x
};
@@ -9940,7 +10902,7 @@ vector<T,N> tanh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T,N,M> tanh(matrix<T,N,M> x)
{
__target_switch
@@ -9951,6 +10913,35 @@ matrix<T,N,M> tanh(matrix<T,N,M> x)
}
}
+// Tangent degree
+
+__generic<T : __BuiltinFloatingPointType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+T tanpi(T x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "tanpi";
+ default:
+ return tan(T.getPi() * x);
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
+vector<T,N> tanpi(vector<T,N> x)
+{
+ __target_switch
+ {
+ case metal: __intrinsic_asm "tanpi";
+ default:
+ return tan(T.getPi() * x);
+ }
+}
+
+
// Matrix transpose
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
@@ -10020,7 +11011,7 @@ matrix<T, M, N> transpose(matrix<T, N, M> x)
// Truncate to integer
__generic<T : __BuiltinFloatingPointType>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
T trunc(T x)
{
__target_switch
@@ -10029,6 +11020,7 @@ T trunc(T x)
case cuda: __intrinsic_asm "$P_trunc($0)";
case glsl: __intrinsic_asm "trunc";
case hlsl: __intrinsic_asm "trunc";
+ case metal: __intrinsic_asm "trunc";
case spirv: return spirv_asm {
OpExtInst $$T result glsl450 Trunc $x
};
@@ -10037,13 +11029,14 @@ T trunc(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
vector<T, N> trunc(vector<T, N> x)
{
__target_switch
{
case glsl: __intrinsic_asm "trunc";
case hlsl: __intrinsic_asm "trunc";
+ case metal: __intrinsic_asm "trunc";
case spirv: return spirv_asm {
OpExtInst $$vector<T, N> result glsl450 Trunc $x
};
@@ -10054,7 +11047,7 @@ vector<T, N> trunc(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
[__readNone]
-[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)]
matrix<T, N, M> trunc(matrix<T, N, M> x)
{
__target_switch
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
index 2c327b613..7da48cac1 100644
--- a/source/slang/slang-emit-metal.cpp
+++ b/source/slang/slang-emit-metal.cpp
@@ -298,35 +298,27 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO
void MetalSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount)
{
- // In some cases we *need* to use the built-in syntax sugar for vector types,
- // so we will try to emit those whenever possible.
- //
- if( elementCount >= 1 && elementCount <= 4 )
- {
- switch( elementType->getOp() )
+ emitSimpleTypeImpl(elementType);
+
+ switch (elementType->getOp())
+ {
+ case kIROp_FloatType:
+ case kIROp_HalfType:
+ case kIROp_BoolType:
+ case kIROp_Int8Type:
+ case kIROp_UInt8Type:
+ case kIROp_Int16Type:
+ case kIROp_UInt16Type:
+ case kIROp_IntType:
+ case kIROp_UIntType:
+ case kIROp_Int64Type:
+ case kIROp_UInt64Type:
+ if (elementCount > 1)
{
- case kIROp_FloatType:
- case kIROp_IntType:
- case kIROp_UIntType:
- // TODO: There are more types that need to be covered here
- emitType(elementType);
m_writer->emit(elementCount);
- return;
-
- default:
- break;
}
+ break;
}
-
- // As a fallback, we will use the `vector<...>` type constructor,
- // although we should not expect to run into types that don't
- // have a sugared form.
- //
- m_writer->emit("vector<");
- emitType(elementType);
- m_writer->emit(",");
- m_writer->emit(elementCount);
- m_writer->emit(">");
}
void MetalSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl)
@@ -855,6 +847,7 @@ void MetalSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst)
void MetalSourceEmitter::emitFrontMatterImpl(TargetRequest*)
{
m_writer->emit("#include <metal_stdlib>\n");
+ m_writer->emit("#include <metal_math>\n");
m_writer->emit("using namespace metal;\n");
}