summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prelude/slang-cpp-scalar-intrinsics.h82
-rw-r--r--prelude/slang-cuda-prelude.h133
-rw-r--r--source/slang/hlsl.meta.slang207
-rw-r--r--source/slang/slang-emit-c-like.cpp47
-rw-r--r--source/slang/slang-emit-cpp.cpp176
-rw-r--r--source/slang/slang-emit-cpp.h5
-rw-r--r--source/slang/slang-emit-cuda.cpp55
-rw-r--r--source/slang/slang-emit-glsl.cpp25
-rw-r--r--source/slang/slang-hlsl-intrinsic-set.cpp41
-rw-r--r--source/slang/slang-hlsl-intrinsic-set.h55
-rw-r--r--source/slang/slang-ir-inst-defs.h4
-rw-r--r--source/slang/slang-ir.cpp3
12 files changed, 351 insertions, 482 deletions
diff --git a/prelude/slang-cpp-scalar-intrinsics.h b/prelude/slang-cpp-scalar-intrinsics.h
index 95acd9335..c814365c6 100644
--- a/prelude/slang-cpp-scalar-intrinsics.h
+++ b/prelude/slang-cpp-scalar-intrinsics.h
@@ -46,12 +46,16 @@ SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians)
// Unary
SLANG_FORCE_INLINE float F32_ceil(float f) { return ::ceilf(f); }
SLANG_FORCE_INLINE float F32_floor(float f) { return ::floorf(f); }
+SLANG_FORCE_INLINE float F32_round(float f) { return ::roundf(f); }
SLANG_FORCE_INLINE float F32_sin(float f) { return ::sinf(f); }
SLANG_FORCE_INLINE float F32_cos(float f) { return ::cosf(f); }
SLANG_FORCE_INLINE float F32_tan(float f) { return ::tanf(f); }
SLANG_FORCE_INLINE float F32_asin(float f) { return ::asinf(f); }
SLANG_FORCE_INLINE float F32_acos(float f) { return ::acosf(f); }
SLANG_FORCE_INLINE float F32_atan(float f) { return ::atanf(f); }
+SLANG_FORCE_INLINE float F32_sinh(float f) { return ::sinhf(f); }
+SLANG_FORCE_INLINE float F32_cosh(float f) { return ::coshf(f); }
+SLANG_FORCE_INLINE float F32_tanh(float f) { return ::tanhf(f); }
SLANG_FORCE_INLINE float F32_log2(float f) { return ::log2f(f); }
SLANG_FORCE_INLINE float F32_log(float f) { return ::logf(f); }
SLANG_FORCE_INLINE float F32_log10(float f) { return ::log10f(f); }
@@ -61,42 +65,39 @@ SLANG_FORCE_INLINE float F32_abs(float f) { return ::fabsf(f); }
SLANG_FORCE_INLINE float F32_trunc(float f) { return ::truncf(f); }
SLANG_FORCE_INLINE float F32_sqrt(float f) { return ::sqrtf(f); }
SLANG_FORCE_INLINE float F32_rsqrt(float f) { return 1.0f / F32_sqrt(f); }
-SLANG_FORCE_INLINE float F32_rcp(float f) { return 1.0f / f; }
SLANG_FORCE_INLINE float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); }
-SLANG_FORCE_INLINE float F32_saturate(float f) { return (f < 0.0f) ? 0.0f : (f > 1.0f) ? 1.0f : f; }
SLANG_FORCE_INLINE float F32_frac(float f) { return f - F32_floor(f); }
-SLANG_FORCE_INLINE float F32_radians(float f) { return f * 0.01745329222f; }
SLANG_FORCE_INLINE bool F32_isnan(float f) { return isnan(f); }
SLANG_FORCE_INLINE bool F32_isfinite(float f) { return isfinite(f); }
SLANG_FORCE_INLINE bool F32_isinf(float f) { return isinf(f); }
// Binary
-SLANG_FORCE_INLINE float F32_min(float a, float b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE float F32_max(float a, float b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE float F32_min(float a, float b) { return ::fminf(a, b); }
+SLANG_FORCE_INLINE float F32_max(float a, float b) { return ::fmaxf(a, b); }
SLANG_FORCE_INLINE float F32_pow(float a, float b) { return ::powf(a, b); }
SLANG_FORCE_INLINE float F32_fmod(float a, float b) { return ::fmodf(a, b); }
SLANG_FORCE_INLINE float F32_remainder(float a, float b) { return ::remainderf(a, b); }
-SLANG_FORCE_INLINE float F32_step(float a, float b) { return float(b >= a); }
SLANG_FORCE_INLINE float F32_atan2(float a, float b) { return float(::atan2(a, b)); }
-// TODO(JS):
-// Note C++ has ldexp, but it takes an integer for the exponent, it seems HLSL takes both as float
-SLANG_FORCE_INLINE float F32_ldexp(float m, float e) { return m * ::powf(2.0f, e); }
-
-// Ternary
-SLANG_FORCE_INLINE float F32_smoothstep(float min, float max, float x)
-{
- const float t = x < min ? 0.0f : ((x > max) ? 1.0f : (x - min) / (max - min));
- return t * t * (3.0 - 2.0 * t);
+SLANG_FORCE_INLINE float F32_frexp(float x, float& e)
+{
+ int ei;
+ float m = ::frexpf(x, &ei);
+ e = ei;
+ return m;
+}
+SLANG_FORCE_INLINE float F32_modf(float x, float& ip)
+{
+ return ::modff(x, &ip);
}
-SLANG_FORCE_INLINE float F32_lerp(float x, float y, float s) { return x + s * (y - x); }
-SLANG_FORCE_INLINE float F32_clamp(float x, float min, float max) { return ( x < min) ? min : ((x > max) ? max : x); }
-SLANG_FORCE_INLINE void F32_sincos(float f, float& outSin, float& outCos) { outSin = F32_sin(f); outCos = F32_cos(f); }
SLANG_FORCE_INLINE uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; }
SLANG_FORCE_INLINE int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; }
+// Ternary
+SLANG_FORCE_INLINE float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); }
+
// ----------------------------- F64 -----------------------------------------
SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians)
@@ -112,12 +113,16 @@ SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians)
// Unary
SLANG_FORCE_INLINE double F64_ceil(double f) { return ::ceil(f); }
SLANG_FORCE_INLINE double F64_floor(double f) { return ::floor(f); }
+SLANG_FORCE_INLINE double F64_round(double f) { return ::round(f); }
SLANG_FORCE_INLINE double F64_sin(double f) { return ::sin(f); }
SLANG_FORCE_INLINE double F64_cos(double f) { return ::cos(f); }
SLANG_FORCE_INLINE double F64_tan(double f) { return ::tan(f); }
SLANG_FORCE_INLINE double F64_asin(double f) { return ::asin(f); }
SLANG_FORCE_INLINE double F64_acos(double f) { return ::acos(f); }
SLANG_FORCE_INLINE double F64_atan(double f) { return ::atan(f); }
+SLANG_FORCE_INLINE double F64_sinh(double f) { return ::sinh(f); }
+SLANG_FORCE_INLINE double F64_cosh(double f) { return ::cosh(f); }
+SLANG_FORCE_INLINE double F64_tanh(double f) { return ::tanh(f); }
SLANG_FORCE_INLINE double F64_log2(double f) { return ::log2(f); }
SLANG_FORCE_INLINE double F64_log(double f) { return ::log(f); }
SLANG_FORCE_INLINE double F64_log10(float f) { return ::log10(f); }
@@ -127,38 +132,32 @@ SLANG_FORCE_INLINE double F64_abs(double f) { return ::fabs(f); }
SLANG_FORCE_INLINE double F64_trunc(double f) { return ::trunc(f); }
SLANG_FORCE_INLINE double F64_sqrt(double f) { return ::sqrt(f); }
SLANG_FORCE_INLINE double F64_rsqrt(double f) { return 1.0 / F64_sqrt(f); }
-SLANG_FORCE_INLINE double F64_rcp(double f) { return 1.0 / f; }
SLANG_FORCE_INLINE double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); }
-SLANG_FORCE_INLINE double F64_saturate(double f) { return (f < 0.0) ? 0.0 : (f > 1.0) ? 1.0 : f; }
SLANG_FORCE_INLINE double F64_frac(double f) { return f - F64_floor(f); }
-SLANG_FORCE_INLINE double F64_radians(double f) { return f * 0.01745329222; }
SLANG_FORCE_INLINE bool F64_isnan(double f) { return isnan(f); }
SLANG_FORCE_INLINE bool F64_isfinite(double f) { return isfinite(f); }
SLANG_FORCE_INLINE bool F64_isinf(double f) { return isinf(f); }
// Binary
-SLANG_FORCE_INLINE double F64_min(double a, double b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE double F64_max(double a, double b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE double F64_min(double a, double b) { return ::fmin(a, b); }
+SLANG_FORCE_INLINE double F64_max(double a, double b) { return ::fmax(a, b); }
SLANG_FORCE_INLINE double F64_pow(double a, double b) { return ::pow(a, b); }
SLANG_FORCE_INLINE double F64_fmod(double a, double b) { return ::fmod(a, b); }
SLANG_FORCE_INLINE double F64_remainder(double a, double b) { return ::remainder(a, b); }
-SLANG_FORCE_INLINE double F64_step(double a, double b) { return double(b >= a); }
SLANG_FORCE_INLINE double F64_atan2(double a, double b) { return ::atan2(a, b); }
-// TODO(JS):
-// Note C++ has ldexp, but it takes an integer for the exponent, it seems HLSL takes both as float
-SLANG_FORCE_INLINE double F64_ldexp(double m, double e) { return m * ::pow(2.0, e); }
-
-// Ternary
-SLANG_FORCE_INLINE double F64_smoothstep(double min, double max, double x)
-{
- const double t = x < min ? 0.0 : ((x > max) ? 1.0 : (x - min) / (max - min));
- return t * t * (3.0 - 2.0 * t);
+SLANG_FORCE_INLINE double F64_frexp(double x, double& e)
+{
+ int ei;
+ double m = ::frexp(x, &ei);
+ e = ei;
+ return m;
+}
+SLANG_FORCE_INLINE double F64_modf(double x, double& ip)
+{
+ return ::modf(x, &ip);
}
-SLANG_FORCE_INLINE double F64_lerp(double x, double y, double s) { return x + s * (y - x); }
-SLANG_FORCE_INLINE double F64_clamp(double x, double min, double max) { return (x < min) ? min : ((x > max) ? max : x); }
-SLANG_FORCE_INLINE void F64_sincos(double f, double& outSin, double& outCos) { outSin = F64_sin(f); outCos = F64_cos(f); }
SLANG_FORCE_INLINE void F64_asuint(double d, uint32_t& low, uint32_t& hi)
{
@@ -176,6 +175,9 @@ SLANG_FORCE_INLINE void F64_asint(double d, int32_t& low, int32_t& hi)
hi = int32_t(u.u >> 32);
}
+// Ternary
+SLANG_FORCE_INLINE double F64_fma(double a, double b, double c) { return ::fma(a, b, c); }
+
// ----------------------------- I32 -----------------------------------------
SLANG_FORCE_INLINE int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; }
@@ -183,8 +185,6 @@ SLANG_FORCE_INLINE int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; }
SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; }
SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; }
-SLANG_FORCE_INLINE int32_t I32_clamp(int32_t x, int32_t min, int32_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
SLANG_FORCE_INLINE float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; }
SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x) { return uint32_t(x); }
SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi )
@@ -201,8 +201,6 @@ SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f) { return f; }
SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; }
SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; }
-SLANG_FORCE_INLINE uint32_t U32_clamp(uint32_t x, uint32_t min, uint32_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
SLANG_FORCE_INLINE float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; }
SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x) { return uint32_t(x); }
@@ -238,8 +236,6 @@ SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f) { return f; }
SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; }
SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; }
-SLANG_FORCE_INLINE uint64_t U64_clamp(uint64_t x, uint64_t min, uint64_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v)
{
#if SLANG_GCC_FAMILY
@@ -264,8 +260,6 @@ SLANG_FORCE_INLINE int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; }
SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; }
SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; }
-SLANG_FORCE_INLINE int64_t I64_clamp(int64_t x, int64_t min, int64_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
#ifdef SLANG_PRELUDE_NAMESPACE
}
#endif
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 457fb4246..0a2ec088b 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -131,67 +131,113 @@ union Union64
// ----------------------------- F32 -----------------------------------------
// Unary
-SLANG_CUDA_CALL float F32_rcp(float f) { return 1.0f / f; }
+SLANG_CUDA_CALL float F32_ceil(float f) { return ::ceilf(f); }
+SLANG_CUDA_CALL float F32_floor(float f) { return ::floorf(f); }
+SLANG_CUDA_CALL float F32_round(float f) { return ::roundf(f); }
+SLANG_CUDA_CALL float F32_sin(float f) { return ::sinf(f); }
+SLANG_CUDA_CALL float F32_cos(float f) { return ::cosf(f); }
+SLANG_CUDA_CALL void F32_sincos(float f, float& s, float& c) { ::sincosf(f, &s, &c); }
+SLANG_CUDA_CALL float F32_tan(float f) { return ::tanf(f); }
+SLANG_CUDA_CALL float F32_asin(float f) { return ::asinf(f); }
+SLANG_CUDA_CALL float F32_acos(float f) { return ::acosf(f); }
+SLANG_CUDA_CALL float F32_atan(float f) { return ::atanf(f); }
+SLANG_CUDA_CALL float F32_sinh(float f) { return ::sinhf(f); }
+SLANG_CUDA_CALL float F32_cosh(float f) { return ::coshf(f); }
+SLANG_CUDA_CALL float F32_tanh(float f) { return ::tanhf(f); }
+SLANG_CUDA_CALL float F32_log2(float f) { return ::log2f(f); }
+SLANG_CUDA_CALL float F32_log(float f) { return ::logf(f); }
+SLANG_CUDA_CALL float F32_log10(float f) { return ::log10f(f); }
+SLANG_CUDA_CALL float F32_exp2(float f) { return ::exp2f(f); }
+SLANG_CUDA_CALL float F32_exp(float f) { return ::expf(f); }
+SLANG_CUDA_CALL float F32_abs(float f) { return ::fabsf(f); }
+SLANG_CUDA_CALL float F32_trunc(float f) { return ::truncf(f); }
+SLANG_CUDA_CALL float F32_sqrt(float f) { return ::sqrtf(f); }
+SLANG_CUDA_CALL float F32_rsqrt(float f) { return ::rsqrtf(f); }
SLANG_CUDA_CALL float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); }
-SLANG_CUDA_CALL float F32_saturate(float f) { return (f < 0.0f) ? 0.0f : (f > 1.0f) ? 1.0f : f; }
-SLANG_CUDA_CALL float F32_frac(float f) { return f - floorf(f); }
+SLANG_CUDA_CALL float F32_frac(float f) { return f - F32_floor(f); }
SLANG_CUDA_CALL bool F32_isnan(float f) { return isnan(f); }
SLANG_CUDA_CALL bool F32_isfinite(float f) { return isfinite(f); }
SLANG_CUDA_CALL bool F32_isinf(float f) { return isinf(f); }
// Binary
-SLANG_CUDA_CALL float F32_min(float a, float b) { return a < b ? a : b; }
-SLANG_CUDA_CALL float F32_max(float a, float b) { return a > b ? a : b; }
-SLANG_CUDA_CALL float F32_step(float a, float b) { return float(b >= a); }
-
-// TODO(JS):
-// Note CUDA has ldexp, but it takes an integer for the exponent, it seems HLSL takes both as float
-SLANG_CUDA_CALL float F32_ldexp(float m, float e) { return m * powf(2.0f, e); }
-
-// Ternary
-SLANG_CUDA_CALL float F32_lerp(float x, float y, float s) { return x + s * (y - x); }
-SLANG_CUDA_CALL void F32_sincos(float f, float& outSin, float& outCos) { sincosf(f, &outSin, &outCos); }
-SLANG_CUDA_CALL float F32_smoothstep(float min, float max, float x)
+SLANG_CUDA_CALL float F32_min(float a, float b) { return ::fminf(a, b); }
+SLANG_CUDA_CALL float F32_max(float a, float b) { return ::fmaxf(a, b); }
+SLANG_CUDA_CALL float F32_pow(float a, float b) { return ::powf(a, b); }
+SLANG_CUDA_CALL float F32_fmod(float a, float b) { return ::fmodf(a, b); }
+SLANG_CUDA_CALL float F32_remainder(float a, float b) { return ::remainderf(a, b); }
+SLANG_CUDA_CALL float F32_atan2(float a, float b) { return float(::atan2(a, b)); }
+
+SLANG_CUDA_CALL float F32_frexp(float x, float& e)
+{
+ int ei;
+ float m = ::frexpf(x, &ei);
+ e = ei;
+ return m;
+}
+SLANG_CUDA_CALL float F32_modf(float x, float& ip)
{
- const float t = x < min ? 0.0f : ((x > max) ? 1.0f : (x - min) / (max - min));
- return t * t * (3.0 - 2.0 * t);
+ return ::modff(x, &ip);
}
-SLANG_CUDA_CALL float F32_clamp(float x, float min, float max) { return ( x < min) ? min : ((x > max) ? max : x); }
SLANG_CUDA_CALL uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; }
SLANG_CUDA_CALL int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; }
+// Ternary
+SLANG_CUDA_CALL float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); }
+
+
// ----------------------------- F64 -----------------------------------------
// Unary
-SLANG_CUDA_CALL double F64_rcp(double f) { return 1.0 / f; }
+SLANG_CUDA_CALL double F64_ceil(double f) { return ::ceil(f); }
+SLANG_CUDA_CALL double F64_floor(double f) { return ::floor(f); }
+SLANG_CUDA_CALL double F64_round(double f) { return ::round(f); }
+SLANG_CUDA_CALL double F64_sin(double f) { return ::sin(f); }
+SLANG_CUDA_CALL double F64_cos(double f) { return ::cos(f); }
+SLANG_CUDA_CALL void F64_sincos(double f, double& s, double& c) { ::sincos(f, &s, &c); }
+SLANG_CUDA_CALL double F64_tan(double f) { return ::tan(f); }
+SLANG_CUDA_CALL double F64_asin(double f) { return ::asin(f); }
+SLANG_CUDA_CALL double F64_acos(double f) { return ::acos(f); }
+SLANG_CUDA_CALL double F64_atan(double f) { return ::atan(f); }
+SLANG_CUDA_CALL double F64_sinh(double f) { return ::sinh(f); }
+SLANG_CUDA_CALL double F64_cosh(double f) { return ::cosh(f); }
+SLANG_CUDA_CALL double F64_tanh(double f) { return ::tanh(f); }
+SLANG_CUDA_CALL double F64_log2(double f) { return ::log2(f); }
+SLANG_CUDA_CALL double F64_log(double f) { return ::log(f); }
+SLANG_CUDA_CALL double F64_log10(float f) { return ::log10(f); }
+SLANG_CUDA_CALL double F64_exp2(double f) { return ::exp2(f); }
+SLANG_CUDA_CALL double F64_exp(double f) { return ::exp(f); }
+SLANG_CUDA_CALL double F64_abs(double f) { return ::fabs(f); }
+SLANG_CUDA_CALL double F64_trunc(double f) { return ::trunc(f); }
+SLANG_CUDA_CALL double F64_sqrt(double f) { return ::sqrt(f); }
+SLANG_CUDA_CALL double F64_rsqrt(double f) { return ::rsqrt(f); }
SLANG_CUDA_CALL double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); }
-SLANG_CUDA_CALL double F64_saturate(double f) { return (f < 0.0) ? 0.0 : (f > 1.0) ? 1.0 : f; }
-SLANG_CUDA_CALL double F64_frac(double f) { return f - floor(f); }
+SLANG_CUDA_CALL double F64_frac(double f) { return f - F64_floor(f); }
SLANG_CUDA_CALL bool F64_isnan(double f) { return isnan(f); }
SLANG_CUDA_CALL bool F64_isfinite(double f) { return isfinite(f); }
SLANG_CUDA_CALL bool F64_isinf(double f) { return isinf(f); }
// Binary
-SLANG_CUDA_CALL double F64_min(double a, double b) { return a < b ? a : b; }
-SLANG_CUDA_CALL double F64_max(double a, double b) { return a > b ? a : b; }
-SLANG_CUDA_CALL double F64_step(double a, double b) { return double(b >= a); }
-
-// TODO(JS):
-// Note CUDA has ldexp, but it takes an integer for the exponent, it seems HLSL takes both as float
-SLANG_CUDA_CALL double F64_ldexp(double m, double e) { return m * pow(2.0, e); }
-
-// Ternary
-SLANG_CUDA_CALL double F64_lerp(double x, double y, double s) { return x + s * (y - x); }
-SLANG_CUDA_CALL void F64_sincos(double f, double& outSin, double& outCos) { sincos(f, &outSin, &outCos); }
-SLANG_CUDA_CALL double F64_smoothstep(double min, double max, double x)
-{
- const double t = x < min ? 0.0 : ((x > max) ? 1.0 : (x - min) / (max - min));
- return t * t * (3.0 - 2.0 * t);
+SLANG_CUDA_CALL double F64_min(double a, double b) { return ::fmin(a, b); }
+SLANG_CUDA_CALL double F64_max(double a, double b) { return ::fmax(a, b); }
+SLANG_CUDA_CALL double F64_pow(double a, double b) { return ::pow(a, b); }
+SLANG_CUDA_CALL double F64_fmod(double a, double b) { return ::fmod(a, b); }
+SLANG_CUDA_CALL double F64_remainder(double a, double b) { return ::remainder(a, b); }
+SLANG_CUDA_CALL double F64_atan2(double a, double b) { return ::atan2(a, b); }
+
+SLANG_CUDA_CALL double F64_frexp(double x, double& e)
+{
+ int ei;
+ double m = ::frexp(x, &ei);
+ e = ei;
+ return m;
+}
+SLANG_CUDA_CALL double F64_modf(double x, double& ip)
+{
+ return ::modf(x, &ip);
}
-SLANG_CUDA_CALL double F64_clamp(double x, double min, double max) { return (x < min) ? min : ((x > max) ? max : x); }
SLANG_CUDA_CALL void F64_asuint(double d, uint32_t& low, uint32_t& hi)
{
@@ -209,6 +255,9 @@ SLANG_CUDA_CALL void F64_asint(double d, int32_t& low, int32_t& hi)
hi = int32_t(u.u >> 32);
}
+// Ternary
+SLANG_CUDA_CALL double F64_fma(double a, double b, double c) { return ::fma(a, b, c); }
+
// ----------------------------- I32 -----------------------------------------
// Unary
@@ -218,9 +267,6 @@ SLANG_CUDA_CALL int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; }
SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; }
SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; }
-// Ternary
-SLANG_CUDA_CALL int32_t I32_clamp(int32_t x, int32_t min, int32_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
SLANG_CUDA_CALL float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; }
SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x) { return uint32_t(x); }
SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi )
@@ -239,9 +285,6 @@ SLANG_CUDA_CALL uint32_t U32_abs(uint32_t f) { return f; }
SLANG_CUDA_CALL uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; }
SLANG_CUDA_CALL uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; }
-// Ternary
-SLANG_CUDA_CALL uint32_t U32_clamp(uint32_t x, uint32_t min, uint32_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
SLANG_CUDA_CALL float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; }
SLANG_CUDA_CALL uint32_t U32_asint(int32_t x) { return uint32_t(x); }
@@ -266,8 +309,6 @@ SLANG_CUDA_CALL int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; }
SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; }
SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; }
-SLANG_CUDA_CALL int64_t I64_clamp(int64_t x, int64_t min, int64_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
// ----------------------------- U64 -----------------------------------------
SLANG_CUDA_CALL int64_t U64_abs(uint64_t f) { return f; }
@@ -275,8 +316,6 @@ SLANG_CUDA_CALL int64_t U64_abs(uint64_t f) { return f; }
SLANG_CUDA_CALL int64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; }
SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; }
-SLANG_CUDA_CALL int64_t U64_clamp(uint64_t x, uint64_t min, uint64_t max) { return ( x < min) ? min : ((x > max) ? max : x); }
-
SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v)
{
// https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 20158c1b1..03496ccc8 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -349,8 +349,13 @@ void abort();
// Absolute value (HLSL SM 1.0)
__generic<T : __BuiltinSignedArithmeticType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_abs($0)")
+__target_intrinsic(cpp, "$P_abs($0)")
T abs(T x);
/*{
+ // Note: this simple definition may not be appropriate for floating-point inputs
return x < 0 ? -x : x;
}*/
@@ -372,6 +377,10 @@ matrix<T,N,M> abs(matrix<T,N,M> x)
// Inverse cosine (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_acos($0)")
+__target_intrinsic(cpp, "$P_acos($0)")
T acos(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -530,6 +539,8 @@ matrix<float,N,M> asfloat(matrix<float,N,M> x);
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_asin($0)")
+__target_intrinsic(cpp, "$P_asin($0)")
T asin(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -659,6 +670,10 @@ matrix<uint,N,M> asuint(matrix<uint,N,M> x);
// Inverse tangent (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_atan($0)")
+__target_intrinsic(cpp, "$P_atan($0)")
T atan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -679,6 +694,8 @@ matrix<T, N, M> atan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"atan($0,$1)")
+__target_intrinsic(cuda, "$P_atan2($0, $1)")
+__target_intrinsic(cpp, "$P_atan2($0, $1)")
T atan2(T y, T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -698,6 +715,10 @@ matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
// Ceiling (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_ceil($0)")
+__target_intrinsic(cpp, "$P_ceil($0)")
T ceil(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -767,6 +788,10 @@ void clip(matrix<T,N,M> x)
// Cosine
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_cos($0)")
+__target_intrinsic(cpp, "$P_cos($0)")
T cos(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -786,6 +811,10 @@ matrix<T, N, M> cos(matrix<T, N, M> x)
// Hyperbolic cosine
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_cosh($0)")
+__target_intrinsic(cpp, "$P_cosh($0)")
T cosh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -804,7 +833,10 @@ matrix<T, N, M> cosh(matrix<T, N, M> x)
}
// Population count
+__target_intrinsic(hlsl)
__target_intrinsic(glsl, "bitCount")
+__target_intrinsic(cuda, "$P_countbits($0)")
+__target_intrinsic(cpp, "$P_countbits($0)")
uint countbits(uint value);
// Cross product
@@ -1070,6 +1102,10 @@ matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
// Base-e exponent
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_exp($0)")
+__target_intrinsic(cpp, "$P_exp($0)")
T exp(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1090,6 +1126,10 @@ matrix<T, N, M> exp(matrix<T, N, M> x)
// Base-2 exponent
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_exp2($0)")
+__target_intrinsic(cpp, "$P_exp2($0)")
T exp2(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1133,7 +1173,10 @@ vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng)
}
// Find first set bit starting at high bit and working down
+__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findMSB")
+__target_intrinsic(cuda, "$P_firstbithigh($0)")
+__target_intrinsic(cpp, "$P_firstbithigh($0)")
int firstbithigh(int value);
__target_intrinsic(hlsl)
@@ -1144,7 +1187,10 @@ vector<int, N> firstbithigh(vector<int, N> value)
VECTOR_MAP_UNARY(int, N, firstbithigh, value);
}
+__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findMSB")
+__target_intrinsic(cuda, "$P_firstbithigh($0)")
+__target_intrinsic(cpp, "$P_firstbithigh($0)")
uint firstbithigh(uint value);
__target_intrinsic(hlsl)
@@ -1156,7 +1202,10 @@ vector<uint,N> firstbithigh(vector<uint,N> value)
}
// Find first set bit starting at low bit and working up
+__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findLSB")
+__target_intrinsic(cuda, "$P_firstbitlow($0)")
+__target_intrinsic(cpp, "$P_firstbitlow($0)")
int firstbitlow(int value);
__target_intrinsic(hlsl)
@@ -1167,7 +1216,10 @@ vector<int,N> firstbitlow(vector<int,N> value)
VECTOR_MAP_UNARY(int, N, firstbitlow, value);
}
+__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findLSB")
+__target_intrinsic(cuda, "$P_firstbitlow($0)")
+__target_intrinsic(cpp, "$P_firstbitlow($0)")
uint firstbitlow(uint value);
__target_intrinsic(hlsl)
@@ -1181,6 +1233,10 @@ vector<uint,N> firstbitlow(vector<uint,N> value)
// Floor (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_floor($0)")
+__target_intrinsic(cpp, "$P_floor($0)")
T floor(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1201,6 +1257,8 @@ matrix<T, N, M> floor(matrix<T, N, M> x)
// Fused multiply-add for doubles
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
+__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
double fma(double a, double b, double c);
__generic<let N : int>
@@ -1220,6 +1278,10 @@ matrix<double, N, M> fma(matrix<double, N, M> a, matrix<double, N, M> b, matrix<
// Floating point remainder of x/y
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_fmod($0, $1)")
+__target_intrinsic(cpp, "$P_fmod($0, $1)")
T fmod(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1239,7 +1301,10 @@ matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
// Fractional part
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
__target_intrinsic(glsl, fract)
+__target_intrinsic(cuda, "$P_frac($0)")
+__target_intrinsic(cpp, "$P_frac($0)")
T frac(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1438,8 +1503,8 @@ void InterlockedXor(__ref uint dest, uint value, out uint original_value);
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
-__target_intrinsic(cpu)
-__target_intrinsic(cuda)
+__target_intrinsic(cuda, "$P_isfinite($0)")
+__target_intrinsic(cpp, "$P_isfinite($0)")
bool isfinite(T x)
{
return !(isinf(x) || isnan(x));
@@ -1461,6 +1526,10 @@ matrix<bool, N, M> isfinite(matrix<T, N, M> x)
// Is floating-point value infinite?
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_isinf($0)")
+__target_intrinsic(cpp, "$P_isinf($0)")
bool isinf(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1480,6 +1549,10 @@ matrix<bool, N, M> isinf(matrix<T, N, M> x)
// Is floating-point value not-a-number?
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_isnan($0)")
+__target_intrinsic(cpp, "$P_isnan($0)")
bool isnan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1565,6 +1638,10 @@ float4 lit(float n_dot_l, float n_dot_h, float m)
// Base-e logarithm
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_log($0)")
+__target_intrinsic(cpp, "$P_log($0)")
T log(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1586,6 +1663,8 @@ matrix<T, N, M> log(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "(log( $0 ) * $S0( 0.43429448190325182765112891891661) )" )
+__target_intrinsic(cuda, "$P_log10($0)")
+__target_intrinsic(cpp, "$P_log10($0)")
T log10(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1605,6 +1684,10 @@ matrix<T,N,M> log10(matrix<T,N,M> x)
// Base-2 logarithm
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_log2($0)")
+__target_intrinsic(cpp, "$P_log2($0)")
T log2(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1627,6 +1710,8 @@ matrix<T,N,M> log2(matrix<T,N,M> x)
__generic<T : __BuiltinArithmeticType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, fma)
+__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
+__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
T mad(T mvalue, T avalue, T bvalue);
__generic<T : __BuiltinArithmeticType, let N : int>
@@ -1646,6 +1731,10 @@ matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N,
// maximum
__generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_max($0, $1)")
+__target_intrinsic(cpp, "$P_max($0, $1)")
T max(T x, T y);
// Note: a stdlib implementation of `max` (or `min`) will require splitting
// floating-point and integer cases apart, because the floating-point
@@ -1669,6 +1758,10 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
// minimum
__generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_min($0, $1)")
+__target_intrinsic(cpp, "$P_min($0, $1)")
T min(T x, T y);
__generic<T : __BuiltinArithmeticType, let N : int>
@@ -1757,28 +1850,64 @@ T mul(vector<T, N> x, vector<T, N> y)
return dot(x, y);
}
-${{{{
-// TODO: The following functions could conceivably be defined
-// in the stdlib for the benefit of targets without direct
-// support for matrices, but the use of `__intrinsic_op` to
-// map them to a dedicated IR instruction interferes with
-// that choice.
-}}}}
-
// vector-matrix
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-__intrinsic_op(mulVectorMatrix)
-vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl, "($1 * $0)")
+vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
+{
+ vector<T,M> result;
+ for( int j = 0; j < M; ++j )
+ {
+ T sum = T(0);
+ for( int i = 0; i < N; ++i )
+ {
+ sum += left[i] * right[i][j];
+ }
+ result[j] = sum;
+ }
+ return result;
+}
// matrix-vector
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-__intrinsic_op(mulMatrixVector)
-vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl, "($1 * $0)")
+vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
+{
+ vector<T,N> result;
+ for( int i = 0; i < N; ++i )
+ {
+ T sum = T(0);
+ for( int j = 0; j < M; ++j )
+ {
+ sum += left[i][j] * right[j];
+ }
+ result[i] = sum;
+ }
+ return result;
+}
+
// matrix-matrix
__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int>
-__intrinsic_op(mulMatrixMatrix)
-matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl, "($1 * $0)")
+matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
+{
+ matrix<T,R,C> result;
+ for( int r = 0; r < R; ++r)
+ for( int c = 0; c < C; ++c)
+ {
+ T sum = T(0);
+ for( int i = 0; i < N; ++i )
+ {
+ sum += left[r][i] * right[i][c];
+ }
+ result[r][c] = sum;
+ }
+ return result;
+}
// noise (deprecated)
@@ -1839,6 +1968,10 @@ vector<T,N> normalize(vector<T,N> x)
// Raise to a power
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_pow($0, $1)")
+__target_intrinsic(cpp, "$P_pow($0, $1)")
T pow(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1999,7 +2132,10 @@ vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta)
}
// Reverse order of bits
+__target_intrinsic(hlsl)
__target_intrinsic(glsl, "bitfieldReverse")
+__target_intrinsic(cuda, "$P_reversebits($0)")
+__target_intrinsic(cpp, "$P_reversebits($0)")
uint reversebits(uint value);
__target_intrinsic(glsl, "bitfieldReverse")
@@ -2011,6 +2147,10 @@ vector<uint, N> reversebits(vector<uint, N> value)
// Round-to-nearest
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_round($0)")
+__target_intrinsic(cpp, "$P_round($0)")
T round(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2032,7 +2172,12 @@ matrix<T,N,M> round(matrix<T,N,M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "inversesqrt($0)")
-T rsqrt(T x);
+__target_intrinsic(cuda, "$P_rsqrt($0)")
+__target_intrinsic(cpp, "$P_rsqrt($0)")
+T rsqrt(T x)
+{
+ return T(1.0) / sqrt(x);
+}
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
@@ -2076,7 +2221,10 @@ matrix<T,N,M> saturate(matrix<T,N,M> x)
// Extract sign of value
__generic<T : __BuiltinSignedArithmeticType>
+__target_intrinsic(hlsl)
__target_intrinsic(glsl, "int(sign($0))")
+__target_intrinsic(cuda, "$P_sign($0)")
+__target_intrinsic(cpp, "$P_sign($0)")
int sign(T x);
__generic<T : __BuiltinSignedArithmeticType, let N : int>
@@ -2098,6 +2246,10 @@ matrix<int, N, M> sign(matrix<T, N, M> x)
// Sine
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_sin($0)")
+__target_intrinsic(cpp, "$P_sin($0)")
T sin(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2118,6 +2270,7 @@ matrix<T, N, M> sin(matrix<T, N, M> x)
// Sine and cosine
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
+__target_intrinsic(cuda, "$P_sincos($0, $1, $2)")
void sincos(T x, out T s, out T c)
{
s = sin(x);
@@ -2142,6 +2295,10 @@ void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c)
// Hyperbolic Sine
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_sinh($0)")
+__target_intrinsic(cpp, "$P_sinh($0)")
T sinh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2186,6 +2343,10 @@ matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N
// Square root
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_sqrt($0)")
+__target_intrinsic(cpp, "$P_sqrt($0)")
T sqrt(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2229,6 +2390,10 @@ matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x)
// Tangent
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_tan($0)")
+__target_intrinsic(cpp, "$P_tan($0)")
T tan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2248,6 +2413,10 @@ matrix<T, N, M> tan(matrix<T, N, M> x)
// Hyperbolic tangent
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_tanh($0)")
+__target_intrinsic(cpp, "$P_tanh($0)")
T tanh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2280,6 +2449,10 @@ matrix<T, M, N> transpose(matrix<T, N, M> x)
// Truncate to integer
__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_trunc($0)")
+__target_intrinsic(cpp, "$P_trunc($0)")
T trunc(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 55f251565..3631040b8 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -1777,6 +1777,42 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl(
}
break;
+ case 'P':
+ // Type-based prefix as used for CUDA and C++ targets
+ {
+ Index argIndex = 0;
+ SLANG_RELEASE_ASSERT(argCount > argIndex);
+ auto arg = args[argIndex].get();
+ auto argType = arg->getDataType();
+
+ const char* str = "";
+ switch(argType->op)
+ {
+ #define CASE(OP, STR) \
+ case kIROp_##OP: str = #STR; break
+
+ CASE(Int8Type, I8);
+ CASE(Int16Type, I16);
+ CASE(IntType, I32);
+ CASE(Int64Type, I64);
+ CASE(UInt8Type, U8);
+ CASE(UInt16Type, U16);
+ CASE(UIntType, U32);
+ CASE(UInt64Type, U64);
+ CASE(HalfType, F16);
+ CASE(FloatType, F32);
+ CASE(DoubleType, F64);
+
+ #undef CASE
+
+ default:
+ SLANG_UNEXPECTED("unexpected type in intrinsic definition");
+ break;
+ }
+ m_writer->emit(str);
+ }
+ break;
+
default:
SLANG_UNEXPECTED("bad format in intrinsic definition");
break;
@@ -2059,17 +2095,6 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
}
break;
- case kIROp_Mul_Vector_Matrix:
- case kIROp_Mul_Matrix_Vector:
- case kIROp_Mul_Matrix_Matrix:
- // Default impl
- m_writer->emit("mul(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(", ");
- emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
- m_writer->emit(")");
- break;
-
case kIROp_swizzle:
{
auto prec = getInfo(EmitOp::Postfix);
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 7fb04c33b..bece6c2d0 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -901,56 +901,6 @@ void CPPSourceEmitter::_emitSignature(const UnownedStringSlice& funcName, const
writer->emit(")");
}
-void CPPSourceEmitter::_emitVecMatMulDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{
- IRFuncType* funcType = specOp->signatureType;
- SLANG_ASSERT(funcType->getParamCount() == 2);
- IRType* paramType0 = funcType->getParamType(0);
- IRType* paramType1 = funcType->getParamType(1);
- IRType* retType = specOp->returnType;
-
- SourceWriter* writer = getSourceWriter();
-
- _emitSignature(funcName, specOp);
-
- writer->emit("\n{\n");
- writer->indent();
-
- emitType(retType);
- writer->emit(" r;\n");
-
- TypeDimension dimA = _getTypeDimension(paramType0, false);
- TypeDimension dimB = _getTypeDimension(paramType1, true);
- TypeDimension resultDim = _getTypeDimension(retType, paramType1->op == kIROp_VectorType);
-
- for (int i = 0; i < resultDim.rowCount; ++i)
- {
- for (int j = 0; j < resultDim.colCount; ++j)
- {
- _emitAccess(UnownedStringSlice::fromLiteral("r"), resultDim, i, j, writer);
- writer->emit(" = ");
-
- for (int k = 0; k < dimA.colCount; k++)
- {
- if (k > 0)
- {
- writer->emit(" + ");
- }
- _emitAccess(UnownedStringSlice::fromLiteral("a"), dimA, i, k, writer);
- writer->emit(" * ");
- _emitAccess(UnownedStringSlice::fromLiteral("b"), dimB, k, j, writer);
- }
-
- writer->emit(";\n");
- }
- }
-
- writer->emit("return r;\n");
-
- writer->dedent();
- writer->emit("}\n\n");
-}
-
UnownedStringSlice CPPSourceEmitter::_getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op op, IRType*const* argTypes, Int argCount, IRType* retType)
{
HLSLIntrinsic intrinsic;
@@ -960,38 +910,6 @@ UnownedStringSlice CPPSourceEmitter::_getAndEmitSpecializedOperationDefinition(H
return _getFuncName(specOp);
}
-void CPPSourceEmitter::_emitLengthDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{
- SourceWriter* writer = getSourceWriter();
-
- IRFuncType* funcType = specOp->signatureType;
- SLANG_ASSERT(funcType->getParamCount() == 1);
- IRType* paramType0 = funcType->getParamType(0);
-
- SLANG_ASSERT(paramType0->op == kIROp_VectorType);
-
- IRBasicType* elementType = as<IRBasicType>(static_cast<IRVectorType*>(paramType0)->getElementType());
-
- IRType* dotArgs[] = { paramType0, paramType0 };
- UnownedStringSlice dotFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Dot, dotArgs, SLANG_COUNT_OF(dotArgs), elementType);
-
- UnownedStringSlice sqrtName = _getScalarFuncName(HLSLIntrinsic::Op::Sqrt, elementType);
-
- _emitSignature(funcName, specOp);
-
- writer->emit("\n{\n");
- writer->indent();
-
- writer->emit("return ");
- writer->emit(sqrtName);
- writer->emit("(");
- writer->emit(dotFuncName);
- writer->emit("(a, a));\n");
-
- writer->dedent();
- writer->emit("}\n\n");
-}
-
void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
{
SourceWriter* writer = getSourceWriter();
@@ -1049,47 +967,6 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName,
}
}
-void CPPSourceEmitter::_emitNormalizeDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{
- SourceWriter* writer = getSourceWriter();
-
- IRFuncType* funcType = specOp->signatureType;
- SLANG_ASSERT(funcType->getParamCount() == 1);
- IRType* paramType0 = funcType->getParamType(0);
-
- SLANG_ASSERT(paramType0->op == kIROp_VectorType);
-
- IRBasicType* elementType = as<IRBasicType>(static_cast<IRVectorType*>(paramType0)->getElementType());
-
- IRType* dotArgs[] = { paramType0, paramType0 };
- UnownedStringSlice dotFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Dot, dotArgs, SLANG_COUNT_OF(dotArgs), elementType);
- UnownedStringSlice rsqrtName = _getScalarFuncName(HLSLIntrinsic::Op::RecipSqrt, elementType);
- IRType* vecMulScalarArgs[] = { paramType0, elementType };
- UnownedStringSlice vecMulScalarName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Mul, vecMulScalarArgs, SLANG_COUNT_OF(vecMulScalarArgs), paramType0);
-
- TypeDimension dimA = _getTypeDimension(paramType0, false);
-
- // Assumes C++
-
- _emitSignature(funcName, specOp);
-
- writer->emit("\n{\n");
- writer->indent();
-
- writer->emit("return ");
-
- // Assumes C++ here
- writer->emit("a * ");
- writer->emit(rsqrtName);
- writer->emit("(");
- writer->emit(dotFuncName);
- writer->emit("(a, a));\n");
-
- writer->dedent();
- writer->emit("}\n\n");
-}
-
-
void CPPSourceEmitter::_emitConstructConvertDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
{
SourceWriter* writer = getSourceWriter();
@@ -1329,42 +1206,6 @@ void CPPSourceEmitter::_emitConstructFromScalarDefinition(const UnownedStringSli
writer->emit("}\n\n");
}
-void CPPSourceEmitter::_emitReflectDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{
- SourceWriter* writer = getSourceWriter();
-
- IRFuncType* funcType = specOp->signatureType;
- SLANG_ASSERT(funcType->getParamCount() == 2);
- IRType* paramType0 = funcType->getParamType(0);
-
- SLANG_ASSERT(paramType0->op == kIROp_VectorType);
-
- IRBasicType* elementType = as<IRBasicType>(static_cast<IRVectorType*>(paramType0)->getElementType());
-
- // Make sure we have all these functions defined before emitting
- IRType* dotArgs[] = { paramType0, paramType0 };
- UnownedStringSlice dotFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Dot, dotArgs, SLANG_COUNT_OF(dotArgs), elementType);
-
- IRType* subArgs[] = { paramType0, paramType0};
- UnownedStringSlice subFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Sub, subArgs, SLANG_COUNT_OF(subArgs), paramType0);
-
- IRType* vecMulScalarArgs[] = { paramType0, elementType };
- UnownedStringSlice vecMulScalarFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Mul, vecMulScalarArgs, SLANG_COUNT_OF(vecMulScalarArgs), paramType0);
-
- // Assumes C++
-
- _emitSignature(funcName, specOp);
- writer->emit("\n{\n");
- writer->indent();
-
- writer->emit("return a - b * 2.0 * ");
- writer->emit(dotFuncName);
- writer->emit("(a, b);\n");
-
- writer->dedent();
- writer->emit("}\n\n");
-}
-
void CPPSourceEmitter::_maybeEmitSpecializedOperationDefinition(const HLSLIntrinsic* specOp)
{
// Check if it's been emitted already, if not add it.
@@ -1385,28 +1226,11 @@ void CPPSourceEmitter::emitSpecializedOperationDefinition(const HLSLIntrinsic* s
{
return _emitInitDefinition(_getFuncName(specOp), specOp);
}
- case Op::VecMatMul:
- case Op::Dot:
- {
- return _emitVecMatMulDefinition(_getFuncName(specOp), specOp);
- }
case Op::Any:
case Op::All:
{
return _emitAnyAllDefinition(_getFuncName(specOp), specOp);
}
- case Op::Normalize:
- {
- return _emitNormalizeDefinition(_getFuncName(specOp), specOp);
- }
- case Op::Length:
- {
- return _emitLengthDefinition(_getFuncName(specOp), specOp);
- }
- case Op::Reflect:
- {
- return _emitReflectDefinition(_getFuncName(specOp), specOp);
- }
case Op::ConstructConvert:
{
return _emitConstructConvertDefinition(_getFuncName(specOp), specOp);
diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h
index 7f9046643..99f180850 100644
--- a/source/slang/slang-emit-cpp.h
+++ b/source/slang/slang-emit-cpp.h
@@ -91,15 +91,10 @@ protected:
void _calcGlobalParams(const List<EmitAction>& actions, List<GlobalParamInfo>& outParams, IRGlobalParam** outEntryPointGlobalParams);
void _emitUniformStateMembers(const List<EmitAction>& actions, IRGlobalParam** outEntryPointGlobalParams);
- void _emitVecMatMulDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
-
void _emitAryDefinition(const HLSLIntrinsic* specOp);
// Really we don't want any of these defined like they are here, they should be defined in slang stdlib
void _emitAnyAllDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
- void _emitLengthDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
- void _emitNormalizeDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
- void _emitReflectDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
void _emitConstructConvertDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
void _emitConstructFromScalarDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
void _emitGetAtDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 3531d55db..91439d5d3 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -112,26 +112,7 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT
switch (op)
{
- case Op::Sin:
- case Op::Cos:
- case Op::Tan:
- case Op::ArcSin:
- case Op::ArcCos:
- case Op::ArcTan:
- case Op::ArcTan2:
- case Op::Floor:
- case Op::Ceil:
- case Op::FMod:
- case Op::Exp2:
- case Op::Exp:
- case Op::Log:
- case Op::Log2:
- case Op::Log10:
case Op::FRem:
- case Op::Sqrt:
- case Op::RecipSqrt:
- case Op::Pow:
- case Op::Trunc:
{
if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
{
@@ -139,25 +120,6 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT
}
break;
}
- case Op::Max:
- case Op::Min:
- case Op::Abs:
- {
- // There are only floating point built in versions of these, prefixed with f
- if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
- {
- outBuilder << "f";
- outBuilder << HLSLIntrinsic::getInfo(op).funcName;
-
- if (type->op == kIROp_FloatType)
- {
- outBuilder << "f";
- }
- return SLANG_OK;
- }
- break;
- }
-
default: break;
}
@@ -171,23 +133,6 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT
return SLANG_OK;
}
- // Missing ones:
- //
- // sincos - the built in uses pointer, so we'll just define in prelude
- // rcp
- // sign
- // saturate
- // frac
- // smoothstep
- // lerp
- // clamp
- // step
- //
- // For integer types
- // abs
- // min
- // max
-
// Defer to the supers impl
return Super::calcScalarFuncName(op, type, outBuilder);
}
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index 155b86a9c..b433b4d94 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -1096,31 +1096,6 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
}
break;
}
- case kIROp_Mul_Vector_Matrix:
- case kIROp_Mul_Matrix_Vector:
- case kIROp_Mul_Matrix_Matrix:
- {
- EmitOpInfo outerPrec = inOuterPrec;
- bool needClose = false;
-
- // GLSL expresses inner-product multiplications
- // with the ordinary infix `*` operator.
- //
- // Note that the order of the operands is reversed
- // compared to HLSL (and Slang's internal representation)
- // because the notion of what is a "row" vs. a "column"
- // is reversed between HLSL/Slang and GLSL.
- //
- auto prec = getInfo(EmitOp::Mul);
- needClose = maybeEmitParens(outerPrec, prec);
-
- emitOperand(inst->getOperand(1), leftSide(outerPrec, prec));
- m_writer->emit(" * ");
- emitOperand(inst->getOperand(0), rightSide(prec, outerPrec));
-
- maybeCloseParens(needClose);
- return true;
- }
case kIROp_Select:
{
if (inst->getOperand(0)->getDataType()->op != kIROp_BoolType)
diff --git a/source/slang/slang-hlsl-intrinsic-set.cpp b/source/slang/slang-hlsl-intrinsic-set.cpp
index 82a8851e0..27871141d 100644
--- a/source/slang/slang-hlsl-intrinsic-set.cpp
+++ b/source/slang/slang-hlsl-intrinsic-set.cpp
@@ -220,42 +220,9 @@ SlangResult HLSLIntrinsicSet::makeIntrinsic(IRInst* inst, HLSLIntrinsic& out)
{
default: break;
- case Op::Sin:
- case Op::Cos:
- case Op::Tan:
- case Op::ArcSin:
- case Op::ArcCos:
- case Op::ArcTan:
- case Op::ArcTan2:
- case Op::Rcp:
- case Op::Sign:
- case Op::Frac:
- case Op::Ceil:
- case Op::Floor:
- case Op::Trunc:
- case Op::Sqrt:
- case Op::RecipSqrt:
- case Op::Exp2:
- case Op::Exp:
- case Op::Log:
- case Op::Log2:
- case Op::Log10:
- case Op::Abs:
- case Op::Min:
- case Op::Max:
- case Op::Pow:
- case Op::FMod:
- case Op::SmoothStep:
- case Op::Lerp:
- case Op::Clamp:
- case Op::Step:
case Op::AsFloat:
case Op::AsInt:
case Op::AsUInt:
- case Op::IsInfinite:
- case Op::IsFinite:
- case Op::IsNan:
- case Op::LdExp:
// Note: the `any()`/`all()` case can't be handled via a stdlib definition
// right now because `bool` vectors map to `int` vectors on the CUDA
// path, so that the generated `geAt` operation is incorrect.
@@ -605,14 +572,6 @@ HLSLIntrinsic::Op HLSLIntrinsicOpLookup::getOpForIROp(IRInst* inst)
case kIROp_constructVectorFromScalar: return Op::ConstructFromScalar;
- case kIROp_Mul_Matrix_Matrix:
- case kIROp_Mul_Matrix_Vector:
- case kIROp_Mul_Vector_Matrix:
- {
- return Op::VecMatMul;
- }
- case kIROp_Dot: return Op::Dot;
-
default: return Op::Invalid;
}
}
diff --git a/source/slang/slang-hlsl-intrinsic-set.h b/source/slang/slang-hlsl-intrinsic-set.h
index 6ab5480b3..ca3fced50 100644
--- a/source/slang/slang-hlsl-intrinsic-set.h
+++ b/source/slang/slang-hlsl-intrinsic-set.h
@@ -64,53 +64,6 @@ just constructXXXFromScalar. Would be good if there was a suitable name to encom
\
x(Swizzle, "", -1) \
\
- x(Dot, "dot", 2) \
- x(VecMatMul, "mul", 2) \
- \
- x(Normalize, "normalize", 1) \
- x(Length, "length", 1) \
- \
- x(Sin, "sin", 1) \
- x(Cos, "cos", 1) \
- x(Tan, "tan", 1) \
- \
- x(ArcSin, "asin", 1) \
- x(ArcCos, "acos", 1) \
- x(ArcTan, "atan", 1) \
- \
- x(ArcTan2, "atan2", 2) \
- \
- x(Rcp, "rcp", 1) \
- x(Sign, "sign", 1) \
- x(Frac, "frac", 1) \
- \
- x(Ceil, "ceil", 1) \
- x(Floor, "floor", 1) \
- x(Trunc, "trunc", 1) \
- \
- x(Sqrt, "sqrt", 1) \
- x(RecipSqrt, "rsqrt", 1) \
- \
- x(Exp2, "exp2", 1) \
- x(Exp, "exp", 1) \
- \
- x(Log, "log", 1) \
- x(Log2, "log2", 1) \
- x(Log10, "log10", 1) \
- \
- x(Abs, "abs", 1) \
- \
- x(Min, "min", 2) \
- x(Max, "max", 2) \
- x(Pow, "pow", 2) \
- x(FMod, "fmod", 2) \
- x(Reflect, "reflect", 2) \
- \
- x(SmoothStep, "smoothstep", 3) \
- x(Lerp, "lerp", 3) \
- x(Clamp, "clamp", 3) \
- x(Step, "step", 2) \
- \
x(AsFloat, "asfloat", 1) \
x(AsInt, "asint", -1) \
x(AsUInt, "asuint", -1) \
@@ -120,13 +73,7 @@ just constructXXXFromScalar. Would be good if there was a suitable name to encom
x(ConstructFromScalar, "", 1) \
\
x(GetAt, "", 2) \
- \
- x(CountBits, "countbits", 1) \
- \
- x(IsInfinite, "isinf", 1) \
- x(IsFinite, "isfinite", 1) \
- x(IsNan, "isnan", 1) \
- x(LdExp, "ldexp", 2)
+ /* end */
struct HLSLIntrinsic
{
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 89fec618c..3fdf9f113 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -373,10 +373,6 @@ INST(Dot, dot, 2, 0)
INST(GetStringHash, getStringHash, 1, 0)
-INST(Mul_Vector_Matrix, mulVectorMatrix, 2, 0)
-INST(Mul_Matrix_Vector, mulMatrixVector, 2, 0)
-INST(Mul_Matrix_Matrix, mulMatrixMatrix, 2, 0)
-
// Texture sampling operation of the form `t.Sample(s,u)`
INST(Sample, sample, 3, 0)
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 6e1b6fe83..f84300327 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -4951,9 +4951,6 @@ namespace Slang
case kIROp_BitNot:
case kIROp_Select:
case kIROp_Dot:
- case kIROp_Mul_Vector_Matrix:
- case kIROp_Mul_Matrix_Vector:
- case kIROp_Mul_Matrix_Matrix:
case kIROp_MakeExistential:
case kIROp_ExtractExistentialType:
case kIROp_ExtractExistentialValue: