From 6f31eae79d5b4297d0099c5779a9806a786cf9f8 Mon Sep 17 00:00:00 2001 From: Yong He Date: Wed, 1 Mar 2023 13:19:33 -0800 Subject: Implement derivatives for HLSL intrinsics. (#2684) * Implement derivatives for HLSL intrinsics. * Vector intrinsics. * Add all intrinsics. --------- Co-authored-by: Yong He --- docs/user-guide/07-autodiff.md | 13 +- source/slang/diff.meta.slang | 932 ++++++++++++++------- source/slang/hlsl.meta.slang | 75 +- tests/autodiff-dstdlib/vector-cross.slang | 40 + .../vector-cross.slang.expected.txt | 13 + tests/autodiff-dstdlib/vector-length.slang | 36 + .../vector-length.slang.expected.txt | 4 + 7 files changed, 799 insertions(+), 314 deletions(-) create mode 100644 tests/autodiff-dstdlib/vector-cross.slang create mode 100644 tests/autodiff-dstdlib/vector-cross.slang.expected.txt create mode 100644 tests/autodiff-dstdlib/vector-length.slang create mode 100644 tests/autodiff-dstdlib/vector-length.slang.expected.txt diff --git a/docs/user-guide/07-autodiff.md b/docs/user-guide/07-autodiff.md index 5ca073983..244ebb47b 100644 --- a/docs/user-guide/07-autodiff.md +++ b/docs/user-guide/07-autodiff.md @@ -477,12 +477,17 @@ void back_prop( The following builtin functions are backward differentiable and both their forward-derivative and backward-propagation functions are already defined in the builtin library: -- Arithmetic functions: `abs`, `max`, `min`, `sqrt` -- Trigonometric functions: `sin`, `cos`, `tan` -- Exponential and logarithmic functions: `exp`, `pow`, `log`, `log2` -- Vector: `dot`, `cross` +- Arithmetic functions: `abs`, `max`, `min`, `sqrt`, `rcp`, `rsqrt`, `fma`, `mad`, `fmod`, `frac`, `radians`, `degrees` +- Interpolation and clamping functions: `lerp`, `smoothstep`, `clamp`, `saturate` +- Trigonometric functions: `sin`, `cos`, `sincos`, `tan`, `asin`, `acos`, `atan`, `atan2` +- Hyperbolic functions: `sinh`, `cosh`, `tanh` +- Exponential and logarithmic functions: `exp`, `exp2`, `pow`, `log`, `log2`, `log10` +- Vector functions: `dot`, `cross`, `length`, `distance`, `normalize`, `reflect`, `refract` - Matrix transform: `mul(matrix, vector)`, `mul(vector, matrix)`, `mul(matrix, matrix)`, `transpose` +Derivatives for the following legacy HLSL intrinsic functions are not implemented: +- `dst`, `lit`, + ## Excluding Parameters From Differentiation Sometimes we do not wish a parameter to be considered differentiable despite it has a differentiable type. We can use the `no_diff` modifier on the parameter to inform the compiler to treat the parameter as non-differentiable and skip generating differentiation code for the parameter. The syntax is: diff --git a/source/slang/diff.meta.slang b/source/slang/diff.meta.slang index 8931cccdd..c303b39d9 100644 --- a/source/slang/diff.meta.slang +++ b/source/slang/diff.meta.slang @@ -162,6 +162,23 @@ extension Array : IDifferentiable } } +// Matrix transpose +__generic +[ForceInline] +[ForwardDerivativeOf(transpose)] +DifferentialPair> __d_transpose(DifferentialPair> m) +{ + return DifferentialPair>(transpose(m.p), transpose(m.d)); +} + +__generic +[ForceInline] +[BackwardDerivativeOf(transpose)] +void __d_transpose(inout DifferentialPair> m, matrix.Differential dOut) +{ + m = diffPair(m.p, transpose(dOut)); +} + // vector-matrix __generic [ForceInline] @@ -174,7 +191,6 @@ DifferentialPair> mul(DifferentialPair> left, Differen } __generic -[ForceInline] [BackwardDerivativeOf(mul)] void __d_mul(inout DifferentialPair> left, inout DifferentialPair> right, vector.Differential dOut) { @@ -206,7 +222,6 @@ DifferentialPair> mul(DifferentialPair> left, Differen } __generic -[ForceInline] [BackwardDerivativeOf(mul)] void __d_mul(inout DifferentialPair> left, inout DifferentialPair> right, vector.Differential dOut) { @@ -238,7 +253,6 @@ DifferentialPair> mul(DifferentialPair> left, Differ } __generic -[ForceInline] [BackwardDerivativeOf(mul)] void mul(inout DifferentialPair> left, inout DifferentialPair> right, matrix.Differential dOut) { @@ -267,442 +281,750 @@ void mul(inout DifferentialPair> left, inout DifferentialPair result; \ - vector.Differential d_result; \ - [ForceUnroll]\ - for (int i = 0; i < N; ++i) \ - { \ - DifferentialPair dp_elem = D_FUNC(DifferentialPair(VALUE.p[i], __slang_noop_cast(VALUE.d[i]))); \ - result[i] = dp_elem.p; \ - d_result[i] = __slang_noop_cast(dp_elem.d); \ - } \ - return DifferentialPair>(result, d_result) - -#define VECTOR_MAP_D_BINARY(TYPE, COUNT, D_FUNC, LEFT, RIGHT) \ - vector result; \ - vector.Differential d_result; \ - [ForceUnroll] \ - for (int i = 0; i < N; ++i) \ - { \ - DifferentialPair dp_elem = D_FUNC(DifferentialPair(LEFT.p[i], __slang_noop_cast(LEFT.d[i])), \ - DifferentialPair(RIGHT.p[i], __slang_noop_cast(RIGHT.d[i]))); \ - result[i] = dp_elem.p; \ - d_result[i] = __slang_noop_cast(dp_elem.d); \ - } \ - return DifferentialPair>(result, d_result) - -#define VECTOR_MAP_BWD_D_UNARY(TYPE, COUNT, D_FUNC, VALUE, D_OUT) \ - vector.Differential d_result; \ - [ForceUnroll] \ - for (int i = 0; i < N; ++i) \ - { \ - DifferentialPair dp_elem = diffPair(VALUE.p[i], TYPE.dzero()); \ - D_FUNC(dp_elem, __slang_noop_cast(D_OUT[i])); \ - d_result[i] = __slang_noop_cast(dp_elem.d); \ - } \ - VALUE = diffPair(VALUE.p, d_result) - -#define VECTOR_MAP_BWD_D_BINARY(TYPE, COUNT, D_FUNC, LEFT, RIGHT, D_OUT) \ - vector.Differential left_d_result, right_d_result; \ - [ForceUnroll] \ - for (int i = 0; i < N; ++i) \ - { \ - DifferentialPair left_dp = diffPair(LEFT.p[i], TYPE.dzero()); \ - DifferentialPair right_dp = diffPair(RIGHT.p[i], TYPE.dzero()); \ - D_FUNC(left_dp, right_dp, __slang_noop_cast(D_OUT[i])); \ - left_d_result[i] = __slang_noop_cast(left_dp.d); \ - right_d_result[i] = __slang_noop_cast(right_dp.d); \ - } \ - LEFT = diffPair(LEFT.p, left_d_result); \ - RIGHT = diffPair(RIGHT.p, right_d_result) +// Vector dot product +__generic +[ForwardDerivativeOf(dot)] +DifferentialPair __d_dot(DifferentialPair> dpx, DifferentialPair> dpy) +{ + T result = T(0); + T.Differential d_result = T.dzero(); + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + result = result + dpx.p[i] * dpy.p[i]; + d_result = T.dadd(d_result, T.dmul(dpx.p[i], __slang_noop_cast(dpy.d[i]))); + d_result = T.dadd(d_result, T.dmul(dpy.p[i], __slang_noop_cast(dpx.d[i]))); + } + return DifferentialPair(result, d_result); +} -// Detach and set derivatives to zero +__generic +[BackwardDerivativeOf(dot)] +void __d_dot(inout DifferentialPair> dpx, inout DifferentialPair> dpy, T.Differential dOut) +{ + vector.Differential x_d_result, y_d_result; + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + x_d_result[i] = dpy.p[i] * __slang_noop_cast(dOut); + y_d_result[i] = dpx.p[i] * __slang_noop_cast(dOut); + } + dpx = diffPair(dpx.p, x_d_result); + dpy = diffPair(dpy.p, y_d_result); +} + +// Cross product +__generic +[ForwardDerivativeOf(cross)] +DifferentialPair> __d_cross(DifferentialPair> a, DifferentialPair> b) +{ + /* + cx = ay * bz − az * by + cy = az * bx − ax * bz + cz = ax * by − ay * bx + */ + T aybz = a.p.y * b.p.z; + T azby = a.p.z * b.p.y; + T px = aybz - azby; + T dx = (b.p.z - azby) * a.d.y + (a.p.y - azby) * b.d.z + (aybz - b.p.y) * a.d.z + (aybz - a.p.z) * b.d.y; + + T azbx = a.p.z * b.p.x; + T axbz = a.p.x * b.p.z; + T py = azbx - axbz; + T dy = (b.p.x - axbz) * a.d.z + (a.p.z - axbz) * b.d.x + (azbx - b.p.z) * a.d.x + (azbx - a.p.x) * b.d.z; + + T axby = a.p.x * b.p.y; + T aybx = a.p.y * b.p.x; + T pz = axby - aybx; + T dz = (b.p.y - aybx) * a.d.x + (a.p.x - aybx) * b.d.y + (axby - b.p.x) * a.d.y + (axby - a.p.y) * b.d.x; + + return DifferentialPair>(vector(px, py, pz), vector.Differential(dx, dy, dz)); +} + +__generic +[BackwardDerivativeOf(cross)] +void __d_cross(inout DifferentialPair> a, inout DifferentialPair> b, vector.Differential dOut) +{ + /* + cx = ay * bz − az * by + cy = az * bx − ax * bz + cz = ax * by − ay * bx + */ + T dax = (-b.p.z * dOut.y) + (b.p.y * dOut.z); + T day = (b.p.z * dOut.x) + (-b.p.x * dOut.z); + T daz = (-b.p.y * dOut.x) + (b.p.x * dOut.y); + + T dbx = (a.p.z * dOut.y) + (-a.p.y * dOut.z); + T dby = (-a.p.z * dOut.x) + (a.p.x * dOut.z); + T dbz = (a.p.y * dOut.x) + (-a.p.x * dOut.y); + + a = diffPair(a.p, vector.Differential(dax, day, daz)); + b = diffPair(b.p, vector.Differential(dbx, dby, dbz)); +} + +#define VECTOR_MATRIX_BINARY_DIFF_IMPL(NAME) \ + __generic \ + [ForwardDerivativeOf(NAME)] \ + DifferentialPair> __d_##NAME##_vector( \ + DifferentialPair> dpx, DifferentialPair> dpy) \ + { \ + vector result; \ + vector.Differential d_result; \ + [ForceUnroll] for (int i = 0; i < N; ++i) \ + { \ + DifferentialPair dp_elem = __d_##NAME( \ + DifferentialPair(dpx.p[i], __slang_noop_cast(dpx.d[i])), \ + DifferentialPair(dpy.p[i], __slang_noop_cast(dpy.d[i]))); \ + result[i] = dp_elem.p; \ + d_result[i] = __slang_noop_cast(dp_elem.d); \ + } \ + return DifferentialPair>(result, d_result); \ + } \ + __generic \ + [BackwardDerivativeOf(NAME)] \ + void __d_##NAME##_vector( \ + inout DifferentialPair> dpx, \ + inout DifferentialPair> dpy, \ + vector.Differential dOut) \ + { \ + vector.Differential left_d_result, right_d_result; \ + [ForceUnroll] for (int i = 0; i < N; ++i) \ + { \ + DifferentialPair left_dp = diffPair(dpx.p[i], T.dzero()); \ + DifferentialPair right_dp = diffPair(dpy.p[i], T.dzero()); \ + __d_##NAME(left_dp, right_dp, __slang_noop_cast(dOut[i])); \ + left_d_result[i] = __slang_noop_cast(left_dp.d); \ + right_d_result[i] = __slang_noop_cast(right_dp.d); \ + } \ + dpx = diffPair(dpx.p, left_d_result); \ + dpy = diffPair(dpy.p, right_d_result); \ + } +#define VECTOR_MATRIX_TERNARY_DIFF_IMPL(NAME) \ + __generic \ + [ForwardDerivativeOf(NAME)] \ + DifferentialPair> __d_##NAME##_vector( \ + DifferentialPair> dpx, \ + DifferentialPair> dpy, \ + DifferentialPair> dpz) \ +{ \ + vector result; \ + vector.Differential d_result; \ + [ForceUnroll] for (int i = 0; i < N; ++i) \ + { \ + DifferentialPair dp_elem = __d_##NAME( \ + DifferentialPair(dpx.p[i], __slang_noop_cast(dpx.d[i])), \ + DifferentialPair(dpy.p[i], __slang_noop_cast(dpy.d[i])), \ + DifferentialPair(dpz.p[i], __slang_noop_cast(dpz.d[i]))); \ + result[i] = dp_elem.p; \ + d_result[i] = __slang_noop_cast(dp_elem.d); \ + } \ + return DifferentialPair>(result, d_result); \ + } \ + __generic \ + [BackwardDerivativeOf(NAME)] \ + void __d_##NAME##_vector( \ + inout DifferentialPair> dpx, \ + inout DifferentialPair> dpy, \ + inout DifferentialPair> dpz, \ + vector.Differential dOut) \ + { \ + vector.Differential left_d_result, middle_d_result, right_d_result; \ + [ForceUnroll] for (int i = 0; i < N; ++i) \ + { \ + DifferentialPair left_dp = diffPair(dpx.p[i], T.dzero()); \ + DifferentialPair middle_dp = diffPair(dpy.p[i], T.dzero()); \ + DifferentialPair right_dp = diffPair(dpz.p[i], T.dzero()); \ + __d_##NAME(left_dp, middle_dp, right_dp, \ + __slang_noop_cast(dOut[i])); \ + left_d_result[i] = __slang_noop_cast(left_dp.d); \ + middle_d_result[i] = __slang_noop_cast(middle_dp.d); \ + right_d_result[i] = __slang_noop_cast(right_dp.d); \ + } \ + dpx = diffPair(dpx.p, left_d_result); \ + dpy = diffPair(dpy.p, middle_d_result); \ + dpz = diffPair(dpz.p, right_d_result); \ + } + +#define UNARY_DERIVATIVE_IMPL(NAME, FWD_DIFF_FUNC, BWD_DIFF_FUNC) \ + __generic \ + [ForwardDerivativeOf(NAME)] \ + DifferentialPair __d_##NAME(DifferentialPair dpx) \ + { \ + return DifferentialPair(NAME(dpx.p), FWD_DIFF_FUNC); \ + } \ + __generic \ + [ForwardDerivativeOf(NAME)] \ + DifferentialPair> __d_##NAME##_vector(DifferentialPair> dpx) \ + { \ + vector result; \ + vector.Differential d_result; \ + [ForceUnroll] for (int i = 0; i < N; ++i) \ + { \ + DifferentialPair dp_elem = __d_##NAME( \ + DifferentialPair(dpx.p[i], __slang_noop_cast(dpx.d[i]))); \ + result[i] = dp_elem.p; \ + d_result[i] = __slang_noop_cast(dp_elem.d); \ + } \ + return DifferentialPair>(result, d_result); \ + } \ + __generic \ + [ForwardDerivativeOf(NAME)] \ + DifferentialPair> __d_##NAME##_m(DifferentialPair> dpx) \ + { \ + matrix result; \ + matrix.Differential d_result; \ + [ForceUnroll] for (int i = 0; i < M; ++i) \ + [ForceUnroll] for (int j = 0; j < N; ++j) \ + { \ + DifferentialPair dp_elem = __d_##NAME( \ + DifferentialPair(dpx.p[i][j], \ + __slang_noop_cast(dpx.d[i][j]))); \ + result[i][j] = dp_elem.p; \ + d_result[i][j] = __slang_noop_cast(dp_elem.d); \ + } \ + return DifferentialPair>(result, d_result); \ + } \ + __generic \ + [BackwardDerivativeOf(NAME)] \ + void __d_##NAME(inout DifferentialPair dpx, T.Differential dOut) \ + { \ + dpx = diffPair(dpx.p, BWD_DIFF_FUNC); \ + } \ + __generic \ + [BackwardDerivativeOf(NAME)] \ + void __d_##NAME##_vector( \ + inout DifferentialPair> dpx, vector.Differential dOut) \ + { \ + vector.Differential d_result; \ + [ForceUnroll] for (int i = 0; i < N; ++i) \ + { \ + DifferentialPair dp_elem = diffPair(dpx.p[i], T.dzero()); \ + __d_##NAME(dp_elem, __slang_noop_cast(dOut[i])); \ + d_result[i] = __slang_noop_cast(dp_elem.d); \ + } \ + dpx = diffPair(dpx.p, d_result); \ + } \ + __generic \ + [BackwardDerivativeOf(NAME)] \ + void __d_##NAME##_matrix( \ + inout DifferentialPair> dpx, matrix.Differential dOut) \ + { \ + matrix.Differential d_result; \ + [ForceUnroll] for (int i = 0; i < M; ++i) \ + [ForceUnroll] for (int j = 0; j < N; ++j) \ + { \ + DifferentialPair dp_elem = diffPair(dpx.p[i][j], T.dzero()); \ + __d_##NAME(dp_elem, __slang_noop_cast(dOut[i][j])); \ + d_result[i][j] = __slang_noop_cast(dp_elem.d); \ + } \ + dpx = diffPair(dpx.p, d_result); \ + } +#define SIMPLE_UNARY_DERIVATIVE_IMPL(NAME, DIFF_FUNC) UNARY_DERIVATIVE_IMPL(NAME, T.dmul(DIFF_FUNC, dpx.d), T.dmul(DIFF_FUNC, dOut)) + +// Detach and set derivatives to zero __generic __intrinsic_op($(kIROp_DetachDerivative)) T detach(T x); -// Natural Exponent +#define SLANG_SQR(x) ((x)*(x)) +// Absolute value +UNARY_DERIVATIVE_IMPL(abs, (dpx.p > T(0.0) ? dpx.d : T.dmul(T(-1.0), dpx.d)), (T.dmul(__slang_noop_cast(sign(dpx.p)), dOut))) +// Saturate +UNARY_DERIVATIVE_IMPL(saturate, (dpx.p < T(0.0) || dpx.p > T(1.0) ? T.dzero() : dpx.d), (dpx.p < T(0.0) || dpx.p > T(1.0) ? T.dzero() : dOut)) +// frac +UNARY_DERIVATIVE_IMPL(frac, dpx.d, dOut) +// raidans, degrees +SIMPLE_UNARY_DERIVATIVE_IMPL(radians, T(0.01745329251994329576923690768489)) +SIMPLE_UNARY_DERIVATIVE_IMPL(degrees, T(57.295779513082320876798154814105)) +// Exponent +SIMPLE_UNARY_DERIVATIVE_IMPL(exp, exp(dpx.p)) +SIMPLE_UNARY_DERIVATIVE_IMPL(exp2, exp2(dpx.p)* T(50.69314718055994530941723212145818)) +// sin, sinh +SIMPLE_UNARY_DERIVATIVE_IMPL(sin, cos(dpx.p)) +SIMPLE_UNARY_DERIVATIVE_IMPL(sinh, cosh(dpx.p)) +// cos, cosh +SIMPLE_UNARY_DERIVATIVE_IMPL(cos, -sin(dpx.p)) +SIMPLE_UNARY_DERIVATIVE_IMPL(cosh, sinh(dpx.p)) +// tan, tanh +SIMPLE_UNARY_DERIVATIVE_IMPL(tan, T(1.0) / (cos(dpx.p) * cos(dpx.p))) +SIMPLE_UNARY_DERIVATIVE_IMPL(tanh, T(1.0) / (cosh(dpx.p) * cosh(dpx.p))) +// Logarithm +SIMPLE_UNARY_DERIVATIVE_IMPL(log, T(1.0) / dpx.p) +SIMPLE_UNARY_DERIVATIVE_IMPL(log10, T(1.0) / (dpx.p * T(52.3025850929940456840179914546844))) +SIMPLE_UNARY_DERIVATIVE_IMPL(log2, T(1.0) / (dpx.p * T(50.69314718055994530941723212145818))) +// Square root +SIMPLE_UNARY_DERIVATIVE_IMPL(sqrt, (dpx.p < T(1e-7) ? T(0.0) : T(0.5) / sqrt(dpx.p))) +// Reciprocal +SIMPLE_UNARY_DERIVATIVE_IMPL(rcp, (dpx.p < T(1e-7) ? T(0.0) : T(-1.0) / (dpx.p * dpx.p))) +// rsqrt +SIMPLE_UNARY_DERIVATIVE_IMPL(rsqrt, T(-0.5) / (dpx.p * sqrt(dpx.p))) +// Arc-sin +SIMPLE_UNARY_DERIVATIVE_IMPL(asin, T(1.0) / sqrt(T(1.0) - dpx.p * dpx.p)) +// Arc-cos +SIMPLE_UNARY_DERIVATIVE_IMPL(acos, T(-1.0) / sqrt(T(1.0) - dpx.p * dpx.p)) +// Arc-tan +SIMPLE_UNARY_DERIVATIVE_IMPL(atan, T(1.0) / (T(1.0) + dpx.p * dpx.p)) + +// Atan2 __generic -[ForwardDerivativeOf(exp)] -DifferentialPair __d_exp(DifferentialPair dpx) +[ForwardDerivativeOf(atan2)] +DifferentialPair __d_atan2(DifferentialPair dpy, DifferentialPair dpx) { + T.Differential dx = T.dmul(-dpy.p / (dpx.p * dpx.p + dpy.p * dpy.p), dpx.d); + T.Differential dy = T.dmul(-dpx.p / (dpx.p * dpx.p + dpy.p * dpy.p), dpy.d); return DifferentialPair( - exp(dpx.p), - T.dmul(exp(dpx.p), dpx.d)); + atan2(dpy.p, dpx.p), + T.dadd(dx, dy)); } -__generic -[ForwardDerivativeOf(exp)] -DifferentialPair> __d_exp_vector(DifferentialPair> dpx) +__generic +[BackwardDerivativeOf(atan2)] +void __d_atan2(inout DifferentialPair dpy, inout DifferentialPair dpx, T.Differential dOut) { - VECTOR_MAP_D_UNARY(T, N, __d_exp, dpx); + dpx = diffPair(dpx.p, T.dmul(-dpy.p / (dpx.p * dpx.p + dpy.p * dpy.p), dpx.d)); + dpy = diffPair(dpy.p, T.dmul(-dpx.p / (dpx.p * dpx.p + dpy.p * dpy.p), dpy.d)); } +VECTOR_MATRIX_BINARY_DIFF_IMPL(atan2) + +// fmod __generic -[BackwardDerivativeOf(exp)] -void __d_exp(inout DifferentialPair dpx, T.Differential dOut) +[ForwardDerivativeOf(fmod)] +DifferentialPair __d_fmod(DifferentialPair x, DifferentialPair y) { - dpx = diffPair( - dpx.p, - T.dmul(exp(dpx.p), dOut)); + return DifferentialPair(fmod(x.p, y.p), x.d); } - -__generic -[BackwardDerivativeOf(exp)] -void __d_exp_vector(inout DifferentialPair> dpx, vector.Differential dOut) +__generic +[BackwardDerivativeOf(fmod)] +void __d_fmod(inout DifferentialPair x, inout DifferentialPair y, T.Differential dOut) { - dpx = diffPair( - dpx.p, - vector.dmul(exp(dpx.p), dOut)); + x = diffPair(x.p, dOut); + y = diffPair(y.p); } +VECTOR_MATRIX_BINARY_DIFF_IMPL(fmod) -// Absolute value - +// Raise to a power __generic -[ForwardDerivativeOf(abs)] -DifferentialPair __d_abs(DifferentialPair dpx) +[ForwardDerivativeOf(pow)] +DifferentialPair __d_pow(DifferentialPair dpx, DifferentialPair dpy) { + // Special case + if (dpx.p < T(1e-6)) + { + return DifferentialPair(T(0.0), T.dzero()); + } + + T val = pow(dpx.p, dpy.p); + T.Differential d1 = T.dmul(val * log(dpx.p), dpy.d); + T.Differential d2 = T.dmul(val * dpy.p / dpx.p, dpx.d); return DifferentialPair( - abs(dpx.p), - dpx.p > T(0.0) ? dpx.d : T.dmul(T(-1.0), dpx.d) + val, + T.dadd(d1, d2) ); } -__generic -[ForwardDerivativeOf(abs)] -DifferentialPair> __d_abs_vector(DifferentialPair> dpx) +__generic +[BackwardDerivativeOf(pow)] +void __d_pow(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) { - VECTOR_MAP_D_UNARY(T, N, __d_abs, dpx); + // Special case + if (dpx.p < T(1e-6)) + { + dpx = diffPair(dpx.p, T.dzero()); + dpy = diffPair(dpy.p, T.dzero()); + } + else + { + T val = pow(dpx.p, dpy.p); + dpx = diffPair( + dpx.p, + T.dmul(val * dpy.p / dpx.p, dOut)); + dpy = diffPair( + dpy.p, + T.dmul(val * log(dpx.p), dOut)); + } } +VECTOR_MATRIX_BINARY_DIFF_IMPL(pow) + +// Maximum __generic -[BackwardDerivativeOf(abs)] -void __d_abs(inout DifferentialPair dpx, T.Differential dOut) +[ForwardDerivativeOf(max)] +DifferentialPair __d_max(DifferentialPair dpx, DifferentialPair dpy) { - dpx = diffPair( - dpx.p, - T.dmul(__slang_noop_cast(sign(dpx.p)), dOut)); + return DifferentialPair( + max(dpx.p, dpy.p), + dpx.p > dpy.p ? dpx.d : dpy.d + ); } -__generic -[BackwardDerivativeOf(abs)] -void __d_abs_vector(inout DifferentialPair> dpx, vector.Differential dOut) +__generic +[BackwardDerivativeOf(max)] +void __d_max(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) { - VECTOR_MAP_BWD_D_UNARY(T, N, __d_abs, dpx, dOut); + dpx = diffPair(dpx.p, dpx.p > dpy.p ? dOut : T.dzero()); + dpy = diffPair(dpy.p, dpy.p > dpx.p ? dOut : T.dzero()); } -// Sine +VECTOR_MATRIX_BINARY_DIFF_IMPL(max) +// Minimum __generic -[ForwardDerivativeOf(sin)] -DifferentialPair __d_sin(DifferentialPair dpx) +[ForwardDerivativeOf(min)] +DifferentialPair __d_min(DifferentialPair dpx, DifferentialPair dpy) { return DifferentialPair( - sin(dpx.p), - T.dmul(cos(dpx.p), dpx.d)); + min(dpx.p, dpy.p), + dpx.p < dpy.p ? dpx.d : dpy.d + ); } -__generic -[ForwardDerivativeOf(sin)] -DifferentialPair> __d_sin_vector(DifferentialPair> dpx) +__generic +[BackwardDerivativeOf(min)] +void __d_min(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) { - VECTOR_MAP_D_UNARY(T, N, __d_sin, dpx); + dpx = diffPair(dpx.p, dpx.p < dpy.p ? dOut : T.dzero()); + dpy = diffPair(dpy.p, dpy.p < dpx.p ? dOut : T.dzero()); } +VECTOR_MATRIX_BINARY_DIFF_IMPL(min) + +// Lerp __generic -[BackwardDerivativeOf(sin)] -void __d_sin(inout DifferentialPair dpx, T.Differential dOut) +[ForwardDerivativeOf(lerp)] +DifferentialPair __d_lerp(DifferentialPair dpx, DifferentialPair dpy, DifferentialPair dps) { - dpx = diffPair( - dpx.p, - T.dmul(cos(dpx.p), dOut)); + return DifferentialPair( + lerp(dpx.p, dpy.p, dps.p), + T.dadd(T.dadd(T.dmul((T(1.0) - dps.p), dpx.d), T.dmul(dps.p, dpy.d)), T.dmul(dpy.p - dpx.p, dps.d)) + ); } - -__generic -[BackwardDerivativeOf(sin)] -void __d_sin_vector(inout DifferentialPair> dpx, vector.Differential dOut) +__generic +[BackwardDerivativeOf(lerp)] +void __d_lerp(inout DifferentialPair dpx, inout DifferentialPair dpy, inout DifferentialPair dps, T.Differential dOut) { - dpx = diffPair( - dpx.p, - vector.dmul(cos(dpx.p), dOut)); + dpx = diffPair(dpx.p, T.dmul(T(1.0) - dps.p, dOut)); + dpy = diffPair(dpy.p, T.dmul(dps.p, dOut)); + dps = diffPair(dpy.p, T.dmul((dpy.p - dpx.p), dOut)); } +VECTOR_MATRIX_TERNARY_DIFF_IMPL(lerp) -// Cosine - +// Clamp __generic -[ForwardDerivativeOf(cos)] -DifferentialPair __d_cos(DifferentialPair dpx) +[ForwardDerivativeOf(clamp)] +DifferentialPair __d_clamp(DifferentialPair dpx, DifferentialPair dpMin, DifferentialPair dpMax) { return DifferentialPair( - cos(dpx.p), - T.dmul(-sin(dpx.p), dpx.d)); + clamp(dpx.p, dpMin.p, dpMax.p), + dpx.p < dpMin.p ? (dpx.p > dpMax.p ? dpMax.d : dpx.d) : dpMin.d); } - -__generic -[ForwardDerivativeOf(cos)] -DifferentialPair> __d_cos_vector(DifferentialPair> dpx) +__generic +[BackwardDerivativeOf(clamp)] +void __d_clamp(inout DifferentialPair dpx, inout DifferentialPair dpMin, inout DifferentialPair dpMax, T.Differential dOut) { - VECTOR_MAP_D_UNARY(T, N, __d_cos, dpx); + dpx = diffPair(dpx.p, dpx.p > dpMin.p && dpx.p < dpMax.p ? dOut : T.dzero()); + dpMin = diffPair(dpMin.p, dpx.p <= dpMin.p ? dOut : T.dzero()); + dpMax = diffPair(dpMin.p, dpx.p >= dpMax.p ? dOut : T.dzero()); } +VECTOR_MATRIX_TERNARY_DIFF_IMPL(clamp) -__generic -[BackwardDerivativeOf(cos)] -void __d_cos(inout DifferentialPair dpx, T.Differential dOut) +// fma +[ForwardDerivativeOf(fma)] +DifferentialPair __d_fma(DifferentialPair dpx, DifferentialPair dpy, DifferentialPair dpz) { - dpx = diffPair( - dpx.p, - T.dmul(-sin(dpx.p), dOut)); + return DifferentialPair( + fma(dpx.p, dpy.p, dpz.p), + dpy.p * dpx.d + dpx.p * dpy.d + dpz.d); } - -__generic -[BackwardDerivativeOf(cos)] -void __d_cos_vector(inout DifferentialPair> dpx, vector.Differential dOut) +[BackwardDerivativeOf(fma)] +void __d_fma(inout DifferentialPair dpx, inout DifferentialPair dpy, inout DifferentialPair dpz, double dOut) { - dpx = diffPair( - dpx.p, - vector.dmul(-sin(dpx.p), dOut)); + dpx = diffPair(dpx.p, dpy.p * dOut); + dpy = diffPair(dpy.p, dpx.p * dOut); + dpz = diffPair(dpz.p, dOut); +} +__generic +[ForwardDerivativeOf(fma)] +DifferentialPair> __d_fma_vector( + DifferentialPair> dpx, + DifferentialPair> dpy, + DifferentialPair> dpz) +{ + vector result; + vector.Differential d_result; + [ForceUnroll] for (int i = 0; i < N; ++i) + { + DifferentialPair dp_elem = __d_fma( + DifferentialPair(dpx.p[i], dpx.d[i]), + DifferentialPair(dpy.p[i], dpy.d[i]), + DifferentialPair(dpz.p[i], dpz.d[i])); + result[i] = dp_elem.p; + d_result[i] = dp_elem.d; + } + return DifferentialPair>(result, d_result); +} +__generic +[BackwardDerivativeOf(fma)] +void __d_fma_vector( + inout DifferentialPair> dpx, + inout DifferentialPair> dpy, + inout DifferentialPair> dpz, + vector dOut) +{ + vector.Differential x_d_result, y_d_result, z_d_result; + [ForceUnroll] for (int i = 0; i < N; ++i) + { + DifferentialPair x_dp = diffPair(dpx.p[i], 0.0); + DifferentialPair y_dp = diffPair(dpy.p[i], 0.0); + DifferentialPair z_dp = diffPair(dpz.p[i], 0.0); + __d_fma(x_dp, y_dp, z_dp, dOut[i]); + x_d_result[i] = x_dp.d; + y_d_result[i] = y_dp.d; + z_d_result[i] = z_dp.d; + } + dpx = diffPair(dpx.p, x_d_result); + dpy = diffPair(dpy.p, y_d_result); + dpz = diffPair(dpz.p, z_d_result); } -// Base-e logarithm - +// mad __generic -[ForwardDerivativeOf(log)] -DifferentialPair __d_log(DifferentialPair dpx) +[ForwardDerivativeOf(mad)] +DifferentialPair __d_mad(DifferentialPair dpx, DifferentialPair dpy, DifferentialPair dpz) { return DifferentialPair( - log(dpx.p), - T.dmul(T(1.0) / dpx.p, dpx.d) - ); + mad(dpx.p, dpy.p, dpz.p), + T.dadd(T.dadd(T.dmul(dpy.p, dpx.d), T.dmul(dpx.p, dpy.d)), dpz.d)); } - -__generic -[ForwardDerivativeOf(log)] -DifferentialPair> __d_log_vector(DifferentialPair> dpx) +__generic +[BackwardDerivativeOf(mad)] +void __d_mad(inout DifferentialPair dpx, inout DifferentialPair dpy, inout DifferentialPair dpz, T.Differential dOut) { - VECTOR_MAP_D_UNARY(T, N, __d_log, dpx); + dpx = diffPair(dpx.p, T.dmul(dpy.p, dOut)); + dpy = diffPair(dpy.p, T.dmul(dpx.p, dOut)); + dpz = diffPair(dpz.p, dOut); } +VECTOR_MATRIX_TERNARY_DIFF_IMPL(mad) +// Smoothstep __generic -[BackwardDerivativeOf(log)] -void __d_log(inout DifferentialPair dpx, T.Differential dOut) +[BackwardDifferentiable] +T __smoothstep_impl(T minVal, T maxVal, T x) { - dpx = diffPair(dpx.p, T.dmul(T(1.0) / dpx.p, dOut)); + let t = saturate((x - minVal) / (maxVal - minVal)); + return t * t * (T(3.0) - T(2.0) * t); } - -__generic -[BackwardDerivativeOf(log)] -void __d_log_vector(inout DifferentialPair> dpx, vector.Differential dOut) +__generic +[ForwardDerivativeOf(smoothstep)] +DifferentialPair __d_smoothstep(DifferentialPair minVal, DifferentialPair maxVal, DifferentialPair x) { - VECTOR_MAP_BWD_D_UNARY(T, N, __d_log, dpx, dOut); + return __fwd_diff(__smoothstep_impl)(minVal, maxVal, x); } - -// Square root - __generic -[ForwardDerivativeOf(sqrt)] -DifferentialPair __d_sqrt(DifferentialPair dpx) +[BackwardDerivativeOf(smoothstep)] +void __d_smoothstep(inout DifferentialPair minVal, inout DifferentialPair maxVal, inout DifferentialPair x, T.Differential dOut) { - // Special case - if (dpx.p < T(1e-6)) + __bwd_diff(__smoothstep_impl)(minVal, maxVal, x, dOut); +} +VECTOR_MATRIX_TERNARY_DIFF_IMPL(smoothstep) + +// Vector length +__generic +[BackwardDifferentiable] +T __length_impl(vector x) +{ + T len = T(0.0); + [ForceUnroll] for (int i = 0; i < N; i++) { - return DifferentialPair(T(0.0), T.dzero()); + len += x[i] * x[i]; } - - T val = sqrt(dpx.p); - return DifferentialPair( - val, - T.dmul(T(0.5) / val, dpx.d) - ); + return sqrt(len); } -__generic -[ForwardDerivativeOf(sqrt)] -DifferentialPair> __d_sqrt_vector(DifferentialPair> dpx) +__generic +[ForwardDerivativeOf(length)] +[ForceInline] +DifferentialPair __d_length(DifferentialPair> x) { - VECTOR_MAP_D_UNARY(T, N, __d_sqrt, dpx); + return __fwd_diff(__length_impl)(x); } -__generic -[BackwardDerivativeOf(sqrt)] -void __d_sqrt(inout DifferentialPair dpx, T.Differential dOut) +__generic +[BackwardDerivativeOf(length)] +[ForceInline] +void __d_length(inout DifferentialPair> x, T.Differential dOut) { - // Special case - if (dpx.p < T(1e-6)) - { - dpx = diffPair(dpx.p, T.dzero()); - } - else - { - dpx = diffPair( - dpx.p, - T.dmul(T(0.5) / sqrt(dpx.p), dOut)); - } + return __bwd_diff(__length_impl)(x, dOut); } +// Vector distance __generic -[BackwardDerivativeOf(sqrt)] -void __d_sqrt_vector(inout DifferentialPair> dpx, vector.Differential dOut) +[BackwardDifferentiable] +T __distance_impl(vector x, vector y) { - VECTOR_MAP_BWD_D_UNARY(T, N, __d_sqrt, dpx, dOut); + return length(y - x); +} +__generic +[ForwardDerivativeOf(distance)] +[ForceInline] +DifferentialPair __d_distance(DifferentialPair> x, DifferentialPair> y) +{ + return __fwd_diff(__distance_impl)(x, y); } -// Maximum - -__generic -[ForwardDerivativeOf(max)] -DifferentialPair __d_max(DifferentialPair dpx, DifferentialPair dpy) +__generic +[BackwardDerivativeOf(distance)] +[ForceInline] +void __d_distance(inout DifferentialPair> x, inout DifferentialPair> y, T.Differential dOut) { - return DifferentialPair( - max(dpx.p, dpy.p), - dpx.p > dpy.p ? dpx.d : dpy.d - ); + return __bwd_diff(__distance_impl)(x, y, dOut); } +// Vector normalize __generic -[ForwardDerivativeOf(max)] -DifferentialPair> __d_max_vector(DifferentialPair> dpx, DifferentialPair> dpy) +[BackwardDifferentiable] +vector __normalize_impl(vector x) { - VECTOR_MAP_D_BINARY(T, N, __d_max, dpx, dpy); + let r = T(1.0) / length(x); + return x * r; } - -__generic -[BackwardDerivativeOf(max)] -void __d_max(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) +__generic +[ForwardDerivativeOf(normalize)] +[ForceInline] +DifferentialPair> __d_normalize(DifferentialPair> x) { - dpx = diffPair(dpx.p, dpx.p > dpy.p ? dOut : T.dzero()); - dpy = diffPair(dpy.p, dpy.p > dpx.p ? dOut : T.dzero()); + return __fwd_diff(__normalize_impl)(x); +} +__generic +[BackwardDerivativeOf(normalize)] +[ForceInline] +void __d_distance(inout DifferentialPair> x, vector.Differential dOut) +{ + return __bwd_diff(__normalize_impl)(x, dOut); } +// Vector reflect __generic -[BackwardDerivativeOf(max)] -void __d_max_vector(inout DifferentialPair> dpx, inout DifferentialPair> dpy, vector.Differential dOut) +[BackwardDifferentiable] +vector __reflect_impl(vector i, vector n) { - VECTOR_MAP_BWD_D_BINARY(T, N, __d_max, dpx, dpy, dOut); + return i - n * (T(2.0) * dot(i, n)); } - -// Minimum - -__generic -[ForwardDerivativeOf(min)] -DifferentialPair __d_min(DifferentialPair dpx, DifferentialPair dpy) +__generic +[ForwardDerivativeOf(reflect)] +[ForceInline] +DifferentialPair> __d_reflect(DifferentialPair> i, DifferentialPair> n) { - return DifferentialPair( - min(dpx.p, dpy.p), - dpx.p < dpy.p ? dpx.d : dpy.d - ); + return __fwd_diff(__reflect_impl)(i, n); +} +__generic +[BackwardDerivativeOf(reflect)] +[ForceInline] +void __d_reflect(inout DifferentialPair> i, inout DifferentialPair> n, vector.Differential dOut) +{ + return __bwd_diff(__reflect_impl)(i, n, dOut); } +// Vector refract __generic -[ForwardDerivativeOf(min)] -DifferentialPair> __d_min_vector(DifferentialPair> dpx, DifferentialPair> dpy) +[BackwardDifferentiable] +vector __refract_impl(vector i, vector n, T eta) +{ + let k = T(1.0) - eta * eta * (T(1.0) - dot(n, i) * dot(n, i)); + return (k < T(0.0)) ? vector(T(0.0)) : eta * i - (eta * dot(n, i) + sqrt(max(T(0.0),k))) * n; +} +__generic +[ForwardDerivativeOf(refract)] +[ForceInline] +DifferentialPair> __d_refract(DifferentialPair> i, DifferentialPair> n, DifferentialPair eta) { - VECTOR_MAP_D_BINARY(T, N, __d_min, dpx, dpy); + return __fwd_diff(__refract_impl)(i, n, eta); +} +__generic +[BackwardDerivativeOf(refract)] +[ForceInline] +void __d_refract(inout DifferentialPair> i, inout DifferentialPair> n, inout DifferentialPair eta, vector.Differential dOut) +{ + return __bwd_diff(__refract_impl)(i, n, eta, dOut); } +// Sine and cosine __generic -[BackwardDerivativeOf(min)] -void __d_min(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) +[BackwardDifferentiable] +void __sincos_impl(T x, out T s, out T c) { - dpx = diffPair(dpx.p, dpx.p < dpy.p ? dOut : T.dzero()); - dpy = diffPair(dpy.p, dpy.p < dpx.p ? dOut : T.dzero()); + s = sin(x); + c = cos(x); } __generic -[BackwardDerivativeOf(min)] -void __d_min_vector(inout DifferentialPair> dpx, inout DifferentialPair> dpy, vector.Differential dOut) +[BackwardDifferentiable] +void __sincos_impl(vector x, out vector s, out vector c) { - VECTOR_MAP_BWD_D_BINARY(T, N, __d_min, dpx, dpy, dOut); + s = sin(x); + c = cos(x); } -// Raise to a power - -__generic -[ForwardDerivativeOf(pow)] -DifferentialPair __d_pow(DifferentialPair dpx, DifferentialPair dpy) +__generic +[BackwardDifferentiable] +void __sincos_impl(matrix x, out matrix s, out matrix c) { - // Special case - if (dpx.p < T(1e-6)) - { - return DifferentialPair(T(0.0), T.dzero()); - } - - T val = pow(dpx.p, dpy.p); - T.Differential d1 = T.dmul(val * log(dpx.p), dpy.d); - T.Differential d2 = T.dmul(val * dpy.p / dpx.p, dpx.d); - return DifferentialPair( - val, - T.dadd(d1, d2) - ); + s = sin(x); + c = cos(x); } -__generic -[ForwardDerivativeOf(pow)] -DifferentialPair> __d_pow_vector(DifferentialPair> dpx, DifferentialPair> dpy) +__generic +[ForwardDerivativeOf(sincos)] +[ForceInline] +void __d_sincos(DifferentialPair x, out DifferentialPair s, out DifferentialPair c) { - VECTOR_MAP_D_BINARY(T, N, __d_pow, dpx, dpy); + __fwd_diff(__sincos_impl)(x, s, c); } -__generic -[BackwardDerivativeOf(pow)] -void __d_pow(inout DifferentialPair dpx, inout DifferentialPair dpy, T.Differential dOut) +__generic +[ForwardDerivativeOf(sincos)] +[ForceInline] +void __d_sincos(DifferentialPair> x, out DifferentialPair> s, out DifferentialPair> c) { - // Special case - if (dpx.p < T(1e-6)) - { - dpx = diffPair(dpx.p, T.dzero()); - dpy = diffPair(dpy.p, T.dzero()); - } - else - { - T val = pow(dpx.p, dpy.p); - dpx = diffPair( - dpx.p, - T.dmul(val * dpy.p / dpx.p, dOut)); - dpy = diffPair( - dpy.p, - T.dmul(val * log(dpx.p), dOut)); - } + __fwd_diff(__sincos_impl)(x, s, c); } -__generic -[BackwardDerivativeOf(pow)] -void __d_pow_vector(inout DifferentialPair> dpx, inout DifferentialPair> dpy, vector.Differential dOut) +__generic +[ForwardDerivativeOf(sincos)] +[ForceInline] +void __d_sincos(DifferentialPair> x, out DifferentialPair> s, out DifferentialPair> c) { - VECTOR_MAP_BWD_D_BINARY(T, N, __d_pow, dpx, dpy, dOut); + __fwd_diff(__sincos_impl)(x, s, c); } -// Vector dot product +#if 0 +// TODO: this is not working right now since our type system can't resolve +// the overload to `sincos` in `[BackwardDerivativeOf]` attribute. We need to implement +// a proper overload resolver for custom backward derivatives. -__generic -[ForwardDerivativeOf(dot)] -DifferentialPair __d_dot(DifferentialPair> dpx, DifferentialPair> dpy) +__generic +[BackwardDerivativeOf(sincos)] +[ForceInline] +void __d_sincos(inout DifferentialPair x, T.Differential dS, T.Differential dC) { - T result = T(0); - T.Differential d_result = T.dzero(); - [ForceUnroll] - for (int i = 0; i < N; ++i) - { - result = result + dpx.p[i] * dpy.p[i]; - d_result = T.dadd(d_result, T.dmul(dpx.p[i], __slang_noop_cast(dpy.d[i]))); - d_result = T.dadd(d_result, T.dmul(dpy.p[i], __slang_noop_cast(dpx.d[i]))); - } - return DifferentialPair(result, d_result); + __bwd_diff(__sincos_impl)(x, s, c); +} +__generic +[BackwardDerivativeOf(sincos)] +[ForceInline] +void __d_sincos(inout DifferentialPair> x, vector.Differential dS, vector.Differential dC) +{ + __bwd_diff(__sincos_impl)(x, s, c); } -__generic -[BackwardDerivativeOf(dot)] -void __d_dot(inout DifferentialPair> dpx, inout DifferentialPair> dpy, T.Differential dOut) +__generic +[BackwardDerivativeOf(sincos)] +[ForceInline] +void __d_sincos(inout DifferentialPair> x, matrix.Differential dS, matrix.Differential dC) { - vector.Differential x_d_result, y_d_result; - [ForceUnroll] - for (int i = 0; i < N; ++i) - { - x_d_result[i] = dpy.p[i] * __slang_noop_cast(dOut); - y_d_result[i] = dpx.p[i] * __slang_noop_cast(dOut); - } - dpx = diffPair(dpx.p, x_d_result); - dpy = diffPair(dpy.p, y_d_result); + __bwd_diff(__sincos_impl)(x, s, c); } + +#endif \ No newline at end of file diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 0b7ca535b..5a01bc132 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1526,7 +1526,7 @@ uint countbits(uint value); // Cross product // TODO: SPIRV does not support integer vectors. -__generic +__generic __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cross _0 _1") @@ -1539,6 +1539,19 @@ vector cross(vector left, vector right) left.x * right.y - left.y * right.x); } +__generic +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cross _0 _1") +[__readNone] +vector cross(vector left, vector right) +{ + return vector( + left.y * right.z - left.z * right.y, + left.z * right.x - left.x * right.z, + left.x * right.y - left.y * right.x); +} + // Convert encoded color __target_intrinsic(hlsl) [__readNone] @@ -2696,7 +2709,7 @@ matrix log2(matrix x) // multiply-add -__generic +__generic __target_intrinsic(hlsl) __target_intrinsic(glsl, fma) __target_intrinsic(cuda, "$P_fma($0, $1, $2)") @@ -2705,7 +2718,7 @@ __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _ [__readNone] T mad(T mvalue, T avalue, T bvalue); -__generic +__generic __target_intrinsic(hlsl) __target_intrinsic(glsl, fma) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") @@ -2715,7 +2728,7 @@ vector mad(vector mvalue, vector avalue, vector bvalue) VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); } -__generic +__generic __target_intrinsic(hlsl) [__readNone] matrix mad(matrix mvalue, matrix avalue, matrix bvalue) @@ -2723,6 +2736,34 @@ matrix mad(matrix mvalue, matrix avalue, matrix +__target_intrinsic(hlsl) +__target_intrinsic(glsl, fma) +__target_intrinsic(cuda, "$P_fma($0, $1, $2)") +__target_intrinsic(cpp, "$P_fma($0, $1, $2)") +__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") +[__readNone] +T mad(T mvalue, T avalue, T bvalue); + +__generic +__target_intrinsic(hlsl) +__target_intrinsic(glsl, fma) +__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") +[__readNone] +vector mad(vector mvalue, vector avalue, vector bvalue) +{ + VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); +} + +__generic +__target_intrinsic(hlsl) +[__readNone] +matrix mad(matrix mvalue, matrix avalue, matrix bvalue) +{ + MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue); +} + + // maximum __generic __target_intrinsic(hlsl) @@ -3763,7 +3804,7 @@ matrix tanh(matrix x) } // Matrix transpose -__generic +__generic __target_intrinsic(hlsl) __target_intrinsic(glsl) [__readNone] @@ -3775,6 +3816,30 @@ matrix transpose(matrix x) result[r][c] = x[c][r]; return result; } +__generic +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +[__readNone] +matrix transpose(matrix x) +{ + matrix result; + for (int r = 0; r < M; ++r) + for (int c = 0; c < N; ++c) + result[r][c] = x[c][r]; + return result; +} +__generic +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +[__readNone] +matrix transpose(matrix x) +{ + matrix result; + for (int r = 0; r < M; ++r) + for (int c = 0; c < N; ++c) + result[r][c] = x[c][r]; + return result; +} // Truncate to integer __generic diff --git a/tests/autodiff-dstdlib/vector-cross.slang b/tests/autodiff-dstdlib/vector-cross.slang new file mode 100644 index 000000000..be08894cb --- /dev/null +++ b/tests/autodiff-dstdlib/vector-cross.slang @@ -0,0 +1,40 @@ +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer outputBuffer; + +[BackwardDifferentiable] +float3 crossImpl(float3 x, float3 y) +{ + return float3(x.y * y.z - y.y * x.z, + x.z * y.x - y.z * x.x, + x.x * y.y - y.x * x.y); +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + { + let x = float3(-0.51, 0.74, 0.86); + let y = float3(1.43, -0.92, 4.36); + let dOut = float3(3.41, 6.55, 2.39); + var dpx = diffPair(x); + var dpy = diffPair(y); + __bwd_diff(cross)(dpx, dpy, dOut); + outputBuffer[0] = dpx.d[0]; + outputBuffer[1] = dpx.d[1]; + outputBuffer[2] = dpx.d[2]; + outputBuffer[3] = dpy.d[0]; + outputBuffer[4] = dpy.d[1]; + outputBuffer[5] = dpy.d[2]; + + __bwd_diff(crossImpl)(dpx, dpy, dOut); + outputBuffer[6] = dpx.d[0]; + outputBuffer[7] = dpx.d[1]; + outputBuffer[8] = dpx.d[2]; + outputBuffer[9] = dpy.d[0]; + outputBuffer[10] = dpy.d[1]; + outputBuffer[11] = dpy.d[2]; + } +} diff --git a/tests/autodiff-dstdlib/vector-cross.slang.expected.txt b/tests/autodiff-dstdlib/vector-cross.slang.expected.txt new file mode 100644 index 000000000..9d472f078 --- /dev/null +++ b/tests/autodiff-dstdlib/vector-cross.slang.expected.txt @@ -0,0 +1,13 @@ +type: float +-30.756802 +11.449901 +12.503700 +3.864400 +-4.151500 +5.863900 +-30.756804 +11.449901 +12.503700 +3.864400 +-4.151500 +5.863900 diff --git a/tests/autodiff-dstdlib/vector-length.slang b/tests/autodiff-dstdlib/vector-length.slang new file mode 100644 index 000000000..c5064e54e --- /dev/null +++ b/tests/autodiff-dstdlib/vector-length.slang @@ -0,0 +1,36 @@ +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type + +//TEST_INPUT:ubuffer(data=[0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer outputBuffer; + +float lengthDiffX(float3 x) +{ + return (length(float3(x.x + 0.001, x.yz)) - length(float3(x.x - 0.001, x.yz))) / 0.002; +} +float lengthDiffY(float3 x) +{ + return (length(float3(x.x, x.y + 0.001, x.z)) - length(float3(x.x, x.y - 0.001, x.z))) / 0.002; +} +float lengthDiffZ(float3 x) +{ + return (length(float3(x.xy, x.z + 0.001)) - length(float3(x.xy, x.z - 0.001))) / 0.002; +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + { + let x = float3(12, 23, 31); + var dpx = diffPair(x); + __bwd_diff(length)(dpx, 1.0); + outputBuffer[0] = dpx.d[0]; + outputBuffer[1] = dpx.d[1]; + outputBuffer[2] = dpx.d[2]; + + // for reference: + //outputBuffer[3] = lengthDiffX(x); + //outputBuffer[4] = lengthDiffY(x); + //outputBuffer[5] = lengthDiffZ(x); + } +} diff --git a/tests/autodiff-dstdlib/vector-length.slang.expected.txt b/tests/autodiff-dstdlib/vector-length.slang.expected.txt new file mode 100644 index 000000000..3c3f3727d --- /dev/null +++ b/tests/autodiff-dstdlib/vector-length.slang.expected.txt @@ -0,0 +1,4 @@ +type: float +0.296862 +0.568986 +0.766895 -- cgit v1.2.3