summaryrefslogtreecommitdiffstats
path: root/vertex_deformation.slang
blob: b02d1544f6025c90b031eaee01c2787907cc9882 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
#ifndef __CUSTOM31_INC
#define __CUSTOM31_INC

#define PI      3.14159265f
#define PI_RCP  0.31830988f
#define SQRT_2      1.41421356f
#define RCP_SQRT_2  0.70710678f
#define TAU     (2.0f * PI)
#define HALF_PI (0.5f * PI)
#define RCP_PI  (1.0f / PI)
#define RCP_TAU (1.0f / TAU)

#define glsl_mod(x,y) (((x)-(y)*floor((x)/(y))))

// Differentiable versions of common operators.
#define dabs(x) sqrt((x) * (x) + 1e-6f)
#define dmin(a, b) (0.5f * ((a) + (b) - dabs((a) - (b))))
#define dmax(a, b) (0.5f * ((a) + (b) + dabs((a) - (b))))
#define dlerp(x, y, t) ((x) * (1.0f-t) + (y) * t)
// This was derived using fourier analysis. See Scripts/approximate.py.
#define dfrac(x) \
  4.997559e-01 - \
  3.183096e-01 * sin(6.283185e+00*(x)) - \
  1.591544e-01 * sin(1.256637e+01*(x)) - \
  1.061025e-01 * sin(1.884956e+01*(x)) - \
  7.957647e-02 * sin(2.513274e+01*(x))

// Quintic interpolation for C2 continuity (smoother derivatives)
#define quintic(t) ((t) * (t) * (t) * ((t) * ((t) * 6.0f - 15.0f) + 10.0f))
#define d_quintic(t) (30.0f * (t) * (t) * ((t) * ((t) - 2.0f) + 1.0f))

// Macros for transforming normal and tangent using autodiff.
// r3r3 refers to "r3 to r3 transform", aka a mapping between 3d real-valued
// spaces.
#define R3R3_DECLARE_BASIS_VECTORS(xyz) \
  DifferentialPair<float3> dp_x = diffPair(xyz, float3(1, 0, 0)); \
  DifferentialPair<float3> dp_y = diffPair(xyz, float3(0, 1, 0)); \
  DifferentialPair<float3> dp_z = diffPair(xyz, float3(0, 0, 1))

#define R3R3_AUTODIFF_BASIS_VECTORS(fun, ...) \
  DifferentialPair<float3> dp_x_out = fwd_diff(fun)(dp_x, __VA_ARGS__); \
  DifferentialPair<float3> dp_y_out = fwd_diff(fun)(dp_y, __VA_ARGS__); \
  DifferentialPair<float3> dp_z_out = fwd_diff(fun)(dp_z, __VA_ARGS__)

#define R3R3_DEFORM_NORMAL_AND_TANGENT(normal, tangent)   \
  float3x3 jacobian = float3x3(                           \
    float3(dp_x_out.d.x, dp_y_out.d.x, dp_z_out.d.x),     \
    float3(dp_x_out.d.y, dp_y_out.d.y, dp_z_out.d.y),     \
    float3(dp_x_out.d.z, dp_y_out.d.z, dp_z_out.d.z)      \
  );                                                      \
  float jac_det = determinant(jacobian);                  \
  float3x3 itjac = inverse(transpose(jacobian), jac_det); \
  normal = mul(itjac, normal) * jac_det;            \
  tangent = mul(jacobian, tangent) * jac_det

// Syntactic sugar - wraps the previous three macros.
#define R3R3_NORMALS(xyz, normal, tangent, fun, ...)  \
  R3R3_DECLARE_BASIS_VECTORS(xyz);                    \
  R3R3_AUTODIFF_BASIS_VECTORS(fun, __VA_ARGS__);      \
  R3R3_DEFORM_NORMAL_AND_TANGENT(normal, tangent);    \
  xyz = fun(xyz, __VA_ARGS__)

[Differentiable]
float3x3 inverse(no_diff float3x3 m, no_diff float det) {
  det = (det < 0.0f ? -1.0f : 1.0f) * max(1e-6f, abs(det));

  float invDet = 1.0f / det;
  float3x3 inv;

  inv._11 = (m._22 * m._33 - m._23 * m._32) * invDet;
  inv._12 = (m._13 * m._32 - m._12 * m._33) * invDet;
  inv._13 = (m._12 * m._23 - m._13 * m._22) * invDet;
  inv._21 = (m._23 * m._31 - m._21 * m._33) * invDet;
  inv._22 = (m._11 * m._33 - m._13 * m._31) * invDet;
  inv._23 = (m._13 * m._21 - m._11 * m._23) * invDet;
  inv._31 = (m._21 * m._32 - m._22 * m._31) * invDet;
  inv._32 = (m._31 * m._12 - m._11 * m._32) * invDet;
  inv._33 = (m._11 * m._22 - m._12 * m._21) * invDet;

  return inv;
}

float2 project_x_onto_y(float2 x, float2 y) {
  return (dot(x, y) / dot(y, y)) * y;
}

// Maps a 2x2 quad to a tube.
// `s_cart` is the axis along which the tube is rolled.
// `r_cart` is an axis along which points are not transformed. It is assumed to
// be orthogonal `s_cart`.
[Differentiable]
public float3 plane_to_tube(float3 xyz,
    no_diff float3 p, no_diff float3 r_cart, no_diff float3 s_cart,
    no_diff float t) {
  // Convert from cartesian to (r, s, r x s) space.
  // Ensure orthonormal basis vectors.
  // TODO remove normalize, do at higher level of stack.
  r_cart = normalize(r_cart);
  s_cart = normalize(s_cart);
  float3 rxs_cart = cross(s_cart, r_cart);
  float3x3 to_rsrxs = float3x3(r_cart, s_cart, rxs_cart);
  // Inverse of orthonormal matrix is just the transpose.
  float3x3 to_cart  = transpose(to_rsrxs);

  // Translate origin to `p` then change into (r, s, r x s) basis.
  xyz = mul(to_rsrxs, xyz - p);

  // Components in pivot basis: n (neutral axis), b (tangential), h (axial s).
  float epsilon = 1e-4f;
  float r = xyz.x;     // Lr
  float s = xyz.y;     // Lv0 x Lr
  float rxs = xyz.z;   // Lv0

  // Blend between two vectors:
  //  v0: vector of length ||Lr + Lv0|| at angle atan2(Lv0, Lr)
  //  v1: vector of length ||Lr|| at angle ||Lv0|| / ||Lr||
  // Interpolate in polar coordinates to make it wrap nicely.
  float r0 = length(float2(r, rxs));
  float r1 = max(abs(r), epsilon);

  float theta0 = atan2(rxs, r);
  float theta1 = rxs / r1;

  // Interpolate polar coordinates.
  float radius = dlerp(r0, r1, t);
  float theta = dlerp(theta0, theta1, t);

  // Map into (r, rxs) basis.
  float2 nb_t = float2(cos(theta), sin(theta)) * radius;

  // Un-project from (r, rxs) plane to full (r, s, rxs) basis.
  xyz = float3(nb_t.x, s, nb_t.y);

  // Map back to cartesian basis.
  xyz = mul(to_cart, xyz) + p;

  return xyz;
}

public void plane_to_tube_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 p, float3 r, float3 s, float t) {
  R3R3_NORMALS(xyz, normal, tangent, plane_to_tube, p, r, s, t);
}

[Differentiable]
public float3 axis_align(float3 xyz, no_diff float3 po, no_diff float3 pp, no_diff float3 r, no_diff float t) {
  const float3 xyz0 = xyz;

  // We assume that `s` is orthogonal to `r`, and that `r` is normalized.
  float3 s = normalize(pp - po);
  float3 sxr = cross(s, r);

  float3x3 to_rsrxs = float3x3(r, s, sxr);
  // Inverse of orthonormal matrix is just the transpos.
  float3x3 to_cart  = transpose(to_rsrxs);

  // Move key vectors into (r, s, sxr) basis centered at `po`, per the derivation.
  float3 xyz_rsrxs = mul(to_rsrxs, xyz - po);

  float vr = xyz_rsrxs[0];
  float vs = xyz_rsrxs[1];
  float2 v0 = float2(vr, vs);
  float3 pp_rsrxs = mul(to_rsrxs, pp - po);
  float qr = pp_rsrxs[1] * vr / vs;  // TODO epsilon
  float2 q = float2(qr, pp_rsrxs[1]);

  // Translate to `q`.
  v0 -= q;

  // Project onto `s`.
  v0.x = 0;

  // Translate back to `po`.
  v0 += q;

  xyz_rsrxs[0] = v0[0];
  xyz_rsrxs[1] = v0[1];

  // Move back into cartesian space.
  xyz = mul(to_cart, xyz_rsrxs) + po;

  return dlerp(xyz0, xyz, t);
}

public void axis_align_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 po, float3 pp, float3 r, float t) {
  R3R3_NORMALS(xyz, normal, tangent, axis_align, po, pp, r, t);
}

// Maps a tube with circular cross section on the xz plane to a quad on the xy
// plane.
[Differentiable]
public float3 tube_to_plane(float3 xyz,
    no_diff float3 p, no_diff float3 r_cart, no_diff float3 s_cart,
    no_diff float t) {
  // Convert from cartesian to (r, s, r x s) space.
  // Ensure orthonormal basis vectors.
  // TODO remove normalize, do at higher level of stack.
  r_cart = normalize(r_cart);
  s_cart = normalize(s_cart);
  float3 rxs_cart = cross(s_cart, r_cart);
  float3x3 to_rsrxs = float3x3(r_cart, s_cart, rxs_cart);
  float3x3 to_cart  = inverse(to_rsrxs, determinant(to_rsrxs));

  // Translate origin to `p` then change into (r, s, r x s) basis.
  xyz = mul(to_rsrxs, xyz - p);

  // Components in pivot basis: n (neutral axis), b (tangential), h (axial s).
  float epsilon = 1e-4f;
  float r = xyz.x;     // Lr
  float s = xyz.y;     // Lv0 x Lr
  float rxs = xyz.z;   // Lv0
  float2 v0 = float2(r, rxs);

  float theta0 = atan2(rxs, r);
  float Lr  = length(v0);
  float Lv0 = theta0 * Lr;
  float phi = atan2(Lv0, Lr);
  float rr = length(float2(Lr, Lv0));

  // Blend between two vectors:
  //  v0: vector of length ||Lr + Lv0|| at angle atan2(Lv0, Lr)
  //  v1: vector of length ||Lr|| at angle ||Lv0|| / ||Lr||
  // Interpolate in polar coordinates to make it wrap nicely.
  float r0 = Lr;
  float r1 = rr;

  float theta1 = phi;

  // Interpolate polar coordinates.
  float radius = dlerp(r0, r1, t);
  float theta = dlerp(theta0, theta1, t);

  // Map into (r, rxs) basis.
  float2 nb_t = float2(cos(theta), sin(theta)) * radius;

  // Un-project from (r, rxs) plane to full (r, s, rxs) basis.
  xyz = float3(nb_t.x, s, nb_t.y);

  // Map back to cartesian basis.
  xyz = mul(to_cart, xyz) + p;

  return xyz;
}

public void tube_to_plane_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 p, float3 r, float3 s, float t) {
  R3R3_NORMALS(xyz, normal, tangent, tube_to_plane, p, r, s, t);
}

[Differentiable]
public float3 point_align(float3 xyz, no_diff float3 po, no_diff float3 pp, no_diff float3 r, no_diff float t) {
  const float3 xyz0 = xyz;

  // We assume that `s` is orthogonal to `r`, and that `r` is normalized.
  float3 s = normalize(pp - po);
  float3 sxr = cross(s, r);

  float3x3 to_rsrxs = float3x3(r, s, sxr);
  // Inverse of orthonormal matrix is just the transpose.
  float3x3 to_cart  = transpose(to_rsrxs);

  // Move key vectors into (r, s, sxr) basis centered at `po`, per the derivation.
  float3 xyz_rsrxs = mul(to_rsrxs, xyz - po);
  float3 pp_rsrxs  = mul(to_rsrxs, pp - po);

  float2 v1 = xyz_rsrxs.xy;
  float v1r = v1.x;
  float v1s = v1.y;

  float vs = v1.y;
  float qs = pp_rsrxs.y;
  float qr = v1.x;
  float2 q = float2(qr, qs);
  q *= vs / qs;

  xyz_rsrxs.xy = q;

  // Move back into cartesian space.
  xyz = mul(to_cart, xyz_rsrxs) + po;

  return dlerp(xyz0, xyz, t);
}

public void point_align_normal(inout float3 xyz, inout float3 normal, inout float3 tangent, float3 po, float3 pp, float3 r, float t) {
  R3R3_NORMALS(xyz, normal, tangent, point_align, po, pp, r, t);
}

[Differentiable]
public float3 seal(float3 xyz, no_diff float A, no_diff float k, no_diff float t) {
  float x = xyz.x;
  float y = xyz.y;
  float z = xyz.z;

  float x0 = x + sin(y * k) * 0.1;
  float y0 = y + sin(x * k * 2) * 0.5 + cos(z);
  float z0 = (z + sin(x * y * k * PI + t)) * A * (1.0 + sin(y * k) * sin(x * k));

  x0 += z0 * 0.1 * sin(z0 * PI + 1.5);
  y0 += z0 * 0.1 * sin(z0 * PI + 1.5);

  x0 -= 0.0;
  y0 -= 1.2;

  return float3(
      x0, y0, z0
  );
}

public void seal_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float A, float k, float t) {
  R3R3_NORMALS(xyz, normal, tangent, seal, A, k, t);
}

[Differentiable]
public float3 sine_wave(float3 xyz,
    no_diff float3 amplitude,
    no_diff float3 direction,
    no_diff float3 k,
    no_diff float3 omega,
    no_diff float t) {
  xyz += amplitude * sin(k * dot(xyz, direction) + omega * t);
  return xyz;
}

public void sine_wave_normal(inout float3 xyz, inout float3 normal, inout float3 tangent,
    float3 amplitude, float3 direction, float3 k, float3 omega, float t) {
  R3R3_NORMALS(xyz, normal, tangent, sine_wave, amplitude, direction, k, omega, t);
}

[Differentiable]
public float3 norm_conversion(float3 xyz, no_diff float input_k, no_diff float output_k, no_diff float t) {
  float3 xyz_abs = dabs(xyz)+1e-4f;
  float lin = pow(pow(xyz_abs.x, input_k) + pow(xyz_abs.y, input_k) + pow(xyz_abs.z, input_k), 1.0f / input_k);
  float lout = pow(pow(xyz_abs.x, output_k) + pow(xyz_abs.y, output_k) + pow(xyz_abs.z, output_k), 1.0f / output_k);
  float scale = dlerp(1.0f, lout / lin, t);
  return xyz * scale;
}

public void norm_conversion_normal(inout float3 xyz, inout float3 normal, inout float3 tangent, float input_k, float output_k, float t) {
  R3R3_NORMALS(xyz, normal, tangent, norm_conversion, input_k, output_k, t);
}

[Differentiable]
float3 rand3_hash3(float3 p)
{
    // Improved Murmurhash3 by Squirrel Eiserloh (GDC 2017)
    p = float3(dot(p, float3(127.1, 311.7, 74.7)),
               dot(p, float3(269.5, 183.3, 246.1)),
               dot(p, float3(113.5, 271.9, 124.6)));
    return frac(sin(p) * 43758.5453123);
}

[Differentiable]
float rand3_hash1(float3 p)
{
    // Improved Murmurhash3 by Squirrel Eiserloh (GDC 2017)
    float p0 = dot(p, float3(127.1, 311.7, 74.7));
    return frac(sin(p0) * 43758.5453123);
}

// Calculate value noise and jacobian in one shot.
// Based on https://iquilezles.org/articles/morenoise/
float3 value_noise(float3 xyz, out float3 dx, out float3 dy, out float3 dz) {
  float3 cell = floor(xyz);
  float3 f = xyz - cell;

  float ux = quintic(f.x);
  float uy = quintic(f.y);
  float uz = quintic(f.z);

  float dux = d_quintic(f.x);
  float duy = d_quintic(f.y);
  float duz = d_quintic(f.z);

  float3 n000 = rand3_hash3(cell + float3(0.0f, 0.0f, 0.0f));
  float3 n001 = rand3_hash3(cell + float3(0.0f, 0.0f, 1.0f));
  float3 n010 = rand3_hash3(cell + float3(0.0f, 1.0f, 0.0f));
  float3 n011 = rand3_hash3(cell + float3(0.0f, 1.0f, 1.0f));
  float3 n100 = rand3_hash3(cell + float3(1.0f, 0.0f, 0.0f));
  float3 n101 = rand3_hash3(cell + float3(1.0f, 0.0f, 1.0f));
  float3 n110 = rand3_hash3(cell + float3(1.0f, 1.0f, 0.0f));
  float3 n111 = rand3_hash3(cell + float3(1.0f, 1.0f, 1.0f));

  float3 n00 = lerp(n000, n001, uz);
  float3 n01 = lerp(n010, n011, uz);
  float3 n10 = lerp(n100, n101, uz);
  float3 n11 = lerp(n110, n111, uz);

  float3 n0 = lerp(n00, n01, uy);
  float3 n1 = lerp(n10, n11, uy);

  float oneMinusUx = 1.0f - ux;
  float oneMinusUy = 1.0f - uy;

  float3 noise_half = lerp(n0, n1, ux);

  float3 dnoise_half_dx = (n1 - n0) * dux;

  float3 dn0_dy = (n01 - n00) * duy;
  float3 dn1_dy = (n11 - n10) * duy;
  float3 dnoise_half_dy = dn0_dy * oneMinusUx + dn1_dy * ux;

  float3 dn00_dz = (n001 - n000) * duz;
  float3 dn01_dz = (n011 - n010) * duz;
  float3 dn10_dz = (n101 - n100) * duz;
  float3 dn11_dz = (n111 - n110) * duz;
  float3 dn0_dz = dn00_dz * oneMinusUy + dn01_dz * uy;
  float3 dn1_dz = dn10_dz * oneMinusUy + dn11_dz * uy;
  float3 dnoise_half_dz = dn0_dz * oneMinusUx + dn1_dz * ux;

  dx = 2.0f * dnoise_half_dx;
  dy = 2.0f * dnoise_half_dy;
  dz = 2.0f * dnoise_half_dz;

  return -1.0f + 2.0f * noise_half;
}

float3 fbm_with_derivatives(float3 xyz,
    no_diff float t,
    no_diff float3 amplitude,
    no_diff float gain,
    no_diff float lacunarity,
    no_diff float3 period,
    no_diff float octaves,
    no_diff float3 velocity,
    out float3 dx,
    out float3 dy,
    out float3 dz) {
  float3 noise = float3(0.0f, 0.0f, 0.0f);
  float3 gain_i = amplitude;
  float3 freq_i = 1.0f / period;

  dx = float3(0.0f, 0.0f, 0.0f);
  dy = float3(0.0f, 0.0f, 0.0f);
  dz = float3(0.0f, 0.0f, 0.0f);

  for (uint i = 0; i < octaves; ++i) {
    float3 dx_i, dy_i, dz_i;
    float3 octave_noise = value_noise((xyz - velocity * t) * freq_i, dx_i, dy_i, dz_i);
    noise += gain_i * octave_noise;
    dx += gain_i * freq_i.x * dx_i;
    dy += gain_i * freq_i.y * dy_i;
    dz += gain_i * freq_i.z * dz_i;
    freq_i *= lacunarity;
    gain_i *= gain;
  }

  dx += float3(1.0f, 0.0f, 0.0f);
  dy += float3(0.0f, 1.0f, 0.0f);
  dz += float3(0.0f, 0.0f, 1.0f);

  return xyz + noise;
}

public float3 fbm(float3 xyz,
    no_diff float t,
    no_diff float3 amplitude,
    no_diff float gain,
    no_diff float lacunarity,
    no_diff float3 period,
    no_diff float octaves,
    no_diff float3 velocity) {
  float3 dx_unused, dy_unused, dz_unused;
  return fbm_with_derivatives(xyz, t, amplitude, gain, lacunarity, period, octaves,
      velocity, dx_unused, dy_unused, dz_unused);
}

public void fbm_normal(inout float3 xyz, inout float3 normal, inout float3 tangent,
    no_diff float t,
    no_diff float3 amplitude, no_diff float gain, no_diff float lacunarity,
    no_diff float3 period, no_diff float octaves, no_diff float3 velocity) {
  float3 dx, dy, dz;
  xyz = fbm_with_derivatives(xyz, t, amplitude, gain, lacunarity, period, octaves,
      velocity, dx, dy, dz);
  float3x3 jac = float3x3(
      dx.x, dy.x, dz.x,
      dx.y, dy.y, dz.y,
      dx.z, dy.z, dz.z);
  float jac_det = determinant(jac);
  float3x3 itjac = inverse(transpose(jac), jac_det);
  normal = mul(itjac, normal) * jac_det;
  tangent = mul(jac, tangent);
}

// Maps a plane on [-1, 1] on xz plane to a hemi-octahedron with radius 1.
[Differentiable]
public float3 plane_to_hemi_octahedron(float3 xyz,
    no_diff float3 p, no_diff float3 r_cart, no_diff float3 s_cart,
    no_diff float t) {
  // Convert from cartesian to (r, s, r x s) space.
  r_cart = normalize(r_cart);
  s_cart = normalize(s_cart);
  float3 rxs_cart = cross(s_cart, r_cart);
  float3x3 to_rsrxs = float3x3(r_cart, s_cart, rxs_cart);
  float3x3 to_cart = transpose(to_rsrxs);

  // Translate origin to `p` then change into (r, s, r x s) basis.
  xyz = mul(to_rsrxs, xyz - p);

  float3 xyz0 = xyz;

  // Extract planar coordinates: x and z form the 2D plane
  float x = xyz.x;
  float z = xyz.z;

  // Rotate 45° and scale to fit square into diamond
  float x_rot = (x + z) * 0.5;
  float z_rot = (z - x) * 0.5;

  // Octahedral decode: y = 1 - |x| - |z|, clamped to hemisphere
  // Use differentiable abs and max for smooth autodiff
  float y = dmax(0.0f, 1.0f - dabs(x_rot) - dabs(z_rot));

  float3 oct_pos = float3(x_rot, y, z_rot);
  float len = dot(oct_pos, oct_pos);
  oct_pos = oct_pos * (1.0f + xyz.y);

  // Rotate back by -45° around y to undo input rotation
  float x_unrot = (oct_pos.x - oct_pos.z) * RCP_SQRT_2;
  float z_unrot = (oct_pos.x + oct_pos.z) * RCP_SQRT_2;
  oct_pos = float3(x_unrot, oct_pos.y, z_unrot);

  // Interpolate between original position and sphere position
  float3 result = dlerp(xyz0, oct_pos, t);

  // Map back to cartesian basis
  xyz = mul(to_cart, result) + p;

  return xyz;
}

public void plane_to_hemi_octahedron_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 p, float3 r, float3 s, float t) {
  R3R3_NORMALS(xyz, normal, tangent, plane_to_hemi_octahedron, p, r, s, t);
}

// Maps a hemi-octahedron with raidus 1 to a quad on [-1, 1] on the (r, rxs) plane.
[Differentiable]
public float3 hemi_octahedron_to_plane(float3 xyz,
    no_diff float3 p, no_diff float3 r_cart, no_diff float3 s_cart,
    no_diff float t) {
  // Convert from cartesian to (r, s, r x s) space.
  r_cart = normalize(r_cart);
  s_cart = normalize(s_cart);
  float3 rxs_cart = cross(s_cart, r_cart);
  float3x3 to_rsrxs = float3x3(r_cart, s_cart, rxs_cart);
  float3x3 to_cart = transpose(to_rsrxs);

  // Translate origin to `p` then change into (r, s, r x s) basis.
  xyz = mul(to_rsrxs, xyz - p);

  float3 xyz0 = xyz;

  // Undo the -45° unrotation from forward pass (rotate by +45°)
  float x_rot = (xyz.x + xyz.z) * RCP_SQRT_2;
  float z_rot = (xyz.z - xyz.x) * RCP_SQRT_2;

  // The forward pass scales by (1 + s_original), and the unscaled hemi-octahedron
  // has L1 norm = 1, so L1 of the scaled point recovers s_original.
  float L1_scaled = dabs(x_rot) + dabs(xyz.y) + dabs(z_rot);
  float s_original = L1_scaled - 1.0f;

  // Remove (1 + s) scale to get the unit hemi-octahedron point, then decode.
  float L1_inv = 1.0f / L1_scaled;
  float x_oct = x_rot * L1_inv;
  float z_oct = z_rot * L1_inv;

  // Undo the initial 45° rotation (x_rot = (x+z)*0.5, z_rot = (z-x)*0.5)
  // Inverse: x = x_rot - z_rot, z = x_rot + z_rot
  float x_plane = x_oct - z_oct;
  float z_plane = x_oct + z_oct;

  float3 plane_pos = float3(x_plane, s_original, z_plane);

  // Interpolate between original position and plane position
  float3 result = dlerp(xyz0, plane_pos, t);

  // Map back to cartesian basis
  xyz = mul(to_cart, result) + p;

  return xyz;
}

public void hemi_octahedron_to_plane_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 p, float3 r, float3 s, float t) {
  R3R3_NORMALS(xyz, normal, tangent, hemi_octahedron_to_plane, p, r, s, t);
}

// Maps [-1, 1] on (r, rxs) plane to a unit sphere using octahedral mapping.
[Differentiable]
public float3 plane_to_octahedron(float3 xyz,
    no_diff float3 p, no_diff float3 r_cart, no_diff float3 s_cart,
    no_diff float t) {
  r_cart = normalize(r_cart);
  s_cart = normalize(s_cart);
  float3 rxs_cart = cross(s_cart, r_cart);
  float3x3 to_rsrxs = float3x3(r_cart, s_cart, rxs_cart);
  float3x3 to_cart = transpose(to_rsrxs);

  xyz = mul(to_rsrxs, xyz - p);
  float3 xyz0 = xyz;

  float l1_norm = dabs(xyz.x) + dabs(xyz.z);
  if (l1_norm > 1) {
    xyz.x = sign(xyz0.x) * (1 - dabs(xyz0.z));
    xyz.z = sign(xyz0.z) * (1 - dabs(xyz0.x));
  }
  xyz.y = 1 - l1_norm;

  xyz *= (1 + xyz0.y);

  float3 result = dlerp(xyz0, xyz, t);

  xyz = mul(to_cart, result) + p;
  return xyz;
}

public void plane_to_octahedron_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 p, float3 r, float3 s, float t) {
  R3R3_NORMALS(xyz, normal, tangent, plane_to_octahedron, p, r, s, t);
}

// Maps a unit sphere to a plane using octahedral mapping.
[Differentiable]
public float3 octahedron_to_plane(float3 xyz,
    no_diff float3 p, no_diff float3 r_cart, no_diff float3 s_cart,
    no_diff float t) {
  r_cart = normalize(r_cart);
  s_cart = normalize(s_cart);
  float3 rxs_cart = cross(s_cart, r_cart);
  float3x3 to_rsrxs = float3x3(r_cart, s_cart, rxs_cart);
  float3x3 to_cart = transpose(to_rsrxs);

  xyz = mul(to_rsrxs, xyz - p);
  float3 xyz0 = xyz;

  // The forward pass scales by (1 + s_original), and the unscaled octahedron
  // has L1 norm = 1, so L1 of the scaled point recovers s_original.
  float l1_norm = dabs(xyz.x) + dabs(xyz.y) + dabs(xyz.z);
  float s_original = l1_norm - 1.0f;
  xyz /= l1_norm;
  float2 xz_tmp = xyz.xz;
  if (xyz.y < 0) {
    xyz.x = sign(xz_tmp[0]) * (1 - dabs(xz_tmp[1]));
    xyz.z = sign(xz_tmp[1]) * (1 - dabs(xz_tmp[0]));
  }
  xyz.y = s_original;

  float3 result = dlerp(xyz0, xyz, t);
  xyz = mul(to_cart, result) + p;
  return xyz;
}

public void octahedron_to_plane_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 p, float3 r, float3 s, float t) {
  R3R3_NORMALS(xyz, normal, tangent, octahedron_to_plane, p, r, s, t);
}

[Differentiable]
public float3 scale(float3 xyz,
    no_diff float3 k, no_diff float t) {
  return dlerp(xyz, xyz * k, t);
}

public void scale_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 k, float t) {
  R3R3_NORMALS(xyz, normal, tangent, scale, k, t);
}

[Differentiable]
public float3 translate(float3 xyz,
    no_diff float3 offset, no_diff float t) {
  return xyz + offset * t;
}

public void translate_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 offset, float t) {
  R3R3_NORMALS(xyz, normal, tangent, translate, offset, t);
}

[Differentiable]
public float3 rotate(float3 xyz,
    no_diff float3 p, no_diff float3 axis, no_diff float angle, no_diff float t) {
  float theta = angle * t;
  float3 a = normalize(axis);
  float c = cos(theta);
  float s = sin(theta);
  float3 v = xyz - p;
  // Rodrigues' rotation formula
  float3 rotated = v * c + cross(a, v) * s + a * dot(a, v) * (1.0f - c);
  return rotated + p;
}

public void rotate_normal(inout float3 xyz, inout float3 normal,
    inout float3 tangent, float3 p, float3 axis, float angle, float t) {
  R3R3_NORMALS(xyz, normal, tangent, rotate, p, axis, angle, t);
}

#endif  // __CUSTOM31_INC