Add faster 3-in 1-out hasher for domain warping

Goes from ~1.7 ms/frame to ~1.1 ms/frame in 10-octave microbenchmark.
author: yum <yum.food.vr@gmail.com> 2026-03-17 15:49:21 -0700
committer: yum <yum.food.vr@gmail.com> 2026-03-17 15:49:25 -0700
commit: 1784064c7a39a69203e8975167addf1915f940bd (patch)
tree: 4adc272435efcf54ac7ed8399aa33acbf422a959 /math.cginc
parent: 019c24186c87fd747aae1512abf4d4690e3aca07 (diff)
1 files changed, 51 insertions, 16 deletions
diff --git a/math.cginc b/math.cginc
index 7c6db21..5803be5 100755
--- a/math.cginc
+++ b/math.cginc
@@ -19,6 +19,9 @@
 
 #define F1_TO_F3(x) float3((x), (x), (x))
 
+// Remaps [0, INT_MAX] to [0, 1]
+#define UINT_TO_UNIT (1.0 / 4294967296.0)
+
 float sin_noise_3d(float3 uvw) {
   return sin(uvw[0]) * sin(uvw[1]) * sin(uvw[2]);
 }
@@ -111,24 +114,48 @@ float4 alpha_blend(float4 front, float4 back) {
   return float4(front.rgb * front.a + back.rgb * (1 - front.a), front.a + back.a * (1 - front.a));
 }
 
-// Cheap procedural 3D hash -> [0,1]^3. Based on the "hashwithoutsine" family.
+// 3 in 3 out hash. Based on the "hashwithoutsine" family.
 float3 hash33_fast(float3 p) {
   p = frac(p * float3(0.1031, 0.1030, 0.0973));
   p += dot(p, p.yxz + 33.33);
   return frac((p.xxy + p.yxx) * p.zyx);
 }
 
+// O'Neill-style PCG 32-bit output permutation (RXS-M-XS).
+uint pcg32(uint input) {
+  uint state = input * 747796405u + 2891336453u;
+  uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
+  return (word >> 22u) ^ word;
+}
+
+// 3 in 1 out hash. Mix three lanes in integer space, then PCG-finalize once.
+uint hash31_u32(uint3 p) {
+  uint seed = p.x * 0x2c1b3c6du;
+  seed ^= p.y * 0x297a2d39u;
+  seed ^= p.z * 0x1b56c4e9u;
+  return pcg32(seed);
+}
+
+// Float wrapper for arbitrary inputs.
+float hash31_ff(float3 p) {
+  return hash31_u32(asuint(p)) * UINT_TO_UNIT;
+}
+
+float hash31_if(int3 p) {
+  return hash31_u32(p) * UINT_TO_UNIT;
+}
+
 // Procedural value noise in [0,1]^3 — trilinear interpolation of hashed corners.
 float3 value_noise3(float3 p) {
-  float3 i = floor(p);
+  int3 i = (int3)floor(p);
   float3 f = frac(p);
   float3 u = f * f * (3.0 - 2.0 * f);
 
   return lerp(
-    lerp(lerp(hash33_fast(i + float3(0, 0, 0)), hash33_fast(i + float3(1, 0, 0)), u.x),
-         lerp(hash33_fast(i + float3(0, 1, 0)), hash33_fast(i + float3(1, 1, 0)), u.x), u.y),
-    lerp(lerp(hash33_fast(i + float3(0, 0, 1)), hash33_fast(i + float3(1, 0, 1)), u.x),
-         lerp(hash33_fast(i + float3(0, 1, 1)), hash33_fast(i + float3(1, 1, 1)), u.x), u.y),
+    lerp(lerp(hash31_if(i + int3(0, 0, 0)), hash31_if(i + int3(1, 0, 0)), u.x),
+         lerp(hash31_if(i + int3(0, 1, 0)), hash31_if(i + int3(1, 1, 0)), u.x), u.y),
+    lerp(lerp(hash31_if(i + int3(0, 0, 1)), hash31_if(i + int3(1, 0, 1)), u.x),
+         lerp(hash31_if(i + int3(0, 1, 1)), hash31_if(i + int3(1, 1, 1)), u.x), u.y),
     u.z);
 }
 
@@ -149,22 +176,31 @@ float3 domain_warp_procedural(float3 uvw, float strength,
   return noise;
 }
 
+float3 value_noise_3d_tex(Texture3D tex, SamplerState s, float3 p) {
+  float w, h, d;
+  tex.GetDimensions(w, h, d);
+  float3 res = float3(w, h, d);
+
+  p *= res;
+  float3 i = floor(p);
+  float3 f = frac(p);
+  float3 u = f * f * (3.0 - 2.0 * f);
+
+  return tex.Sample(s, (i + 0.5 + u) / res).rgb;
+}
+
 // Domain warping using a 3D noise texture. Texture should have an EV of
-// 0.5.
-float3 domain_warp_3d_tex(texture3D noise_tex, float3 uvw, float strength,
-    uint octaves, float lacunarity, float gain) {
+// 0.5. Uses cubic interpolation between lattice points (same semantics as
+// domain_warp_procedural / value_noise3).
+float3 domain_warp_3d_tex(Texture3D noise_tex, SamplerState s, float3 uvw,
+    float strength, uint octaves, float lacunarity, float gain) {
   float3 noise = 0;
   float g = 1;
 
-  float3 uvw_dx = ddx(uvw);
-  float3 uvw_dy = ddy(uvw);
-
   for (uint ii = 0; ii < octaves; ++ii) {
-    noise += noise_tex.SampleGrad(aniso4_trilinear_repeat_s, uvw + noise * strength, uvw_dx, uvw_dy).rgb * g;
+    noise += value_noise_3d_tex(noise_tex, s, uvw + noise * strength) * g;
     uvw *= lacunarity;
     g *= gain;
-    uvw_dx *= lacunarity;
-    uvw_dy *= lacunarity;
   }
 
   // Normalize: geometric series 1 + r + ... + r^{n-1} = (1 - r^n) / (1 - r)
@@ -204,4 +240,3 @@ float voronoi_edge_distance(float3 x) {
 }
 
 #endif  // __MATH_INC
-
author	yum <yum.food.vr@gmail.com>	2026-03-17 15:49:21 -0700
committer	yum <yum.food.vr@gmail.com>	2026-03-17 15:49:25 -0700
commit	1784064c7a39a69203e8975167addf1915f940bd (patch)
tree	4adc272435efcf54ac7ed8399aa33acbf422a959 /math.cginc
parent	019c24186c87fd747aae1512abf4d4690e3aca07 (diff)