From b5197bed4cad2a8452bcbfa8e116497760edf1ba Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Sun, 29 Mar 2026 22:43:01 -0700
Subject: Refactor & credit glitter

---
 glitter.cginc | 238 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 129 insertions(+), 109 deletions(-)

(limited to 'glitter.cginc')

diff --git a/glitter.cginc b/glitter.cginc
index 2744d6b..2ed0ef4 100644
--- a/glitter.cginc
+++ b/glitter.cginc
@@ -1,183 +1,203 @@
 #ifndef __GLITTER_INC
 #define __GLITTER_INC
 
-#include "math.cginc"
-
 /*
-@article{KPT:2025:Glinty,
-  title = {Evaluating and Sampling Glinty NDFs in Constant Time},
-  author = {Kemppinen, Pauli and Paulin, LoÏs and Thonat, Théo and Thiery, Jean-Marc and Lehtinen, Jaakko and Boubekeur, Tamy},
-  year = {2025},
-  journal = {ACM Transactions on Graphics (Proc. SIGGRAPH Asia 2025)},
-  volume = {44},
-  number = {6},
-  articleno = {255},
-}
-*/
-// Ported from: https://www.shadertoy.com/view/tcdGDl
+ * This is an implementation of Kemppinen et. al.'s "Evaluating and Sampling
+ * Glinty NDFs in Constant Time".
+ * It is ported from: https://www.shadertoy.com/view/tcdGDl
+ * Since no license terms are listed in the shader body, it is protected by
+ * the default Shadertoy license (per https://www.shadertoy.com/terms),
+ * which is the Creative Commons Attribution-NonCommercial-ShareAlike 3.0
+ * Unported License: https://creativecommons.org/licenses/by-nc-sa/3.0/deed.en
+ *
+ * I have made changes to this code. They are:
+ *   1. Syntax changes required to translate GLSL to HLSL.
+ *   2. Stylistic preferences, like using "1" or "1.0" instead of "1.".
+ *
+ * @article{KPT:2025:Glinty,
+ *   title = {Evaluating and Sampling Glinty NDFs in Constant Time},
+ *   author = {Kemppinen, Pauli and Paulin, LoÏs and Thonat,
+ *       Théo and Thiery, Jean-Marc and Lehtinen, Jaakko and Boubekeur,
+ *       Tamy},
+ *   year = {2025},
+ *   journal = {ACM Transactions on Graphics (Proc. SIGGRAPH Asia 2025)},
+ *   volume = {44},
+ *   number = {6},
+ *   articleno = {255},
+ * }
+ */
+
+#define PI 3.1415926535897932384626433832795028841971
+// Remaps [0, UINT_MAX] to [0, 1]
+#define UINT_TO_UNIT (1.0 / 4294967296.0)
 
 // Lambert azimuthal equal area projection
 float2 lambert(float3 v) {
   return v.xy / sqrt(1 + v.z);
 }
 
-// Rebuild GLSL mat2 column semantics explicitly in HLSL.
-float2 mat2_col(float2x2 m, uint i) {
-  return float2(m[0][i], m[1][i]);
-}
-
-float2x2 mat2_from_cols(float2 c0, float2 c1) {
-  return float2x2(c0.x, c1.x,
-                  c0.y, c1.y);
-}
-
 // v is a microfacet normal that has been squished according to alpha, a
 // roughness parameter.
 float3 ndf_to_disk_ggx(float3 v, float alpha) {
-    // Map `v` onto a hemisphere.
-    float3 hemi = float3(v.xy / alpha, v.z);
-    float denom = dot(hemi, hemi);
-    // Project onto circle with equal area projection, and remap from [-1, 1]
-    // to [0, 1].
-    float2 v_disk = lambert(normalize(hemi)) * 0.5 + 0.5;
-    float jacobian_determinant = 1.0 / (alpha * alpha * denom * denom);
-    return float3(v_disk, jacobian_determinant);
+  // Map `v` onto a hemisphere.
+  float3 hemi = float3(v.xy / alpha, v.z);
+  float denom = dot(hemi, hemi);
+  // Project onto circle with equal area projection, and remap from [-1, 1]
+  // to [0, 1].
+  float2 v_disk = lambert(normalize(hemi)) * 0.5 + 0.5;
+  float jacobian_determinant = 1.0 / (alpha * alpha * denom * denom);
+  return float3(v_disk, jacobian_determinant);
 }
 
 // Computes (M^T M)^-1
 float2x2 inv_quadratic(float2x2 M) {
-	float D = determinant(M);
-	float2 c0 = mat2_col(M, 0) / D;
-	float2 c1 = mat2_col(M, 1) / D;
-	float A = dot(c0, c0);
-	float B = -dot(c0, c1);
-	float C = dot(c1, c1);
-	return mat2_from_cols(float2(C, B), float2(B, A));
+  float D = determinant(M);
+  float2 c0 = transpose(M)[0] / D;
+  float2 c1 = transpose(M)[1] / D;
+  float A = dot(c0, c0);
+  float B = -dot(c0, c1);
+  float C = dot(c1, c1);
+  return transpose(float2x2(float2(C, B), float2(B, A)));
 }
 
 float2x2 uv_ellipsoid(float2x2 uv_J) {
-	float2x2 Q = inv_quadratic(transpose(uv_J));
-	float2 q0 = mat2_col(Q, 0);
-	float2 q1 = mat2_col(Q, 1);
-	float tr = 0.5 * (q0.x + q1.y);
-	float  D = sqrt(max(0.0, tr * tr - determinant(Q)));
-	float l1 = tr - D;
-	float l2 = tr + D;
-	float2 v1 = float2(l1 - q1.y, q0.y);
-	float2 v2 = float2(q1.x, l2 - q0.x);
-	float2 n = 1.f/sqrt(float2(l1, l2));
-	return mat2_from_cols(normalize(v1) * n.x, normalize(v2) * n.y);
+  float2x2 Q = inv_quadratic(transpose(uv_J));
+  float2 q0 = transpose(Q)[0];
+  float2 q1 = transpose(Q)[1];
+  float tr = 0.5 * (q0.x + q1.y);
+  float  D = sqrt(max(0.0, tr * tr - determinant(Q)));
+  float l1 = tr - D;
+  float l2 = tr + D;
+  float2 v1 = float2(l1 - q1.y, q0.y);
+  float2 v2 = float2(q1.x, l2 - q0.x);
+  float2 n = 1.0/sqrt(float2(l1, l2));
+  return transpose(float2x2(normalize(v1) * n.x, normalize(v2) * n.y));
 }
 
 float QueryLod(float2x2 uv_J, float filter_size) {
-    float s0 = length(mat2_col(uv_J, 0));
-    float s1 = length(mat2_col(uv_J, 1));
-    return log2(max(s0, s1) * filter_size) + pow(2.0, filter_size);
+  float s0 = length(transpose(uv_J)[0]);
+  float s1 = length(transpose(uv_J)[1]);
+  return log2(max(s0, s1) * filter_size) + pow(2.0, filter_size);
+}
+
+float2x2 inverse(float2x2 m) {
+  float det = (m[0][0] * m[1][1]) - (m[0][1] * m[1][0]);
+
+  return float2x2(
+      m[1][1], -m[0][1],
+      -m[1][0],  m[0][0]
+      ) / det;
 }
 
 float normal(float2x2 cov, float2 x) {
-    return exp(-.5 * dot(x, mul(inverse(cov), x))) / (sqrt(determinant(cov)) * 2.0 * PI);
+  return exp(-.5 * dot(x, mul(inverse(cov), x))) / (sqrt(determinant(cov)) * 2.0 * PI);
 }
 
 uint2 shuffle(uint2 v) {
-    v = v * 1664525u + 1013904223u;
-	v.x += v.y * 1664525u;
-	v.y += v.x * 1664525u;
+  v = v * 1664525u + 1013904223u;
+  v.x += v.y * 1664525u;
+  v.y += v.x * 1664525u;
 
-	v = v ^ (v>>16u);
+  v = v ^ (v>>16u);
 
-	v.x += v.y * 1664525u;
-	v.y += v.x * 1664525u;
-	v = v ^ (v>>16u);
-    return v;
+  v.x += v.y * 1664525u;
+  v.y += v.x * 1664525u;
+  v = v ^ (v>>16u);
+  return v;
 }
 
 float2 rand(uint2 v) {
-	return float2(shuffle(v)) * pow(0.5, 32.0);
+  return float2(shuffle(v)) * UINT_TO_UNIT;
 }
 
 float2 Rand2D(float2 x, float2 y, float l, uint i) {
-	uint2 ux = asuint(x);
-	uint2 uy = asuint(y);
-	uint  ul = asuint(l);
-	return rand((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u));
+  uint2 ux = asuint(x);
+  uint2 uy = asuint(y);
+  uint  ul = asuint(l);
+  // This is broken, but looks cool.
+  //return hash22_fast(asfloat((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u)));
+  return rand((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u));
 }
 
 float Rand1D(float2 x, float2 y, float l, uint i) {
-	return Rand2D(x, y, l, i).x;
+  return Rand2D(x, y, l, i).x;
 }
 
 // Bürmann series, see https://en.wikipedia.org/wiki/Error_function
 float erf(float x) {
-    float e = exp(-x*x);
-		return sign(x) * 2.0 * sqrt((1.0 - e) / PI) *
-			(sqrt(PI) * 0.5 + 31./200. * e - 341.0/8000.0 * e * e);
+  float e = exp(-x*x);
+  return sign(x) * 2.0 * sqrt((1.0 - e) / PI) *
+    (sqrt(PI) * 0.5 + 31.0/200.0 * e - 341.0/8000.0 * e * e);
 }
 
 float cdf(float x, float mu, float sigma) {
-	return 0.5 + 0.5 * erf((x-mu)/(sigma*sqrt(2.0)));
+  return 0.5 + 0.5 * erf((x-mu)/(sigma*sqrt(2.0)));
 }
 
 float integrate_interval(float x, float size, float mu, float stdev, float lower_limit, float upper_limit) {
-	return cdf(min(x+size, upper_limit), mu, stdev) - cdf(max(x-size, lower_limit), mu, stdev);
+  return cdf(min(x+size, upper_limit), mu, stdev) - cdf(max(x-size, lower_limit), mu, stdev);
 }
 
 float integrate_box(float2 x, float2 size, float2 mu, float2x2 sigma, float2 lower_limit, float2 upper_limit) {
-	return
-		integrate_interval(x.x, size.x, mu.x, sqrt(sigma[0][0]), lower_limit.x, upper_limit.x) *
-		integrate_interval(x.y, size.y, mu.y, sqrt(sigma[1][1]), lower_limit.y, upper_limit.y);
+  return
+    integrate_interval(x.x, size.x, mu.x, sqrt(sigma[0][0]), lower_limit.x, upper_limit.x) *
+    integrate_interval(x.y, size.y, mu.y, sqrt(sigma[1][1]), lower_limit.y, upper_limit.y);
 }
 
 float compensation(float2 x_a, float2x2 sigma_a, float res_a) {
-	float containing = integrate_box(0.5, 0.5, x_a, sigma_a, 0.0, 1.0);
-	float explicitly_evaluated = integrate_box(round(x_a*res_a)/res_a, 1.0/res_a, x_a, sigma_a, 0, 1);
-	return containing - explicitly_evaluated;
+  float containing = integrate_box(0.5, 0.5, x_a, sigma_a, 0.0, 1.0);
+  float explicitly_evaluated = integrate_box(round(x_a*res_a)/res_a, 1.0/res_a, x_a, sigma_a, 0, 1);
+  return containing - explicitly_evaluated;
 }
 
 float D_Kemppinen(float3 h, float alpha, float glint_alpha, float2 uv, float2x2 uv_J, float N, float filter_size) {
-	float res = sqrt(N);
-	float2 x_s = uv;
-	float3 x_a_and_d = ndf_to_disk_ggx(h, alpha);
-	float2 x_a = x_a_and_d.xy;
-	float d = x_a_and_d.z;
+  float res = sqrt(N);
+  float2 x_s = uv;
+  float3 x_a_and_d = ndf_to_disk_ggx(h, alpha);
+  float2 x_a = x_a_and_d.xy;
+  float d = x_a_and_d.z;
 
-	float lambda = QueryLod(res * uv_J, filter_size);
+  float lambda = QueryLod(res * uv_J, filter_size);
 
-	float D_filter = .0;
+  float D_filter = 0;
 
-	for(float m = .0; m<2.; m += 1.) {
-		float l = floor(lambda) + m;
+  [loop]
+    for (float m = 0; m < 2; m += 1) {
+      float l = floor(lambda) + m;
 
-		float w_lambda = 1. - abs(lambda - l);
-		float res_s = res * pow(2., -l);
-		float res_a = pow(2., l);
+      float w_lambda = 1.0 - abs(lambda - l);
+      float res_s = res * pow(2, -l);
+      float res_a = pow(2, l);
 
-		float2x2 uv_J2 = filter_size * uv_J;
-		float2x2 sigma_s = mul(uv_J2, transpose(uv_J2));
+      float2x2 uv_J2 = filter_size * uv_J;
+      float2x2 sigma_s = mul(uv_J2, transpose(uv_J2));
 
-		float2x2 sigma_a = d * pow(glint_alpha, 2.) * float2x2(1., .0, .0, 1.);
+      float2x2 sigma_a = d * pow(glint_alpha, 2) * float2x2(1, 0, 0, 1);
 
-		float2 base_i_a = clamp(round(x_a * res_a), 1., res_a-1.);
-		for(int j_a = 0; j_a < 4; ++j_a) {
-			float2 i_a = base_i_a + float2(int2(j_a, j_a/2)%2)-.5;
+      float2 base_i_a = clamp(round(x_a * res_a), 1, res_a-1);
+      [loop]
+        for (uint j_a = 0; j_a < 4; ++j_a) {
+          float2 i_a = base_i_a + float2(int2(j_a, j_a/2)%2)-.5;
 
-			float2 base_i_s = round(x_s * res_s);
-			for(int j_s = 0; j_s < 4; ++j_s) {
-				float2 i_s = base_i_s + float2(int2(j_s, j_s/2)%2)-.5;
+          float2 base_i_s = round(x_s * res_s);
+          [loop]
+            for (uint j_s = 0; j_s < 4; ++j_s) {
+              float2 i_s = base_i_s + float2(int2(j_s, j_s/2)%2)-.5;
 
-				float2 g_s = (i_s + Rand2D(i_s, i_a, l, 1u) - .5) / res_s;
-				float2 g_a = (i_a + Rand2D(i_s, i_a, l, 2u) - .5) / res_a;
+              float2 g_s = (i_s + Rand2D(i_s, i_a, l, 1u) - .5) / res_s;
+              float2 g_a = (i_a + Rand2D(i_s, i_a, l, 2u) - .5) / res_a;
 
-				float r = Rand1D(i_s, i_a, l, 4u);
-				float roulette = smoothstep(max(.0, r-.1), min(1.0, r+.1), w_lambda);
+              float r = Rand1D(i_s, i_a, l, 4u);
+              float roulette = smoothstep(max(.0, r-.1), min(1.0, r+.1), w_lambda);
 
-				D_filter += roulette * normal(sigma_a, x_a - g_a) * normal(sigma_s, x_s - g_s) / N;
-			}
-		}
-		D_filter += w_lambda * compensation(x_a, sigma_a, res_a);
-	}
+              D_filter += roulette * normal(sigma_a, x_a - g_a) * normal(sigma_s, x_s - g_s) / N;
+            }
+        }
+      D_filter += w_lambda * compensation(x_a, sigma_a, res_a);
+    }
 
-	return D_filter * d / PI;
+  return D_filter * d / PI;
 }
 
 #endif  // __GLITTER_INC
+
-- 
cgit v1.2.3