glitter.cginc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

#ifndef __GLITTER_INC
#define __GLITTER_INC

#include "math.cginc"

/*
@article{KPT:2025:Glinty,
  title = {Evaluating and Sampling Glinty NDFs in Constant Time},
  author = {Kemppinen, Pauli and Paulin, LoÏs and Thonat, Théo and Thiery, Jean-Marc and Lehtinen, Jaakko and Boubekeur, Tamy},
  year = {2025},
  journal = {ACM Transactions on Graphics (Proc. SIGGRAPH Asia 2025)},
  volume = {44},
  number = {6},
  articleno = {255},
}
*/
// Ported from: https://www.shadertoy.com/view/tcdGDl

// Lambert azimuthal equal area projection
float2 lambert(float3 v) {
  return v.xy / sqrt(1 + v.z);
}

// v is a microfacet normal that has been squished according to alpha, a
// roughness parameter.
float3 ndf_to_disk_ggx(float3 v, float alpha) {
    // Map `v` onto a hemisphere.
    float3 hemi = float3(v.xy / alpha, v.z);
    float denom = dot(hemi, hemi);
    // Project onto circle with equal area projection, and remap from [-1, 1]
    // to [0, 1].
    float2 v_disk = lambert(normalize(hemi)) * 0.5 + 0.5;
    float jacobian_determinant = 1.0 / (alpha * alpha * denom * denom);
    return float3(v_disk, jacobian_determinant);
}

// Computes (M^T M)^-1
float2x2 inv_quadratic(float2x2 M) {
	float D = determinant(M);
	float A = dot(M[0] / D, M[0] / D);
	float B = -dot(M[0] / D, M[1] / D);
	float C = dot(M[1] / D, M[1] / D);
	return float2x2(C, B, B, A);
}

float2x2 uv_ellipsoid(float2x2 uv_J) {
	float2x2 Q = inv_quadratic(transpose(uv_J));
	float tr = 0.5 * (Q[0][0] + Q[1][1]);
	float  D = sqrt(max(0.0, tr * tr - determinant(Q)));
	float l1 = tr - D;
	float l2 = tr + D;
	float2 v1 = float2(l1 - Q[1][1], Q[0][1]);
	float2 v2 = float2(Q[1][0], l2 - Q[0][0]);
	float2 n = 1.f/sqrt(float2(l1, l2));
	return float2x2(normalize(v1)*n.x, normalize(v2)*n.y);
}

float QueryLod(float2x2 uv_J, float filter_size) {
    float s0 = length(uv_J[0]), s1 = length(uv_J[1]);
    return log2(max(s0, s1) * filter_size) + pow(2.0, filter_size);
}

float normal(float2x2 cov, float2 x) {
    return exp(-.5 * dot(x, mul(inverse(cov), x))) / (sqrt(determinant(cov)) * 2.0 * PI);
}

float Rand2D(float2 x, float2 y, float l, uint i) {
	uint2 ux = asuint(x);
	uint2 uy = asuint(y);
	uint  ul = asuint(l);
	return hash22_fast((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u));
}

float Rand1D(float2 x, float2 y, float l, uint i) {
	return Rand2D(x, y, l, i).x;
}

// Bürmann series, see https://en.wikipedia.org/wiki/Error_function
float erf(float x) {
    float e = exp(-x*x);
		return sign(x) * 2.0 * sqrt((1.0 - e) / PI) *
			(sqrt(PI) * 0.5 + 31./200. * e - 341.0/8000.0 * e * e);
}

float cdf(float x, float mu, float sigma) {
	return 0.5 + 0.5 * erf((x-mu)/(sigma*sqrt(2.0)));
}

float integrate_interval(float x, float size, float mu, float stdev, float lower_limit, float upper_limit) {
	return cdf(min(x+size, upper_limit), mu, stdev) - cdf(max(x-size, lower_limit), mu, stdev);
}

float integrate_box(float2 x, float2 size, float2 mu, float2x2 sigma, float2 lower_limit, float2 upper_limit) {
	return
		integrate_interval(x.x, size.x, mu.x, sqrt(sigma[0][0]), lower_limit.x, upper_limit.x) *
		integrate_interval(x.y, size.y, mu.y, sqrt(sigma[1][1]), lower_limit.y, upper_limit.y);
}

float compensation(float2 x_a, float2x2 sigma_a, float res_a) {
	float containing = integrate_box(0.5, 0.5, x_a, sigma_a, 0.0, 1.0);
	float explicitly_evaluated = integrate_box(round(x_a*res_a)/res_a, 1.0/res_a, x_a, sigma_a, 0, 1);
	return containing - explicitly_evaluated;
}

float ndf(float3 h, float alpha, float glint_alpha, float2 uv, float2x2 uv_J, float N, float filter_size) {
	float res = sqrt(N);
	float2 x_s = uv;
	float3 x_a_and_d = ndf_to_disk_ggx(h, alpha);
	float2 x_a = x_a_and_d.xy;
	float d = x_a_and_d.z;

	float lambda = QueryLod(res * uv_J, filter_size);

	float D_filter = .0;

	for(float m = .0; m<2.; m += 1.) {
		float l = floor(lambda) + m;

		float w_lambda = 1. - abs(lambda - l);
		float res_s = res * pow(2., -l);
		float res_a = pow(2., l);

		float2x2 uv_J2 = filter_size * uv_J;
		float2x2 sigma_s = uv_J2 * transpose(uv_J2);

		float2x2 sigma_a = d * pow(glint_alpha, 2.) * float2x2(1., .0, .0, 1.);

		float2 base_i_a = clamp(round(x_a * res_a), 1., res_a-1.);
		for(int j_a = 0; j_a < 4; ++j_a) {
			float2 i_a = base_i_a + float2(int2(j_a, j_a/2)%2)-.5;

			float2 base_i_s = round(x_s * res_s);
			for(int j_s = 0; j_s < 4; ++j_s) {
				float2 i_s = base_i_s + float2(int2(j_s, j_s/2)%2)-.5;

				float2 g_s = (i_s + Rand2D(i_s, i_a, l, 1u) - .5) / res_s;
				float2 g_a = (i_a + Rand2D(i_s, i_a, l, 2u) - .5) / res_a;

				float r = Rand1D(i_s, i_a, l, 4u);
				float roulette = smoothstep(max(.0, r-.1), min(1.0, r+.1), w_lambda);

				D_filter += roulette * normal(sigma_a, x_a - g_a) * normal(sigma_s, x_s - g_s) / N;
			}
		}
		D_filter += w_lambda * compensation(x_a, sigma_a, res_a);
	}

	return D_filter * d / PI;
}

#endif  // __GLITTER_INC