glitter.cginc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

#ifndef __GLITTER_INC
#define __GLITTER_INC

/*
 * This is an implementation of Kemppinen et. al.'s "Evaluating and Sampling
 * Glinty NDFs in Constant Time".
 * It is ported from: https://www.shadertoy.com/view/tcdGDl
 * Since no license terms are listed in the shader body, it is protected by
 * the default Shadertoy license (per https://www.shadertoy.com/terms),
 * which is the Creative Commons Attribution-NonCommercial-ShareAlike 3.0
 * Unported License: https://creativecommons.org/licenses/by-nc-sa/3.0/deed.en
 *
 * I have made changes to this code. They are:
 *   1. Syntax changes required to translate GLSL to HLSL.
 *   2. Stylistic preferences, like using "1" or "1.0" instead of "1.".
 *
 * @article{KPT:2025:Glinty,
 *   title = {Evaluating and Sampling Glinty NDFs in Constant Time},
 *   author = {Kemppinen, Pauli and Paulin, LoÏs and Thonat,
 *       Théo and Thiery, Jean-Marc and Lehtinen, Jaakko and Boubekeur,
 *       Tamy},
 *   year = {2025},
 *   journal = {ACM Transactions on Graphics (Proc. SIGGRAPH Asia 2025)},
 *   volume = {44},
 *   number = {6},
 *   articleno = {255},
 * }
 */

#define PI 3.1415926535897932384626433832795028841971
// Remaps [0, UINT_MAX] to [0, 1]
#define UINT_TO_UNIT (1.0 / 4294967296.0)

// Lambert azimuthal equal area projection
float2 lambert(float3 v) {
  return v.xy / sqrt(1 + v.z);
}

// v is a microfacet normal that has been squished according to alpha, a
// roughness parameter.
float3 ndf_to_disk_ggx(float3 v, float alpha) {
  // Map `v` onto a hemisphere.
  float3 hemi = float3(v.xy / alpha, v.z);
  float denom = dot(hemi, hemi);
  // Project onto circle with equal area projection, and remap from [-1, 1]
  // to [0, 1].
  float2 v_disk = lambert(normalize(hemi)) * 0.5 + 0.5;
  float jacobian_determinant = 1.0 / (alpha * alpha * denom * denom);
  return float3(v_disk, jacobian_determinant);
}

// Computes (M^T M)^-1
float2x2 inv_quadratic(float2x2 M) {
  float D = determinant(M);
  float2 c0 = transpose(M)[0] / D;
  float2 c1 = transpose(M)[1] / D;
  float A = dot(c0, c0);
  float B = -dot(c0, c1);
  float C = dot(c1, c1);
  return transpose(float2x2(float2(C, B), float2(B, A)));
}

float2x2 uv_ellipsoid(float2x2 uv_J) {
  float2x2 Q = inv_quadratic(transpose(uv_J));
  float2 q0 = transpose(Q)[0];
  float2 q1 = transpose(Q)[1];
  float tr = 0.5 * (q0.x + q1.y);
  float  D = sqrt(max(0.0, tr * tr - determinant(Q)));
  float l1 = tr - D;
  float l2 = tr + D;
  float2 v1 = float2(l1 - q1.y, q0.y);
  float2 v2 = float2(q1.x, l2 - q0.x);
  float2 n = 1.0/sqrt(float2(l1, l2));
  return transpose(float2x2(normalize(v1) * n.x, normalize(v2) * n.y));
}

float QueryLod(float2x2 uv_J, float filter_size) {
  float s0 = length(transpose(uv_J)[0]);
  float s1 = length(transpose(uv_J)[1]);
  return log2(max(s0, s1) * filter_size) + pow(2.0, filter_size);
}

float2x2 inverse(float2x2 m) {
  float det = (m[0][0] * m[1][1]) - (m[0][1] * m[1][0]);

  return float2x2(
      m[1][1], -m[0][1],
      -m[1][0],  m[0][0]
      ) / det;
}

float normal(float2x2 cov, float2 x) {
  return exp(-.5 * dot(x, mul(inverse(cov), x))) / (sqrt(determinant(cov)) * 2.0 * PI);
}

uint2 shuffle(uint2 v) {
  v = v * 1664525u + 1013904223u;
  v.x += v.y * 1664525u;
  v.y += v.x * 1664525u;

  v = v ^ (v>>16u);

  v.x += v.y * 1664525u;
  v.y += v.x * 1664525u;
  v = v ^ (v>>16u);
  return v;
}

float2 rand(uint2 v) {
  return float2(shuffle(v)) * UINT_TO_UNIT;
}

float2 Rand2D(float2 x, float2 y, float l, uint i) {
  uint2 ux = asuint(x);
  uint2 uy = asuint(y);
  uint  ul = asuint(l);
  // This is broken, but looks cool.
  //return hash22_fast(asfloat((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u)));
  return rand((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u));
}

float Rand1D(float2 x, float2 y, float l, uint i) {
  return Rand2D(x, y, l, i).x;
}

// Bürmann series, see https://en.wikipedia.org/wiki/Error_function
float erf(float x) {
  float e = exp(-x*x);
  return sign(x) * 2.0 * sqrt((1.0 - e) / PI) *
    (sqrt(PI) * 0.5 + 31.0/200.0 * e - 341.0/8000.0 * e * e);
}

float cdf(float x, float mu, float sigma) {
  return 0.5 + 0.5 * erf((x-mu)/(sigma*sqrt(2.0)));
}

float integrate_interval(float x, float size, float mu, float stdev, float lower_limit, float upper_limit) {
  return cdf(min(x+size, upper_limit), mu, stdev) - cdf(max(x-size, lower_limit), mu, stdev);
}

float integrate_box(float2 x, float2 size, float2 mu, float2x2 sigma, float2 lower_limit, float2 upper_limit) {
  return
    integrate_interval(x.x, size.x, mu.x, sqrt(sigma[0][0]), lower_limit.x, upper_limit.x) *
    integrate_interval(x.y, size.y, mu.y, sqrt(sigma[1][1]), lower_limit.y, upper_limit.y);
}

float compensation(float2 x_a, float2x2 sigma_a, float res_a) {
  float containing = integrate_box(0.5, 0.5, x_a, sigma_a, 0.0, 1.0);
  float explicitly_evaluated = integrate_box(round(x_a*res_a)/res_a, 1.0/res_a, x_a, sigma_a, 0, 1);
  return containing - explicitly_evaluated;
}

float D_Kemppinen(float3 h, float alpha, float glint_alpha, float2 uv, float2x2 uv_J, float N, float filter_size) {
  float res = sqrt(N);
  float2 x_s = uv;
  float3 x_a_and_d = ndf_to_disk_ggx(h, alpha);
  float2 x_a = x_a_and_d.xy;
  float d = x_a_and_d.z;

  float lambda = QueryLod(res * uv_J, filter_size);

  float D_filter = 0;

  [loop]
    for (float m = 0; m < 2; m += 1) {
      float l = floor(lambda) + m;

      float w_lambda = 1.0 - abs(lambda - l);
      float res_s = res * pow(2, -l);
      float res_a = pow(2, l);

      float2x2 uv_J2 = filter_size * uv_J;
      float2x2 sigma_s = mul(uv_J2, transpose(uv_J2));

      float2x2 sigma_a = d * pow(glint_alpha, 2) * float2x2(1, 0, 0, 1);

      float2 base_i_a = clamp(round(x_a * res_a), 1, res_a-1);
      [loop]
        for (uint j_a = 0; j_a < 4; ++j_a) {
          float2 i_a = base_i_a + float2(int2(j_a, j_a/2)%2)-.5;

          float2 base_i_s = round(x_s * res_s);
          [loop]
            for (uint j_s = 0; j_s < 4; ++j_s) {
              float2 i_s = base_i_s + float2(int2(j_s, j_s/2)%2)-.5;

              float2 g_s = (i_s + Rand2D(i_s, i_a, l, 1u) - .5) / res_s;
              float2 g_a = (i_a + Rand2D(i_s, i_a, l, 2u) - .5) / res_a;

              float r = Rand1D(i_s, i_a, l, 4u);
              float roulette = smoothstep(max(.0, r-.1), min(1.0, r+.1), w_lambda);

              D_filter += roulette * normal(sigma_a, x_a - g_a) * normal(sigma_s, x_s - g_s) / N;
            }
        }
      D_filter += w_lambda * compensation(x_a, sigma_a, res_a);
    }

  return D_filter * d / PI;
}

#endif  // __GLITTER_INC