glitter.cginc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

#ifndef __GLITTER_INC
#define __GLITTER_INC

/*
 * This is an implementation of Kemppinen et. al.'s "Evaluating and Sampling
 * Glinty NDFs in Constant Time".
 * It is ported from: https://www.shadertoy.com/view/tcdGDl
 * Since no license terms are listed in the shader body, it is protected by
 * the default Shadertoy license (per https://www.shadertoy.com/terms),
 * which is the Creative Commons Attribution-NonCommercial-ShareAlike 3.0
 * Unported License: https://creativecommons.org/licenses/by-nc-sa/3.0/deed.en
 *
 * I have made changes to this code. They are:
 *   1. Syntax changes required to translate GLSL to HLSL.
 *   2. Stylistic preferences, like using "1" or "1.0" instead of "1.".
 *   3. The `GetGlitterLighting` function, which populates data required for
 *      indirect glitter. The original paper only discusses analytic lighting.
 *
 * @article{KPT:2025:Glinty,
 *   title = {Evaluating and Sampling Glinty NDFs in Constant Time},
 *   author = {Kemppinen, Pauli and Paulin, LoÏs and Thonat,
 *       Théo and Thiery, Jean-Marc and Lehtinen, Jaakko and Boubekeur,
 *       Tamy},
 *   year = {2025},
 *   journal = {ACM Transactions on Graphics (Proc. SIGGRAPH Asia 2025)},
 *   volume = {44},
 *   number = {6},
 *   articleno = {255},
 * }
 */

#define PI 3.1415926535897932384626433832795028841971
// Remaps [0, UINT_MAX] to [0, 1]
#define UINT_TO_UNIT (1.0 / 4294967296.0)

// Lambert azimuthal equal area projection
float2 lambert(float3 v) {
  return v.xy / sqrt(1 + v.z);
}

// v is a microfacet normal that has been squished according to alpha, a
// roughness parameter.
float3 ndf_to_disk_ggx(float3 v, float alpha) {
  // Map `v` onto a hemisphere.
  float3 hemi = float3(v.xy / alpha, v.z);
  float denom = dot(hemi, hemi);
  // Project onto circle with equal area projection, and remap from [-1, 1]
  // to [0, 1].
  float2 v_disk = lambert(normalize(hemi)) * 0.5 + 0.5;
  float jacobian_determinant = 1.0 / (alpha * alpha * denom * denom);
  return float3(v_disk, jacobian_determinant);
}

// Computes (M^T M)^-1
float2x2 inv_quadratic(float2x2 M) {
  float D = determinant(M);
  float2 c0 = transpose(M)[0] / D;
  float2 c1 = transpose(M)[1] / D;
  float A = dot(c0, c0);
  float B = -dot(c0, c1);
  float C = dot(c1, c1);
  return transpose(float2x2(float2(C, B), float2(B, A)));
}

float2x2 uv_ellipsoid(float2x2 uv_J) {
  float2x2 Q = inv_quadratic(transpose(uv_J));
  float2 q0 = transpose(Q)[0];
  float2 q1 = transpose(Q)[1];
  float tr = 0.5 * (q0.x + q1.y);
  float  D = sqrt(max(0.0, tr * tr - determinant(Q)));
  float l1 = tr - D;
  float l2 = tr + D;
  float2 v1 = float2(l1 - q1.y, q0.y);
  float2 v2 = float2(q1.x, l2 - q0.x);
  float2 n = 1.0/sqrt(float2(l1, l2));
  return transpose(float2x2(normalize(v1) * n.x, normalize(v2) * n.y));
}

float QueryLod(float2x2 uv_J, float filter_size) {
  float s0 = length(transpose(uv_J)[0]);
  float s1 = length(transpose(uv_J)[1]);
  return log2(max(s0, s1) * filter_size) + pow(2.0, filter_size);
}

float2x2 inverse(float2x2 m) {
  float det = (m[0][0] * m[1][1]) - (m[0][1] * m[1][0]);

  return float2x2(
      m[1][1], -m[0][1],
      -m[1][0],  m[0][0]
      ) / det;
}

float normal(float2x2 cov, float2 x) {
  return exp(-.5 * dot(x, mul(inverse(cov), x)))
    / (sqrt(determinant(cov)) * 2.0 * PI);
}

uint2 shuffle(uint2 v) {
  v = v * 1664525u + 1013904223u;
  v.x += v.y * 1664525u;
  v.y += v.x * 1664525u;

  v = v ^ (v>>16u);

  v.x += v.y * 1664525u;
  v.y += v.x * 1664525u;
  v = v ^ (v>>16u);
  return v;
}

float2 rand(uint2 v) {
  return float2(shuffle(v)) * UINT_TO_UNIT;
}

float2 Rand2D(float2 x, float2 y, float l, uint i) {
  uint2 ux = asuint(x);
  uint2 uy = asuint(y);
  uint  ul = asuint(l);
  // This is broken, but looks cool.
  //return hash22_fast(asfloat((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u)));
  return rand((ux>>16|ux<<16) ^ uy ^ ul ^ (i*0x124u));
}

float Rand1D(float2 x, float2 y, float l, uint i) {
  return Rand2D(x, y, l, i).x;
}

// Bürmann series, see https://en.wikipedia.org/wiki/Error_function
float erf(float x) {
  float e = exp(-x*x);
  return sign(x) * 2.0 * sqrt((1.0 - e) / PI) *
    (sqrt(PI) * 0.5 + 31.0/200.0 * e - 341.0/8000.0 * e * e);
}

float cdf(float x, float mu, float sigma) {
  return 0.5 + 0.5 * erf((x-mu)/(sigma*sqrt(2.0)));
}

float integrate_interval(float x, float size, float mu, float stdev,
    float lower_limit, float upper_limit) {
  return cdf(min(x+size, upper_limit), mu, stdev)
    - cdf(max(x-size, lower_limit), mu, stdev);
}

float integrate_box(float2 x, float2 size, float2 mu, float2x2 sigma,
    float2 lower_limit, float2 upper_limit) {
  return
    integrate_interval(x.x, size.x, mu.x,
        sqrt(sigma[0][0]), lower_limit.x, upper_limit.x) *
    integrate_interval(x.y, size.y, mu.y,
        sqrt(sigma[1][1]), lower_limit.y, upper_limit.y);
}

float compensation(float2 x, float2x2 sigma, float res) {
  float containing = integrate_box(0.5, 0.5, x, sigma, 0.0, 1.0);
  float2 sampled_cell_center = (floor(x * res) + 0.5) / res;
  float explicitly_evaluated =
    integrate_box(sampled_cell_center, 1.0 / res, x, sigma, 0, 1);
  return containing - explicitly_evaluated;
}

float3 disk_to_ndf_ggx(float2 v_disk, float alpha) {
  float2 p = v_disk * 2.0f - 1.0f;
  float r2 = saturate(dot(p, p));
  float3 hemi = float3(p * sqrt(max(1e-6f, 2.0f - r2)), 1.0f - r2);
  float alpha2 = alpha * alpha;
  float denom =
    sqrt(max(1e-6f, alpha2 * dot(hemi.xy, hemi.xy) + hemi.z * hemi.z));
  return float3(alpha * hemi.xy, hemi.z) / denom;
}

// Algorithm 1 from Kemppinen et. al.
float D_Kemppinen(float3 h, float alpha, float glint_alpha, float2 uv,
    float2x2 uv_J, float N, float filter_size, out float3 micro_normal) {
  float res = sqrt(N);
  float2 x_s = uv;
  float3 x_a_and_d = ndf_to_disk_ggx(h, alpha);
  float2 x_a = x_a_and_d.xy;
  float d = x_a_and_d.z;

  float lambda = QueryLod(res * uv_J, filter_size);

  float D_filter = 0;
  float best_weight = 0;
  float2 best_g_a = x_a;

  [loop]
  for (float m = 0; m < 2; m += 1) {
    float l = floor(lambda) + m;

    float w_lambda = 1.0 - abs(lambda - l);
    float res_s = res * pow(2, -l);
    float res_a = pow(2, l);

    float2x2 uv_J2 = filter_size * uv_J;
    float2x2 sigma_s = mul(uv_J2, transpose(uv_J2));

    float2x2 sigma_a = d * pow(glint_alpha, 2) * float2x2(1, 0, 0, 1);

    float2 base_i_a = floor(x_a * res_a) + 0.5;
    float2 i_a = clamp(base_i_a, 0.5, res_a - 0.5);

    float2 base_i_s = floor(x_s * res_s) + 0.5;
    float2 i_s = clamp(base_i_s, 0.5, res_s - 0.5);

    float2 g_s = (i_s + Rand2D(i_s, i_a, l, 1u) - .5) / res_s;
    float2 g_a = (i_a + Rand2D(i_s, i_a, l, 2u) - .5) / res_a;

    float r = Rand1D(i_s, i_a, l, 4u);
    float roulette = smoothstep(max(.0, r-.1), min(1.0, r+.1), w_lambda);

    float w = roulette * normal(sigma_a, x_a - g_a)
      * normal(sigma_s, x_s - g_s) / N;
    // This is hacky nonsense intended to improve the 1-sampling case. Original
    // code is commented out below.
    D_filter += w < 1 ? sqrt(w) * 2 : w;
    //D_filter += w;
    if (w > best_weight) {
      best_weight = w;
      best_g_a = g_a;
    }
    D_filter += w_lambda * compensation(x_a, sigma_a, res_a);
    // This is also hacked in.
    D_filter += w_lambda * compensation(x_s, sigma_s, res_s);
  }

  micro_normal = normalize(disk_to_ndf_ggx(best_g_a, alpha));
  return D_filter * d / PI;
}

#if defined(_GLITTER)
struct LightGlitter {
  float direct_D;

  float indirect_D;
  float indirect_NoL;
  float indirect_LoH;
};

// Glitter data getter to be run from lighting code.
LightGlitter GetGlitterLighting(
    float glitter_amount, float glitter_roughness, float2 uv, float3x3 tbn, float roughness,
    float3 normal, float3 V, float3 direct_H, float3 indirect_dir) {
  LightGlitter g;
  const float glitter_filter_size = 0.7f;
  float2x2 uv_J = uv_ellipsoid(transpose(float2x2(ddx(uv), ddy(uv))));
  float N = 8.0e5f * pow(10.0f, glitter_amount * 6.0f - 2.0f);

  // Direct
  float3 direct_H_tangent = mul(direct_H, transpose(tbn));
  float3 direct_micro_normal;  // unused
  g.direct_D = D_Kemppinen(direct_H_tangent, roughness, glitter_roughness,
      uv, uv_J, N, glitter_filter_size, direct_micro_normal);

  // Indirect
  float3 indirect_H = normalize(V + indirect_dir);
  float3 indirect_H_tangent = mul(indirect_H, transpose(tbn));
  float3 indirect_micro_normal;  // unused, but required by D_Kemppinen
  g.indirect_D = D_Kemppinen(indirect_H_tangent, roughness, glitter_roughness,
      uv, uv_J, N, glitter_filter_size, indirect_micro_normal);
  g.indirect_NoL = max(1e-4, dot(normal, indirect_dir));
  g.indirect_LoH = max(1e-4, dot(indirect_dir, indirect_H));

  return g;
}
#endif  // _GLITTER

#endif  // __GLITTER_INC