From d41f983216c973953eb313db690e7f4d919eb61b Mon Sep 17 00:00:00 2001 From: yum Date: Sat, 11 Apr 2026 16:49:27 -0700 Subject: aperiodic: optimize analytic aa --- aperiodic_tiling.cginc | 309 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 218 insertions(+), 91 deletions(-) diff --git a/aperiodic_tiling.cginc b/aperiodic_tiling.cginc index 1907c69..4106530 100644 --- a/aperiodic_tiling.cginc +++ b/aperiodic_tiling.cginc @@ -13,6 +13,7 @@ #if defined(_APERIODIC_TILING) static const float M5 = sqrt(2.0 / 5.0); +static const float APERIODIC_FILTER_THRESHOLD = 0.1; static const float4 basis_u5_03 = M5 * float4( cos(0 * TAU / 10), @@ -28,6 +29,79 @@ static const float4 basis_v5_03 = M5 * float4( sin(3 * TAU / 10)); static const float basis_v5_44 = M5 * sin(4 * TAU / 10); +static const float2 aperiodic_tile_offsets[4] = { + float2(0.5, 0.5), + float2(0.5, -0.5), + float2(-0.5, 0.5), + float2(-0.5, -0.5) +}; + +static const float4 aperiodic_face_a03[10] = { + float4(1, 0, 0, 0), + float4(1, 0, 0, 0), + float4(1, 0, 0, 0), + float4(1, 0, 0, 0), + float4(0, 1, 0, 0), + float4(0, 1, 0, 0), + float4(0, 1, 0, 0), + float4(0, 0, 1, 0), + float4(0, 0, 1, 0), + float4(0, 0, 0, 1) +}; + +static const float aperiodic_face_a44[10] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const float4 aperiodic_face_b03[10] = { + float4(0, 1, 0, 0), + float4(0, 0, 1, 0), + float4(0, 0, 0, 1), + float4(0, 0, 0, 0), + float4(0, 0, 1, 0), + float4(0, 0, 0, 1), + float4(0, 0, 0, 0), + float4(0, 0, 0, 1), + float4(0, 0, 0, 0), + float4(0, 0, 0, 0) +}; + +static const float aperiodic_face_b44[10] = { + 0, 0, 0, 1, 0, 0, 1, 0, 1, 1 +}; + +static const float4 aperiodic_face_c03[10] = { + float4(0, 0, 1, 1), + float4(0, 1, 0, 1), + float4(0, 1, 1, 0), + float4(0, 1, 1, 1), + float4(1, 0, 0, 1), + float4(1, 0, 1, 0), + float4(1, 0, 1, 1), + float4(1, 1, 0, 0), + float4(1, 1, 0, 1), + float4(1, 1, 1, 0) +}; + +static const float aperiodic_face_c44[10] = { + 1, 1, 1, 0, 1, 1, 0, 1, 0, 0 +}; + +// Precomputed per-face barycentric transforms. For each fixed face +// orientation, this is inverse(float2x2(proj5(a), proj5(b))). +static const float2x2 aperiodic_face_matrices[10] = { + float2x2(1.5811388300841898, -2.1762508994828216, -0.0, 2.6899940478558295), + float2x2(1.5811388300841895, -0.5137431483730078, -0.0, 1.6625077511098139), + float2x2(1.5811388300841895, 0.5137431483730076, -0.0, 1.6625077511098136), + float2x2(1.5811388300841895, 2.176250899482821, -0.0, 2.6899940478558286), + float2x2(2.558336368008464, -0.8312538755549072, -1.58113883008419, 2.176250899482822), + float2x2(1.5811388300841895, 0.5137431483730076, -0.9771975379242739, 1.3449970239279145), + float2x2(0.977197537924274, 1.3449970239279145, -0.9771975379242739, 1.3449970239279145), + float2x2(2.558336368008464, 0.8312538755549067, -2.5583363680084634, 0.8312538755549069), + float2x2(0.9771975379242742, 1.3449970239279148, -1.5811388300841895, 0.5137431483730078), + float2x2(1.5811388300841898, 2.176250899482821, -2.5583363680084634, -0.8312538755549066) +}; + float dot5(float4 a03, float a44, float4 b03, float b44) { return dot(a03, b03) + a44 * b44; } @@ -38,17 +112,24 @@ float2 proj5(float4 p03, float p44) { dot5(p03, p44, basis_v5_03, basis_v5_44)); } -float2x2 inv2x2(float2x2 m) { - float d = m[0][0] * m[1][1] - m[0][1] * m[1][0]; - return float2x2(m[1][1], -m[0][1], -m[1][0], m[0][0]) / d; +float3 aperiodic_face_color(int face_id) { + if (face_id == 0) return _Aperiodic_Tiling_Color_0.rgb; + if (face_id == 1) return _Aperiodic_Tiling_Color_1.rgb; + if (face_id == 2) return _Aperiodic_Tiling_Color_2.rgb; + if (face_id == 3) return _Aperiodic_Tiling_Color_3.rgb; + if (face_id == 4) return _Aperiodic_Tiling_Color_4.rgb; + if (face_id == 5) return _Aperiodic_Tiling_Color_5.rgb; + if (face_id == 6) return _Aperiodic_Tiling_Color_6.rgb; + if (face_id == 7) return _Aperiodic_Tiling_Color_7.rgb; + if (face_id == 8) return _Aperiodic_Tiling_Color_8.rgb; + return _Aperiodic_Tiling_Color_9.rgb; } -float2 tile5_barycentric(float4 p03, float p44, float2x2 m, float2 s, - float4 a03, float a44, float4 b03, float b44) { - float2 q = mul(s, m); - p03 -= (1.0 - a03 - b03) * round(q.x * basis_u5_03 + q.y * basis_v5_03); - p44 -= (1.0 - a44 - b44) * round(q.x * basis_u5_44 + q.y * basis_v5_44); - return mul(m, proj5(p03, p44)) - s; +float2 aperiodic_tile_barycentric(float2 uv_m, float2x2 m, float2 s, float2 q, + float4 c03, float c44) { + float4 shift03 = c03 * round(q.x * basis_u5_03 + q.y * basis_v5_03); + float shift44 = c44 * round(q.x * basis_u5_44 + q.y * basis_v5_44); + return uv_m - mul(m, proj5(shift03, shift44)) - s; } float interval_box_coverage(float center, float half_extent, float radius) { @@ -81,106 +162,152 @@ float3 aperiodic_tiling_normal(float2 barycentric) { } #endif // _APERIODIC_TILING_NORMALS -void accumulate_filtered_candidate( - float2 barycentric, float2 footprint_half_extents, float edge_width, float3 face_color, - inout float3 color_sum, inout float outer_sum, inout float2 normal_xy_sum) { - float outer = square_box_coverage(barycentric, footprint_half_extents, 0.5); - float inner_radius = max(0.5 - edge_width, 0.0); - float inner = square_box_coverage(barycentric, footprint_half_extents, inner_radius); - float edge = outer - inner; +struct AperiodicPointSample { + float distance_to_edge; + float2 barycentric; + int face_id; +}; + +void aperiodic_accumulate_point_orientation(float2 uv, float4 p03, float p44, int face_id, + inout AperiodicPointSample best) { + float2x2 m = aperiodic_face_matrices[face_id]; + float4 a03 = aperiodic_face_a03[face_id]; + float a44 = aperiodic_face_a44[face_id]; + float4 b03 = aperiodic_face_b03[face_id]; + float b44 = aperiodic_face_b44[face_id]; + float4 c03 = aperiodic_face_c03[face_id]; + float c44 = aperiodic_face_c44[face_id]; + + float2 r = round(float2(dot5(p03, p44, a03, a44), dot5(p03, p44, b03, b44))); + float2 uv_m = mul(m, uv); + + [unroll] + for (int candidate_id = 0; candidate_id < 4; ++candidate_id) { + float2 s = r + aperiodic_tile_offsets[candidate_id]; + float2 q = mul(s, m); + float2 barycentric = aperiodic_tile_barycentric(uv_m, m, s, q, c03, c44); + float distance_to_edge = 0.5 - max(abs(barycentric.x), abs(barycentric.y)); + bool better = distance_to_edge > best.distance_to_edge; + best.distance_to_edge = better ? distance_to_edge : best.distance_to_edge; + best.barycentric = better ? barycentric : best.barycentric; + best.face_id = better ? face_id : best.face_id; + } +} + +void sample_aperiodic_tiling_point(float2 uv, float4 p03, float p44, out float3 albedo, + out float3 tiling_normal_tangent) { + AperiodicPointSample best; + best.distance_to_edge = -1e10; + best.barycentric = 0.0; + best.face_id = 0; - color_sum += face_color * inner + _Aperiodic_Tiling_Edge_Color.rgb * edge; - outer_sum += outer; + [unroll] + for (int face_id = 0; face_id < 10; ++face_id) { + aperiodic_accumulate_point_orientation(uv, p03, p44, face_id, best); + } + + float edge_width = min(_Aperiodic_Tiling_Edge_Thickness, 0.5); + float edge_sd = best.distance_to_edge - edge_width; + float edge_sd_aa = max(abs(fwidth(edge_sd)), 1e-4); + float edge_mask = smoothstep(-edge_sd_aa * 0.5, edge_sd_aa * 0.5, edge_sd); + albedo = lerp(_Aperiodic_Tiling_Edge_Color.rgb, aperiodic_face_color(best.face_id), edge_mask); #if defined(_APERIODIC_TILING_NORMALS) - float3 tile_normal = aperiodic_tiling_normal(barycentric); - normal_xy_sum += tile_normal.xy * outer; + float3 tile_normal = aperiodic_tiling_normal(best.barycentric); + tiling_normal_tangent = normalize(float3(tile_normal.xy * edge_mask, 1.0)); +#else + tiling_normal_tangent = 0.0; #endif } -void accumulate_filtered_lattice( - float4 p03, float p44, float2 uv_ddx, float2 uv_ddy, - float4 a03, float a44, float4 b03, float b44, float edge_width, float3 face_color, - inout float3 color_sum, inout float outer_sum, inout float2 normal_xy_sum) { - float2 pa = proj5(a03, a44); - float2 pb = proj5(b03, b44); - float2x2 m = inv2x2(float2x2(pa.x, pb.x, pa.y, pb.y)); - float2 r = round(float2(dot5(p03, p44, a03, a44), dot5(p03, p44, b03, b44))); - float2 s = float2(0.5, -0.5); +bool aperiodic_requires_filtering(float2 uv_ddx, float2 uv_ddy) { + [unroll] + for (int face_id = 0; face_id < 10; ++face_id) { + float2x2 m = aperiodic_face_matrices[face_id]; + float2 dbdx = mul(m, uv_ddx); + float2 dbdy = mul(m, uv_ddy); + float2 half_extents = 0.5 * (abs(dbdx) + abs(dbdy)); + if (max(half_extents.x, half_extents.y) >= APERIODIC_FILTER_THRESHOLD) { + return true; + } + } + return false; +} + +void aperiodic_accumulate_filtered_orientation(float2 uv, float4 p03, float p44, + float2 uv_ddx, float2 uv_ddy, int face_id, + float inner_radius, inout float3 face_color_sum, + inout float face_sum) { + float2x2 m = aperiodic_face_matrices[face_id]; + float4 a03 = aperiodic_face_a03[face_id]; + float a44 = aperiodic_face_a44[face_id]; + float4 b03 = aperiodic_face_b03[face_id]; + float b44 = aperiodic_face_b44[face_id]; + float4 c03 = aperiodic_face_c03[face_id]; + float c44 = aperiodic_face_c44[face_id]; + + float2 r = round(float2(dot5(p03, p44, a03, a44), dot5(p03, p44, b03, b44))); + float2 uv_m = mul(m, uv); float2 dbdx = mul(m, uv_ddx); float2 dbdy = mul(m, uv_ddy); - float2 footprint_half_extents = 0.5 * (abs(dbdx) + abs(dbdy)); - - accumulate_filtered_candidate( - tile5_barycentric(p03, p44, m, r + s.xx, a03, a44, b03, b44), - footprint_half_extents, edge_width, face_color, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_candidate( - tile5_barycentric(p03, p44, m, r + s.xy, a03, a44, b03, b44), - footprint_half_extents, edge_width, face_color, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_candidate( - tile5_barycentric(p03, p44, m, r + s.yx, a03, a44, b03, b44), - footprint_half_extents, edge_width, face_color, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_candidate( - tile5_barycentric(p03, p44, m, r + s.yy, a03, a44, b03, b44), - footprint_half_extents, edge_width, face_color, color_sum, outer_sum, normal_xy_sum); + float2 half_extents = 0.5 * (abs(dbdx) + abs(dbdy)); + float2 inner_limit = half_extents + inner_radius; + float3 face_color = aperiodic_face_color(face_id); + + [unroll] + for (int candidate_id = 0; candidate_id < 4; ++candidate_id) { + float2 s = r + aperiodic_tile_offsets[candidate_id]; + float2 q = mul(s, m); + float2 barycentric = aperiodic_tile_barycentric(uv_m, m, s, q, c03, c44); + + if (abs(barycentric.x) >= inner_limit.x || abs(barycentric.y) >= inner_limit.y) { + continue; + } + + float inner = square_box_coverage(barycentric, half_extents, inner_radius); + face_color_sum += face_color * inner; + face_sum += inner; + } } -void sample_aperiodic_tiling(float2 uv, inout float3 albedo, out float3 tiling_normal_tangent) { - float4 p03 = uv.x * basis_u5_03 + uv.y * basis_v5_03; - float p44 = uv.x * basis_u5_44 + uv.y * basis_v5_44; - float2 uv_ddx = ddx(uv); - float2 uv_ddy = ddy(uv); +void sample_aperiodic_tiling_filtered(float2 uv, float4 p03, float p44, float2 uv_ddx, + float2 uv_ddy, out float3 albedo, + out float3 tiling_normal_tangent) { + float inner_radius = max(0.5 - min(_Aperiodic_Tiling_Edge_Thickness, 0.5), 0.0); + float3 face_color_sum = 0.0; + float face_sum = 0.0; - float2 u = float2(0, 1); + [unroll] + for (int face_id = 0; face_id < 10; ++face_id) { + aperiodic_accumulate_filtered_orientation( + uv, p03, p44, uv_ddx, uv_ddy, face_id, inner_radius, face_color_sum, face_sum); + } - // Analytically filter the existing local tile set in each orientation's - // barycentric space using a separable box footprint. - float edge_width = min(_Aperiodic_Tiling_Edge_Thickness, 0.5); - float3 color_sum = 0.0; - float outer_sum = 0.0; - float2 normal_xy_sum = 0.0; - - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.yxxx, u.x, u.xyxx, u.x, edge_width, - _Aperiodic_Tiling_Color_0.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.yxxx, u.x, u.xxyx, u.x, edge_width, - _Aperiodic_Tiling_Color_1.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.yxxx, u.x, u.xxxy, u.x, edge_width, - _Aperiodic_Tiling_Color_2.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.yxxx, u.x, u.xxxx, u.y, edge_width, - _Aperiodic_Tiling_Color_3.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.xyxx, u.x, u.xxyx, u.x, edge_width, - _Aperiodic_Tiling_Color_4.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.xyxx, u.x, u.xxxy, u.x, edge_width, - _Aperiodic_Tiling_Color_5.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.xyxx, u.x, u.xxxx, u.y, edge_width, - _Aperiodic_Tiling_Color_6.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.xxyx, u.x, u.xxxy, u.x, edge_width, - _Aperiodic_Tiling_Color_7.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.xxyx, u.x, u.xxxx, u.y, edge_width, - _Aperiodic_Tiling_Color_8.rgb, color_sum, outer_sum, normal_xy_sum); - accumulate_filtered_lattice( - p03, p44, uv_ddx, uv_ddy, u.xxxy, u.x, u.xxxx, u.y, edge_width, - _Aperiodic_Tiling_Color_9.rgb, color_sum, outer_sum, normal_xy_sum); - - float normalization = max(outer_sum, 1e-4); - albedo *= color_sum / normalization; + float edge_weight = saturate(1.0 - face_sum); + albedo = face_color_sum + _Aperiodic_Tiling_Edge_Color.rgb * edge_weight; #if defined(_APERIODIC_TILING_NORMALS) - tiling_normal_tangent = normalize(float3(normal_xy_sum / normalization, 1.0)); + // In the filtered regime the bevel is subpixel detail, so keep the normal flat. + tiling_normal_tangent = float3(0.0, 0.0, 1.0); #else - tiling_normal_tangent = 0; + tiling_normal_tangent = 0.0; #endif } + +void sample_aperiodic_tiling(float2 uv, out float3 albedo, out float3 tiling_normal_tangent) { + float4 p03 = uv.x * basis_u5_03 + uv.y * basis_v5_03; + float p44 = uv.x * basis_u5_44 + uv.y * basis_v5_44; + float2 uv_ddx = ddx(uv); + float2 uv_ddy = ddy(uv); + + [branch] + if (aperiodic_requires_filtering(uv_ddx, uv_ddy)) { + sample_aperiodic_tiling_filtered(uv, p03, p44, uv_ddx, uv_ddy, albedo, tiling_normal_tangent); + } else { + sample_aperiodic_tiling_point(uv, p03, p44, albedo, tiling_normal_tangent); + } +} #endif // defined(_APERIODIC_TILING) void apply_aperiodic_tiling(float2 uv, inout float3 albedo, inout float3 normal_tangent) { -- cgit v1.2.3