From 42dbd067eb455d4b67d6ae8c57f1cbe4ec7dccfa Mon Sep 17 00:00:00 2001 From: yum Date: Fri, 8 Aug 2025 19:28:27 -0700 Subject: Optimize sh9 implementation Saves ~20 instructions (572 -> 552). --- brdf.cginc | 8 +++++--- filamented.cginc | 24 ------------------------ lighting.cginc | 51 ++++++++++++++++++--------------------------------- 3 files changed, 23 insertions(+), 60 deletions(-) diff --git a/brdf.cginc b/brdf.cginc index 12f2770..5a24307 100644 --- a/brdf.cginc +++ b/brdf.cginc @@ -55,6 +55,7 @@ float G_GGXSmith(float roughness, float NoL, float NoV) { return rcp(denom); } +#if defined(_CLOTH_SHEEN) // Estevez "Production Friendly Microfacet Sheen BRDF" // Equation 2. // The original equation is: @@ -113,6 +114,7 @@ float G_Cloth(float roughness, float LoH) { // Apply terminator softening (equation 4). return pow(lambda, 1.0f + 2.0f * pow(one_minus_LoH, 8)); } +#endif float4 brdf(Pbr pbr, LightData data) { float3 specular = 0; @@ -134,7 +136,7 @@ float4 brdf(Pbr pbr, LightData data) { #endif // Direct - if (true) { + { float remainder = 1.0f; #if defined(_CLEARCOAT) @@ -180,7 +182,7 @@ float4 brdf(Pbr pbr, LightData data) { // Indirect #if defined(FORWARD_BASE_PASS) - if (true) { + { float remainder = 1.0f; float2 dfg_uv = float2(data.common.NoV, pbr.roughness); @@ -212,7 +214,7 @@ float4 brdf(Pbr pbr, LightData data) { // For energy conservation with the diffuse term, we use the view-dependent Fresnel. float3 F = F_Schlick(data.common.NoV, f0_spec, 1.0f); remainder *= (1.0f - F); - + // Diffuse is Lambertian, which is pre-integrated into the SH diffuse probe float3 indirect_diffuse = pbr.albedo.xyz * data.indirect.diffuse * remainder * (1.0 - pbr.metallic); diffuse += indirect_diffuse; diff --git a/filamented.cginc b/filamented.cginc index fb019cb..f6bd67a 100644 --- a/filamented.cginc +++ b/filamented.cginc @@ -213,30 +213,6 @@ #include "UnityCG.cginc" #include "UnityImageBasedLightingMinimal.cginc" -float normalFiltering(float perceptualRoughness, const float3 worldNormal) { - // Kaplanyan 2016, "Stable specular highlights" - // Tokuyoshi 2017, "Error Reduction and Simplification for Shading Anti-Aliasing" - // Tokuyoshi and Kaplanyan 2019, "Improved Geometric Specular Antialiasing" - - // This implementation is meant for deferred rendering in the original paper but - // we use it in forward rendering as well (as discussed in Tokuyoshi and Kaplanyan - // 2019). The main reason is that the forward version requires an expensive transform - // of the half vector by the tangent frame for every light. This is therefore an - // approximation but it works well enough for our needs and provides an improvement - // over our original implementation based on Vlachos 2015, "Advanced VR Rendering". - - float3 du = ddx(worldNormal); - float3 dv = ddy(worldNormal); - - float variance = _Specular_AA_Variance * (dot(du, du) + dot(dv, dv)); - - float roughness = perceptualRoughnessToRoughness(perceptualRoughness); - float kernelRoughness = min(2.0 * variance, _Specular_AA_Threshold); - float squareRoughness = saturate(roughness * roughness + kernelRoughness); - - return roughnessToPerceptualRoughness(sqrt(squareRoughness)); -} - half3 Unity_GlossyEnvironment_local (UNITY_ARGS_TEXCUBE(tex), half4 hdr, Unity_GlossyEnvironmentData glossIn) { half perceptualRoughness = glossIn.roughness /* perceptualRoughness */ ; diff --git a/lighting.cginc b/lighting.cginc index 48d95a1..6668459 100644 --- a/lighting.cginc +++ b/lighting.cginc @@ -114,50 +114,35 @@ float3 yumSH9(float4 n, float3 worldPos, inout LightIndirect light) { // unity_SHB*: first four of the L2 coefficients // unity_SHC: last L2 coefficient - // Parse out coefficients into a simpler but less efficient format. - float3 L00 = float3(unity_SHAr.w, unity_SHAg.w, unity_SHAb.w); - float3 L1_1 = float3(unity_SHAr.x, unity_SHAg.x, unity_SHAb.x); - float3 L10 = float3(unity_SHAr.y, unity_SHAg.y, unity_SHAb.y); - float3 L11 = float3(unity_SHAr.z, unity_SHAg.z, unity_SHAb.z); - float3 L2_2 = float3(unity_SHBr.x, unity_SHBg.x, unity_SHBb.x); - float3 L2_1 = float3(unity_SHBr.y, unity_SHBg.y, unity_SHBb.y); - float3 L20 = float3(unity_SHBr.z, unity_SHBg.z, unity_SHBb.z); - float3 L21 = float3(unity_SHBr.w, unity_SHBg.w, unity_SHBb.w); - float3 L22 = unity_SHC; - - // Equation 13 from "An Efficient Representation for Irradiance Environment - // Maps" by Ramamoorthi and Hanrahan. Note that the order of some - // coefficients is different, and normalization constants have been - // premultiplied by Unity. - float3 L0 = L00; - float3 L1 = L1_1 * n.x + L10 * n.y + L11 * n.z; + // L0 band + float3 L0 = float3(unity_SHAr.w, unity_SHAg.w, unity_SHAb.w); + + // L1 band + float3 L1 = float3( + dot(unity_SHAr.xyz, n.xyz), + dot(unity_SHAg.xyz, n.xyz), + dot(unity_SHAb.xyz, n.xyz) + ); + + // L2 band + float4 v = float4(n.x * n.y, n.y * n.z, n.z * n.z, n.x * n.z); float3 L2 = - L2_2 * n.x * n.y + - L2_1 * n.y * n.z + - L20 * n.z * n.z + - L21 * n.x * n.z + - L22 * (n.x * n.x - n.y * n.y); + float3(dot(unity_SHBr.xyzw, v), dot(unity_SHBg.xyzw, v), dot(unity_SHBb.xyzw, v)) + + unity_SHC.xyz * (n.x * n.x - n.y * n.y); // TODO expose this as a parameter float wrap_term = 0.0f; + // Original coefficients: 1, 2/3, 1/4. // Wrapped coefficients: 1, (2-w)/3, ((1-w)^2)/4. - - // Setting w=0, the l1 band is: - // (2-w)/3 = 2/3 - // 2-w = 2 - // 1-w/2 = 1 float l1_wrap = 1.0f - wrap_term * 0.75f; L1 *= l1_wrap; - // The l2 band is: - // ((1-w)^2)/4 = 1/4 - // (1-w)^2 = 1 - float l2_wrap = (1.0f-wrap_term); - l2_wrap *= l2_wrap; + float l2_wrap_base = 1.0f - wrap_term; + float l2_wrap = l2_wrap_base * l2_wrap_base; L2 *= l2_wrap; - light.L00 = L00; + light.L00 = L0; light.L01r = unity_SHAr.xyz * l1_wrap; light.L01g = unity_SHAg.xyz * l1_wrap; light.L01b = unity_SHAb.xyz * l1_wrap; -- cgit v1.2.3