From 42dbd067eb455d4b67d6ae8c57f1cbe4ec7dccfa Mon Sep 17 00:00:00 2001
From: yum <yum.food.vr@gmail.com>
Date: Fri, 8 Aug 2025 19:28:27 -0700
Subject: Optimize sh9 implementation

Saves ~20 instructions (572 -> 552).
---
 brdf.cginc       |  8 +++++---
 filamented.cginc | 24 ------------------------
 lighting.cginc   | 51 ++++++++++++++++++---------------------------------
 3 files changed, 23 insertions(+), 60 deletions(-)

diff --git a/brdf.cginc b/brdf.cginc
index 12f2770..5a24307 100644
--- a/brdf.cginc
+++ b/brdf.cginc
@@ -55,6 +55,7 @@ float G_GGXSmith(float roughness, float NoL, float NoV) {
   return rcp(denom);
 }
 
+#if defined(_CLOTH_SHEEN)
 // Estevez "Production Friendly Microfacet Sheen BRDF"
 // Equation 2.
 // The original equation is:
@@ -113,6 +114,7 @@ float G_Cloth(float roughness, float LoH) {
   // Apply terminator softening (equation 4).
   return pow(lambda, 1.0f + 2.0f * pow(one_minus_LoH, 8));
 }
+#endif
 
 float4 brdf(Pbr pbr, LightData data) {
   float3 specular = 0;
@@ -134,7 +136,7 @@ float4 brdf(Pbr pbr, LightData data) {
 #endif
 
   // Direct
-  if (true) {
+  {
     float remainder = 1.0f;
 
 #if defined(_CLEARCOAT)
@@ -180,7 +182,7 @@ float4 brdf(Pbr pbr, LightData data) {
 
   // Indirect
 #if defined(FORWARD_BASE_PASS)
-  if (true) {
+  {
     float remainder = 1.0f;
     float2 dfg_uv = float2(data.common.NoV, pbr.roughness);
 
@@ -212,7 +214,7 @@ float4 brdf(Pbr pbr, LightData data) {
     // For energy conservation with the diffuse term, we use the view-dependent Fresnel.
     float3 F = F_Schlick(data.common.NoV, f0_spec, 1.0f);
     remainder *= (1.0f - F);
-
+    
     // Diffuse is Lambertian, which is pre-integrated into the SH diffuse probe
     float3 indirect_diffuse = pbr.albedo.xyz * data.indirect.diffuse * remainder * (1.0 - pbr.metallic);
     diffuse  += indirect_diffuse;
diff --git a/filamented.cginc b/filamented.cginc
index fb019cb..f6bd67a 100644
--- a/filamented.cginc
+++ b/filamented.cginc
@@ -213,30 +213,6 @@
 #include "UnityCG.cginc"
 #include "UnityImageBasedLightingMinimal.cginc"
 
-float normalFiltering(float perceptualRoughness, const float3 worldNormal) {
-    // Kaplanyan 2016, "Stable specular highlights"
-    // Tokuyoshi 2017, "Error Reduction and Simplification for Shading Anti-Aliasing"
-    // Tokuyoshi and Kaplanyan 2019, "Improved Geometric Specular Antialiasing"
-
-    // This implementation is meant for deferred rendering in the original paper but
-    // we use it in forward rendering as well (as discussed in Tokuyoshi and Kaplanyan
-    // 2019). The main reason is that the forward version requires an expensive transform
-    // of the half vector by the tangent frame for every light. This is therefore an
-    // approximation but it works well enough for our needs and provides an improvement
-    // over our original implementation based on Vlachos 2015, "Advanced VR Rendering".
-
-    float3 du = ddx(worldNormal);
-    float3 dv = ddy(worldNormal);
-
-    float variance = _Specular_AA_Variance * (dot(du, du) + dot(dv, dv));
-
-    float roughness = perceptualRoughnessToRoughness(perceptualRoughness);
-    float kernelRoughness = min(2.0 * variance, _Specular_AA_Threshold);
-    float squareRoughness = saturate(roughness * roughness + kernelRoughness);
-
-    return roughnessToPerceptualRoughness(sqrt(squareRoughness));
-}
-
 half3 Unity_GlossyEnvironment_local (UNITY_ARGS_TEXCUBE(tex), half4 hdr, Unity_GlossyEnvironmentData glossIn)
 {
 	half perceptualRoughness = glossIn.roughness /* perceptualRoughness */ ;
diff --git a/lighting.cginc b/lighting.cginc
index 48d95a1..6668459 100644
--- a/lighting.cginc
+++ b/lighting.cginc
@@ -114,50 +114,35 @@ float3 yumSH9(float4 n, float3 worldPos, inout LightIndirect light) {
   //   unity_SHB*:     first four of the L2 coefficients
   //   unity_SHC:      last L2 coefficient
 
-  // Parse out coefficients into a simpler but less efficient format.
-  float3 L00  = float3(unity_SHAr.w, unity_SHAg.w, unity_SHAb.w);
-  float3 L1_1 = float3(unity_SHAr.x, unity_SHAg.x, unity_SHAb.x);
-  float3 L10  = float3(unity_SHAr.y, unity_SHAg.y, unity_SHAb.y);
-  float3 L11  = float3(unity_SHAr.z, unity_SHAg.z, unity_SHAb.z);
-  float3 L2_2 = float3(unity_SHBr.x, unity_SHBg.x, unity_SHBb.x);
-  float3 L2_1 = float3(unity_SHBr.y, unity_SHBg.y, unity_SHBb.y);
-  float3 L20  = float3(unity_SHBr.z, unity_SHBg.z, unity_SHBb.z);
-  float3 L21  = float3(unity_SHBr.w, unity_SHBg.w, unity_SHBb.w);
-  float3 L22  = unity_SHC;
-
-  // Equation 13 from "An Efficient Representation for Irradiance Environment
-  // Maps" by Ramamoorthi and Hanrahan. Note that the order of some
-  // coefficients is different, and normalization constants have been
-  // premultiplied by Unity.
-  float3 L0 = L00;
-  float3 L1 = L1_1 * n.x + L10 * n.y + L11 * n.z;
+  // L0 band
+  float3 L0 = float3(unity_SHAr.w, unity_SHAg.w, unity_SHAb.w);
+
+  // L1 band
+  float3 L1 = float3(
+    dot(unity_SHAr.xyz, n.xyz),
+    dot(unity_SHAg.xyz, n.xyz),
+    dot(unity_SHAb.xyz, n.xyz)
+  );
+
+  // L2 band
+  float4 v = float4(n.x * n.y, n.y * n.z, n.z * n.z, n.x * n.z);
   float3 L2 =
-    L2_2 * n.x * n.y +
-    L2_1 * n.y * n.z +
-    L20  * n.z * n.z +
-    L21 * n.x * n.z +
-    L22 * (n.x * n.x - n.y * n.y);
+    float3(dot(unity_SHBr.xyzw, v), dot(unity_SHBg.xyzw, v), dot(unity_SHBb.xyzw, v)) +
+    unity_SHC.xyz * (n.x * n.x - n.y * n.y);
 
   // TODO expose this as a parameter
   float wrap_term = 0.0f;
+
   // Original coefficients: 1, 2/3, 1/4.
   // Wrapped coefficients: 1, (2-w)/3, ((1-w)^2)/4.
-
-  // Setting w=0, the l1 band is:
-  //   (2-w)/3 = 2/3
-  //   2-w = 2
-  //   1-w/2 = 1
   float l1_wrap = 1.0f - wrap_term * 0.75f;
   L1 *= l1_wrap;
 
-  // The l2 band is:
-  //   ((1-w)^2)/4 = 1/4
-  //   (1-w)^2 = 1
-  float l2_wrap = (1.0f-wrap_term);
-  l2_wrap *= l2_wrap;
+  float l2_wrap_base = 1.0f - wrap_term;
+  float l2_wrap = l2_wrap_base * l2_wrap_base;
   L2 *= l2_wrap;
 
-  light.L00 = L00;
+  light.L00 = L0;
   light.L01r = unity_SHAr.xyz * l1_wrap;
   light.L01g = unity_SHAg.xyz * l1_wrap;
   light.L01b = unity_SHAb.xyz * l1_wrap;
-- 
cgit v1.2.3