summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2025-08-08 19:28:27 -0700
committeryum <yum.food.vr@gmail.com>2025-08-08 19:28:30 -0700
commit42dbd067eb455d4b67d6ae8c57f1cbe4ec7dccfa (patch)
tree7f1ae4fe3b2c921bf38c7f2f6689cbc05180f3f3
parent7cdf39f51b47553af89c3de5b86d33ded4b5cdd2 (diff)
Optimize sh9 implementation
Saves ~20 instructions (572 -> 552).
-rw-r--r--brdf.cginc8
-rw-r--r--filamented.cginc24
-rw-r--r--lighting.cginc51
3 files changed, 23 insertions, 60 deletions
diff --git a/brdf.cginc b/brdf.cginc
index 12f2770..5a24307 100644
--- a/brdf.cginc
+++ b/brdf.cginc
@@ -55,6 +55,7 @@ float G_GGXSmith(float roughness, float NoL, float NoV) {
return rcp(denom);
}
+#if defined(_CLOTH_SHEEN)
// Estevez "Production Friendly Microfacet Sheen BRDF"
// Equation 2.
// The original equation is:
@@ -113,6 +114,7 @@ float G_Cloth(float roughness, float LoH) {
// Apply terminator softening (equation 4).
return pow(lambda, 1.0f + 2.0f * pow(one_minus_LoH, 8));
}
+#endif
float4 brdf(Pbr pbr, LightData data) {
float3 specular = 0;
@@ -134,7 +136,7 @@ float4 brdf(Pbr pbr, LightData data) {
#endif
// Direct
- if (true) {
+ {
float remainder = 1.0f;
#if defined(_CLEARCOAT)
@@ -180,7 +182,7 @@ float4 brdf(Pbr pbr, LightData data) {
// Indirect
#if defined(FORWARD_BASE_PASS)
- if (true) {
+ {
float remainder = 1.0f;
float2 dfg_uv = float2(data.common.NoV, pbr.roughness);
@@ -212,7 +214,7 @@ float4 brdf(Pbr pbr, LightData data) {
// For energy conservation with the diffuse term, we use the view-dependent Fresnel.
float3 F = F_Schlick(data.common.NoV, f0_spec, 1.0f);
remainder *= (1.0f - F);
-
+
// Diffuse is Lambertian, which is pre-integrated into the SH diffuse probe
float3 indirect_diffuse = pbr.albedo.xyz * data.indirect.diffuse * remainder * (1.0 - pbr.metallic);
diffuse += indirect_diffuse;
diff --git a/filamented.cginc b/filamented.cginc
index fb019cb..f6bd67a 100644
--- a/filamented.cginc
+++ b/filamented.cginc
@@ -213,30 +213,6 @@
#include "UnityCG.cginc"
#include "UnityImageBasedLightingMinimal.cginc"
-float normalFiltering(float perceptualRoughness, const float3 worldNormal) {
- // Kaplanyan 2016, "Stable specular highlights"
- // Tokuyoshi 2017, "Error Reduction and Simplification for Shading Anti-Aliasing"
- // Tokuyoshi and Kaplanyan 2019, "Improved Geometric Specular Antialiasing"
-
- // This implementation is meant for deferred rendering in the original paper but
- // we use it in forward rendering as well (as discussed in Tokuyoshi and Kaplanyan
- // 2019). The main reason is that the forward version requires an expensive transform
- // of the half vector by the tangent frame for every light. This is therefore an
- // approximation but it works well enough for our needs and provides an improvement
- // over our original implementation based on Vlachos 2015, "Advanced VR Rendering".
-
- float3 du = ddx(worldNormal);
- float3 dv = ddy(worldNormal);
-
- float variance = _Specular_AA_Variance * (dot(du, du) + dot(dv, dv));
-
- float roughness = perceptualRoughnessToRoughness(perceptualRoughness);
- float kernelRoughness = min(2.0 * variance, _Specular_AA_Threshold);
- float squareRoughness = saturate(roughness * roughness + kernelRoughness);
-
- return roughnessToPerceptualRoughness(sqrt(squareRoughness));
-}
-
half3 Unity_GlossyEnvironment_local (UNITY_ARGS_TEXCUBE(tex), half4 hdr, Unity_GlossyEnvironmentData glossIn)
{
half perceptualRoughness = glossIn.roughness /* perceptualRoughness */ ;
diff --git a/lighting.cginc b/lighting.cginc
index 48d95a1..6668459 100644
--- a/lighting.cginc
+++ b/lighting.cginc
@@ -114,50 +114,35 @@ float3 yumSH9(float4 n, float3 worldPos, inout LightIndirect light) {
// unity_SHB*: first four of the L2 coefficients
// unity_SHC: last L2 coefficient
- // Parse out coefficients into a simpler but less efficient format.
- float3 L00 = float3(unity_SHAr.w, unity_SHAg.w, unity_SHAb.w);
- float3 L1_1 = float3(unity_SHAr.x, unity_SHAg.x, unity_SHAb.x);
- float3 L10 = float3(unity_SHAr.y, unity_SHAg.y, unity_SHAb.y);
- float3 L11 = float3(unity_SHAr.z, unity_SHAg.z, unity_SHAb.z);
- float3 L2_2 = float3(unity_SHBr.x, unity_SHBg.x, unity_SHBb.x);
- float3 L2_1 = float3(unity_SHBr.y, unity_SHBg.y, unity_SHBb.y);
- float3 L20 = float3(unity_SHBr.z, unity_SHBg.z, unity_SHBb.z);
- float3 L21 = float3(unity_SHBr.w, unity_SHBg.w, unity_SHBb.w);
- float3 L22 = unity_SHC;
-
- // Equation 13 from "An Efficient Representation for Irradiance Environment
- // Maps" by Ramamoorthi and Hanrahan. Note that the order of some
- // coefficients is different, and normalization constants have been
- // premultiplied by Unity.
- float3 L0 = L00;
- float3 L1 = L1_1 * n.x + L10 * n.y + L11 * n.z;
+ // L0 band
+ float3 L0 = float3(unity_SHAr.w, unity_SHAg.w, unity_SHAb.w);
+
+ // L1 band
+ float3 L1 = float3(
+ dot(unity_SHAr.xyz, n.xyz),
+ dot(unity_SHAg.xyz, n.xyz),
+ dot(unity_SHAb.xyz, n.xyz)
+ );
+
+ // L2 band
+ float4 v = float4(n.x * n.y, n.y * n.z, n.z * n.z, n.x * n.z);
float3 L2 =
- L2_2 * n.x * n.y +
- L2_1 * n.y * n.z +
- L20 * n.z * n.z +
- L21 * n.x * n.z +
- L22 * (n.x * n.x - n.y * n.y);
+ float3(dot(unity_SHBr.xyzw, v), dot(unity_SHBg.xyzw, v), dot(unity_SHBb.xyzw, v)) +
+ unity_SHC.xyz * (n.x * n.x - n.y * n.y);
// TODO expose this as a parameter
float wrap_term = 0.0f;
+
// Original coefficients: 1, 2/3, 1/4.
// Wrapped coefficients: 1, (2-w)/3, ((1-w)^2)/4.
-
- // Setting w=0, the l1 band is:
- // (2-w)/3 = 2/3
- // 2-w = 2
- // 1-w/2 = 1
float l1_wrap = 1.0f - wrap_term * 0.75f;
L1 *= l1_wrap;
- // The l2 band is:
- // ((1-w)^2)/4 = 1/4
- // (1-w)^2 = 1
- float l2_wrap = (1.0f-wrap_term);
- l2_wrap *= l2_wrap;
+ float l2_wrap_base = 1.0f - wrap_term;
+ float l2_wrap = l2_wrap_base * l2_wrap_base;
L2 *= l2_wrap;
- light.L00 = L00;
+ light.L00 = L0;
light.L01r = unity_SHAr.xyz * l1_wrap;
light.L01g = unity_SHAg.xyz * l1_wrap;
light.L01b = unity_SHAb.xyz * l1_wrap;