4 files changed, 79 insertions, 40 deletions
diff --git a/Scripts/make_dfg_lut.py b/Scripts/make_dfg_lut.py
index 4e16c99..8105bb1 100755
--- a/Scripts/make_dfg_lut.py
+++ b/Scripts/make_dfg_lut.py
@@ -73,43 +73,56 @@ def G_Cloth_L(x, a, b, c, d, e):
 
 
 @numba.njit(cache=True)
-def Lambda_Cloth(roughness, cos_theta):
+def Lambda_Cloth_Raw(roughness, cos_theta):
     a0, a1 = 25.3245, 21.5473
     b0, b1 = 3.32435, 3.82987
     c0, c1 = 0.16801, 0.19823
     d0, d1 = -1.27393, -1.97760
     e0, e1 = -4.85967, -4.32054
 
-    # Matches shader: interpolator = r^2 blends toward rough (a1) column
-    r_sq = roughness * roughness
+    one_minus_r = 1.0 - roughness
+    interp = one_minus_r * one_minus_r
+    rough_weight = 1.0 - interp
 
     lambda_val = 0.0
     if cos_theta < 0.5:
         L0 = G_Cloth_L(cos_theta, a0, b0, c0, d0, e0)
         L1 = G_Cloth_L(cos_theta, a1, b1, c1, d1, e1)
-        L = lerp(L0, L1, r_sq)
+        L = lerp(L0, L1, rough_weight)
         lambda_val = math.exp(L)
     else:
         L_05_0 = G_Cloth_L(0.5, a0, b0, c0, d0, e0)
         L_05_1 = G_Cloth_L(0.5, a1, b1, c1, d1, e1)
-        L_05 = lerp(L_05_0, L_05_1, r_sq)
+        L_05 = lerp(L_05_0, L_05_1, rough_weight)
 
         one_minus_cos = 1.0 - cos_theta
         L_c_0 = G_Cloth_L(one_minus_cos, a0, b0, c0, d0, e0)
         L_c_1 = G_Cloth_L(one_minus_cos, a1, b1, c1, d1, e1)
-        L_c = lerp(L_c_0, L_c_1, r_sq)
+        L_c = lerp(L_c_0, L_c_1, rough_weight)
 
         lambda_val = math.exp(2.0 * L_05 - L_c)
 
-    # Apply terminator softening (equation 4)
+    return lambda_val
+
+
+@numba.njit(cache=True)
+def Lambda_Cloth_Softened(roughness, cos_theta):
+    lambda_val = Lambda_Cloth_Raw(roughness, cos_theta)
     return pow(lambda_val, 1.0 + 2.0 * pow(1.0 - cos_theta, 8.0))
 
 
 @numba.njit(cache=True)
-def V_Cloth(roughness, NoL, NoV):
+def V_Cloth_Outgoing(roughness, NoL, NoV):
     # Height-correlated Smith: G2 / (4 * NoL * NoV)
-    lambda_l = Lambda_Cloth(roughness, NoL)
-    lambda_v = Lambda_Cloth(roughness, NoV)
+    lambda_l = Lambda_Cloth_Softened(roughness, NoL)
+    lambda_v = Lambda_Cloth_Raw(roughness, NoV)
+    return 1.0 / ((1.0 + lambda_l + lambda_v) * 4.0 * NoL * NoV + 1e-6)
+
+
+@numba.njit(cache=True)
+def V_Cloth_Incoming(roughness, NoL, NoV):
+    lambda_l = Lambda_Cloth_Softened(roughness, NoL)
+    lambda_v = Lambda_Cloth_Raw(roughness, NoV)
     return 1.0 / ((1.0 + lambda_l + lambda_v) * 4.0 * NoL * NoV + 1e-6)
 
 
@@ -119,8 +132,8 @@ def integrate_brdf_jitted(roughness, NoV, num_samples):
     V_y = 0.0
     V_z = NoV
 
-    # R: GGX scale, G: GGX bias, B: cloth DFG
-    std_scale, std_bias, cloth_val = 0.0, 0.0, 0.0
+    # R: GGX scale, G: GGX bias, B: cloth outgoing albedo, A: cloth incoming albedo
+    std_scale, std_bias, cloth_out, cloth_in = 0.0, 0.0, 0.0, 0.0
 
     for i in range(num_samples):
         e1, e2 = random.random(), random.random()
@@ -164,12 +177,14 @@ def integrate_brdf_jitted(roughness, NoV, num_samples):
             # Same GGX importance samples, reweighted for cloth D and V.
             if roughness >= 1e-4:
                 D_c = D_Cloth(roughness, NoH)
-                V_c = V_Cloth(roughness, NoL, NoV_proxy)
                 pdf_ggx = D_GGX(roughness, NoH) * NoH / (4.0 * VoH + 1e-6)
-                cloth_val += (D_c * V_c * NoL) / (pdf_ggx + 1e-6)
+                V_out = V_Cloth_Outgoing(roughness, NoL, NoV_proxy)
+                V_in = V_Cloth_Incoming(roughness, NoV_proxy, NoL)
+                cloth_out += (D_c * V_out * NoL) / (pdf_ggx + 1e-6)
+                cloth_in += (D_c * V_in * NoL) / (pdf_ggx + 1e-6)
 
     inv_n = 1.0 / num_samples
-    return std_scale * inv_n, std_bias * inv_n, cloth_val * inv_n
+    return std_scale * inv_n, std_bias * inv_n, cloth_out * inv_n, cloth_in * inv_n
 
 
 def calculate_pixel(coords, resolution, num_samples):
@@ -178,22 +193,28 @@ def calculate_pixel(coords, resolution, num_samples):
     v = (y + 0.5) / resolution
 
     NoV = saturate(u)
-    roughness = saturate(v)
-    if NoV < 1e-4: return x, y, 0.0, 0.0, 0.0
+    perceptual_roughness = saturate(v)
+    roughness = max(perceptual_roughness * perceptual_roughness, 1e-4)
+    if NoV < 1e-4: return x, y, 0.0, 0.0, 0.0, 0.0
 
-    std_scale, std_bias, cloth = integrate_brdf_jitted(roughness, NoV, num_samples)
+    std_scale, std_bias, cloth_out, cloth_in = integrate_brdf_jitted(roughness, NoV, num_samples)
 
-    # R: GGX scale, G: GGX bias, B: cloth DFG
-    return x, y, std_scale, std_bias, cloth
+    # R: GGX scale, G: GGX bias, B: cloth outgoing albedo, A: cloth incoming albedo
+    return x, y, std_scale, std_bias, cloth_out, cloth_in
 
 
 def generate_exr(resolution, output_filename, num_samples, num_workers):
-    print(f"Generating {resolution}x{resolution} EXR '{output_filename}' (R=GGX scale, G=GGX bias, B=cloth) ({num_samples} samples/pixel) using {num_workers} workers.")
+    print(f"Generating {resolution}x{resolution} EXR '{output_filename}' (R=GGX scale, G=GGX bias, B=cloth out, A=cloth in) ({num_samples} samples/pixel) using {num_workers} workers.")
     header = OpenEXR.Header(resolution, resolution)
     pt = Imath.PixelType(Imath.PixelType.FLOAT)
-    header['channels'] = { 'R': Imath.Channel(pt), 'G': Imath.Channel(pt), 'B': Imath.Channel(pt) }
+    header['channels'] = {
+        'R': Imath.Channel(pt),
+        'G': Imath.Channel(pt),
+        'B': Imath.Channel(pt),
+        'A': Imath.Channel(pt),
+    }
 
-    pixel_data = np.zeros((resolution, resolution, 3), dtype=np.float32)
+    pixel_data = np.zeros((resolution, resolution, 4), dtype=np.float32)
 
     coords_to_process = [(x, y) for y in range(resolution) for x in range(resolution)]
     worker_func = partial(calculate_pixel, resolution=resolution, num_samples=num_samples)
@@ -207,8 +228,8 @@ def generate_exr(resolution, output_filename, num_samples, num_workers):
 
         for future in concurrent.futures.as_completed(futures):
             try:
-                x, y, r, g, b = future.result()
-                pixel_data[y, x] = (r, g, b)
+                x, y, r, g, b, a = future.result()
+                pixel_data[y, x] = (r, g, b, a)
             except Exception as exc:
                 coord = futures[future]
                 print(f'\nPixel at {coord} generated an exception: {exc}')
@@ -225,14 +246,15 @@ def generate_exr(resolution, output_filename, num_samples, num_workers):
         r_data = pixel_data[:, :, 0].ravel().tobytes()
         g_data = pixel_data[:, :, 1].ravel().tobytes()
         b_data = pixel_data[:, :, 2].ravel().tobytes()
-        exr_file.writePixels({'R': r_data, 'G': g_data, 'B': b_data})
+        a_data = pixel_data[:, :, 3].ravel().tobytes()
+        exr_file.writePixels({'R': r_data, 'G': g_data, 'B': b_data, 'A': a_data})
         exr_file.close()
         print(f"Successfully generated {output_filename}")
     except Exception as e:
         raise RuntimeError(f"Failed to write EXR file '{output_filename}': {e}")
 
 def main():
-    parser = argparse.ArgumentParser(description='Generate packed DFG LUT (R=GGX scale, G=cloth, B=GGX bias).')
+    parser = argparse.ArgumentParser(description='Generate packed DFG LUT (R=GGX scale, G=GGX bias, B=cloth out, A=cloth in).')
     parser.add_argument('-r', '--resolution', type=int, default=512,
                         help='Resolution of the square EXR image (default: 512)')
     parser.add_argument('-s', '--samples', type=int, default=8192,
diff --git a/brdf.cginc b/brdf.cginc
index 65a25c9..75eb295 100755
--- a/brdf.cginc
+++ b/brdf.cginc
@@ -73,7 +73,8 @@ float G_GGXSmith(float roughness, float NoL, float NoV) {
 
 float L_Estevez(float r, float x) {
   // Recover constants according to Table 1.
-  float interpolator = 1 - r * r;
+  float one_minus_r = 1 - r;
+  float interpolator = one_minus_r * one_minus_r;
   float one_minus_i = 1 - interpolator;
   float a = interpolator * 25.3245 + one_minus_i * 21.5473;
   float b = interpolator * 3.32435 + one_minus_i * 3.82987;
@@ -84,20 +85,24 @@ float L_Estevez(float r, float x) {
   return a / (1 + b*pow(x, c)) + d*x + e;
 }
 
-float Lambda_Estevez(float cos_theta, float roughness) {
+float Lambda_Estevez_Raw(float cos_theta, float roughness) {
   // Equation 3
-  float lambda = cos_theta < 0.5
+  return cos_theta < 0.5
     ? exp(L_Estevez(roughness, cos_theta))
     : exp(2 * L_Estevez(roughness, 0.5) - L_Estevez(roughness, 1 - cos_theta));
-  // Equation 4
+}
+
+float Lambda_Estevez_Softened(float cos_theta, float roughness) {
+  // Equation 4 applies only to the light-side term.
+  float lambda = Lambda_Estevez_Raw(cos_theta, roughness);
   return pow(lambda, 1 + 2 * pow(1 - cos_theta, 8));
 }
 
 // Estevez & Kulla "Production Friendly Microfacet Sheen BRDF"
 // Height-correlated Smith: G2 / (4 * NoL * NoV)
 float G_Estevez(float roughness, float NoL, float NoV) {
-  float lambda_l = Lambda_Estevez(NoL, roughness);
-  float lambda_v = Lambda_Estevez(NoV, roughness);
+  float lambda_l = Lambda_Estevez_Softened(NoL, roughness);
+  float lambda_v = Lambda_Estevez_Raw(NoV, roughness);
   return 1.0 / ((1.0 + lambda_l + lambda_v) * 4.0 * NoL * NoV);
 }
 
@@ -124,9 +129,9 @@ float4 brdf(v2f i, Pbr pbr, LightData data, out BrdfData bd) {
   float2 dfg_uv = float2(data.common.NoV, pbr.roughness_perceptual);
   [branch]
   if (textureExists(_DFG_LUT)) {
-    bd.ibl_dfg = _DFG_LUT.SampleLevel(bilinear_clamp_s, dfg_uv, 0).rgb;
+    bd.ibl_dfg = _DFG_LUT.SampleLevel(bilinear_clamp_s, dfg_uv, 0);
   } else {
-    bd.ibl_dfg = float3(1, 1, 1);
+    bd.ibl_dfg = float4(1, 1, 1, 1);
   }
   float3 f0_color = lerp(f0, pbr.albedo.xyz, pbr.metallic);
   float3 energy_comp = 1.0f + f0_color * (1.0f / (bd.ibl_dfg.xxx + bd.ibl_dfg.yyy) - 1.0f);
@@ -136,9 +141,9 @@ float4 brdf(v2f i, Pbr pbr, LightData data, out BrdfData bd) {
   float2 cc_dfg_uv = float2(data.common.NoV_cc, pbr.cc_roughness_perceptual);
   [branch]
   if (textureExists(_DFG_LUT)) {
-    bd.ibl_dfg_cc = _DFG_LUT.SampleLevel(bilinear_clamp_s, cc_dfg_uv, 0).rgb;
+    bd.ibl_dfg_cc = _DFG_LUT.SampleLevel(bilinear_clamp_s, cc_dfg_uv, 0);
   } else {
-    bd.ibl_dfg_cc = float3(1, 1, 1);
+    bd.ibl_dfg_cc = float4(1, 1, 1, 1);
   }
   float3 cc_f0_color = lerp(cc_f0, pbr.albedo.xyz, pbr.metallic);
   float3 cc_energy_comp = 1.0f + cc_f0_color * (1.0f / (bd.ibl_dfg_cc.xxx + bd.ibl_dfg_cc.yyy) - 1.0f);
@@ -167,10 +172,21 @@ float4 brdf(v2f i, Pbr pbr, LightData data, out BrdfData bd) {
     bd.direct_d = D_Estevez(pbr.roughness, data.direct.NoH);
     bd.direct_g = G_Estevez(pbr.roughness, data.direct.NoL, data.common.NoV);
 
+    float4 cloth_dfg_i = bd.ibl_dfg;
+    [branch]
+    if (textureExists(_DFG_LUT)) {
+      float2 cloth_direct_uv = float2(data.direct.NoL, pbr.roughness_perceptual);
+      cloth_dfg_i = _DFG_LUT.SampleLevel(bilinear_clamp_s, cloth_direct_uv, 0);
+    }
+    float3 cloth_alpha_o = cloth_f0 * bd.ibl_dfg.zzz;
+    float3 cloth_alpha_i = cloth_f0 * cloth_dfg_i.www;
+    float cloth_base_scale = luminance(min(1.0f - cloth_alpha_i, 1.0f - cloth_alpha_o));
+
     float3 direct_specular_cloth = (bd.direct_d * bd.direct_g) * bd.direct_f;
     direct_specular_cloth *= data.direct.color * data.direct.NoL;
     direct_specular_cloth *= remainder;
     specular += direct_specular_cloth;
+    remainder *= saturate(cloth_base_scale);
     /*
     float Fd = Fd_Lambertian(data.direct.NoL) / PI;
     float3 direct_diffuse = Fd * pbr.albedo.xyz * data.direct.color;
@@ -232,6 +248,7 @@ float4 brdf(v2f i, Pbr pbr, LightData data, out BrdfData bd) {
     float3 specular_dfg = _Cloth_Sheen.rgb * bd.ibl_dfg.zzz;
     float3 indirect_specular = data.indirect.specular * specular_dfg;
     specular += indirect_specular * remainder * data.common.spec_ao;
+    remainder *= saturate(1.0f - specular_dfg);
 
     float3 indirect_diffuse = pbr.albedo.xyz * data.indirect.diffuse;
     diffuse  += indirect_diffuse * remainder;
diff --git a/burley.cginc b/burley.cginc
index 539b317..0b6faeb 100644
--- a/burley.cginc
+++ b/burley.cginc
@@ -74,7 +74,7 @@ BurleyPatchTransform burley_make_patch_transform(float2 uv, float2 uv_dx, float2
   float2 vertex_uv = burley_tri_to_cart(tri_vertex);
   // Map the unit-radius hex support to the unit square so arbitrary rotation
   // stays within bounds.
-  float2 local_uv = (uv - vertex_uv) * 0.5f;
+  float2 local_uv = (uv - vertex_uv);
   // Apply input scaling.
   local_uv *= input_scale;
   float2 sample_dx = uv_dx * (0.5f * input_scale);
diff --git a/data.cginc b/data.cginc
index 370c3e6..2f6e21c 100644
--- a/data.cginc
+++ b/data.cginc
@@ -112,9 +112,9 @@ struct BrdfData {
   float direct_g_cc;
 #endif
 
-  float3 ibl_dfg;
+  float4 ibl_dfg;
 #if defined(_CLEARCOAT)
-  float3 ibl_dfg_cc;
+  float4 ibl_dfg_cc;
 #endif
 };