summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2024-10-13 13:53:32 -0700
committeryum <yum.food.vr@gmail.com>2024-10-13 13:53:32 -0700
commitddd60efa4318fab1ff21b10f05c2ec954c37427d (patch)
treef8df96bcc0eaa27d730ac1c0beeefb28f99c83d5
parent48f9510d85d88dba9feb345f9a5e800c287b27b7 (diff)
Fog performance optimizations
* Minimize transcendental ops * Minimize use of XU ops (e.g. rcp()) * Remove redundant terms (e.g. ao)
-rw-r--r--fog.cginc69
1 files changed, 34 insertions, 35 deletions
diff --git a/fog.cginc b/fog.cginc
index a07c1e3..292d464 100644
--- a/fog.cginc
+++ b/fog.cginc
@@ -31,31 +31,35 @@ float perlin_noise_3d_tex(float3 p)
{
// 1/256 = 0.00390625
float r_lo = _Gimmick_Fog_00_Noise.SampleLevel(linear_repeat_s, p.xyz * 0.00390625, 0);
-
return r_lo;
}
float map(float3 p, float lod) {
float3 t = _Time[1] * 0.5;
t = 0;
- float radius = saturate(_Gimmick_Fog_00_Radius - length(p));
+#define RADIUS_TRANS_WIDTH 100
+#define RADIUS_TRANS_WIDTH_RCP (1.0 / RADIUS_TRANS_WIDTH)
+ // Try to create a smooth transition without doing any length() or other
+ // transcendental ops.
+ float radius2 = clamp(_Gimmick_Fog_00_Radius * _Gimmick_Fog_00_Radius - dot(p, p), 0, RADIUS_TRANS_WIDTH) * RADIUS_TRANS_WIDTH_RCP;
float3 pp = p * _Gimmick_Fog_00_Noise_Scale * FOG_PERLIN_NOISE_SCALE + t;
- float density = FOG_PERLIN_NOISE(pp) * radius * 0.7;
+ float density = FOG_PERLIN_NOISE(pp) * radius2 * 0.7;
- density = pow(density, _Gimmick_Fog_00_Noise_Exponent);
+ density *= density;
- // This term creates large open areas
- if (lod < 1) {
- float tmp = FOG_PERLIN_NOISE(pp * 0.167 + t/4) * radius - 0.5;
+ // This term creates large open areas.
+ // This `if` doesn't actually create any thread divergence. Since all rays
+ // shoot out in lock step, they all leave this mode at the same time.
+ if (lod == 0) {
+ float tmp = FOG_PERLIN_NOISE(pp * 0.167 + t/4) * radius2 - 0.5;
// Aggressively dial down this parameter as density increases. We really
// need to keep paths short when density is high.
- float density_performance_fix = rcp(_Gimmick_Fog_00_Density);
+ float density_performance_fix = 1 / _Gimmick_Fog_00_Density;
density_performance_fix *= density_performance_fix;
tmp *= 0.5 * density_performance_fix;
density += tmp;
}
-
return saturate(density);
}
@@ -72,30 +76,32 @@ float3 get_normal(float3 p, float map_p, float lod) {
}
void getEmitterData(float3 p, float step_size,
- float3 em_loc, float3 em_normal, float em_scale_x,
- float em_scale_y, out float3 em_color, out float em_weight,
- out float3 p_projected)
+ float3 em_loc, float3 em_normal, float2 emitter_scale,
+ out float3 em_color, out float em_weight)
{
// Project onto plane
const float3 p_to_emitter = p - em_loc;
- const float2 emitter_scale = float2(em_scale_x, em_scale_y);
const float t = dot(p_to_emitter, em_normal);
- float emitter_lod = floor(abs(t) / (_Gimmick_Fog_00_Emitter_Lod_Half_Life * step_size));
- p_projected = p - t * em_normal;
+ const float3 p_projected = p - t * em_normal - em_loc;
- p_projected -= em_loc;
bool in_range = (abs(p_projected.x) < emitter_scale.x) * (abs(p_projected.y) < emitter_scale.y) * (t > 0);
- float2 emitter_uv = clamp(p_projected.xy, -emitter_scale, emitter_scale) / emitter_scale;
- emitter_uv /= 2.0;
- emitter_uv += 0.5;
// Go up one LOD every 5 meters
// TODO make this tunable
- em_color = _Gimmick_Fog_00_Emitter_Texture.SampleLevel(linear_repeat_s, emitter_uv, emitter_lod);
- em_color *= _Gimmick_Fog_00_Emitter_Brightness;
- float emitter_dist = in_range ? abs(t) : 1000;
- float emitter_falloff = min(1, rcp(pow(emitter_dist, 1.4)));
- em_weight = in_range * emitter_falloff;
+ if (in_range) {
+ float2 emitter_uv = clamp(p_projected.xy, -emitter_scale, emitter_scale) / emitter_scale;
+ emitter_uv /= 2.0;
+ emitter_uv += 0.5;
+ float emitter_lod = floor(abs(t) / (_Gimmick_Fog_00_Emitter_Lod_Half_Life * step_size));
+ em_color = _Gimmick_Fog_00_Emitter_Texture.SampleLevel(linear_repeat_s, emitter_uv, emitter_lod);
+ em_color *= _Gimmick_Fog_00_Emitter_Brightness;
+ float emitter_dist = in_range ? abs(t) : 1000;
+ float emitter_falloff = min(1, rcp(pow(emitter_dist, 1.4)));
+ em_weight = in_range * emitter_falloff;
+ } else {
+ em_color = 0;
+ em_weight = 0;
+ }
}
Fog00PBR getFog00(v2f i) {
@@ -172,8 +178,7 @@ Fog00PBR getFog00(v2f i) {
float3 em_color;
float em_weight;
- float3 em_p;
- getEmitterData(p, step_size, em_loc, em_normal, em_scale_x, em_scale_y, em_color, em_weight, em_p);
+ getEmitterData(p, step_size, em_loc, em_normal, float2(em_scale_x, em_scale_y), em_color, em_weight);
#if defined(_GIMMICK_FOG_00_EMITTER_1)
const float3 em1_loc = _Gimmick_Fog_00_Emitter1_Location;
const float3 em1_normal = normalize(_Gimmick_Fog_00_Emitter1_Normal);
@@ -181,8 +186,7 @@ Fog00PBR getFog00(v2f i) {
const float em1_scale_y = _Gimmick_Fog_00_Emitter1_Scale_Y;
float3 em1_color;
float em1_weight;
- float3 em1_p;
- getEmitterData(p, step_size, em1_loc, em1_normal, em1_scale_x, em1_scale_y, em1_color, em1_weight, em1_p);
+ getEmitterData(p, step_size, em1_loc, em1_normal, float2(em1_scale_x, em1_scale_y), em1_color, em1_weight);
em_color += em1_color;
em_weight += em1_weight;
#endif
@@ -193,8 +197,7 @@ Fog00PBR getFog00(v2f i) {
const float em2_scale_y = _Gimmick_Fog_00_Emitter2_Scale_Y;
float3 em2_color;
float em2_weight;
- float3 em2_p;
- getEmitterData(p, step_size, em2_loc, em2_normal, em2_scale_x, em2_scale_y, em2_color, em2_weight, em2_p);
+ getEmitterData(p, step_size, em2_loc, em2_normal, float2(em2_scale_x, em2_scale_y), em2_color, em2_weight);
em_color += em2_color;
em_weight += em2_weight;
#endif
@@ -204,10 +207,6 @@ Fog00PBR getFog00(v2f i) {
acc += c * (1.0 - acc.a);
- const float ao_str = 0.3;
- float cur_ao = saturate(length(ro) / _Gimmick_Fog_00_Radius) * ao_str + (1.0 - ao_str);
- ao = cur_ao * (1.0 - acc.a) + acc.a * ao;
-
// Performance hack: stop blending normals after enough accumulation.
#if 0
if (acc.a < _Gimmick_Fog_00_Normal_Cutoff) {
@@ -229,7 +228,7 @@ Fog00PBR getFog00(v2f i) {
Fog00PBR pbr;
pbr.albedo.rgb = 1;
pbr.albedo.a = saturate(acc.a);
- pbr.ao = ao;
+ pbr.ao = 1;
pbr.diffuse = acc.rgb;
#if 1