diff options
| author | yum <yum.food.vr@gmail.com> | 2025-01-17 01:13:49 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2025-01-17 01:13:49 -0800 |
| commit | bee103e89fc83030bfc0251db5a78bb153042e1f (patch) | |
| tree | 5298edf99718d13d64d69efe2a0ff63bed1337b4 /pema_quad_intrinsics.cginc | |
| parent | b28359aefb16151c7c835dadfe27b969ea8fe702 (diff) | |
Use quad intrinsics to compute trochoid normals
Simple algo. Use quad intrinsics to get neighboring pixels' (x & y)
positions in trochoid space. Compute tangent and bitangent from that.
Then normal as cross product. There's some artifacting on diagonal
boundaries.
Diffstat (limited to 'pema_quad_intrinsics.cginc')
| -rw-r--r-- | pema_quad_intrinsics.cginc | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/pema_quad_intrinsics.cginc b/pema_quad_intrinsics.cginc new file mode 100644 index 0000000..0a411a0 --- /dev/null +++ b/pema_quad_intrinsics.cginc @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: MIT +// Author: pema99 + +// This file contains functions that simulate Quad and Wave Intrinsics without access to either. +// For more information on those, see: https://github.com/Microsoft/DirectXShaderCompiler/wiki/Wave-Intrinsics + +// To use the functions, you must call SETUP_QUAD_INTRINSICS(pos) at the start of your fragment shader, +// where 'pos' is the pixel position, ie. the fragment input variable with the SV_Position semantic. +// Note that some functions will require SM 5.0, ie. #pragma target 5.0. + +// The file is a bit difficult to read, so here is a quick reference of all the functions it provides: +// +// Basic getters: +// uint QuadGetLaneID() - Get the ID of the current lane (0-3), from top left to bottom right. +// uint2 QuadGetLanePosition() - Get the position of the current lane (0,0 - 1,1), from top left to bottom right. +// +// Shuffles and broadcasts: +// <float_type> QuadReadAcrossX(<float_type> value) - Read the value of the lane opposite this one on the X axis. +// <float_type> QuadReadAcrossY(<float_type> value) - Read the value of the lane opposite this one on the Y axis. +// <float_type> QuadReadAcrossDiagonal(<float_type> value) - Read the value of the lane opposite this one on the diagonal. +// <float_type> QuadReadLaneAt(<float_type> value, uint2 quadLaneID) - Read the value of the lane at the given position. +// <float_type> QuadReadLaneAt(<float_type> value, uint quadLaneID) - Read the value of the lane with the given ID. +// void QuadReadAll(<float_type> value, out <float_type> topLeft, out <float_type> topRight, out <float_type> bottomLeft, out <float_type> bottomRight) - Read the value of all lanes. +// +// Reductions: +// bool QuadAny(bool expr) - Check if any lane evaluate the expression to true. +// bool QuadAll(bool expr) - Check if all lanes evaluate the expression to true. +// <float_type> QuadSum(<float_type> value) - Sum the values on all lanes. +// <float_type> QuadProduct(<float_type> value) - Multiply the values on all lanes. +// <float_type> QuadMin(<float_type> value) - Find the minimum value on all lanes. +// <float_type> QuadMax(<float_type> value) - Find the maximum value on all lanes. +// <integer_type> QuadBitAnd(<integer_type> value) - Bitwise AND the values on all lanes. +// <integer_type> QuadBitOr(<integer_type> value) - Bitwise OR the values on all lanes. +// <integer_type> QuadBitXor(<integer_type> value) - Bitwise XOR the values on all lanes. +// uint4 QuadBallot(bool expr) - Create a bitmask of which lanes evaluate the expression to true. +// uint QuadCountBits(bool expr) - Count the number of lanes that evaluate the expression to true. +// +// Scans: +// <float_type> QuadPrefixSum(<float_type> value) - Sum the values on all lanes up to and exlcuding this one. +// <float_type> QuadPrefixProduct(<float_type> value) - Multiply the values on all lanes up to and exlcuding this one. +// uint QuadPrefixCountBits(bool expr) - Count the number of lanes that evaluate the expression to true up to and excluding this one. + +#ifndef QUAD_INTRINSICS +#define QUAD_INTRINSICS + +// Setup functions +static uint2 GLOBAL_QUAD_INDEX = uint2(0, 0); + +#define SETUP_QUAD_INTRINSICS(SV_Position) \ + GLOBAL_QUAD_INDEX = (uint2)(SV_Position).xy & 1; + +// ID getters +uint QuadGetLaneID() +{ + return ((GLOBAL_QUAD_INDEX.y * 1) << 1) + (GLOBAL_QUAD_INDEX.x & 1); +} + +uint2 QuadGetLanePosition() +{ + return GLOBAL_QUAD_INDEX; +} + +// Helper functions +#define GENERIC_QUAD_FLOAT_HELPERS(T) \ +T QUAD_ADD_HELPER(T a, T b) \ +{ \ + return a + b; \ +} \ + +// NOTE: The reason we don't implement these for all types is because the HLSL compiler selects +// overloads based on the size of the type - thus, we can't have any instances that take parameters +// of the same size, as the overloads will overlap. +GENERIC_QUAD_FLOAT_HELPERS(float); +GENERIC_QUAD_FLOAT_HELPERS(float2); +GENERIC_QUAD_FLOAT_HELPERS(float3); +GENERIC_QUAD_FLOAT_HELPERS(float4); +GENERIC_QUAD_FLOAT_HELPERS(float3x3); +GENERIC_QUAD_FLOAT_HELPERS(float4x4); + +#define GENERIC_QUAD_INTEGER_HELPERS(T) \ +T QUAD_BITAND_HELPER(T a, T b) \ +{ \ + return a & b; \ +} \ + \ +T QUAD_BITOR_HELPER(T a, T b) \ +{ \ + return a | b; \ +} \ + \ +T QUAD_BITXOR_HELPER(T a, T b) \ +{ \ + return a ^ b; \ +} + +GENERIC_QUAD_INTEGER_HELPERS(uint); +GENERIC_QUAD_INTEGER_HELPERS(uint2); +GENERIC_QUAD_INTEGER_HELPERS(uint3); +GENERIC_QUAD_INTEGER_HELPERS(uint4); +GENERIC_QUAD_FLOAT_HELPERS(uint3x3); +GENERIC_QUAD_FLOAT_HELPERS(uint4x4); + +uint QUAD_COUNT_BITS_HELPER(uint a, uint b) +{ + return a + b; +} + +// Generic intrinsics +#define GENERIC_QUAD_REDUCTION(T, Name, OP) \ +T Name(T value) \ +{ \ + T topLeft, topRight, bottomLeft, bottomRight; \ + QuadReadAll(value, topLeft, topRight, bottomLeft, bottomRight); \ + return OP(OP(OP(topLeft, topRight), bottomLeft), bottomRight); \ +} + +#define GENERIC_QUAD_SCAN(T, Name, OP) \ +T Name(T value) \ +{ \ + T topLeft, topRight, bottomLeft, bottomRight; \ + QuadReadAll(value, topLeft, topRight, bottomLeft, bottomRight); \ + T allValues[4] = { topLeft, topRight, bottomLeft, bottomRight }; \ + \ + T prefix = 0; \ + for (int i = 0; i < QuadGetLaneID(); i++) \ + { \ + prefix = OP(prefix, allValues[i]); \ + } \ + return prefix; \ +} + +#define GENERIC_QUAD_FLOAT_INTRINSICS(T) \ +T QuadReadAcrossX(T value) \ +{ \ + T diff = ddx_fine(value); \ + float sign = GLOBAL_QUAD_INDEX.x == 0 ? 1 : -1; \ + return (sign * diff) + value; \ +} \ + \ +T QuadReadAcrossY(T value) \ +{ \ + T diff = ddy_fine(value); \ + float sign = GLOBAL_QUAD_INDEX.y == 0 ? 1 : -1; \ + return (sign * diff) + value; \ +} \ + \ +T QuadReadAcrossDiagonal(T value) \ +{ \ + T oppositeX = QuadReadAcrossX(value); \ + T oppositeDiagonal = QuadReadAcrossY(oppositeX); \ + return oppositeDiagonal; \ +} \ + \ +T QuadReadLaneAt(T value, uint2 quadLaneID) \ +{ \ + uint2 offset = 0; \ + bool2 correct = quadLaneID == GLOBAL_QUAD_INDEX; \ + if (all(correct)) \ + { \ + return value; \ + } \ + else if (correct.x) \ + { \ + return QuadReadAcrossY(value); \ + } \ + else if (correct.y) \ + { \ + return QuadReadAcrossX(value); \ + } \ + else \ + { \ + return QuadReadAcrossDiagonal(value); \ + } \ +} \ + \ +T QuadReadLaneAt(T value, uint quadLaneID) \ +{ \ + uint2 offset = 0; \ + return QuadReadLaneAt(value, uint2(quadLaneID & 1, (quadLaneID & 2) >> 1)); \ +} \ + \ +void QuadReadAll(T value, out T topLeft, out T topRight, out T bottomLeft, out T bottomRight) \ +{ \ + topLeft = QuadReadLaneAt(value, uint2(0, 0)); \ + topRight = QuadReadLaneAt(value, uint2(1, 0)); \ + bottomLeft = QuadReadLaneAt(value, uint2(0, 1)); \ + bottomRight = QuadReadLaneAt(value, uint2(1, 1)); \ +} \ + \ +GENERIC_QUAD_REDUCTION(T, QuadSum, QUAD_ADD_HELPER) \ +GENERIC_QUAD_REDUCTION(T, QuadProduct, mul) \ +GENERIC_QUAD_REDUCTION(T, QuadMin, min) \ +GENERIC_QUAD_REDUCTION(T, QuadMax, max) \ + \ +GENERIC_QUAD_SCAN(T, QuadPrefixSum, QUAD_ADD_HELPER) \ +GENERIC_QUAD_SCAN(T, QuadPrefixProduct, mul) \ + +GENERIC_QUAD_FLOAT_INTRINSICS(float); +GENERIC_QUAD_FLOAT_INTRINSICS(float2); +GENERIC_QUAD_FLOAT_INTRINSICS(float3); +GENERIC_QUAD_FLOAT_INTRINSICS(float4); +GENERIC_QUAD_FLOAT_INTRINSICS(float3x3); +GENERIC_QUAD_FLOAT_INTRINSICS(float4x4); + +// Generic, integer-specific intrincs +#define GENERIC_QUAD_INTEGER_INTRINSICS(T) \ +GENERIC_QUAD_REDUCTION(T, QuadBitAnd, QUAD_BITAND_HELPER) \ +GENERIC_QUAD_REDUCTION(T, QuadBitOr, QUAD_BITOR_HELPER) \ +GENERIC_QUAD_REDUCTION(T, QuadBitXor, QUAD_BITXOR_HELPER) + +GENERIC_QUAD_INTEGER_INTRINSICS(uint); +GENERIC_QUAD_INTEGER_INTRINSICS(uint2); +GENERIC_QUAD_INTEGER_INTRINSICS(uint3); +GENERIC_QUAD_INTEGER_INTRINSICS(uint4); +GENERIC_QUAD_INTEGER_INTRINSICS(uint3x3); +GENERIC_QUAD_INTEGER_INTRINSICS(uint4x4); + +// Monomorphic intrinsics +bool QuadAny(bool expr) +{ + return QuadReadLaneAt(expr, 0) || QuadReadLaneAt(expr, 1) || QuadReadLaneAt(expr, 2) || QuadReadLaneAt(expr, 3); +} + +bool QuadAll(bool expr) +{ + return QuadReadLaneAt(expr, 0) && QuadReadLaneAt(expr, 1) && QuadReadLaneAt(expr, 2) && QuadReadLaneAt(expr, 3); +} + +uint4 QuadBallot(bool expr) +{ + uint4 result; + result.x = QuadReadLaneAt(expr ? 1 : 0, 0); + result.y = QuadReadLaneAt(expr ? 1 : 0, 1); + result.z = QuadReadLaneAt(expr ? 1 : 0, 2); + result.w = QuadReadLaneAt(expr ? 1 : 0, 3); + return result; +} + +uint QuadCountBits(bool expr) +{ + uint4 ballot = QuadBallot(expr); + return ballot.x + ballot.y + ballot.z + ballot.w; +} + +GENERIC_QUAD_SCAN(uint, QuadPrefixCountBitsHelper, QUAD_COUNT_BITS_HELPER); +uint QuadPrefixCountBits(bool expr) +{ + return QuadPrefixCountBitsHelper(expr ? 1 : 0); +} + +// Clean up helper macros +#undef GENERIC_QUAD_INTEGER_HELPERS +#undef GENERIC_QUAD_FLOAT_HELPERS +#undef GENERIC_QUAD_REDUCTION +#undef GENERIC_QUAD_SCAN +#undef GENERIC_QUAD_FLOAT_INTRINSICS +#undef GENERIC_QUAD_INTEGER_INTRINSICS + +#endif |
