diff options
Diffstat (limited to 'tests')
389 files changed, 26467 insertions, 0 deletions
diff --git a/tests/bindings/README.md b/tests/bindings/README.md new file mode 100644 index 000000000..ccc9fecb9 --- /dev/null +++ b/tests/bindings/README.md @@ -0,0 +1,29 @@ +Binding Generation Tests +======================== + +These tests ensure that the compiler can correctly add explicit binding information (e.g., HLSL `register` semantics) to code that does not originally have them. + +Example +------- + +Given code like: + + Texture2D ta; + Texture2D tb; + +We expect to produce output like: + + Texture2D ta : register(t0); + Texture2D tb : register(t1); + +The resulting code guarantees that `tb` will always be assigned to the same location, regardless of how these values are (or are not) used in later shader code. + +Methodology +----------- + +These tests currently rely on the ability to run the same HLSL code through the Spire compiler driver and execute either Spire, or HLSL. We write an example like the above by wrapping explicit `register` semantics in a macro: + + Texture2D ta R(: register(t0)); + Texture2D tb R(: register(t1)); + +In the HLSL case, these annotations will manually place things where we want them, while in the Spire case, we define the macro to have an empty expansion, so that the annotations express our expectation for what the compiler will auto-generate.
\ No newline at end of file diff --git a/tests/bindings/binding0.hlsl b/tests/bindings/binding0.hlsl new file mode 100644 index 000000000..3f965bef0 --- /dev/null +++ b/tests/bindings/binding0.hlsl @@ -0,0 +1,27 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry main + +// Let's first confirm that Spire can reproduce what the +// HLSL compiler would already do in the simple case (when +// all shader parameters are actually used). + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +Texture2D t R(: register(t0)); +SamplerState s R(: register(s0)); + +cbuffer C R(: register(b0)) +{ + float c; +} + +float4 main() : SV_Target +{ + return use(t,s) + use(c); +}
\ No newline at end of file diff --git a/tests/bindings/binding1.hlsl b/tests/bindings/binding1.hlsl new file mode 100644 index 000000000..d9e74e918 --- /dev/null +++ b/tests/bindings/binding1.hlsl @@ -0,0 +1,41 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry main + +// We want to make sure that the registers that Spire generates +// are used, even if there are "dead" parameter earlier in the program. +// +// In this case, we declare two each of textures, samplers, and constant +// buffers, and then only use the second one. +// Left to its own devices, the HLSL compiler would usually shift the +// object that was used up to binding slot zero, and eliminate the one +// that wasn't used. +// We expect Spire to generate explicit annotations that stop this from +// happening. + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +Texture2D t0 R(: register(t0)); +Texture2D t1 R(: register(t1)); +SamplerState s0 R(: register(s0)); +SamplerState s1 R(: register(s1)); + +cbuffer C0 R(: register(b0)) +{ + float c0; +} + +cbuffer C1 R(: register(b1)) +{ + float c1; +} + +float4 main() : SV_Target +{ + return use(t1,s1) + use(c1); +}
\ No newline at end of file diff --git a/tests/bindings/explicit-binding.hlsl b/tests/bindings/explicit-binding.hlsl new file mode 100644 index 000000000..ac23c6556 --- /dev/null +++ b/tests/bindings/explicit-binding.hlsl @@ -0,0 +1,55 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry main + +// We need to allow the user to add explicit bindings to their parameters, +// and we can't go and auto-assign anything to use the same locations. + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +// We'll make three textures, but explicit assign the third one +// to the slot `t0`. We expect the others to shift further along +// to "make room". +Texture2D ta R(: register(t1)); +Texture2D tb R(: register(t2)); +Texture2D tc : register(t0); + + +// The explicit binding may "split" the range of register available +// for automatic placement. We use a "first-fit" approach to pack +// things in: +SamplerState sa R(: register(s0)); +SamplerState sb R(: register(s2)); +SamplerState sc : register(s1); + +// It's also okay to use a register that *doesn't* conflict, +// and even to make things non-contiguous. Here we bind +// the third constnat buffer to register `b9` +// +cbuffer CA R(: register(b0)) +{ + float ca; +} +// +cbuffer CB R(: register(b1)) +{ + float cb; +} +// +cbuffer CC : register(b9) +{ + float cc; +} + +float4 main() : SV_Target +{ + // Go ahead and use everything in this case: + return use(ta, sa) + use(ca) + + use(tb, sb) + use(cb) + + use(tc, sc) + use(cc); +}
\ No newline at end of file diff --git a/tests/bindings/multi-file-extra.hlsl b/tests/bindings/multi-file-extra.hlsl new file mode 100644 index 000000000..45837c984 --- /dev/null +++ b/tests/bindings/multi-file-extra.hlsl @@ -0,0 +1,60 @@ +//TEST_IGNORE_FILE: + +// Here we are going to test that we can correctly generating bindings when we +// are presented with a program spanning multiple input files (and multiple entry points) + +// This file provides the fragment shader, and is only meant to be tested in combination with `multi-file.hlsl` + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +// Start with some parameters that will appear in both shaders +Texture2D sharedT R(: register(t0)); +SamplerState sharedS R(: register(s0)); +cbuffer sharedC R(: register(b0)) +{ + float3 sharedCA R(: packoffset(c0)); + float sharedCB R(: packoffset(c0.w)); + float3 sharedCC R(: packoffset(c1)); + float2 sharedCD R(: packoffset(c2)); +} + +// Then some parameters specific to this shader. +// These will be placed *after* the ones from the main file, +// and even after the parameters further down in this file +// that end up being shared between the two files. + +Texture2D fragmentT R(: register(t4)); +SamplerState fragmentS R(: register(s2)); +cbuffer fragmentC R(: register(b2)) +{ + float3 fragmentCA R(: packoffset(c0)); + float fragmentCB R(: packoffset(c0.w)); + float3 fragmentCC R(: packoffset(c1)); + float2 fragmentCD R(: packoffset(c2)); +} + +// And end with some shared parameters again +Texture2D sharedTV R(: register(t2)); +Texture2D sharedTF R(: register(t3)); + + +float4 main() : SV_Target +{ + // Go ahead and use everything here, just to make sure things got placed correctly + return use(sharedT, sharedS) + + use(sharedCD) + + use(fragmentT, fragmentS) + + use(fragmentCD) + + use(sharedTF, sharedS) + ; +}
\ No newline at end of file diff --git a/tests/bindings/multi-file.hlsl b/tests/bindings/multi-file.hlsl new file mode 100644 index 000000000..db193a869 --- /dev/null +++ b/tests/bindings/multi-file.hlsl @@ -0,0 +1,64 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry main Tests/bindings/multi-file-extra.hlsl -profile ps_4_0 -entry main + +// Here we are going to test that we can correctly generating bindings when we +// are presented with a program spanning multiple input files (and multiple entry points) + +// This file provides the vertex shader, while the fragment shader resides in +// the file `multi-file-extra.hlsl` + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) +{ + // This is the vertex shader, so we can't do implicit-gradient sampling + return t.SampleGrad(s, 0.0, 0.0, 0.0); +} + +// Start with some parameters that will appear in both shaders +Texture2D sharedT R(: register(t0)); +SamplerState sharedS R(: register(s0)); +cbuffer sharedC R(: register(b0)) +{ + float3 sharedCA R(: packoffset(c0)); + float sharedCB R(: packoffset(c0.w)); + float3 sharedCC R(: packoffset(c1)); + float2 sharedCD R(: packoffset(c2)); +} + +// Then some parameters specific to this shader +// (these will get placed before the ones in the `extra` file, +// based on how they get named on the command-line) + +Texture2D vertexT R(: register(t1)); +SamplerState vertexS R(: register(s1)); +cbuffer vertexC R(: register(b1)) +{ + float3 vertexCA R(: packoffset(c0)); + float vertexCB R(: packoffset(c0.w)); + float3 vertexCC R(: packoffset(c1)); + float2 vertexCD R(: packoffset(c2)); +} + +// And end with some shared parameters again +Texture2D sharedTV R(: register(t2)); +Texture2D sharedTF R(: register(t3)); + + +float4 main() : SV_Position +{ + // Go ahead and use everything here, just to make sure things got placed correctly + return use(sharedT, sharedS) + + use(sharedCD) + + use(vertexT, vertexS) + + use(vertexCD) + + use(sharedTV, vertexS) + ; +}
\ No newline at end of file diff --git a/tests/bindings/packoffset.hlsl b/tests/bindings/packoffset.hlsl new file mode 100644 index 000000000..787bbc129 --- /dev/null +++ b/tests/bindings/packoffset.hlsl @@ -0,0 +1,40 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry main + +// Let's make sure we generate correct output in cases +// where there are non-trivial `packoffset`s needed + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +cbuffer CA R(: register(b0)) +{ + float4 ca R(: packoffset(c0)); + float3 cb R(: packoffset(c1.x)); + float cc R(: packoffset(c1.w)); + float2 cd R(: packoffset(c2.x)); + float2 ce R(: packoffset(c2.z)); + + Texture2D ta R(: register(t0)); + SamplerState sa R(: register(s0)); +} + +float4 main() : SV_Target +{ + // Go ahead and use everything in this case: + return use(ta, sa) + + use(ca) + + use(cb) + + use(cc) + + use(cd) + + use(ce) + ; +}
\ No newline at end of file diff --git a/tests/bindings/resources-in-cbuffer.hlsl b/tests/bindings/resources-in-cbuffer.hlsl new file mode 100644 index 000000000..ec35943cc --- /dev/null +++ b/tests/bindings/resources-in-cbuffer.hlsl @@ -0,0 +1,68 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry main + +// Confirm that resources inside constant buffers get correct locations, +// including the case where there are *multiple* constant buffers +// with reosurces. + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +cbuffer CA R(: register(b0)) +{ + float4 caa R(: packoffset(c0)); + float3 cab R(: packoffset(c1.x)); + float cac R(: packoffset(c1.w)); + float2 cad R(: packoffset(c2.x)); + float2 cae R(: packoffset(c2.z)); + + Texture2D ta R(: register(t0)); + SamplerState sa R(: register(s0)); +} + +cbuffer CB R(: register(b1)) +{ + float4 cba R(: packoffset(c0)); + float3 cbb R(: packoffset(c1.x)); + float cbc R(: packoffset(c1.w)); + float2 cbd R(: packoffset(c2.x)); + float2 cbe R(: packoffset(c2.z)); + + Texture2D tbx R(: register(t1)); + Texture2D tby R(: register(t2)); + SamplerState sb R(: register(s1)); +} + +cbuffer CC R(: register(b2)) +{ + float4 cca R(: packoffset(c0)); + float3 ccb R(: packoffset(c1.x)); + float ccc R(: packoffset(c1.w)); + float2 ccd R(: packoffset(c2.x)); + float2 cce R(: packoffset(c2.z)); + + Texture2D tc R(: register(t3)); + SamplerState scx R(: register(s2)); + SamplerState scy R(: register(s3)); +} + +float4 main() : SV_Target +{ + // Go ahead and use everything in this case: + return use(ta, sa) + + use(tbx, sb) + + use(tby, scx) + + use(tc, scy) + + use(cae) + + use(cbe) + + use(cce) + ; +}
\ No newline at end of file diff --git a/tests/bindings/resources-in-structs.hlsl.disabled b/tests/bindings/resources-in-structs.hlsl.disabled new file mode 100644 index 000000000..05269d823 --- /dev/null +++ b/tests/bindings/resources-in-structs.hlsl.disabled @@ -0,0 +1,42 @@ +//SPIRE_TEST_OPTS:-target dxbc-assembly -profile ps_5_0 -entry main + +// Confirm that resources inside `struct` types work reasonably well, + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } +float4 use(Texture2D t, SamplerComparisonState s) { return t.SampleCmp(s, 0.0, 0.0); } + +struct FooData +{ + float4 f; + Texture2D t; + SamplerState s; + SamplerComparisonState c; +}; + +cbuffer CA R(: register(b0)) +{ + FooData foo R(: register(c0) : register(t0) : register(s0)); +}; + +Texture2D t R(: register(t1)); +SamplerState s R(: register(s2)); + +float4 main() : SV_Target +{ + // Go ahead and use everything in this case: + return use(foo.t, foo.s) + + use(foo.t, foo.c) + + use(t, s) + + use(foo.f) + ; +}
\ No newline at end of file diff --git a/tests/bindings/targets-and-uavs-structure.hlsl b/tests/bindings/targets-and-uavs-structure.hlsl new file mode 100644 index 000000000..dcc053253 --- /dev/null +++ b/tests/bindings/targets-and-uavs-structure.hlsl @@ -0,0 +1,36 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_5_0 -entry main + +// Handle the case where the fragment shader output is +// defined a structure, and the semantics are on the sub-fields + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +struct Foo { float2 v; }; + +// This should be allocated a register *after* the render targets +RWStructuredBuffer<Foo> fooBuffer R(: register(u2)); + +struct Fragment +{ + float4 color : SV_Target0; + float4 extra : SV_Target1; + +}; + +Fragment main() +{ + Fragment output; + output.color = use(fooBuffer[42].v); + output.extra = use(fooBuffer[999].v); + return output; +}
\ No newline at end of file diff --git a/tests/bindings/targets-and-uavs.hlsl b/tests/bindings/targets-and-uavs.hlsl new file mode 100644 index 000000000..e5843bed0 --- /dev/null +++ b/tests/bindings/targets-and-uavs.hlsl @@ -0,0 +1,28 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_5_0 -entry main + +// Render target outputs (`SV_Target`) and UAVs are treated +// as sharing the same binding slots in HLSL, so we need to +// make sure that any `u` registers we allocate don't +// interfere with render targets. + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +struct Foo { float2 v; }; + +// This should be allocated a register *after* the render target +RWStructuredBuffer<Foo> fooBuffer R(: register(u1)); + +float4 main() : SV_Target +{ + return use(fooBuffer[12].v); +}
\ No newline at end of file diff --git a/tests/diagnostics/break-outside-loop.spire b/tests/diagnostics/break-outside-loop.spire new file mode 100644 index 000000000..f86b0bad6 --- /dev/null +++ b/tests/diagnostics/break-outside-loop.spire @@ -0,0 +1,4 @@ +//TEST:SIMPLE: +// `break` where it isn't allowed + +void foo() { break; } diff --git a/tests/diagnostics/break-outside-loop.spire.expected b/tests/diagnostics/break-outside-loop.spire.expected new file mode 100644 index 000000000..2d56efaa9 --- /dev/null +++ b/tests/diagnostics/break-outside-loop.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/break-outside-loop.spire(4): error 30003: 'break' must appear inside loop constructs. +} +standard output = { +} diff --git a/tests/diagnostics/call-argument-type.spire b/tests/diagnostics/call-argument-type.spire new file mode 100644 index 000000000..d9663147f --- /dev/null +++ b/tests/diagnostics/call-argument-type.spire @@ -0,0 +1,11 @@ +//TEST:SIMPLE: +// call function with wrong argument type + +struct A {}; +struct B {}; + +void f(A a) {} +void g(B b) +{ + f(b); +} diff --git a/tests/diagnostics/call-argument-type.spire.expected b/tests/diagnostics/call-argument-type.spire.expected new file mode 100644 index 000000000..09a01a359 --- /dev/null +++ b/tests/diagnostics/call-argument-type.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/call-argument-type.spire(10): error 30019: expected an expression of type 'A', got 'B' +} +standard output = { +} diff --git a/tests/diagnostics/continue-outside-loop.spire b/tests/diagnostics/continue-outside-loop.spire new file mode 100644 index 000000000..cad9dce6e --- /dev/null +++ b/tests/diagnostics/continue-outside-loop.spire @@ -0,0 +1,4 @@ +//TEST:SIMPLE: +// `continue` where it isn't allowed + +void foo() { continue; } diff --git a/tests/diagnostics/continue-outside-loop.spire.expected b/tests/diagnostics/continue-outside-loop.spire.expected new file mode 100644 index 000000000..d12cceb78 --- /dev/null +++ b/tests/diagnostics/continue-outside-loop.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/continue-outside-loop.spire(4): error 30004: 'continue' must appear inside loop constructs. +} +standard output = { +} diff --git a/tests/diagnostics/expected-token-eof.spire b/tests/diagnostics/expected-token-eof.spire new file mode 100644 index 000000000..99ab161ec --- /dev/null +++ b/tests/diagnostics/expected-token-eof.spire @@ -0,0 +1,6 @@ +//TEST:SIMPLE: +// expected one token, but got EOF + +int foo() +{ + int a = 3
\ No newline at end of file diff --git a/tests/diagnostics/expected-token-eof.spire.expected b/tests/diagnostics/expected-token-eof.spire.expected new file mode 100644 index 000000000..121ac3374 --- /dev/null +++ b/tests/diagnostics/expected-token-eof.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/expected-token-eof.spire(6): error 20001: unexpected end of file, expected ';' +} +standard output = { +} diff --git a/tests/diagnostics/expected-token.spire b/tests/diagnostics/expected-token.spire new file mode 100644 index 000000000..db85e7b92 --- /dev/null +++ b/tests/diagnostics/expected-token.spire @@ -0,0 +1,7 @@ +//TEST:SIMPLE: +// expected one token, but got another + +int foo() +{ + int a = 3 ] +}
\ No newline at end of file diff --git a/tests/diagnostics/expected-token.spire.expected b/tests/diagnostics/expected-token.spire.expected new file mode 100644 index 000000000..508a06726 --- /dev/null +++ b/tests/diagnostics/expected-token.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/expected-token.spire(6): error 20001: unexpected ']', expected ';' +} +standard output = { +} diff --git a/tests/diagnostics/function-redefinition.spire.disabled b/tests/diagnostics/function-redefinition.spire.disabled new file mode 100644 index 000000000..d47cb1bbc --- /dev/null +++ b/tests/diagnostics/function-redefinition.spire.disabled @@ -0,0 +1,5 @@ +//TEST:SIMPLE: +// redefining a function + +int foo(int a) { return 0; } +int foo(int b) { return 1; } diff --git a/tests/diagnostics/function-redefinition.spire.expected b/tests/diagnostics/function-redefinition.spire.expected new file mode 100644 index 000000000..94b0d3626 --- /dev/null +++ b/tests/diagnostics/function-redefinition.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/function-redefinition.spire(5): error 30001: 'foo(int)': function redefinition. +} +standard output = { +} diff --git a/tests/diagnostics/hull-shader-invalid-domain.spire.disabled b/tests/diagnostics/hull-shader-invalid-domain.spire.disabled new file mode 100644 index 000000000..a1341631a --- /dev/null +++ b/tests/diagnostics/hull-shader-invalid-domain.spire.disabled @@ -0,0 +1,53 @@ +//TEST:SIMPLE: +// `HullShader` without `Domain` attribute + +pipeline P +{ + world CoarseVertex; + world ControlPoint; + world CornerPoint; + world TessPatch; + world FineVertex; + + require @FineVertex vec4 RS_Position; + require @ControlPoint vec2 tessLevelInner; + require @ControlPoint vec4 tessLevelOuter; + + // implicit import operator CoarseVertex->CornerPoint + extern @CornerPoint CoarseVertex[] CoarseVertex_ControlPoint; + [PerCornerIterator] + extern @CornerPoint int HS_CornerID; + + extern @ControlPoint CoarseVertex[] CoarseVertex_ControlPoint; + extern @TessPatch CoarseVertex[] CoarseVertex_ControlPoint; + [InvocationId] + extern @ControlPoint int invocationId; + extern @FineVertex ControlPoint[] ControlPoint_tes; + extern @FineVertex Patch<TessPatch> perPatch_tes; + + extern @FineVertex Patch<CornerPoint[3]> perCorner_tes; + [TessCoord] + extern @FineVertex vec3 tessCoord; + + stage hs : HullShader + { + PatchWorld: TessPatch; + ControlPointWorld: ControlPoint; + CornerPointWorld: CornerPoint; + InputControlPointCount: 3; + ControlPointCount: 1; + Domain: pentagons; + TessLevelOuter: tessLevelOuter; + TessLevelInner: tessLevelInner; + Partitioning: integer; + OutputTopology: triangle_ccw; + } +} + +shader S + targets P +{ + @FineVertex float4 RS_Position = float4(0.0); + @ControlPoint float2 tessLevelInner = float2(2.0); + @ControlPoint float4 tessLevelOuter = float4(2.0); +}
\ No newline at end of file diff --git a/tests/diagnostics/hull-shader-invalid-domain.spire.expected b/tests/diagnostics/hull-shader-invalid-domain.spire.expected new file mode 100644 index 000000000..89b4634aa --- /dev/null +++ b/tests/diagnostics/hull-shader-invalid-domain.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/hull-shader-invalid-domain.spire(39): error 50053: 'Domain' should be either 'triangles' or 'quads'. +} +standard output = { +} diff --git a/tests/diagnostics/hull-shader-no-domain.spire.disabled b/tests/diagnostics/hull-shader-no-domain.spire.disabled new file mode 100644 index 000000000..87bf2a81c --- /dev/null +++ b/tests/diagnostics/hull-shader-no-domain.spire.disabled @@ -0,0 +1,53 @@ +//TEST:SIMPLE: +// `HullShader` without `Domain` attribute + +pipeline P +{ + world CoarseVertex; + world ControlPoint; + world CornerPoint; + world TessPatch; + world FineVertex; + + require @FineVertex vec4 RS_Position; + require @ControlPoint vec2 tessLevelInner; + require @ControlPoint vec4 tessLevelOuter; + + // implicit import operator CoarseVertex->CornerPoint + extern @CornerPoint CoarseVertex[] CoarseVertex_ControlPoint; + [PerCornerIterator] + extern @CornerPoint int HS_CornerID; + + extern @ControlPoint CoarseVertex[] CoarseVertex_ControlPoint; + extern @TessPatch CoarseVertex[] CoarseVertex_ControlPoint; + [InvocationId] + extern @ControlPoint int invocationId; + extern @FineVertex ControlPoint[] ControlPoint_tes; + extern @FineVertex Patch<TessPatch> perPatch_tes; + + extern @FineVertex Patch<CornerPoint[3]> perCorner_tes; + [TessCoord] + extern @FineVertex vec3 tessCoord; + + stage hs : HullShader + { + PatchWorld: TessPatch; + ControlPointWorld: ControlPoint; + CornerPointWorld: CornerPoint; + InputControlPointCount: 3; + ControlPointCount: 1; +// Domain: triangles; + TessLevelOuter: tessLevelOuter; + TessLevelInner: tessLevelInner; + Partitioning: integer; + OutputTopology: triangle_ccw; + } +} + +shader S + targets P +{ + @FineVertex float4 RS_Position = float4(0.0); + @ControlPoint float2 tessLevelInner = float2(2.0); + @ControlPoint float4 tessLevelOuter = float4(2.0); +}
\ No newline at end of file diff --git a/tests/diagnostics/hull-shader-no-domain.spire.expected b/tests/diagnostics/hull-shader-no-domain.spire.expected new file mode 100644 index 000000000..08a08ad79 --- /dev/null +++ b/tests/diagnostics/hull-shader-no-domain.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/hull-shader-no-domain.spire(32): error 50052: 'HullShader' requires attribute 'Domain'. +} +standard output = { +} diff --git a/tests/diagnostics/illegal-character.spire b/tests/diagnostics/illegal-character.spire new file mode 100644 index 000000000..5915e861f --- /dev/null +++ b/tests/diagnostics/illegal-character.spire @@ -0,0 +1,5 @@ +//TEST:SIMPLE: +// illegal character + +` + diff --git a/tests/diagnostics/illegal-character.spire.expected b/tests/diagnostics/illegal-character.spire.expected new file mode 100644 index 000000000..301b99063 --- /dev/null +++ b/tests/diagnostics/illegal-character.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/illegal-character.spire(4): error 10000: illegal character '`' +} +standard output = { +} diff --git a/tests/diagnostics/missing-file.spire.disabled b/tests/diagnostics/missing-file.spire.disabled new file mode 100644 index 000000000..f340dd95e --- /dev/null +++ b/tests/diagnostics/missing-file.spire.disabled @@ -0,0 +1,3 @@ +// trying to import a non-existant file + +using "does-not-exist.spire" diff --git a/tests/diagnostics/missing-file.spire.expected b/tests/diagnostics/missing-file.spire.expected new file mode 100644 index 000000000..d4722c0e2 --- /dev/null +++ b/tests/diagnostics/missing-file.spire.expected @@ -0,0 +1,7 @@ +result code = -1 +standard error = { +Tests/Diagnostics/missing-file.spire(1): error 20001: unexpected end of file, expected ';' +Tests/Diagnostics/missing-file.spire(4): error 2: cannot find file 'does-not-exist.spire'. +} +standard output = { +} diff --git a/tests/diagnostics/missing-include-file.spire b/tests/diagnostics/missing-include-file.spire new file mode 100644 index 000000000..9e0f99b9c --- /dev/null +++ b/tests/diagnostics/missing-include-file.spire @@ -0,0 +1,4 @@ +//TEST:SIMPLE: +// trying to include a non-existant file + +#include "does-not-exist.h" diff --git a/tests/diagnostics/missing-include-file.spire.expected b/tests/diagnostics/missing-include-file.spire.expected new file mode 100644 index 000000000..24649c484 --- /dev/null +++ b/tests/diagnostics/missing-include-file.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/missing-include-file.spire(4): error 15300: failed to find include file 'does-not-exist.h' +} +standard output = { +} diff --git a/tests/diagnostics/parameter-already-defined.spire b/tests/diagnostics/parameter-already-defined.spire new file mode 100644 index 000000000..860b17e65 --- /dev/null +++ b/tests/diagnostics/parameter-already-defined.spire @@ -0,0 +1,4 @@ +//TEST:SIMPLE: +// re-use parameter name + +int foo( int a, float a ) { return 0; } diff --git a/tests/diagnostics/parameter-already-defined.spire.expected b/tests/diagnostics/parameter-already-defined.spire.expected new file mode 100644 index 000000000..0021afa42 --- /dev/null +++ b/tests/diagnostics/parameter-already-defined.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/parameter-already-defined.spire(4): error 30002: parameter 'a' already defined. +} +standard output = { +} diff --git a/tests/diagnostics/undefined-identifier.spire b/tests/diagnostics/undefined-identifier.spire new file mode 100644 index 000000000..6c5a59f75 --- /dev/null +++ b/tests/diagnostics/undefined-identifier.spire @@ -0,0 +1,7 @@ +//TEST:SIMPLE: +// use of undefined identifier + +void foo() +{ + int a = b; +} diff --git a/tests/diagnostics/undefined-identifier.spire.expected b/tests/diagnostics/undefined-identifier.spire.expected new file mode 100644 index 000000000..2eed77baf --- /dev/null +++ b/tests/diagnostics/undefined-identifier.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/undefined-identifier.spire(6): error 30015: undefined identifier 'b'. +} +standard output = { +} diff --git a/tests/diagnostics/variable-void-type.spire b/tests/diagnostics/variable-void-type.spire new file mode 100644 index 000000000..926e6401d --- /dev/null +++ b/tests/diagnostics/variable-void-type.spire @@ -0,0 +1,7 @@ +//TEST:SIMPLE: +// variable with `void` type + +void foo() +{ + void a; +} diff --git a/tests/diagnostics/variable-void-type.spire.expected b/tests/diagnostics/variable-void-type.spire.expected new file mode 100644 index 000000000..1f0e6765c --- /dev/null +++ b/tests/diagnostics/variable-void-type.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/variable-void-type.spire(6): error 30009: invalid type 'void'. +} +standard output = { +} diff --git a/tests/diagnostics/while-predicate-type.spire b/tests/diagnostics/while-predicate-type.spire new file mode 100644 index 000000000..14ce45533 --- /dev/null +++ b/tests/diagnostics/while-predicate-type.spire @@ -0,0 +1,10 @@ +//TEST:SIMPLE: +// bad type for `while` predicate + +struct S {}; + +void foo() +{ + S s; + while(s) {break;} +} diff --git a/tests/diagnostics/while-predicate-type.spire.expected b/tests/diagnostics/while-predicate-type.spire.expected new file mode 100644 index 000000000..9f94e679e --- /dev/null +++ b/tests/diagnostics/while-predicate-type.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/Diagnostics/while-predicate-type.spire(9): error 30010: 'while': expression must evaluate to int. +} +standard output = { +} diff --git a/tests/front-end/lexer-comments.spire b/tests/front-end/lexer-comments.spire new file mode 100644 index 000000000..d8c06ca76 --- /dev/null +++ b/tests/front-end/lexer-comments.spire @@ -0,0 +1,13 @@ +//TEST:SIMPLE: +// confirming that the lexer handles comments correctly + +// line comment + +/* block comment +*/ + +/* block comments don't nest + /* +*/ + +float f(float f) { return f; }
\ No newline at end of file diff --git a/tests/front-end/parser-decls.spire b/tests/front-end/parser-decls.spire new file mode 100644 index 000000000..bc5a2b13b --- /dev/null +++ b/tests/front-end/parser-decls.spire @@ -0,0 +1,60 @@ +//TEST:SIMPLE: +// test that we can parse all the expected kinds of declarations + +// global-scope `using` is another test + +/* Note(tfoley): disabled during syntax transition +// pipeline +pipeline P +{ + +} +*/ + +// empty declaration +; + +// struct type +struct Pair +{ + int head; + float tail; + +// Note(tfoley): semicolon is expected/required +// here for compatibility with HLSL (C-style +// declarators). +// +// TODO: this could be removed if we treat +// HLSL as a "compatibility mode" +}; + +// function at global scope +float tail(Pair p) { return p.tail; } + +/* Note(tfoley): disabled during syntax transition + +// module +module M +{ + // component declarations + + // using declarations + +} + +// a module can "inherit" from a pipeline +module M2 + targets P +{ +} + +// shader +shader S +{ + // component declarations + + // using declarations + +} + +*/
\ No newline at end of file diff --git a/tests/front-end/parser-empty.spire b/tests/front-end/parser-empty.spire new file mode 100644 index 000000000..bfd66f05b --- /dev/null +++ b/tests/front-end/parser-empty.spire @@ -0,0 +1 @@ +//TEST:SIMPLE: diff --git a/tests/front-end/parser-error-unclosed-curly.spire b/tests/front-end/parser-error-unclosed-curly.spire new file mode 100644 index 000000000..6cfcca456 --- /dev/null +++ b/tests/front-end/parser-error-unclosed-curly.spire @@ -0,0 +1,3 @@ +//TEST:SIMPLE: +void foo() { +// Note: no closing curly brace diff --git a/tests/front-end/parser-error-unclosed-curly.spire.expected b/tests/front-end/parser-error-unclosed-curly.spire.expected new file mode 100644 index 000000000..437b6eab0 --- /dev/null +++ b/tests/front-end/parser-error-unclosed-curly.spire.expected @@ -0,0 +1,6 @@ +result code = -1 +standard error = { +Tests/FrontEnd/parser-error-unclosed-curly.spire(4): error 20001: unexpected end of file, expected '}' +} +standard output = { +} diff --git a/tests/front-end/parser-using-file-a.spireh b/tests/front-end/parser-using-file-a.spireh new file mode 100644 index 000000000..62e6acd06 --- /dev/null +++ b/tests/front-end/parser-using-file-a.spireh @@ -0,0 +1,3 @@ +// this file exists to be included by "parser-using-file.spire" + +float a(float x) { return x * x; } diff --git a/tests/front-end/parser-using-file.spire.disabled b/tests/front-end/parser-using-file.spire.disabled new file mode 100644 index 000000000..f93fb576c --- /dev/null +++ b/tests/front-end/parser-using-file.spire.disabled @@ -0,0 +1,6 @@ +//TEST:SIMPLE: +// test that we can include a file via `using` + +using "parser-using-file-a.spireh"; + +float base( float x ) { return a(x); }
\ No newline at end of file diff --git a/tests/front-end/pipeline-simple.spireh b/tests/front-end/pipeline-simple.spireh new file mode 100644 index 000000000..55afd693c --- /dev/null +++ b/tests/front-end/pipeline-simple.spireh @@ -0,0 +1,41 @@ +// pipeline-simple.spireh + + +// TODO(tfoley): strip this down to a minimal pipeline + +pipeline StandardPipeline +{ + [Pinned] + input world MeshVertex; + + world CoarseVertex;// : "glsl(vertex:projCoord)" using projCoord export standardExport; + world Fragment;// : "glsl" export fragmentExport; + + require @CoarseVertex vec4 projCoord; + + [VertexInput] + extern @CoarseVertex MeshVertex vertAttribIn; + import(MeshVertex->CoarseVertex) vertexImport() + { + return project(vertAttribIn); + } + + extern @Fragment CoarseVertex CoarseVertexIn; + import(CoarseVertex->Fragment) standardImport() +// TODO(tfoley): this trait doesn't seem to be implemented on `vec3` +// require trait IsTriviallyPassable(CoarseVertex) + { + return project(CoarseVertexIn); + } + + stage vs : VertexShader + { + World: CoarseVertex; + Position: projCoord; + } + + stage fs : FragmentShader + { + World: Fragment; + } +}
\ No newline at end of file diff --git a/tests/front-end/struct.spire b/tests/front-end/struct.spire new file mode 100644 index 000000000..8347f8d58 --- /dev/null +++ b/tests/front-end/struct.spire @@ -0,0 +1,49 @@ +//TEST:SIMPLE: +// test that `struct` decls work + +// Note(tfoley): disabled during syntax transition +// #include "pipeline-simple.spireh" + +// struct declaration +struct Foo +{ + float3 a; + float3 b; +}; + +// function on a struct +Foo makeFoo(float x, float y) +{ + // local of struct type + Foo foo; + foo.a = float3(x); + foo.b = float3(y); + return foo; +} + +/* Note(tfoley): disabled during syntax transition + +template shader Test() +// targets StandardPipeline +{ + // Uniform of struct type + param Foo foo1; + + @MeshVertex float3 position; + @MeshVertex float3 color; + + param mat4 modelViewProjection; + + public vec4 projCoord = modelViewProjection * vec4(position, 1.0); + + // Component of struct type + // Note(tfoley): use of `public` here required to work around parser limitations + public Foo foo2 = makeFoo(color.x, color.y); + + // + float3 result = foo1.a + foo2.b; + + out @Fragment vec4 colorTarget = vec4(result,1); +} + +*/
\ No newline at end of file diff --git a/tests/front-end/typedef.spire b/tests/front-end/typedef.spire new file mode 100644 index 000000000..7e96bead0 --- /dev/null +++ b/tests/front-end/typedef.spire @@ -0,0 +1,15 @@ +//TEST:SIMPLE: +// test that we can `typedef` a type + +typedef float F32; + +F32 foo() +{ + float x = 123.0; + return x; +} + +float bar() +{ + return foo(); +} diff --git a/tests/glsl/sascha-willems/LICENSE.md b/tests/glsl/sascha-willems/LICENSE.md new file mode 100644 index 000000000..2c4cfd5b7 --- /dev/null +++ b/tests/glsl/sascha-willems/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Sascha Willems + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/tests/glsl/sascha-willems/README.md b/tests/glsl/sascha-willems/README.md new file mode 100644 index 000000000..3060f9b3d --- /dev/null +++ b/tests/glsl/sascha-willems/README.md @@ -0,0 +1,8 @@ +# GLSL Shaders from Sascha Willems' Vulkan Tutorials + + +These shaders are taken from the [repository][VulkanTutorials] of Vulkan examples developed by Sascha Willems. +The original code is licensed according to the terms in the [`LICSENSE.md`](LICENSE.md) file in this directory. + +[VulkanTutorials]: https://github.com/SaschaWillems/Vulkan/ "Vulkan C++ examples and demos" + diff --git a/tests/glsl/sascha-willems/base/textoverlay.frag b/tests/glsl/sascha-willems/base/textoverlay.frag new file mode 100644 index 000000000..e5dbb08de --- /dev/null +++ b/tests/glsl/sascha-willems/base/textoverlay.frag @@ -0,0 +1,14 @@ +#version 450 core +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec2 inUV; + +layout (binding = 0) uniform sampler2D samplerFont; + +layout (location = 0) out vec4 outFragColor; + +void main(void) +{ + float color = texture(samplerFont, inUV).r; + outFragColor = vec4(vec3(color), 1.0); +} diff --git a/tests/glsl/sascha-willems/base/textoverlay.vert b/tests/glsl/sascha-willems/base/textoverlay.vert new file mode 100644 index 000000000..8a20fd8b2 --- /dev/null +++ b/tests/glsl/sascha-willems/base/textoverlay.vert @@ -0,0 +1,18 @@ +#version 450 core +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec2 inPos; +layout (location = 1) in vec2 inUV; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main(void) +{ + gl_Position = vec4(inPos, 0.0, 1.0); + outUV = inUV; +} diff --git a/tests/glsl/sascha-willems/bloom/colorpass.frag b/tests/glsl/sascha-willems/bloom/colorpass.frag new file mode 100644 index 000000000..63d518d29 --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/colorpass.frag @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D colorMap; + +layout (location = 0) in vec3 inColor; +layout (location = 1) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor.rgb = inColor; +// outFragColor = texture(colorMap, inUV);// * vec4(inColor, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/bloom/colorpass.vert b/tests/glsl/sascha-willems/bloom/colorpass.vert new file mode 100644 index 000000000..d2f7f4e6f --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/colorpass.vert @@ -0,0 +1,31 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outColor; +layout (location = 1) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + outColor = inColor; + gl_Position = ubo.projection * ubo.view * ubo.model * inPos; +} diff --git a/tests/glsl/sascha-willems/bloom/gaussblur.frag b/tests/glsl/sascha-willems/bloom/gaussblur.frag new file mode 100644 index 000000000..4cf4696cf --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/gaussblur.frag @@ -0,0 +1,48 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (binding = 0) uniform UBO +{ + float blurScale; + float blurStrength; +} ubo; + +layout (constant_id = 0) const int blurdirection = 0; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + float weight[5]; + weight[0] = 0.227027; + weight[1] = 0.1945946; + weight[2] = 0.1216216; + weight[3] = 0.054054; + weight[4] = 0.016216; + + vec2 tex_offset = 1.0 / textureSize(samplerColor, 0) * ubo.blurScale; // gets size of single texel + vec3 result = texture(samplerColor, inUV).rgb * weight[0]; // current fragment's contribution + for(int i = 1; i < 5; ++i) + { + if (blurdirection == 1) + { + // H + result += texture(samplerColor, inUV + vec2(tex_offset.x * i, 0.0)).rgb * weight[i] * ubo.blurStrength; + result += texture(samplerColor, inUV - vec2(tex_offset.x * i, 0.0)).rgb * weight[i] * ubo.blurStrength; + } + else + { + // V + result += texture(samplerColor, inUV + vec2(0.0, tex_offset.y * i)).rgb * weight[i] * ubo.blurStrength; + result += texture(samplerColor, inUV - vec2(0.0, tex_offset.y * i)).rgb * weight[i] * ubo.blurStrength; + } + } + outFragColor = vec4(result, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/bloom/gaussblur.vert b/tests/glsl/sascha-willems/bloom/gaussblur.vert new file mode 100644 index 000000000..548284554 --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/gaussblur.vert @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/bloom/phongpass.frag b/tests/glsl/sascha-willems/bloom/phongpass.frag new file mode 100644 index 000000000..9a449f6d1 --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/phongpass.frag @@ -0,0 +1,34 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D colorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 ambient = vec3(0.0f); + + // Adjust light calculations for glow color + if ((inColor.r >= 0.9) || (inColor.g >= 0.9) || (inColor.b >= 0.9)) + { + ambient = inColor * 0.25; + } + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 8.0) * vec3(0.75); + outFragColor = vec4(ambient + diffuse + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/bloom/phongpass.vert b/tests/glsl/sascha-willems/bloom/phongpass.vert new file mode 100644 index 000000000..ac0a77ab9 --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/phongpass.vert @@ -0,0 +1,42 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outColor; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = inUV; + gl_Position = ubo.projection * ubo.view * ubo.model * inPos; + + vec3 lightPos = vec3(-5.0, -5.0, 0.0); + vec4 pos = ubo.view * ubo.model * inPos; + outNormal = mat3(ubo.view * ubo.model) * inNormal; + outLightVec = lightPos - pos.xyz; + outViewVec = -pos.xyz; +} diff --git a/tests/glsl/sascha-willems/bloom/skybox.frag b/tests/glsl/sascha-willems/bloom/skybox.frag new file mode 100644 index 000000000..611eb37db --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/skybox.frag @@ -0,0 +1,16 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform samplerCube samplerCubeMap; + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerCubeMap, inUVW); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/bloom/skybox.vert b/tests/glsl/sascha-willems/bloom/skybox.vert new file mode 100644 index 000000000..f06b3d38d --- /dev/null +++ b/tests/glsl/sascha-willems/bloom/skybox.vert @@ -0,0 +1,28 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUVW; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + + +void main() +{ + outUVW = inPos; + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/computecullandlod/cull.comp b/tests/glsl/sascha-willems/computecullandlod/cull.comp new file mode 100644 index 000000000..d04beb2ec --- /dev/null +++ b/tests/glsl/sascha-willems/computecullandlod/cull.comp @@ -0,0 +1,127 @@ +//TEST_IGNORE_FILE: Currently failing due to lack of support for math on specialization constants +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (constant_id = 0) const int MAX_LOD_LEVEL = 5; + +struct InstanceData +{ + vec3 pos; + float scale; +}; + +// Binding 0: Instance input data for culling +layout (binding = 0, std140) buffer Instances +{ + InstanceData instances[ ]; +}; + +// Same layout as VkDrawIndexedIndirectCommand +struct IndexedIndirectCommand +{ + uint indexCount; + uint instanceCount; + uint firstIndex; + uint vertexOffset; + uint firstInstance; +}; + +// Binding 1: Multi draw output +layout (binding = 1, std430) writeonly buffer IndirectDraws +{ + IndexedIndirectCommand indirectDraws[ ]; +}; + +// Binding 2: Uniform block object with matrices +layout (binding = 2) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 cameraPos; + vec4 frustumPlanes[6]; +} ubo; + +// Binding 3: Indirect draw stats +layout (binding = 3) buffer UBOOut +{ + uint drawCount; + uint lodCount[MAX_LOD_LEVEL + 1]; +} uboOut; + +// Binding 4: level-of-detail information +struct LOD +{ + uint firstIndex; + uint indexCount; + float distance; + float _pad0; +}; +layout (binding = 4) readonly buffer LODs +{ + LOD lods[ ]; +}; + +layout (local_size_x = 16) in; + +bool frustumCheck(vec4 pos, float radius) +{ + // Check sphere against frustum planes + for (int i = 0; i < 6; i++) + { + if (dot(pos, ubo.frustumPlanes[i]) + radius < 0.0) + { + return false; + } + } + return true; +} + +layout (local_size_x = 16) in; + +void main() +{ + uint idx = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x; + + // Clear stats on first invocation + if (idx == 0) + { + atomicExchange(uboOut.drawCount, 0); + for (uint i = 0; i < MAX_LOD_LEVEL + 1; i++) + { + atomicExchange(uboOut.lodCount[i], 0); + } + } + + vec4 pos = vec4(instances[idx].pos.xyz, 1.0); + + // Check if object is within current viewing frustum + if (frustumCheck(pos, 1.0)) + { + indirectDraws[idx].instanceCount = 1; + + // Increase number of indirect draw counts + atomicAdd(uboOut.drawCount, 1); + + // Select appropriate LOD level based on distance to camera + uint lodLevel = MAX_LOD_LEVEL; + for (uint i = 0; i < MAX_LOD_LEVEL; i++) + { + if (distance(instances[idx].pos.xyz, ubo.cameraPos.xyz) < lods[i].distance) + { + lodLevel = i; + break; + } + } + indirectDraws[idx].firstIndex = lods[lodLevel].firstIndex; + indirectDraws[idx].indexCount = lods[lodLevel].indexCount; + // Update stats + atomicAdd(uboOut.lodCount[lodLevel], 1); + } + else + { + indirectDraws[idx].instanceCount = 0; + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computecullandlod/indirectdraw.frag b/tests/glsl/sascha-willems/computecullandlod/indirectdraw.frag new file mode 100644 index 000000000..c1e2240f3 --- /dev/null +++ b/tests/glsl/sascha-willems/computecullandlod/indirectdraw.frag @@ -0,0 +1,21 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 ambient = vec3(0.25); + vec3 diffuse = vec3(max(dot(N, L), 0.0)); + outFragColor = vec4((ambient + diffuse) * inColor, 1.0); +} diff --git a/tests/glsl/sascha-willems/computecullandlod/indirectdraw.vert b/tests/glsl/sascha-willems/computecullandlod/indirectdraw.vert new file mode 100644 index 000000000..3f4a4b8c2 --- /dev/null +++ b/tests/glsl/sascha-willems/computecullandlod/indirectdraw.vert @@ -0,0 +1,46 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +// Vertex attributes +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +// Instanced attributes +layout (location = 4) in vec3 instancePos; +layout (location = 5) in float instanceScale; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + + outNormal = inNormal; + + vec4 pos = vec4((inPos.xyz * instanceScale) + instancePos, 1.0); + + gl_Position = ubo.projection * ubo.modelview * pos; + + vec4 wPos = ubo.modelview * vec4(pos.xyz, 1.0); + vec4 lPos = vec4(0.0, 10.0, 50.0, 1.0); + outLightVec = lPos.xyz - pos.xyz; + outViewVec = -pos.xyz; +} diff --git a/tests/glsl/sascha-willems/computenbody/particle.frag b/tests/glsl/sascha-willems/computenbody/particle.frag new file mode 100644 index 000000000..e67d2e00f --- /dev/null +++ b/tests/glsl/sascha-willems/computenbody/particle.frag @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerColorMap; +layout (binding = 1) uniform sampler2D samplerGradientRamp; + +layout (location = 0) in float inGradientPos; + +layout (location = 0) out vec4 outFragColor; + +void main () +{ + vec3 color = texture(samplerGradientRamp, vec2(inGradientPos, 0.0)).rgb; + outFragColor.rgb = texture(samplerColorMap, gl_PointCoord).rgb * color; +} diff --git a/tests/glsl/sascha-willems/computenbody/particle.vert b/tests/glsl/sascha-willems/computenbody/particle.vert new file mode 100644 index 000000000..eedd185d9 --- /dev/null +++ b/tests/glsl/sascha-willems/computenbody/particle.vert @@ -0,0 +1,36 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec4 inVel; + +layout (location = 0) out float outGradientPos; + +layout (binding = 2) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec2 screendim; +} ubo; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; +}; + +void main () +{ + const float spriteSize = 0.005 * inPos.w; // Point size influenced by mass (stored in inPos.w); + + vec4 eyePos = ubo.modelview * vec4(inPos.x, inPos.y, inPos.z, 1.0); + vec4 projectedCorner = ubo.projection * vec4(0.5 * spriteSize, 0.5 * spriteSize, eyePos.z, eyePos.w); + gl_PointSize = clamp(ubo.screendim.x * projectedCorner.x / projectedCorner.w, 1.0, 128.0); + + gl_Position = ubo.projection * eyePos; + + outGradientPos = inVel.w; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computenbody/particle_calculate.comp b/tests/glsl/sascha-willems/computenbody/particle_calculate.comp new file mode 100644 index 000000000..82cbe35a0 --- /dev/null +++ b/tests/glsl/sascha-willems/computenbody/particle_calculate.comp @@ -0,0 +1,75 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +struct Particle +{ + vec4 pos; + vec4 vel; +}; + +// Binding 0 : Position storage buffer +layout(std140, binding = 0) buffer Pos +{ + Particle particles[ ]; +}; + +layout (local_size_x = 256) in; + +layout (binding = 1) uniform UBO +{ + float deltaT; + float destX; + float destY; + int particleCount; +} ubo; + +layout (constant_id = 0) const int SHARED_DATA_SIZE = 512; +layout (constant_id = 1) const float GRAVITY = 0.002; +layout (constant_id = 2) const float POWER = 0.75; +layout (constant_id = 3) const float SOFTEN = 0.0075; + +// Share data between computer shader invocations to speed up caluclations +shared vec4 sharedData[SHARED_DATA_SIZE]; + +void main() +{ + // Current SSBO index + uint index = gl_GlobalInvocationID.x; + if (index >= ubo.particleCount) + return; + + vec4 position = particles[index].pos; + vec4 velocity = particles[index].vel; + vec4 acceleration = vec4(0.0); + + for (int i = 0; i < ubo.particleCount; i += SHARED_DATA_SIZE) + { + if (i + gl_LocalInvocationID.x < ubo.particleCount) + { + sharedData[gl_LocalInvocationID.x] = particles[i + gl_LocalInvocationID.x].pos; + } + else + { + sharedData[gl_LocalInvocationID.x] = vec4(0.0); + } + + memoryBarrierShared(); + + for (int j = 0; j < gl_WorkGroupSize.x; j++) + { + vec4 other = sharedData[j]; + vec3 len = other.xyz - position.xyz; + acceleration.xyz += GRAVITY * len * other.w / pow(dot(len, len) + SOFTEN, POWER); + } + } + + particles[index].vel.xyz += ubo.deltaT * acceleration.xyz; + + // Gradient texture position + particles[index].vel.w += 0.1 * ubo.deltaT; + if (particles[index].vel.w > 1.0) + particles[index].vel.w -= 1.0; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computenbody/particle_integrate.comp b/tests/glsl/sascha-willems/computenbody/particle_integrate.comp new file mode 100644 index 000000000..7085bee01 --- /dev/null +++ b/tests/glsl/sascha-willems/computenbody/particle_integrate.comp @@ -0,0 +1,36 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +struct Particle +{ + vec4 pos; + vec4 vel; +}; + +// Binding 0 : Position storage buffer +layout(std140, binding = 0) buffer Pos +{ + Particle particles[ ]; +}; + +layout (local_size_x = 256) in; + +layout (binding = 1) uniform UBO +{ + float deltaT; + float destX; + float destY; + int particleCount; +} ubo; + +void main() +{ + int index = int(gl_GlobalInvocationID); + vec4 position = particles[index].pos; + vec4 velocity = particles[index].vel; + position += ubo.deltaT * velocity; + particles[index].pos = position; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computeparticles/particle.comp b/tests/glsl/sascha-willems/computeparticles/particle.comp new file mode 100644 index 000000000..4faf181a4 --- /dev/null +++ b/tests/glsl/sascha-willems/computeparticles/particle.comp @@ -0,0 +1,80 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +struct Particle +{ + vec2 pos; + vec2 vel; + vec4 gradientPos; +}; + +// Binding 0 : Position storage buffer +layout(std140, binding = 0) buffer Pos +{ + Particle particles[ ]; +}; + +layout (local_size_x = 256) in; + +layout (binding = 1) uniform UBO +{ + float deltaT; + float destX; + float destY; + int particleCount; +} ubo; + +vec2 attraction(vec2 pos, vec2 attractPos) +{ + vec2 delta = attractPos - pos; + const float damp = 0.5; + float dDampedDot = dot(delta, delta) + damp; + float invDist = 1.0f / sqrt(dDampedDot); + float invDistCubed = invDist*invDist*invDist; + return delta * invDistCubed * 0.0035; +} + +vec2 repulsion(vec2 pos, vec2 attractPos) +{ + vec2 delta = attractPos - pos; + float targetDistance = sqrt(dot(delta, delta)); + return delta * (1.0 / (targetDistance * targetDistance * targetDistance)) * -0.000035; +} + +void main() +{ + // Current SSBO index + uint index = gl_GlobalInvocationID.x; + // Don't try to write beyond particle count + if (index >= ubo.particleCount) + return; + + // Read position and velocity + vec2 vVel = particles[index].vel.xy; + vec2 vPos = particles[index].pos.xy; + + vec2 destPos = vec2(ubo.destX, ubo.destY); + + vec2 delta = destPos - vPos; + float targetDistance = sqrt(dot(delta, delta)); + vVel += repulsion(vPos, destPos.xy) * 0.05; + + // Move by velocity + vPos += vVel * ubo.deltaT; + + // collide with boundary + if ((vPos.x < -1.0) || (vPos.x > 1.0) || (vPos.y < -1.0) || (vPos.y > 1.0)) + vVel = (-vVel * 0.1) + attraction(vPos, destPos) * 12; + else + particles[index].pos.xy = vPos; + + // Write back + particles[index].vel.xy = vVel; + particles[index].gradientPos.x += 0.02 * ubo.deltaT; + if (particles[index].gradientPos.x > 1.0) + particles[index].gradientPos.x -= 1.0; +} + diff --git a/tests/glsl/sascha-willems/computeparticles/particle.frag b/tests/glsl/sascha-willems/computeparticles/particle.frag new file mode 100644 index 000000000..ac6077261 --- /dev/null +++ b/tests/glsl/sascha-willems/computeparticles/particle.frag @@ -0,0 +1,19 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerColorMap; +layout (binding = 1) uniform sampler2D samplerGradientRamp; + +layout (location = 0) in vec4 inColor; +layout (location = 1) in float inGradientPos; + +layout (location = 0) out vec4 outFragColor; + +void main () +{ + vec3 color = texture(samplerGradientRamp, vec2(inGradientPos, 0.0)).rgb; + outFragColor.rgb = texture(samplerColorMap, gl_PointCoord).rgb * color; +} diff --git a/tests/glsl/sascha-willems/computeparticles/particle.vert b/tests/glsl/sascha-willems/computeparticles/particle.vert new file mode 100644 index 000000000..b134c4898 --- /dev/null +++ b/tests/glsl/sascha-willems/computeparticles/particle.vert @@ -0,0 +1,25 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec2 inPos; +layout (location = 1) in vec4 inGradientPos; + +layout (location = 0) out vec4 outColor; +layout (location = 1) out float outGradientPos; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; +}; + +void main () +{ + gl_PointSize = 8.0; + outColor = vec4(0.035); + outGradientPos = inGradientPos.x; + gl_Position = vec4(inPos.xy, 1.0, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computeshader/edgedetect.comp b/tests/glsl/sascha-willems/computeshader/edgedetect.comp new file mode 100644 index 000000000..0c6bb32fa --- /dev/null +++ b/tests/glsl/sascha-willems/computeshader/edgedetect.comp @@ -0,0 +1,48 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (local_size_x = 16, local_size_y = 16) in; +layout (binding = 0, rgba8) uniform readonly image2D inputImage; +layout (binding = 1, rgba8) uniform image2D resultImage; + +float conv(in float[9] kernel, in float[9] data, in float denom, in float offset) +{ + float res = 0.0; + for (int i=0; i<9; ++i) + { + res += kernel[i] * data[i]; + } + return clamp(res/denom + offset, 0.0, 1.0); +} + +struct ImageData +{ + float avg[9]; +} imageData; + +void main() +{ + // Fetch neighbouring texels + int n = -1; + for (int i=-1; i<2; ++i) + { + for(int j=-1; j<2; ++j) + { + n++; + vec3 rgb = imageLoad(inputImage, ivec2(gl_GlobalInvocationID.x + i, gl_GlobalInvocationID.y + j)).rgb; + imageData.avg[n] = (rgb.r + rgb.g + rgb.b) / 3.0; + } + } + + float[9] kernel; + kernel[0] = -1.0/8.0; kernel[1] = -1.0/8.0; kernel[2] = -1.0/8.0; + kernel[3] = -1.0/8.0; kernel[4] = 1.0; kernel[5] = -1.0/8.0; + kernel[6] = -1.0/8.0; kernel[7] = -1.0/8.0; kernel[8] = -1.0/8.0; + + vec4 res = vec4(vec3(conv(kernel, imageData.avg, 0.1, 0.0)), 1.0); + + imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), res); +} diff --git a/tests/glsl/sascha-willems/computeshader/emboss.comp b/tests/glsl/sascha-willems/computeshader/emboss.comp new file mode 100644 index 000000000..ad0fef510 --- /dev/null +++ b/tests/glsl/sascha-willems/computeshader/emboss.comp @@ -0,0 +1,48 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (local_size_x = 16, local_size_y = 16) in; +layout (binding = 0, rgba8) uniform readonly image2D inputImage; +layout (binding = 1, rgba8) uniform image2D resultImage; + +float conv(in float[9] kernel, in float[9] data, in float denom, in float offset) +{ + float res = 0.0; + for (int i=0; i<9; ++i) + { + res += kernel[i] * data[i]; + } + return clamp(res/denom + offset, 0.0, 1.0); +} + +struct ImageData +{ + float avg[9]; +} imageData; + +void main() +{ + // Fetch neighbouring texels + int n = -1; + for (int i=-1; i<2; ++i) + { + for(int j=-1; j<2; ++j) + { + n++; + vec3 rgb = imageLoad(inputImage, ivec2(gl_GlobalInvocationID.x + i, gl_GlobalInvocationID.y + j)).rgb; + imageData.avg[n] = (rgb.r + rgb.g + rgb.b) / 3.0; + } + } + + float[9] kernel; + kernel[0] = -1.0; kernel[1] = 0.0; kernel[2] = 0.0; + kernel[3] = 0.0; kernel[4] = -1.0; kernel[5] = 0.0; + kernel[6] = 0.0; kernel[7] = 0.0; kernel[8] = 2.0; + + vec4 res = vec4(vec3(conv(kernel, imageData.avg, 1.0, 0.50)), 1.0); + + imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), res); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computeshader/sharpen.comp b/tests/glsl/sascha-willems/computeshader/sharpen.comp new file mode 100644 index 000000000..dc71ae447 --- /dev/null +++ b/tests/glsl/sascha-willems/computeshader/sharpen.comp @@ -0,0 +1,57 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (local_size_x = 16, local_size_y = 16) in; +layout (binding = 0, rgba8) uniform readonly image2D inputImage; +layout (binding = 1, rgba8) uniform image2D resultImage; + +float conv(in float[9] kernel, in float[9] data, in float denom, in float offset) +{ + float res = 0.0; + for (int i=0; i<9; ++i) + { + res += kernel[i] * data[i]; + } + return clamp(res/denom + offset, 0.0, 1.0); +} + +struct ImageData +{ + float r[9]; + float g[9]; + float b[9]; +} imageData; + +void main() +{ + + // Fetch neighbouring texels + int n = -1; + for (int i=-1; i<2; ++i) + { + for(int j=-1; j<2; ++j) + { + n++; + vec3 rgb = imageLoad(inputImage, ivec2(gl_GlobalInvocationID.x + i, gl_GlobalInvocationID.y + j)).rgb; + imageData.r[n] = rgb.r; + imageData.g[n] = rgb.g; + imageData.b[n] = rgb.b; + } + } + + float[9] kernel; + kernel[0] = -1.0; kernel[1] = -1.0; kernel[2] = -1.0; + kernel[3] = -1.0; kernel[4] = 9.0; kernel[5] = -1.0; + kernel[6] = -1.0; kernel[7] = -1.0; kernel[8] = -1.0; + + vec4 res = vec4( + conv(kernel, imageData.r, 1.0, 0.0), + conv(kernel, imageData.g, 1.0, 0.0), + conv(kernel, imageData.b, 1.0, 0.0), + 1.0); + + imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), res); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computeshader/texture.frag b/tests/glsl/sascha-willems/computeshader/texture.frag new file mode 100644 index 000000000..6d54f2f33 --- /dev/null +++ b/tests/glsl/sascha-willems/computeshader/texture.frag @@ -0,0 +1,16 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerColor, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/computeshader/texture.vert b/tests/glsl/sascha-willems/computeshader/texture.vert new file mode 100644 index 000000000..c1ad3e070 --- /dev/null +++ b/tests/glsl/sascha-willems/computeshader/texture.vert @@ -0,0 +1,27 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/cubemap/reflect.frag b/tests/glsl/sascha-willems/cubemap/reflect.frag new file mode 100644 index 000000000..2ee1d95e7 --- /dev/null +++ b/tests/glsl/sascha-willems/cubemap/reflect.frag @@ -0,0 +1,36 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform samplerCube samplerColor; + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in float inLodBias; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; +layout (location = 5) in mat4 inInvModelView; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 cI = normalize (inPos); + vec3 cR = reflect (cI, normalize(inNormal)); + + cR = vec3(inInvModelView * vec4(cR, 0.0)); + cR.x *= -1.0; + + vec4 color = texture(samplerColor, cR, inLodBias); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 ambient = vec3(0.5) * color.rgb; + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0); + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.5); + outFragColor = vec4(ambient + diffuse * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/cubemap/reflect.vert b/tests/glsl/sascha-willems/cubemap/reflect.vert new file mode 100644 index 000000000..dcf746738 --- /dev/null +++ b/tests/glsl/sascha-willems/cubemap/reflect.vert @@ -0,0 +1,42 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + float lodBias; +} ubo; + +layout (location = 0) out vec3 outPos; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out float outLodBias; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; +layout (location = 5) out mat4 outInvModelView; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + outPos = vec3(ubo.model * vec4(inPos, 1.0)); + outNormal = mat3(ubo.model) * inNormal; + outLodBias = ubo.lodBias; + + outInvModelView = inverse(ubo.model); + + vec3 lightPos = vec3(0.0f, -5.0f, 5.0f); + outLightVec = lightPos.xyz - outPos.xyz; + outViewVec = -outPos.xyz; +} diff --git a/tests/glsl/sascha-willems/cubemap/skybox.frag b/tests/glsl/sascha-willems/cubemap/skybox.frag new file mode 100644 index 000000000..611eb37db --- /dev/null +++ b/tests/glsl/sascha-willems/cubemap/skybox.frag @@ -0,0 +1,16 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform samplerCube samplerCubeMap; + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerCubeMap, inUVW); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/cubemap/skybox.vert b/tests/glsl/sascha-willems/cubemap/skybox.vert new file mode 100644 index 000000000..7011212e0 --- /dev/null +++ b/tests/glsl/sascha-willems/cubemap/skybox.vert @@ -0,0 +1,27 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUVW; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUVW = inPos; + outUVW.x *= -1.0; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/debugmarker/colorpass.frag b/tests/glsl/sascha-willems/debugmarker/colorpass.frag new file mode 100644 index 000000000..de5dcb4dc --- /dev/null +++ b/tests/glsl/sascha-willems/debugmarker/colorpass.frag @@ -0,0 +1,14 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inColor; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor.rgb = inColor; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/debugmarker/colorpass.vert b/tests/glsl/sascha-willems/debugmarker/colorpass.vert new file mode 100644 index 000000000..46b93f8dc --- /dev/null +++ b/tests/glsl/sascha-willems/debugmarker/colorpass.vert @@ -0,0 +1,29 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + { + outColor = inColor; + } + gl_Position = ubo.projection * ubo.model * inPos; +} diff --git a/tests/glsl/sascha-willems/debugmarker/postprocess.frag b/tests/glsl/sascha-willems/debugmarker/postprocess.frag new file mode 100644 index 000000000..0546baaa4 --- /dev/null +++ b/tests/glsl/sascha-willems/debugmarker/postprocess.frag @@ -0,0 +1,43 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + // Single pass gauss blur + + const vec2 texOffset = vec2(0.01, 0.01); + + vec2 tc0 = inUV + vec2(-texOffset.s, -texOffset.t); + vec2 tc1 = inUV + vec2( 0.0, -texOffset.t); + vec2 tc2 = inUV + vec2(+texOffset.s, -texOffset.t); + vec2 tc3 = inUV + vec2(-texOffset.s, 0.0); + vec2 tc4 = inUV + vec2( 0.0, 0.0); + vec2 tc5 = inUV + vec2(+texOffset.s, 0.0); + vec2 tc6 = inUV + vec2(-texOffset.s, +texOffset.t); + vec2 tc7 = inUV + vec2( 0.0, +texOffset.t); + vec2 tc8 = inUV + vec2(+texOffset.s, +texOffset.t); + + vec4 col0 = texture(samplerColor, tc0); + vec4 col1 = texture(samplerColor, tc1); + vec4 col2 = texture(samplerColor, tc2); + vec4 col3 = texture(samplerColor, tc3); + vec4 col4 = texture(samplerColor, tc4); + vec4 col5 = texture(samplerColor, tc5); + vec4 col6 = texture(samplerColor, tc6); + vec4 col7 = texture(samplerColor, tc7); + vec4 col8 = texture(samplerColor, tc8); + + vec4 sum = (1.0 * col0 + 2.0 * col1 + 1.0 * col2 + + 2.0 * col3 + 4.0 * col4 + 2.0 * col5 + + 1.0 * col6 + 2.0 * col7 + 1.0 * col8) / 16.0; + outFragColor = vec4(sum.rgb, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/debugmarker/postprocess.vert b/tests/glsl/sascha-willems/debugmarker/postprocess.vert new file mode 100644 index 000000000..6a368e9b6 --- /dev/null +++ b/tests/glsl/sascha-willems/debugmarker/postprocess.vert @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * vec2(2.0f, 2.0f) + vec2(-1.0f, -1.0f), 0.0f, 1.0f); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/debugmarker/toon.frag b/tests/glsl/sascha-willems/debugmarker/toon.frag new file mode 100644 index 000000000..ed9832f11 --- /dev/null +++ b/tests/glsl/sascha-willems/debugmarker/toon.frag @@ -0,0 +1,40 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + // Desaturate color + vec3 color = vec3(mix(inColor, vec3(dot(vec3(0.2126,0.7152,0.0722), inColor)), 0.65)); + + // High ambient colors because mesh materials are pretty dark + vec3 ambient = color * vec3(1.0); + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * color; + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75); + outFragColor = vec4(ambient + diffuse * 1.75 + specular, 1.0); + + float intensity = dot(N,L); + float shade = 1.0; + shade = intensity < 0.5 ? 0.75 : shade; + shade = intensity < 0.35 ? 0.6 : shade; + shade = intensity < 0.25 ? 0.5 : shade; + shade = intensity < 0.1 ? 0.25 : shade; + + outFragColor.rgb = inColor * 3.0 * shade; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/debugmarker/toon.vert b/tests/glsl/sascha-willems/debugmarker/toon.vert new file mode 100644 index 000000000..d0b3be251 --- /dev/null +++ b/tests/glsl/sascha-willems/debugmarker/toon.vert @@ -0,0 +1,42 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(ubo.model) * inNormal; + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferred/debug.frag b/tests/glsl/sascha-willems/deferred/debug.frag new file mode 100644 index 000000000..6b77990b5 --- /dev/null +++ b/tests/glsl/sascha-willems/deferred/debug.frag @@ -0,0 +1,27 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerPosition; +layout (binding = 2) uniform sampler2D samplerNormal; +layout (binding = 3) uniform sampler2D samplerAlbedo; + +layout (location = 0) in vec3 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 components[3]; + components[0] = texture(samplerPosition, inUV.st).rgb; + components[1] = texture(samplerNormal, inUV.st).rgb; + components[2] = texture(samplerAlbedo, inUV.st).rgb; + // Uncomment to display specular component + //components[2] = vec3(texture(samplerAlbedo, inUV.st).a); + + // Select component depending on z coordinate of quad + highp int index = int(inUV.z); + outFragColor.rgb = components[index]; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferred/debug.vert b/tests/glsl/sascha-willems/deferred/debug.vert new file mode 100644 index 000000000..de1b380f6 --- /dev/null +++ b/tests/glsl/sascha-willems/deferred/debug.vert @@ -0,0 +1,28 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 3) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec3(inUV.st, inNormal.z); + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/deferred/deferred.frag b/tests/glsl/sascha-willems/deferred/deferred.frag new file mode 100644 index 000000000..aead2f872 --- /dev/null +++ b/tests/glsl/sascha-willems/deferred/deferred.frag @@ -0,0 +1,76 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerposition; +layout (binding = 2) uniform sampler2D samplerNormal; +layout (binding = 3) uniform sampler2D samplerAlbedo; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragcolor; + +struct Light { + vec4 position; + vec3 color; + float radius; +}; + +layout (binding = 4) uniform UBO +{ + Light lights[6]; + vec4 viewPos; +} ubo; + + +void main() +{ + // Get G-Buffer values + vec3 fragPos = texture(samplerposition, inUV).rgb; + vec3 normal = texture(samplerNormal, inUV).rgb; + vec4 albedo = texture(samplerAlbedo, inUV); + + #define lightCount 6 + #define ambient 0.0 + + // Ambient part + vec3 fragcolor = albedo.rgb * ambient; + + for(int i = 0; i < lightCount; ++i) + { + // Vector to light + vec3 L = ubo.lights[i].position.xyz - fragPos; + // Distance from light to fragment position + float dist = length(L); + + // Viewer to fragment + vec3 V = ubo.viewPos.xyz - fragPos; + V = normalize(V); + + //if(dist < ubo.lights[i].radius) + { + // Light to fragment + L = normalize(L); + + // Attenuation + float atten = ubo.lights[i].radius / (pow(dist, 2.0) + 1.0); + + // Diffuse part + vec3 N = normalize(normal); + float NdotL = max(0.0, dot(N, L)); + vec3 diff = ubo.lights[i].color * albedo.rgb * NdotL * atten; + + // Specular part + // Specular map values are stored in alpha of albedo mrt + vec3 R = reflect(-L, N); + float NdotR = max(0.0, dot(R, V)); + vec3 spec = ubo.lights[i].color * albedo.a * pow(NdotR, 16.0) * atten; + + fragcolor += diff + spec; + } + } + + outFragcolor = vec4(fragcolor, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferred/deferred.vert b/tests/glsl/sascha-willems/deferred/deferred.vert new file mode 100644 index 000000000..548284554 --- /dev/null +++ b/tests/glsl/sascha-willems/deferred/deferred.vert @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/deferred/mrt.frag b/tests/glsl/sascha-willems/deferred/mrt.frag new file mode 100644 index 000000000..4bd2a10a5 --- /dev/null +++ b/tests/glsl/sascha-willems/deferred/mrt.frag @@ -0,0 +1,34 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; +layout (binding = 2) uniform sampler2D samplerNormalMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inWorldPos; +layout (location = 4) in vec3 inTangent; + +layout (location = 0) out vec4 outPosition; +layout (location = 1) out vec4 outNormal; +layout (location = 2) out vec4 outAlbedo; + +void main() +{ + outPosition = vec4(inWorldPos, 1.0); + + // Calculate normal in tangent space + vec3 N = normalize(inNormal); + N.y = -N.y; + vec3 T = normalize(inTangent); + vec3 B = cross(N, T); + mat3 TBN = mat3(T, B, N); + vec3 tnorm = TBN * normalize(texture(samplerNormalMap, inUV).xyz * 2.0 - vec3(1.0)); + outNormal = vec4(tnorm, 1.0); + + outAlbedo = texture(samplerColor, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferred/mrt.vert b/tests/glsl/sascha-willems/deferred/mrt.vert new file mode 100644 index 000000000..26f764176 --- /dev/null +++ b/tests/glsl/sascha-willems/deferred/mrt.vert @@ -0,0 +1,53 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; +layout (location = 4) in vec3 inTangent; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec4 instancePos[3]; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outColor; +layout (location = 3) out vec3 outWorldPos; +layout (location = 4) out vec3 outTangent; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + vec4 tmpPos = inPos + ubo.instancePos[gl_InstanceIndex]; + + gl_Position = ubo.projection * ubo.view * ubo.model * tmpPos; + + outUV = inUV; + outUV.t = 1.0 - outUV.t; + + // Vertex position in world space + outWorldPos = vec3(ubo.model * tmpPos); + // GL to Vulkan coord space + outWorldPos.y = -outWorldPos.y; + + // Normal in world space + mat3 mNormal = transpose(inverse(mat3(ubo.model))); + outNormal = mNormal * normalize(inNormal); + outTangent = mNormal * normalize(inTangent); + + // Currently just vertex color + outColor = inColor; +} diff --git a/tests/glsl/sascha-willems/deferredmultisampling/debug.frag b/tests/glsl/sascha-willems/deferredmultisampling/debug.frag new file mode 100644 index 000000000..f404711dd --- /dev/null +++ b/tests/glsl/sascha-willems/deferredmultisampling/debug.frag @@ -0,0 +1,56 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2DMS samplerPosition; +layout (binding = 2) uniform sampler2DMS samplerNormal; +layout (binding = 3) uniform sampler2DMS samplerAlbedo; + +layout (location = 0) in vec3 inUV; + +layout (location = 0) out vec4 outFragColor; + +#define NUM_SAMPLES 8 + +vec4 resolve(sampler2DMS tex, ivec2 uv) +{ + vec4 result = vec4(0.0); + int count = 0; + for (int i = 0; i < NUM_SAMPLES; i++) + { + vec4 val = texelFetch(tex, uv, i); + result += val; + count++; + } + return result / float(NUM_SAMPLES); +} + +void main() +{ + ivec2 attDim = textureSize(samplerPosition); + ivec2 UV = ivec2(inUV.st * attDim * 2.0); + + highp int index = 0; + if (inUV.s > 0.5) + { + index = 1; + UV.s -= attDim.x; + } + if (inUV.t > 0.5) + { + index = 2; + UV.t -= attDim.y; + } + + vec3 components[3]; + components[0] = resolve(samplerPosition, UV).rgb; + components[1] = resolve(samplerNormal, UV).rgb; + components[2] = resolve(samplerAlbedo, UV).rgb; + // Uncomment to display specular component + //components[2] = vec3(texture(samplerAlbedo, inUV.st).a); + + // Select component depending on UV + outFragColor.rgb = components[index]; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredmultisampling/debug.vert b/tests/glsl/sascha-willems/deferredmultisampling/debug.vert new file mode 100644 index 000000000..c62c3364a --- /dev/null +++ b/tests/glsl/sascha-willems/deferredmultisampling/debug.vert @@ -0,0 +1,24 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec3((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2, 0.0); + gl_Position = vec4(outUV.st * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/deferredmultisampling/deferred.frag b/tests/glsl/sascha-willems/deferredmultisampling/deferred.frag new file mode 100644 index 000000000..59f855281 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredmultisampling/deferred.frag @@ -0,0 +1,104 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2DMS samplerPosition; +layout (binding = 2) uniform sampler2DMS samplerNormal; +layout (binding = 3) uniform sampler2DMS samplerAlbedo; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragcolor; + +struct Light { + vec4 position; + vec3 color; + float radius; +}; + +layout (binding = 4) uniform UBO +{ + Light lights[6]; + vec4 viewPos; + ivec2 windowSize; +} ubo; + +layout (constant_id = 0) const int NUM_SAMPLES = 8; + +#define NUM_LIGHTS 6 + +// Manual resolve for MSAA samples +vec4 resolve(sampler2DMS tex, ivec2 uv) +{ + vec4 result = vec4(0.0); + for (int i = 0; i < NUM_SAMPLES; i++) + { + vec4 val = texelFetch(tex, uv, i); + result += val; + } + // Average resolved samples + return result / float(NUM_SAMPLES); +} + +vec3 calculateLighting(vec3 pos, vec3 normal, vec4 albedo) +{ + vec3 result = vec3(0.0); + + for(int i = 0; i < NUM_LIGHTS; ++i) + { + // Vector to light + vec3 L = ubo.lights[i].position.xyz - pos; + // Distance from light to fragment position + float dist = length(L); + + // Viewer to fragment + vec3 V = ubo.viewPos.xyz - pos; + V = normalize(V); + + // Light to fragment + L = normalize(L); + + // Attenuation + float atten = ubo.lights[i].radius / (pow(dist, 2.0) + 1.0); + + // Diffuse part + vec3 N = normalize(normal); + float NdotL = max(0.0, dot(N, L)); + vec3 diff = ubo.lights[i].color * albedo.rgb * NdotL * atten; + + // Specular part + vec3 R = reflect(-L, N); + float NdotR = max(0.0, dot(R, V)); + vec3 spec = ubo.lights[i].color * albedo.a * pow(NdotR, 8.0) * atten; + + result += diff + spec; + } + return result; +} + +void main() +{ + ivec2 attDim = textureSize(samplerPosition); + ivec2 UV = ivec2(inUV * attDim); + + #define ambient 0.15 + + // Ambient part + vec4 alb = resolve(samplerAlbedo, UV); + vec3 fragColor = vec3(0.0); + + // Calualte lighting for every MSAA sample + for (int i = 0; i < NUM_SAMPLES; i++) + { + vec3 pos = texelFetch(samplerPosition, UV, i).rgb; + vec3 normal = texelFetch(samplerNormal, UV, i).rgb; + vec4 albedo = texelFetch(samplerAlbedo, UV, i); + fragColor += calculateLighting(pos, normal, albedo); + } + + fragColor = (alb.rgb * ambient) + fragColor / float(NUM_SAMPLES); + + outFragcolor = vec4(fragColor, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredmultisampling/deferred.vert b/tests/glsl/sascha-willems/deferredmultisampling/deferred.vert new file mode 100644 index 000000000..af8eef6dd --- /dev/null +++ b/tests/glsl/sascha-willems/deferredmultisampling/deferred.vert @@ -0,0 +1,24 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/deferredmultisampling/mrt.frag b/tests/glsl/sascha-willems/deferredmultisampling/mrt.frag new file mode 100644 index 000000000..4bd2a10a5 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredmultisampling/mrt.frag @@ -0,0 +1,34 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; +layout (binding = 2) uniform sampler2D samplerNormalMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inWorldPos; +layout (location = 4) in vec3 inTangent; + +layout (location = 0) out vec4 outPosition; +layout (location = 1) out vec4 outNormal; +layout (location = 2) out vec4 outAlbedo; + +void main() +{ + outPosition = vec4(inWorldPos, 1.0); + + // Calculate normal in tangent space + vec3 N = normalize(inNormal); + N.y = -N.y; + vec3 T = normalize(inTangent); + vec3 B = cross(N, T); + mat3 TBN = mat3(T, B, N); + vec3 tnorm = TBN * normalize(texture(samplerNormalMap, inUV).xyz * 2.0 - vec3(1.0)); + outNormal = vec4(tnorm, 1.0); + + outAlbedo = texture(samplerColor, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredmultisampling/mrt.vert b/tests/glsl/sascha-willems/deferredmultisampling/mrt.vert new file mode 100644 index 000000000..5cc127297 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredmultisampling/mrt.vert @@ -0,0 +1,53 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; +layout (location = 4) in vec3 inTangent; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec4 instancePos[3]; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outColor; +layout (location = 3) out vec3 outWorldPos; +layout (location = 4) out vec3 outTangent; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + vec4 tmpPos = vec4(inPos.xyz, 1.0) + ubo.instancePos[gl_InstanceIndex]; + + gl_Position = ubo.projection * ubo.view * ubo.model * tmpPos; + + outUV = inUV; + outUV.t = 1.0 - outUV.t; + + // Vertex position in world space + outWorldPos = vec3(ubo.model * tmpPos); + // GL to Vulkan coord space + outWorldPos.y = -outWorldPos.y; + + // Normal in world space + mat3 mNormal = transpose(inverse(mat3(ubo.model))); + outNormal = mNormal * normalize(inNormal); + outTangent = mNormal * normalize(inTangent); + + // Currently just vertex color + outColor = inColor; +} diff --git a/tests/glsl/sascha-willems/deferredshadows/debug.frag b/tests/glsl/sascha-willems/deferredshadows/debug.frag new file mode 100644 index 000000000..f9136b24d --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/debug.frag @@ -0,0 +1,30 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerPosition; +layout (binding = 2) uniform sampler2D samplerNormal; +layout (binding = 3) uniform sampler2D samplerAlbedo; +layout (binding = 5) uniform sampler2DArray samplerDepth; + +layout (location = 0) in vec3 inUV; + +layout (location = 0) out vec4 outFragColor; + +float LinearizeDepth(float depth) +{ + float n = 0.1; // camera z near + float f = 64.0; // camera z far + float z = depth; + return (2.0 * n) / (f + n - z * (f - n)); +} + +void main() +{ + // Display depth from light's point-of-view + // inUV.w = number of light source + float depth = texture(samplerDepth, vec3(inUV)).r; + outFragColor = vec4(vec3(1.0 - LinearizeDepth(depth)), 0.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredshadows/debug.vert b/tests/glsl/sascha-willems/deferredshadows/debug.vert new file mode 100644 index 000000000..8742202c2 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/debug.vert @@ -0,0 +1,30 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; +} ubo; + +layout (location = 0) out vec3 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec3(inUV.st, gl_InstanceIndex); + vec4 tmpPos = vec4(inPos, 1.0); + tmpPos.y += gl_InstanceIndex; + tmpPos.xy *= vec2(1.0/4.0, 1.0/3.0); + gl_Position = ubo.projection * ubo.modelview * tmpPos; +} diff --git a/tests/glsl/sascha-willems/deferredshadows/deferred.frag b/tests/glsl/sascha-willems/deferredshadows/deferred.frag new file mode 100644 index 000000000..ba3998bd6 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/deferred.frag @@ -0,0 +1,147 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerposition; +layout (binding = 2) uniform sampler2D samplerNormal; +layout (binding = 3) uniform sampler2D samplerAlbedo; +// Depth from the light's point of view +//layout (binding = 5) uniform sampler2DShadow samplerShadowMap; +layout (binding = 5) uniform sampler2DArray samplerShadowMap; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +#define LIGHT_COUNT 3 +#define SHADOW_FACTOR 0.25 +#define AMBIENT_LIGHT 0.1 +#define USE_PCF + +struct Light +{ + vec4 position; + vec4 target; + vec4 color; + mat4 viewMatrix; +}; + +layout (binding = 4) uniform UBO +{ + vec4 viewPos; + Light lights[LIGHT_COUNT]; + int useShadows; +} ubo; + +float textureProj(vec4 P, float layer, vec2 offset) +{ + float shadow = 1.0; + vec4 shadowCoord = P / P.w; + shadowCoord.st = shadowCoord.st * 0.5 + 0.5; + + if (shadowCoord.z > -1.0 && shadowCoord.z < 1.0) + { + float dist = texture(samplerShadowMap, vec3(shadowCoord.st + offset, layer)).r; + if (shadowCoord.w > 0.0 && dist < shadowCoord.z) + { + shadow = SHADOW_FACTOR; + } + } + return shadow; +} + +float filterPCF(vec4 sc, float layer) +{ + ivec2 texDim = textureSize(samplerShadowMap, 0).xy; + float scale = 1.5; + float dx = scale * 1.0 / float(texDim.x); + float dy = scale * 1.0 / float(texDim.y); + + float shadowFactor = 0.0; + int count = 0; + int range = 1; + + for (int x = -range; x <= range; x++) + { + for (int y = -range; y <= range; y++) + { + shadowFactor += textureProj(sc, layer, vec2(dx*x, dy*y)); + count++; + } + + } + return shadowFactor / count; +} + +void main() +{ + // Get G-Buffer values + vec3 fragPos = texture(samplerposition, inUV).rgb; + vec3 normal = texture(samplerNormal, inUV).rgb; + vec4 albedo = texture(samplerAlbedo, inUV); + + // Ambient part + vec3 fragcolor = albedo.rgb * AMBIENT_LIGHT; + + vec3 N = normalize(normal); + + float shadow = 0.0; + + for(int i = 0; i < LIGHT_COUNT; ++i) + { + // Vector to light + vec3 L = ubo.lights[i].position.xyz - fragPos; + // Distance from light to fragment position + float dist = length(L); + L = normalize(L); + + // Viewer to fragment + vec3 V = ubo.viewPos.xyz - fragPos; + V = normalize(V); + + float lightCosInnerAngle = cos(radians(15.0)); + float lightCosOuterAngle = cos(radians(25.0)); + float lightRange = 100.0; + + // Direction vector from source to target + vec3 dir = normalize(ubo.lights[i].position.xyz - ubo.lights[i].target.xyz); + + // Dual cone spot light with smooth transition between inner and outer angle + float cosDir = dot(L, dir); + float spotEffect = smoothstep(lightCosOuterAngle, lightCosInnerAngle, cosDir); + float heightAttenuation = smoothstep(lightRange, 0.0f, dist); + + // Diffuse lighting + float NdotL = max(0.0, dot(N, L)); + vec3 diff = vec3(NdotL); + + // Specular lighting + vec3 R = reflect(-L, N); + float NdotR = max(0.0, dot(R, V)); + vec3 spec = vec3(pow(NdotR, 16.0) * albedo.a * 2.5); + + fragcolor += vec3((diff + spec) * spotEffect * heightAttenuation) * ubo.lights[i].color.rgb * albedo.rgb; + } + + // Shadow calculations in a separate pass + if (ubo.useShadows > 0) + { + for(int i = 0; i < LIGHT_COUNT; ++i) + { + vec4 shadowClip = ubo.lights[i].viewMatrix * vec4(fragPos, 1.0); + + float shadowFactor; + #ifdef USE_PCF + shadowFactor= filterPCF(shadowClip, i); + #else + shadowFactor = textureProj(shadowClip, i, vec2(0.0)); + #endif + + fragcolor *= shadowFactor; + } + } + + outFragColor.rgb = fragcolor; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredshadows/deferred.vert b/tests/glsl/sascha-willems/deferredshadows/deferred.vert new file mode 100644 index 000000000..cd17f9726 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/deferred.vert @@ -0,0 +1,27 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; +} ubo; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + gl_Position = ubo.projection * ubo.modelview * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/deferredshadows/mrt.frag b/tests/glsl/sascha-willems/deferredshadows/mrt.frag new file mode 100644 index 000000000..7a31f54e9 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/mrt.frag @@ -0,0 +1,33 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; +layout (binding = 2) uniform sampler2D samplerNormalMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inWorldPos; +layout (location = 4) in vec3 inTangent; + +layout (location = 0) out vec4 outPosition; +layout (location = 1) out vec4 outNormal; +layout (location = 2) out vec4 outAlbedo; + +void main() +{ + outPosition = vec4(inWorldPos, 1.0); + + // Calculate normal in tangent space + vec3 N = normalize(inNormal); + vec3 T = normalize(inTangent); + vec3 B = cross(N, T); + mat3 TBN = mat3(T, B, N); + vec3 tnorm = TBN * normalize(texture(samplerNormalMap, inUV).xyz * 2.0 - vec3(1.0)); + outNormal = vec4(tnorm, 1.0); + + outAlbedo = texture(samplerColor, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredshadows/mrt.vert b/tests/glsl/sascha-willems/deferredshadows/mrt.vert new file mode 100644 index 000000000..815c2833f --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/mrt.vert @@ -0,0 +1,51 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; +layout (location = 4) in vec3 inTangent; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec4 instancePos[3]; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outColor; +layout (location = 3) out vec3 outWorldPos; +layout (location = 4) out vec3 outTangent; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + vec4 tmpPos = inPos + ubo.instancePos[gl_InstanceIndex]; + + gl_Position = ubo.projection * ubo.view * ubo.model * tmpPos; + + outUV = inUV; + outUV.t = 1.0 - outUV.t; + + // Vertex position in world space + outWorldPos = vec3(ubo.model * tmpPos); + + // Normal in world space + mat3 mNormal = transpose(inverse(mat3(ubo.model))); + outNormal = mNormal * normalize(inNormal); + outTangent = mNormal * normalize(inTangent); + + // Currently just vertex color + outColor = inColor; +} diff --git a/tests/glsl/sascha-willems/deferredshadows/shadow.frag b/tests/glsl/sascha-willems/deferredshadows/shadow.frag new file mode 100644 index 000000000..23217ad42 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/shadow.frag @@ -0,0 +1,12 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout(location = 0) out float fragmentdepth; + +void main() +{ + fragmentdepth = gl_FragCoord.z; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredshadows/shadow.geom b/tests/glsl/sascha-willems/deferredshadows/shadow.geom new file mode 100644 index 000000000..7f71c108a --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/shadow.geom @@ -0,0 +1,36 @@ +#version 420 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +#define LIGHT_COUNT 3 + +layout (triangles, invocations = LIGHT_COUNT) in; +layout (triangle_strip, max_vertices = 3) out; + +layout (binding = 0) uniform UBO +{ + mat4 mvp[LIGHT_COUNT]; + vec4 instancePos[3]; +} ubo; + +layout (location = 0) in int inInstanceIndex[]; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + vec4 instancedPos = ubo.instancePos[inInstanceIndex[0]]; + for (int i = 0; i < gl_in.length(); i++) + { + gl_Layer = gl_InvocationID; + vec4 tmpPos = gl_in[i].gl_Position + instancedPos; + gl_Position = ubo.mvp[gl_InvocationID] * tmpPos; + EmitVertex(); + } + EndPrimitive(); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/deferredshadows/shadow.vert b/tests/glsl/sascha-willems/deferredshadows/shadow.vert new file mode 100644 index 000000000..b16c66414 --- /dev/null +++ b/tests/glsl/sascha-willems/deferredshadows/shadow.vert @@ -0,0 +1,20 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; + +layout (location = 0) out int outInstanceIndex; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outInstanceIndex = gl_InstanceIndex; + gl_Position = inPos; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/displacement/base.frag b/tests/glsl/sascha-willems/displacement/base.frag new file mode 100644 index 000000000..3ab30270c --- /dev/null +++ b/tests/glsl/sascha-willems/displacement/base.frag @@ -0,0 +1,30 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 2) uniform sampler2D colorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inEyePos; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(vec3(1.0)); + + outFragColor.rgb = texture(colorMap, inUV).rgb; + + vec3 Eye = normalize(-inEyePos); + vec3 Reflected = normalize(reflect(-inLightVec, inNormal)); + + vec4 IAmbient = vec4(0.0, 0.0, 0.0, 1.0); + vec4 IDiffuse = vec4(1.0) * max(dot(inNormal, inLightVec), 0.0); + + outFragColor = vec4((IAmbient + IDiffuse) * vec4(texture(colorMap, inUV).rgb, 1.0)); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/displacement/base.vert b/tests/glsl/sascha-willems/displacement/base.vert new file mode 100644 index 000000000..c684afabb --- /dev/null +++ b/tests/glsl/sascha-willems/displacement/base.vert @@ -0,0 +1,19 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; + +void main(void) +{ + gl_Position = vec4(inPos.xyz, 1.0); + outUV = inUV * 3.0; + outNormal = inNormal; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/displacement/displacement.tesc b/tests/glsl/sascha-willems/displacement/displacement.tesc new file mode 100644 index 000000000..73bf237c0 --- /dev/null +++ b/tests/glsl/sascha-willems/displacement/displacement.tesc @@ -0,0 +1,33 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform UBO +{ + float tessLevel; +} ubo; + +layout (vertices = 3) out; + +layout (location = 0) in vec3 inNormal[]; +layout (location = 1) in vec2 inUV[]; + +layout (location = 0) out vec3 outNormal[3]; +layout (location = 1) out vec2 outUV[3]; + +void main() +{ + if (gl_InvocationID == 0) + { + gl_TessLevelInner[0] = ubo.tessLevel; + gl_TessLevelOuter[0] = ubo.tessLevel; + gl_TessLevelOuter[1] = ubo.tessLevel; + gl_TessLevelOuter[2] = ubo.tessLevel; + } + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + outNormal[gl_InvocationID] = inNormal[gl_InvocationID]; + outUV[gl_InvocationID] = inUV[gl_InvocationID]; +} diff --git a/tests/glsl/sascha-willems/displacement/displacement.tese b/tests/glsl/sascha-willems/displacement/displacement.tese new file mode 100644 index 000000000..bd061ec2a --- /dev/null +++ b/tests/glsl/sascha-willems/displacement/displacement.tese @@ -0,0 +1,40 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; + float tessAlpha; + float tessStrength; +} ubo; + +layout (binding = 2) uniform sampler2D displacementMap; + +layout(triangles, equal_spacing, ccw) in; + +layout (location = 0) in vec3 inNormal[]; +layout (location = 1) in vec2 inUV[]; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outEyesPos; +layout (location = 3) out vec3 outLightVec; + +void main() +{ + gl_Position = (gl_TessCoord.x * gl_in[0].gl_Position) + (gl_TessCoord.y * gl_in[1].gl_Position) + (gl_TessCoord.z * gl_in[2].gl_Position); + outUV = gl_TessCoord.x * inUV[0] + gl_TessCoord.y * inUV[1] + gl_TessCoord.z * inUV[2]; + outNormal = gl_TessCoord.x * inNormal[0] + gl_TessCoord.y * inNormal[1] + gl_TessCoord.z * inNormal[2]; + + gl_Position.xyz += normalize(outNormal) * (max(textureLod(displacementMap, outUV.st, 0.0).a, 0.0) * ubo.tessStrength); + + outEyesPos = (gl_Position).xyz; + outLightVec = normalize(ubo.lightPos.xyz - outEyesPos); + + gl_Position = ubo.projection * ubo.model * gl_Position; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/distancefieldfonts/bitmap.frag b/tests/glsl/sascha-willems/distancefieldfonts/bitmap.frag new file mode 100644 index 000000000..5f6303c3c --- /dev/null +++ b/tests/glsl/sascha-willems/distancefieldfonts/bitmap.frag @@ -0,0 +1,16 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = vec4(texture(samplerColor, inUV).a); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/distancefieldfonts/bitmap.vert b/tests/glsl/sascha-willems/distancefieldfonts/bitmap.vert new file mode 100644 index 000000000..4ed4e6246 --- /dev/null +++ b/tests/glsl/sascha-willems/distancefieldfonts/bitmap.vert @@ -0,0 +1,22 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec2 outUV; + +void main() +{ + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/distancefieldfonts/sdf.frag b/tests/glsl/sascha-willems/distancefieldfonts/sdf.frag new file mode 100644 index 000000000..a0a4babb4 --- /dev/null +++ b/tests/glsl/sascha-willems/distancefieldfonts/sdf.frag @@ -0,0 +1,36 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (binding = 2) uniform UBO +{ + vec4 outlineColor; + float outlineWidth; + float outline; +} ubo; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + float distance = texture(samplerColor, inUV).a; + float smoothWidth = fwidth(distance); + float alpha = smoothstep(0.5 - smoothWidth, 0.5 + smoothWidth, distance); + vec3 rgb = vec3(alpha); + + if (ubo.outline > 0.0) + { + float w = 1.0 - ubo.outlineWidth; + alpha = smoothstep(w - smoothWidth, w + smoothWidth, distance); + rgb += mix(vec3(alpha), ubo.outlineColor.rgb, alpha); + } + + outFragColor = vec4(rgb, alpha); + +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/distancefieldfonts/sdf.vert b/tests/glsl/sascha-willems/distancefieldfonts/sdf.vert new file mode 100644 index 000000000..4ed4e6246 --- /dev/null +++ b/tests/glsl/sascha-willems/distancefieldfonts/sdf.vert @@ -0,0 +1,22 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec2 outUV; + +void main() +{ + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/dynamicuniformbuffer/base.frag b/tests/glsl/sascha-willems/dynamicuniformbuffer/base.frag new file mode 100644 index 000000000..496c70b14 --- /dev/null +++ b/tests/glsl/sascha-willems/dynamicuniformbuffer/base.frag @@ -0,0 +1,14 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inColor; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = vec4(inColor, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/dynamicuniformbuffer/base.vert b/tests/glsl/sascha-willems/dynamicuniformbuffer/base.vert new file mode 100644 index 000000000..5440e88d2 --- /dev/null +++ b/tests/glsl/sascha-willems/dynamicuniformbuffer/base.vert @@ -0,0 +1,34 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inColor; + +layout (binding = 0) uniform UboView +{ + mat4 projection; + mat4 view; +} uboView; + +layout (binding = 1) uniform UboInstance +{ + mat4 model; +} uboInstance; + +layout (location = 0) out vec3 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + mat4 modelView = uboView.view * uboInstance.model; + vec3 worldPos = vec3(modelView * vec4(inPos, 1.0)); + gl_Position = uboView.projection * modelView * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/gears/gears.frag b/tests/glsl/sascha-willems/gears/gears.frag new file mode 100644 index 000000000..db3dcd5e2 --- /dev/null +++ b/tests/glsl/sascha-willems/gears/gears.frag @@ -0,0 +1,25 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inEyePos; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 Eye = normalize(-inEyePos); + vec3 Reflected = normalize(reflect(-inLightVec, inNormal)); + + vec4 IAmbient = vec4(0.2, 0.2, 0.2, 1.0); + vec4 IDiffuse = vec4(0.5, 0.5, 0.5, 0.5) * max(dot(inNormal, inLightVec), 0.0); + float specular = 0.25; + vec4 ISpecular = vec4(0.5, 0.5, 0.5, 1.0) * pow(max(dot(Reflected, Eye), 0.0), 0.8) * specular; + + outFragColor = vec4((IAmbient + IDiffuse) * vec4(inColor, 1.0) + ISpecular); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/gears/gears.vert b/tests/glsl/sascha-willems/gears/gears.vert new file mode 100644 index 000000000..3799e0000 --- /dev/null +++ b/tests/glsl/sascha-willems/gears/gears.vert @@ -0,0 +1,35 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 normal; + mat4 view; + vec3 lightpos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outEyePos; +layout (location = 3) out vec3 outLightVec; + +void main() +{ + outNormal = normalize(mat3(ubo.normal) * inNormal); + outColor = inColor; + mat4 modelView = ubo.view * ubo.model; + vec4 pos = modelView * inPos; + outEyePos = vec3(modelView * pos); + vec4 lightPos = vec4(ubo.lightpos, 1.0) * modelView; + outLightVec = normalize(lightPos.xyz - outEyePos); + gl_Position = ubo.projection * pos; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/geometryshader/base.frag b/tests/glsl/sascha-willems/geometryshader/base.frag new file mode 100644 index 000000000..768117a92 --- /dev/null +++ b/tests/glsl/sascha-willems/geometryshader/base.frag @@ -0,0 +1,14 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inColor; + +layout (location = 0) out vec4 outFragColor; + +void main(void) +{ + outFragColor = vec4(inColor, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/geometryshader/base.vert b/tests/glsl/sascha-willems/geometryshader/base.vert new file mode 100644 index 000000000..a3ba3830f --- /dev/null +++ b/tests/glsl/sascha-willems/geometryshader/base.vert @@ -0,0 +1,21 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; + +layout (location = 0) out vec3 outNormal; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main(void) +{ + outNormal = inNormal; + gl_Position = vec4(inPos.xyz, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/geometryshader/mesh.frag b/tests/glsl/sascha-willems/geometryshader/mesh.frag new file mode 100644 index 000000000..d31ff7124 --- /dev/null +++ b/tests/glsl/sascha-willems/geometryshader/mesh.frag @@ -0,0 +1,24 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 ambient = vec3(0.1); + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0); + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75); + outFragColor = vec4((ambient + diffuse) * inColor.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/geometryshader/mesh.vert b/tests/glsl/sascha-willems/geometryshader/mesh.vert new file mode 100644 index 000000000..4caae7b2e --- /dev/null +++ b/tests/glsl/sascha-willems/geometryshader/mesh.vert @@ -0,0 +1,39 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (set = 0, binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + gl_Position = ubo.projection * ubo.model * inPos; + + vec4 pos = ubo.model * vec4(inPos.xyz, 1.0); + outNormal = mat3(ubo.model) * inNormal; + + vec3 lightPos = vec3(1.0f, -1.0f, 1.0f); + outLightVec = lightPos.xyz - pos.xyz; + outViewVec = -pos.xyz; +} diff --git a/tests/glsl/sascha-willems/geometryshader/normaldebug.geom b/tests/glsl/sascha-willems/geometryshader/normaldebug.geom new file mode 100644 index 000000000..d3c487731 --- /dev/null +++ b/tests/glsl/sascha-willems/geometryshader/normaldebug.geom @@ -0,0 +1,38 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (triangles) in; +layout (line_strip, max_vertices = 6) out; + +layout (binding = 1) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) in vec3 inNormal[]; + +layout (location = 0) out vec3 outColor; + +void main(void) +{ + float normalLength = 0.02; + for(int i=0; i<gl_in.length(); i++) + { + vec3 pos = gl_in[i].gl_Position.xyz; + vec3 normal = inNormal[i].xyz; + + gl_Position = ubo.projection * (ubo.model * vec4(pos, 1.0)); + outColor = vec3(1.0, 0.0, 0.0); + EmitVertex(); + + gl_Position = ubo.projection * (ubo.model * vec4(pos + normal * normalLength, 1.0)); + outColor = vec3(0.0, 0.0, 1.0); + EmitVertex(); + + EndPrimitive(); + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/hdr/bloom.frag b/tests/glsl/sascha-willems/hdr/bloom.frag new file mode 100644 index 000000000..bc26005e6 --- /dev/null +++ b/tests/glsl/sascha-willems/hdr/bloom.frag @@ -0,0 +1,67 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerColor0; +layout (binding = 1) uniform sampler2D samplerColor1; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outColor; + +layout (constant_id = 0) const int dir = 0; + +void main(void) +{ + // From the OpenGL Super bible + const float weights[] = float[](0.0024499299678342, + 0.0043538453346397, + 0.0073599963704157, + 0.0118349786570722, + 0.0181026699707781, + 0.0263392293891488, + 0.0364543006660986, + 0.0479932050577658, + 0.0601029809166942, + 0.0715974486241365, + 0.0811305381519717, + 0.0874493212267511, + 0.0896631113333857, + 0.0874493212267511, + 0.0811305381519717, + 0.0715974486241365, + 0.0601029809166942, + 0.0479932050577658, + 0.0364543006660986, + 0.0263392293891488, + 0.0181026699707781, + 0.0118349786570722, + 0.0073599963704157, + 0.0043538453346397, + 0.0024499299678342); + + + const float blurScale = 0.003; + const float blurStrength = 1.0; + + float ar = 1.0; + // Aspect ratio for vertical blur pass + if (dir == 1) + { + vec2 ts = textureSize(samplerColor1, 0); + ar = ts.y / ts.x; + } + + vec2 P = inUV.yx - vec2(0, (weights.length() >> 1) * ar * blurScale); + + vec4 color = vec4(0.0); + for (int i = 0; i < weights.length(); i++) + { + vec2 dv = vec2(0.0, i * blurScale) * ar; + color += texture(samplerColor1, P + dv) * weights[i] * blurStrength; + } + + outColor = color; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/hdr/bloom.vert b/tests/glsl/sascha-willems/hdr/bloom.vert new file mode 100644 index 000000000..548284554 --- /dev/null +++ b/tests/glsl/sascha-willems/hdr/bloom.vert @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/hdr/composition.frag b/tests/glsl/sascha-willems/hdr/composition.frag new file mode 100644 index 000000000..13f2f5838 --- /dev/null +++ b/tests/glsl/sascha-willems/hdr/composition.frag @@ -0,0 +1,17 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerColor0; +layout (binding = 1) uniform sampler2D samplerColor1; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outColor; + +void main() +{ + outColor = texture(samplerColor0, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/hdr/composition.vert b/tests/glsl/sascha-willems/hdr/composition.vert new file mode 100644 index 000000000..548284554 --- /dev/null +++ b/tests/glsl/sascha-willems/hdr/composition.vert @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/hdr/gbuffer.frag b/tests/glsl/sascha-willems/hdr/gbuffer.frag new file mode 100644 index 000000000..476534d76 --- /dev/null +++ b/tests/glsl/sascha-willems/hdr/gbuffer.frag @@ -0,0 +1,95 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform samplerCube samplerEnvMap; + +layout (location = 0) in vec3 inUVW; +layout (location = 1) in vec3 inPos; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; +layout (location = 5) in mat4 inInvModelView; + +layout (location = 0) out vec4 outColor0; +layout (location = 1) out vec4 outColor1; + +layout (constant_id = 0) const int type = 0; + +#define PI 3.1415926 +#define TwoPI (2.0 * PI) + +layout (binding = 2) uniform UBO { + float exposure; +} ubo; + +void main() +{ + vec4 color; + vec3 wcNormal; + + switch (type) { + case 0: // Skybox + { + vec3 normal = normalize(inUVW); + color = texture(samplerEnvMap, normal); + } + break; + + case 1: // Reflect + { + vec3 wViewVec = mat3(inInvModelView) * normalize(inViewVec); + vec3 normal = normalize(inNormal); + vec3 wNormal = mat3(inInvModelView) * normal; + + float NdotL = max(dot(normal, inLightVec), 0.0); + + vec3 eyeDir = normalize(inViewVec); + vec3 halfVec = normalize(inLightVec + eyeDir); + float NdotH = max(dot(normal, halfVec), 0.0); + float NdotV = max(dot(normal, eyeDir), 0.0); + float VdotH = max(dot(eyeDir, halfVec), 0.0); + + // Geometric attenuation + float NH2 = 2.0 * NdotH; + float g1 = (NH2 * NdotV) / VdotH; + float g2 = (NH2 * NdotL) / VdotH; + float geoAtt = min(1.0, min(g1, g2)); + + const float F0 = 0.6; + const float k = 0.2; + + // Fresnel (schlick approximation) + float fresnel = pow(1.0 - VdotH, 5.0); + fresnel *= (1.0 - F0); + fresnel += F0; + + float spec = (fresnel * geoAtt) / (NdotV * NdotL * 3.14); + + color = texture(samplerEnvMap, reflect(-wViewVec, wNormal)); + + color = vec4(color.rgb * NdotL * (k + spec * (1.0 - k)), 1.0); + } + break; + + case 2: // Refract + { + vec3 wViewVec = mat3(inInvModelView) * normalize(inViewVec); + vec3 wNormal = mat3(inInvModelView) * inNormal; + color = texture(samplerEnvMap, refract(-wViewVec, wNormal, 1.0/1.6)); + } + break; + } + + + // Color with manual exposure into attachment 0 + outColor0.rgb = vec3(1.0) - exp(-color.rgb * ubo.exposure); + + // Bright parts for bloom into attachment 1 + float l = dot(outColor0.rgb, vec3(0.2126, 0.7152, 0.0722)); + float threshold = 0.75; + outColor1.rgb = (l > threshold) ? outColor0.rgb : vec3(0.0); + outColor1.a = 1.0; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/hdr/gbuffer.vert b/tests/glsl/sascha-willems/hdr/gbuffer.vert new file mode 100644 index 000000000..862203774 --- /dev/null +++ b/tests/glsl/sascha-willems/hdr/gbuffer.vert @@ -0,0 +1,51 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; + +layout (constant_id = 0) const int type = 0; + +layout (binding = 0) uniform UBO { + mat4 projection; + mat4 modelview; +} ubo; + +layout (location = 0) out vec3 outUVW; +layout (location = 1) out vec3 outPos; +layout (location = 2) out vec3 outNormal; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; +layout (location = 5) out mat4 outInvModelView; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUVW = inPos; + + switch(type) { + case 0: // Skybox + outPos = vec3(mat3(ubo.modelview) * inPos); + gl_Position = vec4(ubo.projection * vec4(outPos, 1.0)); + break; + case 1: // Object + outPos = vec3(ubo.modelview * vec4(inPos, 1.0)); + gl_Position = ubo.projection * ubo.modelview * vec4(inPos.xyz, 1.0); + break; + } + outPos = vec3(ubo.modelview * vec4(inPos, 1.0)); + outNormal = mat3(ubo.modelview) * inNormal; + + outInvModelView = inverse(ubo.modelview); + + vec3 lightPos = vec3(0.0f, -5.0f, 5.0f); + outLightVec = lightPos.xyz - outPos.xyz; + outViewVec = -outPos.xyz; +} diff --git a/tests/glsl/sascha-willems/imgui/scene.frag b/tests/glsl/sascha-willems/imgui/scene.frag new file mode 100644 index 000000000..77eded98b --- /dev/null +++ b/tests/glsl/sascha-willems/imgui/scene.frag @@ -0,0 +1,23 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + float diffuse = max(dot(N, L), 0.0); + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75); + outFragColor = vec4(diffuse * inColor + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/imgui/scene.vert b/tests/glsl/sascha-willems/imgui/scene.vert new file mode 100644 index 000000000..921b1dbe6 --- /dev/null +++ b/tests/glsl/sascha-willems/imgui/scene.vert @@ -0,0 +1,39 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(ubo.model) * inNormal; + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/imgui/ui.frag b/tests/glsl/sascha-willems/imgui/ui.frag new file mode 100644 index 000000000..51e89bd7b --- /dev/null +++ b/tests/glsl/sascha-willems/imgui/ui.frag @@ -0,0 +1,14 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (binding = 0) uniform sampler2D fontSampler; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec4 inColor; + +layout (location = 0) out vec4 outColor; + +void main() +{ + outColor = inColor * texture(fontSampler, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/imgui/ui.vert b/tests/glsl/sascha-willems/imgui/ui.vert new file mode 100644 index 000000000..cf95e08d3 --- /dev/null +++ b/tests/glsl/sascha-willems/imgui/ui.vert @@ -0,0 +1,26 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec2 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec4 inColor; + +layout (push_constant) uniform PushConstants { + vec2 scale; + vec2 translate; +} pushConstants; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec4 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + outColor = inColor; + gl_Position = vec4(inPos * pushConstants.scale + pushConstants.translate, 0.0, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/indirectdraw/ground.frag b/tests/glsl/sascha-willems/indirectdraw/ground.frag new file mode 100644 index 000000000..47589e181 --- /dev/null +++ b/tests/glsl/sascha-willems/indirectdraw/ground.frag @@ -0,0 +1,29 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 2) uniform sampler2D samplerColor; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + // Last array layer is terrain tex + vec4 color = texture(samplerColor, inUV); + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 ambient = vec3(0.65); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.1); + outFragColor = vec4((ambient + diffuse) * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/indirectdraw/ground.vert b/tests/glsl/sascha-willems/indirectdraw/ground.vert new file mode 100644 index 000000000..fa6bbe5c6 --- /dev/null +++ b/tests/glsl/sascha-willems/indirectdraw/ground.vert @@ -0,0 +1,44 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +// Vertex attributes +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + outUV = inUV * 32.0; + outNormal = inNormal; + + vec4 pos = vec4(inPos.xyz, 1.0); + + gl_Position = ubo.projection * ubo.modelview * pos; + + vec4 wPos = ubo.modelview * vec4(pos.xyz, 1.0); + vec4 lPos = vec4(0.0, -5.0, 0.0, 1.0); + outLightVec = lPos.xyz - pos.xyz; + outViewVec = -pos.xyz; +} diff --git a/tests/glsl/sascha-willems/indirectdraw/indirectdraw.frag b/tests/glsl/sascha-willems/indirectdraw/indirectdraw.frag new file mode 100644 index 000000000..3c7913201 --- /dev/null +++ b/tests/glsl/sascha-willems/indirectdraw/indirectdraw.frag @@ -0,0 +1,31 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2DArray samplerArray; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerArray, inUV); + + if (color.a < 0.5) + { + discard; + } + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 ambient = vec3(0.65); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + outFragColor = vec4((ambient + diffuse) * color.rgb, 1.0); +} diff --git a/tests/glsl/sascha-willems/indirectdraw/indirectdraw.vert b/tests/glsl/sascha-willems/indirectdraw/indirectdraw.vert new file mode 100644 index 000000000..a4d7e9132 --- /dev/null +++ b/tests/glsl/sascha-willems/indirectdraw/indirectdraw.vert @@ -0,0 +1,83 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +// Vertex attributes +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +// Instanced attributes +layout (location = 4) in vec3 instancePos; +layout (location = 5) in vec3 instanceRot; +layout (location = 6) in float instanceScale; +layout (location = 7) in int instanceTexIndex; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + outUV = vec3(inUV, instanceTexIndex); + outUV.t = 1.0 - outUV.t; + + mat4 mx, my, mz; + + // rotate around x + float s = sin(instanceRot.x); + float c = cos(instanceRot.x); + + mx[0] = vec4(c, s, 0.0, 0.0); + mx[1] = vec4(-s, c, 0.0, 0.0); + mx[2] = vec4(0.0, 0.0, 1.0, 0.0); + mx[3] = vec4(0.0, 0.0, 0.0, 1.0); + + // rotate around y + s = sin(instanceRot.y); + c = cos(instanceRot.y); + + my[0] = vec4(c, 0.0, s, 0.0); + my[1] = vec4(0.0, 1.0, 0.0, 0.0); + my[2] = vec4(-s, 0.0, c, 0.0); + my[3] = vec4(0.0, 0.0, 0.0, 1.0); + + // rot around z + s = sin(instanceRot.z); + c = cos(instanceRot.z); + + mz[0] = vec4(1.0, 0.0, 0.0, 0.0); + mz[1] = vec4(0.0, c, s, 0.0); + mz[2] = vec4(0.0, -s, c, 0.0); + mz[3] = vec4(0.0, 0.0, 0.0, 1.0); + + mat4 rotMat = mz * my * mx; + + outNormal = inNormal * mat3(rotMat); + + vec4 pos = vec4((inPos.xyz * instanceScale) + instancePos, 1.0) * rotMat; + + gl_Position = ubo.projection * ubo.modelview * pos; + + vec4 wPos = ubo.modelview * vec4(pos.xyz, 1.0); + vec4 lPos = vec4(0.0, -5.0, 0.0, 1.0); + outLightVec = lPos.xyz - pos.xyz; + outViewVec = -pos.xyz; +} diff --git a/tests/glsl/sascha-willems/indirectdraw/skysphere.frag b/tests/glsl/sascha-willems/indirectdraw/skysphere.frag new file mode 100644 index 000000000..5c6f4cc7b --- /dev/null +++ b/tests/glsl/sascha-willems/indirectdraw/skysphere.frag @@ -0,0 +1,18 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 2) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + const vec4 gradientStart = vec4(0.93, 0.9, 0.81, 1.0); + const vec4 gradientEnd = vec4(0.35, 0.5, 1.0, 1.0); + outFragColor = mix(gradientStart, gradientEnd, min(0.5 - inUV.t, 0.5)/0.15 + 0.5); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/indirectdraw/skysphere.vert b/tests/glsl/sascha-willems/indirectdraw/skysphere.vert new file mode 100644 index 000000000..03e563b39 --- /dev/null +++ b/tests/glsl/sascha-willems/indirectdraw/skysphere.vert @@ -0,0 +1,29 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +// Vertex attributes +layout (location = 0) in vec4 inPos; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; +} ubo; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2(inUV.s, 1.0-inUV.t); + // Skysphere always at center, only use rotation part of modelview matrix + gl_Position = ubo.projection * mat4(mat3(ubo.modelview)) * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/instancing/instancing.frag b/tests/glsl/sascha-willems/instancing/instancing.frag new file mode 100644 index 000000000..cdec8971b --- /dev/null +++ b/tests/glsl/sascha-willems/instancing/instancing.frag @@ -0,0 +1,27 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2DArray samplerArray; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerArray, inUV) * vec4(inColor, 1.0); + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.1) * inColor; + vec3 specular = (dot(N,L) > 0.0) ? pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75) * color.r : vec3(0.0); + outFragColor = vec4(diffuse * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/instancing/instancing.vert b/tests/glsl/sascha-willems/instancing/instancing.vert new file mode 100644 index 000000000..9f7516453 --- /dev/null +++ b/tests/glsl/sascha-willems/instancing/instancing.vert @@ -0,0 +1,85 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +// Vertex attributes +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +// Instanced attributes +layout (location = 4) in vec3 instancePos; +layout (location = 5) in vec3 instanceRot; +layout (location = 6) in float instanceScale; +layout (location = 7) in int instanceTexIndex; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 lightPos; + float locSpeed; + float globSpeed; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +void main() +{ + outColor = inColor; + outUV = vec3(inUV, instanceTexIndex); + + mat3 mx, my, mz; + + // rotate around x + float s = sin(instanceRot.x + ubo.locSpeed); + float c = cos(instanceRot.x + ubo.locSpeed); + + mx[0] = vec3(c, s, 0.0); + mx[1] = vec3(-s, c, 0.0); + mx[2] = vec3(0.0, 0.0, 1.0); + + // rotate around y + s = sin(instanceRot.y + ubo.locSpeed); + c = cos(instanceRot.y + ubo.locSpeed); + + my[0] = vec3(c, 0.0, s); + my[1] = vec3(0.0, 1.0, 0.0); + my[2] = vec3(-s, 0.0, c); + + // rot around z + s = sin(instanceRot.z + ubo.locSpeed); + c = cos(instanceRot.z + ubo.locSpeed); + + mz[0] = vec3(1.0, 0.0, 0.0); + mz[1] = vec3(0.0, c, s); + mz[2] = vec3(0.0, -s, c); + + mat3 rotMat = mz * my * mx; + + mat4 gRotMat; + s = sin(instanceRot.y + ubo.globSpeed); + c = cos(instanceRot.y + ubo.globSpeed); + gRotMat[0] = vec4(c, 0.0, s, 0.0); + gRotMat[1] = vec4(0.0, 1.0, 0.0, 0.0); + gRotMat[2] = vec4(-s, 0.0, c, 0.0); + gRotMat[3] = vec4(0.0, 0.0, 0.0, 1.0); + + vec4 locPos = vec4(inPos.xyz * rotMat, 1.0); + vec4 pos = vec4((locPos.xyz * instanceScale) + instancePos, 1.0); + + gl_Position = ubo.projection * ubo.modelview * gRotMat * pos; + outNormal = mat3(ubo.modelview * gRotMat) * inverse(rotMat) * inNormal; + + pos = ubo.modelview * vec4(inPos.xyz + instancePos, 1.0); + vec3 lPos = mat3(ubo.modelview) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +} diff --git a/tests/glsl/sascha-willems/instancing/planet.frag b/tests/glsl/sascha-willems/instancing/planet.frag new file mode 100644 index 000000000..eb6d19242 --- /dev/null +++ b/tests/glsl/sascha-willems/instancing/planet.frag @@ -0,0 +1,27 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColorMap, inUV) * vec4(inColor, 1.0) * 1.5; + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 4.0) * vec3(0.5) * color.r; + outFragColor = vec4(diffuse * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/instancing/planet.vert b/tests/glsl/sascha-willems/instancing/planet.vert new file mode 100644 index 000000000..ad0177933 --- /dev/null +++ b/tests/glsl/sascha-willems/instancing/planet.vert @@ -0,0 +1,36 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +void main() +{ + outColor = inColor; + outUV = inUV * vec2(10.0, 6.0); + gl_Position = ubo.projection * ubo.modelview * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.modelview * vec4(inPos, 1.0); + outNormal = mat3(ubo.modelview) * inNormal; + vec3 lPos = mat3(ubo.modelview) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/instancing/starfield.frag b/tests/glsl/sascha-willems/instancing/starfield.frag new file mode 100644 index 000000000..bb4c2823a --- /dev/null +++ b/tests/glsl/sascha-willems/instancing/starfield.frag @@ -0,0 +1,35 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outFragColor; + +#define HASHSCALE3 vec3(443.897, 441.423, 437.195) +#define STARFREQUENCY 0.01 + +// Hash function by Dave Hoskins (https://www.shadertoy.com/view/4djSRW) +float hash33(vec3 p3) +{ + p3 = fract(p3 * HASHSCALE3); + p3 += dot(p3, p3.yxz+vec3(19.19)); + return fract((p3.x + p3.y)*p3.z + (p3.x+p3.z)*p3.y + (p3.y+p3.z)*p3.x); +} + +vec3 starField(vec3 pos) +{ + vec3 color = vec3(0.0); + float threshhold = (1.0 - STARFREQUENCY); + float rnd = hash33(pos); + if (rnd >= threshhold) + { + float starCol = pow((rnd - threshhold) / (1.0 - threshhold), 16.0); + color += vec3(starCol); + } + return color; +} + +void main() +{ + outFragColor = vec4(starField(inUVW), 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/instancing/starfield.vert b/tests/glsl/sascha-willems/instancing/starfield.vert new file mode 100644 index 000000000..82721aefd --- /dev/null +++ b/tests/glsl/sascha-willems/instancing/starfield.vert @@ -0,0 +1,10 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) out vec3 outUVW; + +void main() +{ + outUVW = vec3((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUVW.st * 2.0f - 1.0f, 0.0f, 1.0f); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/mesh/mesh.frag b/tests/glsl/sascha-willems/mesh/mesh.frag new file mode 100644 index 000000000..01fd2046f --- /dev/null +++ b/tests/glsl/sascha-willems/mesh/mesh.frag @@ -0,0 +1,28 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColorMap, inUV) * vec4(inColor, 1.0); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75); + outFragColor = vec4(diffuse * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/mesh/mesh.vert b/tests/glsl/sascha-willems/mesh/mesh.vert new file mode 100644 index 000000000..d0b3be251 --- /dev/null +++ b/tests/glsl/sascha-willems/mesh/mesh.vert @@ -0,0 +1,42 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(ubo.model) * inNormal; + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/multithreading/phong.frag b/tests/glsl/sascha-willems/multithreading/phong.frag new file mode 100644 index 000000000..663fd81b9 --- /dev/null +++ b/tests/glsl/sascha-willems/multithreading/phong.frag @@ -0,0 +1,24 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 8.0) * vec3(0.75); + outFragColor = vec4(diffuse + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/multithreading/phong.vert b/tests/glsl/sascha-willems/multithreading/phong.vert new file mode 100644 index 000000000..67f803dfd --- /dev/null +++ b/tests/glsl/sascha-willems/multithreading/phong.vert @@ -0,0 +1,43 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (std140, push_constant) uniform PushConsts +{ + mat4 mvp; + vec3 color; +} pushConsts; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +void main() +{ + outNormal = inNormal; + + if ( (inColor.r == 1.0) && (inColor.g == 0.0) && (inColor.b == 0.0)) + { + outColor = pushConsts.color; + } + else + { + outColor = inColor; + } + + gl_Position = pushConsts.mvp * vec4(inPos.xyz, 1.0); + + vec4 pos = pushConsts.mvp * vec4(inPos, 1.0); + outNormal = mat3(pushConsts.mvp) * inNormal; +// vec3 lPos = ubo.lightPos.xyz; +vec3 lPos = vec3(0.0); + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/multithreading/starsphere.frag b/tests/glsl/sascha-willems/multithreading/starsphere.frag new file mode 100644 index 000000000..7136a0499 --- /dev/null +++ b/tests/glsl/sascha-willems/multithreading/starsphere.frag @@ -0,0 +1,43 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outFragColor; + +#define HASHSCALE3 vec3(443.897, 441.423, 437.195) +#define STARFREQUENCY 0.01 + +// Hash function by Dave Hoskins (https://www.shadertoy.com/view/4djSRW) +float hash33(vec3 p3) +{ + p3 = fract(p3 * HASHSCALE3); + p3 += dot(p3, p3.yxz+vec3(19.19)); + return fract((p3.x + p3.y)*p3.z + (p3.x+p3.z)*p3.y + (p3.y+p3.z)*p3.x); +} + +vec3 starField(vec3 pos) +{ + vec3 color = vec3(0.0); + float threshhold = (1.0 - STARFREQUENCY); + float rnd = hash33(pos); + if (rnd >= threshhold) + { + float starCol = pow((rnd - threshhold) / (1.0 - threshhold), 16.0); + color += vec3(starCol); + } + return color; +} + +void main() +{ + // Fake atmosphere at the bottom + vec3 atmosphere = clamp(vec3(0.1, 0.15, 0.4) * (inUVW.t - 5.0), 0.0, 1.0); + + vec3 color = starField(inUVW) + atmosphere; + + outFragColor = vec4(color, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/multithreading/starsphere.vert b/tests/glsl/sascha-willems/multithreading/starsphere.vert new file mode 100644 index 000000000..c80106f45 --- /dev/null +++ b/tests/glsl/sascha-willems/multithreading/starsphere.vert @@ -0,0 +1,20 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (std140, push_constant) uniform PushConsts +{ + mat4 mvp; +} pushConsts; + +layout (location = 0) out vec3 outUVW; + +void main() +{ + outUVW = inPos; + gl_Position = pushConsts.mvp * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/occlusionquery/mesh.frag b/tests/glsl/sascha-willems/occlusionquery/mesh.frag new file mode 100644 index 000000000..af644729d --- /dev/null +++ b/tests/glsl/sascha-willems/occlusionquery/mesh.frag @@ -0,0 +1,32 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in float inVisible; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + + +void main() +{ + if (inVisible > 0.0) + { + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 8.0) * vec3(0.75); + outFragColor = vec4(diffuse + specular, 1.0); + } + else + { + outFragColor = vec4(vec3(0.1), 1.0); + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/occlusionquery/mesh.vert b/tests/glsl/sascha-willems/occlusionquery/mesh.vert new file mode 100644 index 000000000..02a930fe9 --- /dev/null +++ b/tests/glsl/sascha-willems/occlusionquery/mesh.vert @@ -0,0 +1,42 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 lightPos; + float visible; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out float outVisible; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outVisible = ubo.visible; + + gl_Position = ubo.projection * ubo.modelview * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.modelview * vec4(inPos, 1.0); + outNormal = mat3(ubo.modelview) * inNormal; + outLightVec = ubo.lightPos.xyz - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/occlusionquery/occluder.frag b/tests/glsl/sascha-willems/occlusionquery/occluder.frag new file mode 100644 index 000000000..37b13e5c0 --- /dev/null +++ b/tests/glsl/sascha-willems/occlusionquery/occluder.frag @@ -0,0 +1,14 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inColor; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = vec4(inColor, 0.5); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/occlusionquery/occluder.vert b/tests/glsl/sascha-willems/occlusionquery/occluder.vert new file mode 100644 index 000000000..8adca67d2 --- /dev/null +++ b/tests/glsl/sascha-willems/occlusionquery/occluder.vert @@ -0,0 +1,29 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + gl_Position = ubo.projection * ubo.modelview * vec4(inPos.xyz, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/occlusionquery/simple.frag b/tests/glsl/sascha-willems/occlusionquery/simple.frag new file mode 100644 index 000000000..108acc1c1 --- /dev/null +++ b/tests/glsl/sascha-willems/occlusionquery/simple.frag @@ -0,0 +1,14 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inColor; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = vec4(1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/occlusionquery/simple.vert b/tests/glsl/sascha-willems/occlusionquery/simple.vert new file mode 100644 index 000000000..b869b7a9c --- /dev/null +++ b/tests/glsl/sascha-willems/occlusionquery/simple.vert @@ -0,0 +1,26 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + gl_Position = ubo.projection * ubo.modelview * vec4(inPos.xyz, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/offscreen/mirror.frag b/tests/glsl/sascha-willems/offscreen/mirror.frag new file mode 100644 index 000000000..c8b31bd2b --- /dev/null +++ b/tests/glsl/sascha-willems/offscreen/mirror.frag @@ -0,0 +1,44 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; +layout (binding = 2) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec4 inPos; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 tmp = vec4(1.0 / inPos.w); + vec4 projCoord = inPos * tmp; + + // Scale and bias + projCoord += vec4(1.0); + projCoord *= vec4(0.5); + + // Slow single pass blur + // For demonstration purposes only + const float blurSize = 1.0 / 512.0; + + vec4 color = texture(samplerColorMap, inUV); + outFragColor = color * 0.25; + + if (gl_FrontFacing) + { + // Only render mirrored scene on front facing (upper) side of mirror surface + vec4 reflection = vec4(0.0); + for (int x = -3; x <= 3; x++) + { + for (int y = -3; y <= 3; y++) + { + reflection += texture(samplerColor, vec2(projCoord.s + x * blurSize, projCoord.t + y * blurSize)) / 49.0; + } + } + outFragColor += reflection * 1.5 * (color.r); + }; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/offscreen/mirror.vert b/tests/glsl/sascha-willems/offscreen/mirror.vert new file mode 100644 index 000000000..a4c0cb7f5 --- /dev/null +++ b/tests/glsl/sascha-willems/offscreen/mirror.vert @@ -0,0 +1,29 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec4 outPos; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + outPos = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + gl_Position = outPos; +} diff --git a/tests/glsl/sascha-willems/offscreen/phong.frag b/tests/glsl/sascha-willems/offscreen/phong.frag new file mode 100644 index 000000000..43fc662a9 --- /dev/null +++ b/tests/glsl/sascha-willems/offscreen/phong.frag @@ -0,0 +1,30 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inEyePos; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 Eye = normalize(-inEyePos); + vec3 Reflected = normalize(reflect(-inLightVec, inNormal)); + + vec4 IAmbient = vec4(0.1, 0.1, 0.1, 1.0); + vec4 IDiffuse = vec4(max(dot(inNormal, inLightVec), 0.0)); + float specular = 0.75; + vec4 ISpecular = vec4(0.0); + if (dot(inEyePos, inNormal) < 0.0) + { + ISpecular = vec4(0.5, 0.5, 0.5, 1.0) * pow(max(dot(Reflected, Eye), 0.0), 16.0) * specular; + } + + outFragColor = vec4((IAmbient + IDiffuse) * vec4(inColor, 1.0) + ISpecular); + +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/offscreen/phong.vert b/tests/glsl/sascha-willems/offscreen/phong.vert new file mode 100644 index 000000000..5bef0fa1e --- /dev/null +++ b/tests/glsl/sascha-willems/offscreen/phong.vert @@ -0,0 +1,40 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outEyePos; +layout (location = 3) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_ClipDistance[]; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + gl_Position = ubo.projection * ubo.model * inPos; + outEyePos = vec3(ubo.model * inPos); + outLightVec = normalize(ubo.lightPos.xyz - outEyePos); + + // Clip against reflection plane + vec4 clipPlane = vec4(0.0, -1.0, 0.0, 1.5); + gl_ClipDistance[0] = dot(inPos, clipPlane); +} diff --git a/tests/glsl/sascha-willems/offscreen/quad.frag b/tests/glsl/sascha-willems/offscreen/quad.frag new file mode 100644 index 000000000..6d54f2f33 --- /dev/null +++ b/tests/glsl/sascha-willems/offscreen/quad.frag @@ -0,0 +1,16 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerColor, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/offscreen/quad.vert b/tests/glsl/sascha-willems/offscreen/quad.vert new file mode 100644 index 000000000..c1ad3e070 --- /dev/null +++ b/tests/glsl/sascha-willems/offscreen/quad.vert @@ -0,0 +1,27 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/parallax/parallax.frag b/tests/glsl/sascha-willems/parallax/parallax.frag new file mode 100644 index 000000000..79dcab907 --- /dev/null +++ b/tests/glsl/sascha-willems/parallax/parallax.frag @@ -0,0 +1,144 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (binding = 1) uniform sampler2D sColorMap; +layout (binding = 2) uniform sampler2D sNormalHeightMap; + +layout (binding = 3) uniform UBO +{ + float heightScale; + float parallaxBias; + float numLayers; + int mappingMode; +} ubo; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec3 inTangentLightPos; +layout (location = 2) in vec3 inTangentViewPos; +layout (location = 3) in vec3 inTangentFragPos; + +layout (location = 0) out vec4 outColor; + +vec2 parallax_uv(vec2 uv, vec3 view_dir, int type) +{ + if (type == 2) { + // Parallax mapping + float depth = 1.0 - texture(sNormalHeightMap, uv).a; + vec2 p = view_dir.xy * (depth * (ubo.heightScale * 0.5) + ubo.parallaxBias) / view_dir.z; + return uv - p; + } else { + float layer_depth = 1.0 / ubo.numLayers; + float cur_layer_depth = 0.0; + vec2 delta_uv = view_dir.xy * ubo.heightScale / (view_dir.z * ubo.numLayers); + vec2 cur_uv = uv; + + float depth_from_tex = 1.0 - texture(sNormalHeightMap, cur_uv).a; + + for (int i = 0; i < 32; i++) { + cur_layer_depth += layer_depth; + cur_uv -= delta_uv; + depth_from_tex = 1.0 - texture(sNormalHeightMap, cur_uv).a; + if (depth_from_tex < cur_layer_depth) { + break; + } + } + + if (type == 3) { + // Steep parallax mapping + return cur_uv; + } else { + // Parallax occlusion mapping + vec2 prev_uv = cur_uv + delta_uv; + float next = depth_from_tex - cur_layer_depth; + float prev = 1.0 - texture(sNormalHeightMap, prev_uv).a - cur_layer_depth + layer_depth; + float weight = next / (next - prev); + return mix(cur_uv, prev_uv, weight); + } + } +} + +vec2 parallaxMapping(vec2 uv, vec3 viewDir) +{ + float height = 1.0 - texture(sNormalHeightMap, uv).a; + vec2 p = viewDir.xy * (height * (ubo.heightScale * 0.5) + ubo.parallaxBias) / viewDir.z; + return uv - p; +} + +vec2 steepParallaxMapping(vec2 uv, vec3 viewDir) +{ + float layerDepth = 1.0 / ubo.numLayers; + float currLayerDepth = 0.0; + vec2 deltaUV = viewDir.xy * ubo.heightScale / (viewDir.z * ubo.numLayers); + vec2 currUV = uv; + float height = 1.0 - texture(sNormalHeightMap, currUV).a; + for (int i = 0; i < ubo.numLayers; i++) { + currLayerDepth += layerDepth; + currUV -= deltaUV; + height = 1.0 - texture(sNormalHeightMap, currUV).a; + if (height < currLayerDepth) { + break; + } + } + return currUV; +} + +vec2 parallaxOcclusionMapping(vec2 uv, vec3 viewDir) +{ + float layerDepth = 1.0 / ubo.numLayers; + float currLayerDepth = 0.0; + vec2 deltaUV = viewDir.xy * ubo.heightScale / (viewDir.z * ubo.numLayers); + vec2 currUV = uv; + float height = 1.0 - texture(sNormalHeightMap, currUV).a; + for (int i = 0; i < ubo.numLayers; i++) { + currLayerDepth += layerDepth; + currUV -= deltaUV; + height = 1.0 - texture(sNormalHeightMap, currUV).a; + if (height < currLayerDepth) { + break; + } + } + vec2 prevUV = currUV + deltaUV; + float nextDepth = height - currLayerDepth; + float prevDepth = 1.0 - texture(sNormalHeightMap, prevUV).a - currLayerDepth + layerDepth; + return mix(currUV, prevUV, nextDepth / (nextDepth - prevDepth)); +} + +void main(void) +{ + vec3 V = normalize(inTangentViewPos - inTangentFragPos); + vec2 uv = inUV; + + if (ubo.mappingMode == 0) { + // Color only + outColor = texture(sColorMap, inUV); + } else { + switch(ubo.mappingMode) { + case 2: + uv = parallaxMapping(inUV, V); + break; + case 3: + uv = steepParallaxMapping(inUV, V); + break; + case 4: + uv = parallaxOcclusionMapping(inUV, V); + break; + } + + // Discard fragments at texture border + if (uv.x < 0.0 || uv.x > 1.0 || uv.y < 0.0 || uv.y > 1.0) { + discard; + } + + vec3 N = normalize(texture(sNormalHeightMap, uv).rgb * 2.0 - 1.0); + vec3 L = normalize(inTangentLightPos - inTangentFragPos); + vec3 R = reflect(-L, N); + vec3 H = normalize(L + V); + + vec3 color = texture(sColorMap, uv).rgb; + vec3 ambient = 0.2 * color; + vec3 diffuse = max(dot(L, N), 0.0) * color; + vec3 specular = vec3(0.15) * pow(max(dot(N, H), 0.0), 32.0); + + outColor = vec4(ambient + diffuse + specular, 1.0f); + } +} diff --git a/tests/glsl/sascha-willems/parallax/parallax.vert b/tests/glsl/sascha-willems/parallax/parallax.vert new file mode 100644 index 000000000..3032d9c6e --- /dev/null +++ b/tests/glsl/sascha-willems/parallax/parallax.vert @@ -0,0 +1,38 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in vec3 inTangent; +layout (location = 4) in vec3 inBiTangent; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; + vec4 lightPos; + vec4 cameraPos; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec3 outTangentLightPos; +layout (location = 2) out vec3 outTangentViewPos; +layout (location = 3) out vec3 outTangentFragPos; + +void main(void) +{ + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPos, 1.0f); + outTangentFragPos = vec3(ubo.model * vec4(inPos, 1.0)); + outUV = inUV; + + vec3 T = normalize(mat3(ubo.model) * inTangent); + vec3 B = normalize(mat3(ubo.model) * inBiTangent); + vec3 N = normalize(mat3(ubo.model) * inNormal); + mat3 TBN = transpose(mat3(T, B, N)); + + outTangentLightPos = TBN * ubo.lightPos.xyz; + outTangentViewPos = TBN * ubo.cameraPos.xyz; + outTangentFragPos = TBN * outTangentFragPos; +} diff --git a/tests/glsl/sascha-willems/particlefire/normalmap.frag b/tests/glsl/sascha-willems/particlefire/normalmap.frag new file mode 100644 index 000000000..9f3af4cf1 --- /dev/null +++ b/tests/glsl/sascha-willems/particlefire/normalmap.frag @@ -0,0 +1,45 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D sColorMap; +layout (binding = 2) uniform sampler2D sNormalHeightMap; + +#define lightRadius 45.0 + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec3 inLightVec; +layout (location = 2) in vec3 inLightVecB; +layout (location = 5) in vec3 inLightDir; +layout (location = 6) in vec3 inViewVec; + +layout (location = 0) out vec4 outFragColor; + +void main(void) +{ + vec3 specularColor = vec3(0.85, 0.5, 0.0); + + float invRadius = 1.0/lightRadius; + float ambient = 0.25; + + vec3 rgb, normal; + + rgb = texture(sColorMap, inUV).rgb; + normal = normalize((texture(sNormalHeightMap, inUV).rgb - 0.5) * 2.0); + + float distSqr = dot(inLightVecB, inLightVecB); + vec3 lVec = inLightVecB * inversesqrt(distSqr); + + float atten = max(clamp(1.0 - invRadius * sqrt(distSqr), 0.0, 1.0), ambient); + float diffuse = clamp(dot(lVec, normal), 0.0, 1.0); + + vec3 light = normalize(-inLightVec); + vec3 view = normalize(inViewVec); + vec3 reflectDir = reflect(-light, normal); + + float specular = pow(max(dot(view, reflectDir), 0.0), 4.0); + + outFragColor = vec4((rgb * atten + (diffuse * rgb + 0.5 * specular * specularColor.rgb)) * atten, 1.0); +} diff --git a/tests/glsl/sascha-willems/particlefire/normalmap.vert b/tests/glsl/sascha-willems/particlefire/normalmap.vert new file mode 100644 index 000000000..a93c16b00 --- /dev/null +++ b/tests/glsl/sascha-willems/particlefire/normalmap.vert @@ -0,0 +1,60 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in vec3 inTangent; +layout (location = 4) in vec3 inBiTangent; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 normal; + vec4 lightPos; + vec4 cameraPos; +} ubo; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; +}; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec3 outLightVec; +layout (location = 2) out vec3 outLightVecB; +layout (location = 5) out vec3 outLightDir; +layout (location = 6) out vec3 outViewVec; + +void main(void) +{ + vec3 vertexPosition = vec3(ubo.model * vec4(inPos, 1.0)); + outLightDir = normalize(ubo.lightPos.xyz - vertexPosition); + + // Setup (t)angent-(b)inormal-(n)ormal matrix for converting + // object coordinates into tangent space + mat3 tbnMatrix; + tbnMatrix[0] = mat3(ubo.normal) * inTangent; + tbnMatrix[1] = mat3(ubo.normal) * inBiTangent; + tbnMatrix[2] = mat3(ubo.normal) * inNormal; + + outLightVec.xyz = vec3(ubo.lightPos.xyz - vertexPosition.xyz) * tbnMatrix; + + vec3 lightDist = ubo.lightPos.xyz - inPos.xyz; + outLightVecB.x = dot(inTangent.xyz, lightDist); + outLightVecB.y = dot(inBiTangent.xyz, lightDist); + outLightVecB.z = dot(inNormal, lightDist); + + outViewVec.x = dot(inTangent, inPos.xyz); + outViewVec.y = dot(inBiTangent, inPos.xyz); + outViewVec.z = dot(inNormal, inPos.xyz); + + outUV = inUV; + + gl_Position = ubo.projection * ubo.model * vec4(inPos, 1.0); +} diff --git a/tests/glsl/sascha-willems/particlefire/particle.frag b/tests/glsl/sascha-willems/particlefire/particle.frag new file mode 100644 index 000000000..104fe406c --- /dev/null +++ b/tests/glsl/sascha-willems/particlefire/particle.frag @@ -0,0 +1,47 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerSmoke; +layout (binding = 2) uniform sampler2D samplerFire; + +layout (location = 0) in vec4 inColor; +layout (location = 1) in float inAlpha; +layout (location = 2) in flat int inType; +layout (location = 3) in float inRotation; + + +layout (location = 0) out vec4 outFragColor; + +void main () +{ + vec4 color; + float alpha = (inAlpha <= 1.0) ? inAlpha : 2.0 - inAlpha; + + // Rotate texture coordinates + // Rotate UV + float rotCenter = 0.5; + float rotCos = cos(inRotation); + float rotSin = sin(inRotation); + vec2 rotUV = vec2( + rotCos * (gl_PointCoord.x - rotCenter) + rotSin * (gl_PointCoord.y - rotCenter) + rotCenter, + rotCos * (gl_PointCoord.y - rotCenter) - rotSin * (gl_PointCoord.x - rotCenter) + rotCenter); + + + if (inType == 0) + { + // Flame + color = texture(samplerFire, rotUV); + outFragColor.a = 0.0; + } + else + { + // Smoke + color = texture(samplerSmoke, rotUV); + outFragColor.a = color.a * alpha; + } + + outFragColor.rgb = color.rgb * inColor.rgb * alpha; +} diff --git a/tests/glsl/sascha-willems/particlefire/particle.vert b/tests/glsl/sascha-willems/particlefire/particle.vert new file mode 100644 index 000000000..a5d3c561b --- /dev/null +++ b/tests/glsl/sascha-willems/particlefire/particle.vert @@ -0,0 +1,50 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec4 inColor; +layout (location = 2) in float inAlpha; +layout (location = 3) in float inSize; +layout (location = 4) in float inRotation; +layout (location = 5) in int inType; + +layout (location = 0) out vec4 outColor; +layout (location = 1) out float outAlpha; +layout (location = 2) out flat int outType; +layout (location = 3) out float outRotation; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec2 viewportDim; + float pointSize; +} ubo; + +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; +}; + +void main () +{ + outColor = inColor; + outAlpha = inAlpha; + outType = inType; + outRotation = inRotation; + + gl_Position = ubo.projection * ubo.modelview * vec4(inPos.xyz, 1.0); + + // Base size of the point sprites + float spriteSize = 8.0 * inSize; + + // Scale particle size depending on camera projection + vec4 eyePos = ubo.modelview * vec4(inPos.xyz, 1.0); + vec4 projectedCorner = ubo.projection * vec4(0.5 * spriteSize, 0.5 * spriteSize, eyePos.z, eyePos.w); + gl_PointSize = ubo.viewportDim.x * projectedCorner.x / projectedCorner.w; + +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbrbasic/pbr.frag b/tests/glsl/sascha-willems/pbrbasic/pbr.frag new file mode 100644 index 000000000..06a69e1e9 --- /dev/null +++ b/tests/glsl/sascha-willems/pbrbasic/pbr.frag @@ -0,0 +1,128 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec3 inWorldPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec3 camPos; +} ubo; + +layout (binding = 1) uniform UBOShared { + vec4 lights[4]; +} uboParams; + +layout (location = 0) out vec4 outColor; + +layout(push_constant) uniform PushConsts { + layout(offset = 12) float roughness; + layout(offset = 16) float metallic; + layout(offset = 20) float r; + layout(offset = 24) float g; + layout(offset = 28) float b; +} material; + +const float PI = 3.14159265359; + +//#define ROUGHNESS_PATTERN 1 + +vec3 materialcolor() +{ + return vec3(material.r, material.g, material.b); +} + +// Normal Distribution function -------------------------------------- +float D_GGX(float dotNH, float roughness) +{ + float alpha = roughness * roughness; + float alpha2 = alpha * alpha; + float denom = dotNH * dotNH * (alpha2 - 1.0) + 1.0; + return (alpha2)/(PI * denom*denom); +} + +// Geometric Shadowing function -------------------------------------- +float G_SchlicksmithGGX(float dotNL, float dotNV, float roughness) +{ + float r = (roughness + 1.0); + float k = (r*r) / 8.0; + float GL = dotNL / (dotNL * (1.0 - k) + k); + float GV = dotNV / (dotNV * (1.0 - k) + k); + return GL * GV; +} + +// Fresnel function ---------------------------------------------------- +vec3 F_Schlick(float cosTheta, float metallic) +{ + vec3 F0 = mix(vec3(0.04), materialcolor(), metallic); // * material.specular + vec3 F = F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0); + return F; +} + +// Specular BRDF composition -------------------------------------------- + +vec3 BRDF(vec3 L, vec3 V, vec3 N, float metallic, float roughness) +{ + // Precalculate vectors and dot products + vec3 H = normalize (V + L); + float dotNV = clamp(dot(N, V), 0.0, 1.0); + float dotNL = clamp(dot(N, L), 0.0, 1.0); + float dotLH = clamp(dot(L, H), 0.0, 1.0); + float dotNH = clamp(dot(N, H), 0.0, 1.0); + + // Light color fixed + vec3 lightColor = vec3(1.0); + + vec3 color = vec3(0.0); + + if (dotNL > 0.0) + { + float rroughness = max(0.05, roughness); + // D = Normal distribution (Distribution of the microfacets) + float D = D_GGX(dotNH, roughness); + // G = Geometric shadowing term (Microfacets shadowing) + float G = G_SchlicksmithGGX(dotNL, dotNV, roughness); + // F = Fresnel factor (Reflectance depending on angle of incidence) + vec3 F = F_Schlick(dotNV, metallic); + + vec3 spec = D * F * G / (4.0 * dotNL * dotNV); + + color += spec * dotNL * lightColor; + } + + return color; +} + +// ---------------------------------------------------------------------------- +void main() +{ + vec3 N = normalize(inNormal); + vec3 V = normalize(ubo.camPos - inWorldPos); + + float roughness = material.roughness; + + // Add striped pattern to roughness based on vertex position +#ifdef ROUGHNESS_PATTERN + roughness = max(roughness, step(fract(inWorldPos.y * 2.02), 0.5)); +#endif + + // Specular contribution + vec3 Lo = vec3(0.0); + for (int i = 0; i < uboParams.lights.length(); i++) { + vec3 L = normalize(uboParams.lights[i].xyz - inWorldPos); + Lo += BRDF(L, V, N, material.metallic, roughness); + }; + + // Combine with ambient + vec3 color = materialcolor() * 0.02; + color += Lo; + + // Gamma correct + color = pow(color, vec3(0.4545)); + + outColor = vec4(color, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbrbasic/pbr.vert b/tests/glsl/sascha-willems/pbrbasic/pbr.vert new file mode 100644 index 000000000..9a85b892a --- /dev/null +++ b/tests/glsl/sascha-willems/pbrbasic/pbr.vert @@ -0,0 +1,39 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec3 camPos; +} ubo; + +layout (location = 0) out vec3 outWorldPos; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out vec2 outUV; + +layout(push_constant) uniform PushConsts { + vec3 objPos; +} pushConsts; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + vec3 locPos = vec3(ubo.model * vec4(inPos, 1.0)); + outWorldPos = locPos + pushConsts.objPos; + outNormal = mat3(ubo.model) * inNormal; + outUV = inUV; + gl_Position = ubo.projection * ubo.view * vec4(outWorldPos, 1.0); +} diff --git a/tests/glsl/sascha-willems/pbribl/filtercube.vert b/tests/glsl/sascha-willems/pbribl/filtercube.vert new file mode 100644 index 000000000..07f02c8b5 --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/filtercube.vert @@ -0,0 +1,20 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec3 inPos; + +layout(push_constant) uniform PushConsts { + layout (offset = 0) mat4 mvp; +} pushConsts; + +layout (location = 0) out vec3 outUVW; + +out gl_PerVertex { + vec4 gl_Position; +}; + +void main() +{ + outUVW = inPos; + gl_Position = pushConsts.mvp * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/pbribl/genbrdflut.frag b/tests/glsl/sascha-willems/pbribl/genbrdflut.frag new file mode 100644 index 000000000..25f00af1f --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/genbrdflut.frag @@ -0,0 +1,91 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec2 inUV; +layout (location = 0) out vec4 outColor; +layout (constant_id = 0) const uint NUM_SAMPLES = 1024u; + +const float PI = 3.1415926536; + +// Based omn http://byteblacksmith.com/improvements-to-the-canonical-one-liner-glsl-rand-for-opengl-es-2-0/ +float random(vec2 co) +{ + float a = 12.9898; + float b = 78.233; + float c = 43758.5453; + float dt= dot(co.xy ,vec2(a,b)); + float sn= mod(dt,3.14); + return fract(sin(sn) * c); +} + +vec2 hammersley2d(uint i, uint N) +{ + // Radical inverse based on http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html + uint bits = (i << 16u) | (i >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + float rdi = float(bits) * 2.3283064365386963e-10; + return vec2(float(i) /float(N), rdi); +} + +// Based on http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_slides.pdf +vec3 importanceSample_GGX(vec2 Xi, float roughness, vec3 normal) +{ + // Maps a 2D point to a hemisphere with spread based on roughness + float alpha = roughness * roughness; + float phi = 2.0 * PI * Xi.x + random(normal.xz) * 0.1; + float cosTheta = sqrt((1.0 - Xi.y) / (1.0 + (alpha*alpha - 1.0) * Xi.y)); + float sinTheta = sqrt(1.0 - cosTheta * cosTheta); + vec3 H = vec3(sinTheta * cos(phi), sinTheta * sin(phi), cosTheta); + + // Tangent space + vec3 up = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + vec3 tangentX = normalize(cross(up, normal)); + vec3 tangentY = normalize(cross(normal, tangentX)); + + // Convert to world Space + return normalize(tangentX * H.x + tangentY * H.y + normal * H.z); +} + +// Geometric Shadowing function +float G_SchlicksmithGGX(float dotNL, float dotNV, float roughness) +{ + float k = (roughness * roughness) / 2.0; + float GL = dotNL / (dotNL * (1.0 - k) + k); + float GV = dotNV / (dotNV * (1.0 - k) + k); + return GL * GV; +} + +vec2 BRDF(float NoV, float roughness) +{ + // Normal always points along z-axis for the 2D lookup + const vec3 N = vec3(0.0, 0.0, 1.0); + vec3 V = vec3(sqrt(1.0 - NoV*NoV), 0.0, NoV); + + vec2 LUT = vec2(0.0); + for(uint i = 0u; i < NUM_SAMPLES; i++) { + vec2 Xi = hammersley2d(i, NUM_SAMPLES); + vec3 H = importanceSample_GGX(Xi, roughness, N); + vec3 L = 2.0 * dot(V, H) * H - V; + + float dotNL = max(dot(N, L), 0.0); + float dotNV = max(dot(N, V), 0.0); + float dotVH = max(dot(V, H), 0.0); + float dotNH = max(dot(H, N), 0.0); + + if (dotNL > 0.0) { + float G = G_SchlicksmithGGX(dotNL, dotNV, roughness); + float G_Vis = (G * dotVH) / (dotNH * dotNV); + float Fc = pow(1.0 - dotVH, 5.0); + LUT += vec2((1.0 - Fc) * G_Vis, Fc * G_Vis); + } + } + return LUT / float(NUM_SAMPLES); +} + +void main() +{ + outColor = vec4(BRDF(inUV.s, 1.0-inUV.t), 0.0, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbribl/genbrdflut.vert b/tests/glsl/sascha-willems/pbribl/genbrdflut.vert new file mode 100644 index 000000000..8576b78b4 --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/genbrdflut.vert @@ -0,0 +1,10 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) out vec2 outUV; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbribl/irradiancecube.frag b/tests/glsl/sascha-willems/pbribl/irradiancecube.frag new file mode 100644 index 000000000..926c68c4c --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/irradiancecube.frag @@ -0,0 +1,38 @@ +// Generates an irradiance cube from an environment map using convolution + +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec3 inPos; +layout (location = 0) out vec4 outColor; +layout (binding = 0) uniform samplerCube samplerEnv; + +layout(push_constant) uniform PushConsts { + layout (offset = 64) float deltaPhi; + layout (offset = 68) float deltaTheta; +} consts; + +#define PI 3.1415926535897932384626433832795 + +void main() +{ + vec3 N = normalize(inPos); + vec3 up = vec3(0.0, 1.0, 0.0); + vec3 right = normalize(cross(up, N)); + up = cross(N, right); + + const float TWO_PI = PI * 2.0; + const float HALF_PI = PI * 0.5; + + vec3 color = vec3(0.0); + uint sampleCount = 0u; + for (float phi = 0.0; phi < TWO_PI; phi += consts.deltaPhi) { + for (float theta = 0.0; theta < HALF_PI; theta += consts.deltaTheta) { + vec3 tempVec = cos(phi) * right + sin(phi) * up; + vec3 sampleVector = cos(theta) * N + sin(theta) * tempVec; + color += texture(samplerEnv, sampleVector).rgb * cos(theta) * sin(theta); + sampleCount++; + } + } + outColor = vec4(PI * color / float(sampleCount), 1.0); +} diff --git a/tests/glsl/sascha-willems/pbribl/pbribl.frag b/tests/glsl/sascha-willems/pbribl/pbribl.frag new file mode 100644 index 000000000..d04c46256 --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/pbribl.frag @@ -0,0 +1,163 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec3 inWorldPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO { + mat4 projection; + mat4 model; + mat4 view; + vec3 camPos; +} ubo; + +layout (binding = 1) uniform UBOParams { + vec4 lights[4]; + float exposure; + float gamma; +} uboParams; + +layout(push_constant) uniform PushConsts { + layout(offset = 12) float roughness; + layout(offset = 16) float metallic; + layout(offset = 20) float specular; + layout(offset = 24) float r; + layout(offset = 28) float g; + layout(offset = 32) float b; +} material; + +layout (binding = 2) uniform samplerCube samplerIrradiance; +layout (binding = 3) uniform sampler2D samplerBRDFLUT; +layout (binding = 4) uniform samplerCube prefilteredMap; + +layout (location = 0) out vec4 outColor; + +#define PI 3.1415926535897932384626433832795 +#define ALBEDO vec3(material.r, material.g, material.b) + +// From http://filmicgames.com/archives/75 +vec3 Uncharted2Tonemap(vec3 x) +{ + float A = 0.15; + float B = 0.50; + float C = 0.10; + float D = 0.20; + float E = 0.02; + float F = 0.30; + return ((x*(A*x+C*B)+D*E)/(x*(A*x+B)+D*F))-E/F; +} + +// Normal Distribution function -------------------------------------- +float D_GGX(float dotNH, float roughness) +{ + float alpha = roughness * roughness; + float alpha2 = alpha * alpha; + float denom = dotNH * dotNH * (alpha2 - 1.0) + 1.0; + return (alpha2)/(PI * denom*denom); +} + +// Geometric Shadowing function -------------------------------------- +float G_SchlicksmithGGX(float dotNL, float dotNV, float roughness) +{ + float r = (roughness + 1.0); + float k = (r*r) / 8.0; + float GL = dotNL / (dotNL * (1.0 - k) + k); + float GV = dotNV / (dotNV * (1.0 - k) + k); + return GL * GV; +} + +// Fresnel function ---------------------------------------------------- +vec3 F_Schlick(float cosTheta, vec3 F0) +{ + return F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0); +} +vec3 F_SchlickR(float cosTheta, vec3 F0, float roughness) +{ + return F0 + (max(vec3(1.0 - roughness), F0) - F0) * pow(1.0 - cosTheta, 5.0); +} + +vec3 prefilteredReflection(vec3 R, float roughness) +{ + const float MAX_REFLECTION_LOD = 9.0; // todo: param/const + float lod = roughness * MAX_REFLECTION_LOD; + float lodf = floor(lod); + float lodc = ceil(lod); + vec3 a = textureLod(prefilteredMap, R, lodf).rgb; + vec3 b = textureLod(prefilteredMap, R, lodc).rgb; + return mix(a, b, lod - lodf); +} + +vec3 specularContribution(vec3 L, vec3 V, vec3 N, vec3 F0, float metallic, float roughness) +{ + // Precalculate vectors and dot products + vec3 H = normalize (V + L); + float dotNH = clamp(dot(N, H), 0.0, 1.0); + float dotNV = clamp(dot(N, V), 0.0, 1.0); + float dotNL = clamp(dot(N, L), 0.0, 1.0); + + // Light color fixed + vec3 lightColor = vec3(1.0); + + vec3 color = vec3(0.0); + + if (dotNL > 0.0) { + // D = Normal distribution (Distribution of the microfacets) + float D = D_GGX(dotNH, roughness); + // G = Geometric shadowing term (Microfacets shadowing) + float G = G_SchlicksmithGGX(dotNL, dotNV, roughness); + // F = Fresnel factor (Reflectance depending on angle of incidence) + vec3 F = F_Schlick(dotNV, F0); + vec3 spec = D * F * G / (4.0 * dotNL * dotNV + 0.001); + vec3 kD = (vec3(1.0) - F) * (1.0 - metallic); + color += (kD * ALBEDO / PI + spec) * dotNL; + } + + return color; +} + +void main() +{ + vec3 N = normalize(inNormal); + vec3 V = normalize(ubo.camPos - inWorldPos); + vec3 R = reflect(-V, N); + + float metallic = material.metallic; + float roughness = material.roughness; + + vec3 F0 = vec3(0.04); + F0 = mix(F0, ALBEDO, metallic); + + vec3 Lo = vec3(0.0); + for(int i = 0; i < uboParams.lights[i].length(); i++) { + vec3 L = normalize(uboParams.lights[i].xyz - inWorldPos); + Lo += specularContribution(L, V, N, F0, metallic, roughness); + } + + vec2 brdf = texture(samplerBRDFLUT, vec2(max(dot(N, V), 0.0), roughness)).rg; + vec3 reflection = prefilteredReflection(R, roughness).rgb; + vec3 irradiance = texture(samplerIrradiance, N).rgb; + + // Diffuse based on irradiance + vec3 diffuse = irradiance * ALBEDO; + + vec3 F = F_SchlickR(max(dot(N, V), 0.0), F0, roughness); + + // Specular reflectance + vec3 specular = reflection * (F * brdf.x + brdf.y); + + // Ambient part + vec3 kD = 1.0 - F; + kD *= 1.0 - metallic; + vec3 ambient = (kD * diffuse + specular); + + vec3 color = ambient + Lo; + + // Tone mapping + color = Uncharted2Tonemap(color * uboParams.exposure); + color = color * (1.0f / Uncharted2Tonemap(vec3(11.2f))); + // Gamma correction + color = pow(color, vec3(1.0f / uboParams.gamma)); + + outColor = vec4(color, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbribl/pbribl.vert b/tests/glsl/sascha-willems/pbribl/pbribl.vert new file mode 100644 index 000000000..c8d65cc32 --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/pbribl.vert @@ -0,0 +1,40 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec3 camPos; +} ubo; + +layout (location = 0) out vec3 outWorldPos; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out vec2 outUV; + +layout(push_constant) uniform PushConsts { + vec3 objPos; +} pushConsts; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + vec3 locPos = vec3(ubo.model * vec4(inPos, 1.0)); + outWorldPos = locPos + pushConsts.objPos; + outNormal = mat3(ubo.model) * inNormal; + outUV = inUV; + outUV.t = 1.0 - inUV.t; + gl_Position = ubo.projection * ubo.view * vec4(outWorldPos, 1.0); +} diff --git a/tests/glsl/sascha-willems/pbribl/prefilterenvmap.frag b/tests/glsl/sascha-willems/pbribl/prefilterenvmap.frag new file mode 100644 index 000000000..d5231a2d1 --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/prefilterenvmap.frag @@ -0,0 +1,106 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec3 inPos; +layout (location = 0) out vec4 outColor; + +layout (binding = 0) uniform samplerCube samplerEnv; + +layout(push_constant) uniform PushConsts { + layout (offset = 64) float roughness; + layout (offset = 68) uint numSamples; +} consts; + +const float PI = 3.1415926536; + +// Based omn http://byteblacksmith.com/improvements-to-the-canonical-one-liner-glsl-rand-for-opengl-es-2-0/ +float random(vec2 co) +{ + float a = 12.9898; + float b = 78.233; + float c = 43758.5453; + float dt= dot(co.xy ,vec2(a,b)); + float sn= mod(dt,3.14); + return fract(sin(sn) * c); +} + +vec2 hammersley2d(uint i, uint N) +{ + // Radical inverse based on http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html + uint bits = (i << 16u) | (i >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + float rdi = float(bits) * 2.3283064365386963e-10; + return vec2(float(i) /float(N), rdi); +} + +// Based on http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_slides.pdf +vec3 importanceSample_GGX(vec2 Xi, float roughness, vec3 normal) +{ + // Maps a 2D point to a hemisphere with spread based on roughness + float alpha = roughness * roughness; + float phi = 2.0 * PI * Xi.x + random(normal.xz) * 0.1; + float cosTheta = sqrt((1.0 - Xi.y) / (1.0 + (alpha*alpha - 1.0) * Xi.y)); + float sinTheta = sqrt(1.0 - cosTheta * cosTheta); + vec3 H = vec3(sinTheta * cos(phi), sinTheta * sin(phi), cosTheta); + + // Tangent space + vec3 up = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + vec3 tangentX = normalize(cross(up, normal)); + vec3 tangentY = normalize(cross(normal, tangentX)); + + // Convert to world Space + return normalize(tangentX * H.x + tangentY * H.y + normal * H.z); +} + +// Normal Distribution function +float D_GGX(float dotNH, float roughness) +{ + float alpha = roughness * roughness; + float alpha2 = alpha * alpha; + float denom = dotNH * dotNH * (alpha2 - 1.0) + 1.0; + return (alpha2)/(PI * denom*denom); +} + +vec3 prefilterEnvMap(vec3 R, float roughness) +{ + vec3 N = R; + vec3 V = R; + vec3 color = vec3(0.0); + float totalWeight = 0.0; + float envMapDim = float(textureSize(samplerEnv, 0).s); + for(uint i = 0u; i < consts.numSamples; i++) { + vec2 Xi = hammersley2d(i, consts.numSamples); + vec3 H = importanceSample_GGX(Xi, roughness, N); + vec3 L = 2.0 * dot(V, H) * H - V; + float dotNL = clamp(dot(N, L), 0.0, 1.0); + if(dotNL > 0.0) { + // Filtering based on https://placeholderart.wordpress.com/2015/07/28/implementation-notes-runtime-environment-map-filtering-for-image-based-lighting/ + + float dotNH = clamp(dot(N, H), 0.0, 1.0); + float dotVH = clamp(dot(V, H), 0.0, 1.0); + + // Probability Distribution Function + float pdf = D_GGX(dotNH, roughness) * dotNH / (4.0 * dotVH) + 0.0001; + // Slid angle of current smple + float omegaS = 1.0 / (float(consts.numSamples) * pdf); + // Solid angle of 1 pixel across all cube faces + float omegaP = 4.0 * PI / (6.0 * envMapDim * envMapDim); + // Biased (+1.0) mip level for better result + float mipLevel = roughness == 0.0 ? 0.0 : max(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f); + color += textureLod(samplerEnv, L, mipLevel).rgb * dotNL; + totalWeight += dotNL; + + } + } + return (color / totalWeight); +} + + +void main() +{ + vec3 N = normalize(inPos); + outColor = vec4(prefilterEnvMap(N, consts.roughness), 1.0); +} diff --git a/tests/glsl/sascha-willems/pbribl/skybox.frag b/tests/glsl/sascha-willems/pbribl/skybox.frag new file mode 100644 index 000000000..a66c8bdaa --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/skybox.frag @@ -0,0 +1,40 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (binding = 2) uniform samplerCube samplerEnv; + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outColor; + +layout (binding = 1) uniform UBOParams { + vec4 lights[4]; + float exposure; + float gamma; +} uboParams; + +// From http://filmicworlds.com/blog/filmic-tonemapping-operators/ +vec3 Uncharted2Tonemap(vec3 color) +{ + float A = 0.15; + float B = 0.50; + float C = 0.10; + float D = 0.20; + float E = 0.02; + float F = 0.30; + float W = 11.2; + return ((color*(A*color+C*B)+D*E)/(color*(A*color+B)+D*F))-E/F; +} + +void main() +{ + vec3 color = texture(samplerEnv, inUVW).rgb; + + // Tone mapping + color = Uncharted2Tonemap(color * uboParams.exposure); + color = color * (1.0f / Uncharted2Tonemap(vec3(11.2f))); + // Gamma correction + color = pow(color, vec3(1.0f / uboParams.gamma)); + + outColor = vec4(color, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbribl/skybox.vert b/tests/glsl/sascha-willems/pbribl/skybox.vert new file mode 100644 index 000000000..1802e74e4 --- /dev/null +++ b/tests/glsl/sascha-willems/pbribl/skybox.vert @@ -0,0 +1,28 @@ +#version 450 +//TEST:COMPARE_GLSL: + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUVW; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUVW = inPos; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/pbrtexture/filtercube.vert b/tests/glsl/sascha-willems/pbrtexture/filtercube.vert new file mode 100644 index 000000000..07f02c8b5 --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/filtercube.vert @@ -0,0 +1,20 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec3 inPos; + +layout(push_constant) uniform PushConsts { + layout (offset = 0) mat4 mvp; +} pushConsts; + +layout (location = 0) out vec3 outUVW; + +out gl_PerVertex { + vec4 gl_Position; +}; + +void main() +{ + outUVW = inPos; + gl_Position = pushConsts.mvp * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/pbrtexture/genbrdflut.frag b/tests/glsl/sascha-willems/pbrtexture/genbrdflut.frag new file mode 100644 index 000000000..25f00af1f --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/genbrdflut.frag @@ -0,0 +1,91 @@ +#version 450 +//TEST:COMPARE_GLSL: + +layout (location = 0) in vec2 inUV; +layout (location = 0) out vec4 outColor; +layout (constant_id = 0) const uint NUM_SAMPLES = 1024u; + +const float PI = 3.1415926536; + +// Based omn http://byteblacksmith.com/improvements-to-the-canonical-one-liner-glsl-rand-for-opengl-es-2-0/ +float random(vec2 co) +{ + float a = 12.9898; + float b = 78.233; + float c = 43758.5453; + float dt= dot(co.xy ,vec2(a,b)); + float sn= mod(dt,3.14); + return fract(sin(sn) * c); +} + +vec2 hammersley2d(uint i, uint N) +{ + // Radical inverse based on http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html + uint bits = (i << 16u) | (i >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + float rdi = float(bits) * 2.3283064365386963e-10; + return vec2(float(i) /float(N), rdi); +} + +// Based on http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_slides.pdf +vec3 importanceSample_GGX(vec2 Xi, float roughness, vec3 normal) +{ + // Maps a 2D point to a hemisphere with spread based on roughness + float alpha = roughness * roughness; + float phi = 2.0 * PI * Xi.x + random(normal.xz) * 0.1; + float cosTheta = sqrt((1.0 - Xi.y) / (1.0 + (alpha*alpha - 1.0) * Xi.y)); + float sinTheta = sqrt(1.0 - cosTheta * cosTheta); + vec3 H = vec3(sinTheta * cos(phi), sinTheta * sin(phi), cosTheta); + + // Tangent space + vec3 up = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + vec3 tangentX = normalize(cross(up, normal)); + vec3 tangentY = normalize(cross(normal, tangentX)); + + // Convert to world Space + return normalize(tangentX * H.x + tangentY * H.y + normal * H.z); +} + +// Geometric Shadowing function +float G_SchlicksmithGGX(float dotNL, float dotNV, float roughness) +{ + float k = (roughness * roughness) / 2.0; + float GL = dotNL / (dotNL * (1.0 - k) + k); + float GV = dotNV / (dotNV * (1.0 - k) + k); + return GL * GV; +} + +vec2 BRDF(float NoV, float roughness) +{ + // Normal always points along z-axis for the 2D lookup + const vec3 N = vec3(0.0, 0.0, 1.0); + vec3 V = vec3(sqrt(1.0 - NoV*NoV), 0.0, NoV); + + vec2 LUT = vec2(0.0); + for(uint i = 0u; i < NUM_SAMPLES; i++) { + vec2 Xi = hammersley2d(i, NUM_SAMPLES); + vec3 H = importanceSample_GGX(Xi, roughness, N); + vec3 L = 2.0 * dot(V, H) * H - V; + + float dotNL = max(dot(N, L), 0.0); + float dotNV = max(dot(N, V), 0.0); + float dotVH = max(dot(V, H), 0.0); + float dotNH = max(dot(H, N), 0.0); + + if (dotNL > 0.0) { + float G = G_SchlicksmithGGX(dotNL, dotNV, roughness); + float G_Vis = (G * dotVH) / (dotNH * dotNV); + float Fc = pow(1.0 - dotVH, 5.0); + LUT += vec2((1.0 - Fc) * G_Vis, Fc * G_Vis); + } + } + return LUT / float(NUM_SAMPLES); +} + +void main() +{ + outColor = vec4(BRDF(inUV.s, 1.0-inUV.t), 0.0, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbrtexture/genbrdflut.vert b/tests/glsl/sascha-willems/pbrtexture/genbrdflut.vert new file mode 100644 index 000000000..2eeb90cf3 --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/genbrdflut.vert @@ -0,0 +1,10 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (location = 0) out vec2 outUV; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbrtexture/irradiancecube.frag b/tests/glsl/sascha-willems/pbrtexture/irradiancecube.frag new file mode 100644 index 000000000..e94c0f93c --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/irradiancecube.frag @@ -0,0 +1,38 @@ +//TEST:COMPARE_GLSL: +// Generates an irradiance cube from an environment map using convolution + +#version 450 + +layout (location = 0) in vec3 inPos; +layout (location = 0) out vec4 outColor; +layout (binding = 0) uniform samplerCube samplerEnv; + +layout(push_constant) uniform PushConsts { + layout (offset = 64) float deltaPhi; + layout (offset = 68) float deltaTheta; +} consts; + +#define PI 3.1415926535897932384626433832795 + +void main() +{ + vec3 N = normalize(inPos); + vec3 up = vec3(0.0, 1.0, 0.0); + vec3 right = normalize(cross(up, N)); + up = cross(N, right); + + const float TWO_PI = PI * 2.0; + const float HALF_PI = PI * 0.5; + + vec3 color = vec3(0.0); + uint sampleCount = 0u; + for (float phi = 0.0; phi < TWO_PI; phi += consts.deltaPhi) { + for (float theta = 0.0; theta < HALF_PI; theta += consts.deltaTheta) { + vec3 tempVec = cos(phi) * right + sin(phi) * up; + vec3 sampleVector = cos(theta) * N + sin(theta) * tempVec; + color += texture(samplerEnv, sampleVector).rgb * cos(theta) * sin(theta); + sampleCount++; + } + } + outColor = vec4(PI * color / float(sampleCount), 1.0); +} diff --git a/tests/glsl/sascha-willems/pbrtexture/pbrtexture.frag b/tests/glsl/sascha-willems/pbrtexture/pbrtexture.frag new file mode 100644 index 000000000..eb0a14bcc --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/pbrtexture.frag @@ -0,0 +1,179 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (location = 0) in vec3 inWorldPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO { + mat4 projection; + mat4 model; + mat4 view; + vec3 camPos; +} ubo; + +layout (binding = 1) uniform UBOParams { + vec4 lights[4]; + float exposure; + float gamma; +} uboParams; + +layout (binding = 2) uniform samplerCube samplerIrradiance; +layout (binding = 3) uniform sampler2D samplerBRDFLUT; +layout (binding = 4) uniform samplerCube prefilteredMap; + +layout (binding = 5) uniform sampler2D albedoMap; +layout (binding = 6) uniform sampler2D normalMap; +layout (binding = 7) uniform sampler2D aoMap; +layout (binding = 8) uniform sampler2D metallicMap; +layout (binding = 9) uniform sampler2D roughnessMap; + + +layout (location = 0) out vec4 outColor; + +#define PI 3.1415926535897932384626433832795 +#define ALBEDO pow(texture(albedoMap, inUV).rgb, vec3(2.2)) + +// From http://filmicgames.com/archives/75 +vec3 Uncharted2Tonemap(vec3 x) +{ + float A = 0.15; + float B = 0.50; + float C = 0.10; + float D = 0.20; + float E = 0.02; + float F = 0.30; + return ((x*(A*x+C*B)+D*E)/(x*(A*x+B)+D*F))-E/F; +} + +// Normal Distribution function -------------------------------------- +float D_GGX(float dotNH, float roughness) +{ + float alpha = roughness * roughness; + float alpha2 = alpha * alpha; + float denom = dotNH * dotNH * (alpha2 - 1.0) + 1.0; + return (alpha2)/(PI * denom*denom); +} + +// Geometric Shadowing function -------------------------------------- +float G_SchlicksmithGGX(float dotNL, float dotNV, float roughness) +{ + float r = (roughness + 1.0); + float k = (r*r) / 8.0; + float GL = dotNL / (dotNL * (1.0 - k) + k); + float GV = dotNV / (dotNV * (1.0 - k) + k); + return GL * GV; +} + +// Fresnel function ---------------------------------------------------- +vec3 F_Schlick(float cosTheta, vec3 F0) +{ + return F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0); +} +vec3 F_SchlickR(float cosTheta, vec3 F0, float roughness) +{ + return F0 + (max(vec3(1.0 - roughness), F0) - F0) * pow(1.0 - cosTheta, 5.0); +} + +vec3 prefilteredReflection(vec3 R, float roughness) +{ + const float MAX_REFLECTION_LOD = 9.0; // todo: param/const + float lod = roughness * MAX_REFLECTION_LOD; + float lodf = floor(lod); + float lodc = ceil(lod); + vec3 a = textureLod(prefilteredMap, R, lodf).rgb; + vec3 b = textureLod(prefilteredMap, R, lodc).rgb; + return mix(a, b, lod - lodf); +} + +vec3 specularContribution(vec3 L, vec3 V, vec3 N, vec3 F0, float metallic, float roughness) +{ + // Precalculate vectors and dot products + vec3 H = normalize (V + L); + float dotNH = clamp(dot(N, H), 0.0, 1.0); + float dotNV = clamp(dot(N, V), 0.0, 1.0); + float dotNL = clamp(dot(N, L), 0.0, 1.0); + + // Light color fixed + vec3 lightColor = vec3(1.0); + + vec3 color = vec3(0.0); + + if (dotNL > 0.0) { + // D = Normal distribution (Distribution of the microfacets) + float D = D_GGX(dotNH, roughness); + // G = Geometric shadowing term (Microfacets shadowing) + float G = G_SchlicksmithGGX(dotNL, dotNV, roughness); + // F = Fresnel factor (Reflectance depending on angle of incidence) + vec3 F = F_Schlick(dotNV, F0); + vec3 spec = D * F * G / (4.0 * dotNL * dotNV + 0.001); + vec3 kD = (vec3(1.0) - F) * (1.0 - metallic); + color += (kD * ALBEDO / PI + spec) * dotNL; + } + + return color; +} + +// See http://www.thetenthplanet.de/archives/1180 +vec3 perturbNormal() +{ + vec3 tangentNormal = texture(normalMap, inUV).xyz * 2.0 - 1.0; + + vec3 q1 = dFdx(inWorldPos); + vec3 q2 = dFdy(inWorldPos); + vec2 st1 = dFdx(inUV); + vec2 st2 = dFdy(inUV); + + vec3 N = normalize(inNormal); + vec3 T = normalize(q1 * st2.t - q2 * st1.t); + vec3 B = -normalize(cross(N, T)); + mat3 TBN = mat3(T, B, N); + + return normalize(TBN * tangentNormal); +} + +void main() +{ + vec3 N = perturbNormal(); + vec3 V = normalize(ubo.camPos - inWorldPos); + vec3 R = reflect(-V, N); + + float metallic = texture(metallicMap, inUV).r; + float roughness = texture(roughnessMap, inUV).r; + + vec3 F0 = vec3(0.04); + F0 = mix(F0, ALBEDO, metallic); + + vec3 Lo = vec3(0.0); + for(int i = 0; i < uboParams.lights[i].length(); i++) { + vec3 L = normalize(uboParams.lights[i].xyz - inWorldPos); + Lo += specularContribution(L, V, N, F0, metallic, roughness); + } + + vec2 brdf = texture(samplerBRDFLUT, vec2(max(dot(N, V), 0.0), roughness)).rg; + vec3 reflection = prefilteredReflection(R, roughness).rgb; + vec3 irradiance = texture(samplerIrradiance, N).rgb; + + // Diffuse based on irradiance + vec3 diffuse = irradiance * ALBEDO; + + vec3 F = F_SchlickR(max(dot(N, V), 0.0), F0, roughness); + + // Specular reflectance + vec3 specular = reflection * (F * brdf.x + brdf.y); + + // Ambient part + vec3 kD = 1.0 - F; + kD *= 1.0 - metallic; + vec3 ambient = (kD * diffuse + specular) * texture(aoMap, inUV).rrr; + + vec3 color = ambient + Lo; + + // Tone mapping + color = Uncharted2Tonemap(color * uboParams.exposure); + color = color * (1.0f / Uncharted2Tonemap(vec3(11.2f))); + // Gamma correction + color = pow(color, vec3(1.0f / uboParams.gamma)); + + outColor = vec4(color, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbrtexture/pbrtexture.vert b/tests/glsl/sascha-willems/pbrtexture/pbrtexture.vert new file mode 100644 index 000000000..9962220a8 --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/pbrtexture.vert @@ -0,0 +1,36 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec3 camPos; +} ubo; + +layout (location = 0) out vec3 outWorldPos; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + vec3 locPos = vec3(ubo.model * vec4(inPos, 1.0)); + outWorldPos = locPos; + outNormal = mat3(ubo.model) * inNormal; + outUV = inUV; + outUV.t = 1.0 - inUV.t; + gl_Position = ubo.projection * ubo.view * vec4(outWorldPos, 1.0); +} diff --git a/tests/glsl/sascha-willems/pbrtexture/prefilterenvmap.frag b/tests/glsl/sascha-willems/pbrtexture/prefilterenvmap.frag new file mode 100644 index 000000000..912780c82 --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/prefilterenvmap.frag @@ -0,0 +1,106 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (location = 0) in vec3 inPos; +layout (location = 0) out vec4 outColor; + +layout (binding = 0) uniform samplerCube samplerEnv; + +layout(push_constant) uniform PushConsts { + layout (offset = 64) float roughness; + layout (offset = 68) uint numSamples; +} consts; + +const float PI = 3.1415926536; + +// Based omn http://byteblacksmith.com/improvements-to-the-canonical-one-liner-glsl-rand-for-opengl-es-2-0/ +float random(vec2 co) +{ + float a = 12.9898; + float b = 78.233; + float c = 43758.5453; + float dt= dot(co.xy ,vec2(a,b)); + float sn= mod(dt,3.14); + return fract(sin(sn) * c); +} + +vec2 hammersley2d(uint i, uint N) +{ + // Radical inverse based on http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html + uint bits = (i << 16u) | (i >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + float rdi = float(bits) * 2.3283064365386963e-10; + return vec2(float(i) /float(N), rdi); +} + +// Based on http://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_slides.pdf +vec3 importanceSample_GGX(vec2 Xi, float roughness, vec3 normal) +{ + // Maps a 2D point to a hemisphere with spread based on roughness + float alpha = roughness * roughness; + float phi = 2.0 * PI * Xi.x + random(normal.xz) * 0.1; + float cosTheta = sqrt((1.0 - Xi.y) / (1.0 + (alpha*alpha - 1.0) * Xi.y)); + float sinTheta = sqrt(1.0 - cosTheta * cosTheta); + vec3 H = vec3(sinTheta * cos(phi), sinTheta * sin(phi), cosTheta); + + // Tangent space + vec3 up = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + vec3 tangentX = normalize(cross(up, normal)); + vec3 tangentY = normalize(cross(normal, tangentX)); + + // Convert to world Space + return normalize(tangentX * H.x + tangentY * H.y + normal * H.z); +} + +// Normal Distribution function +float D_GGX(float dotNH, float roughness) +{ + float alpha = roughness * roughness; + float alpha2 = alpha * alpha; + float denom = dotNH * dotNH * (alpha2 - 1.0) + 1.0; + return (alpha2)/(PI * denom*denom); +} + +vec3 prefilterEnvMap(vec3 R, float roughness) +{ + vec3 N = R; + vec3 V = R; + vec3 color = vec3(0.0); + float totalWeight = 0.0; + float envMapDim = float(textureSize(samplerEnv, 0).s); + for(uint i = 0u; i < consts.numSamples; i++) { + vec2 Xi = hammersley2d(i, consts.numSamples); + vec3 H = importanceSample_GGX(Xi, roughness, N); + vec3 L = 2.0 * dot(V, H) * H - V; + float dotNL = clamp(dot(N, L), 0.0, 1.0); + if(dotNL > 0.0) { + // Filtering based on https://placeholderart.wordpress.com/2015/07/28/implementation-notes-runtime-environment-map-filtering-for-image-based-lighting/ + + float dotNH = clamp(dot(N, H), 0.0, 1.0); + float dotVH = clamp(dot(V, H), 0.0, 1.0); + + // Probability Distribution Function + float pdf = D_GGX(dotNH, roughness) * dotNH / (4.0 * dotVH) + 0.0001; + // Slid angle of current smple + float omegaS = 1.0 / (float(consts.numSamples) * pdf); + // Solid angle of 1 pixel across all cube faces + float omegaP = 4.0 * PI / (6.0 * envMapDim * envMapDim); + // Biased (+1.0) mip level for better result + float mipLevel = roughness == 0.0 ? 0.0 : max(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f); + color += textureLod(samplerEnv, L, mipLevel).rgb * dotNL; + totalWeight += dotNL; + + } + } + return (color / totalWeight); +} + + +void main() +{ + vec3 N = normalize(inPos); + outColor = vec4(prefilterEnvMap(N, consts.roughness), 1.0); +} diff --git a/tests/glsl/sascha-willems/pbrtexture/skybox.frag b/tests/glsl/sascha-willems/pbrtexture/skybox.frag new file mode 100644 index 000000000..d80d44487 --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/skybox.frag @@ -0,0 +1,40 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (binding = 2) uniform samplerCube samplerEnv; + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outColor; + +layout (binding = 1) uniform UBOParams { + vec4 lights[4]; + float exposure; + float gamma; +} uboParams; + +// From http://filmicworlds.com/blog/filmic-tonemapping-operators/ +vec3 Uncharted2Tonemap(vec3 color) +{ + float A = 0.15; + float B = 0.50; + float C = 0.10; + float D = 0.20; + float E = 0.02; + float F = 0.30; + float W = 11.2; + return ((color*(A*color+C*B)+D*E)/(color*(A*color+B)+D*F))-E/F; +} + +void main() +{ + vec3 color = texture(samplerEnv, inUVW).rgb; + + // Tone mapping + color = Uncharted2Tonemap(color * uboParams.exposure); + color = color * (1.0f / Uncharted2Tonemap(vec3(11.2f))); + // Gamma correction + color = pow(color, vec3(1.0f / uboParams.gamma)); + + outColor = vec4(color, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pbrtexture/skybox.vert b/tests/glsl/sascha-willems/pbrtexture/skybox.vert new file mode 100644 index 000000000..e77f89773 --- /dev/null +++ b/tests/glsl/sascha-willems/pbrtexture/skybox.vert @@ -0,0 +1,28 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUVW; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUVW = inPos; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/pipelines/phong.frag b/tests/glsl/sascha-willems/pipelines/phong.frag new file mode 100644 index 000000000..f8dddd1cf --- /dev/null +++ b/tests/glsl/sascha-willems/pipelines/phong.frag @@ -0,0 +1,31 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + // Desaturate color + vec3 color = vec3(mix(inColor, vec3(dot(vec3(0.2126,0.7152,0.0722), inColor)), 0.65)); + + // High ambient colors because mesh materials are pretty dark + vec3 ambient = color * vec3(1.0); + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * color; + vec3 specular = pow(max(dot(R, V), 0.0), 32.0) * vec3(0.35); + outFragColor = vec4(ambient + diffuse * 1.75 + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pipelines/phong.vert b/tests/glsl/sascha-willems/pipelines/phong.vert new file mode 100644 index 000000000..44fd2e692 --- /dev/null +++ b/tests/glsl/sascha-willems/pipelines/phong.vert @@ -0,0 +1,42 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(ubo.model) * inNormal; + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pipelines/toon.frag b/tests/glsl/sascha-willems/pipelines/toon.frag new file mode 100644 index 000000000..bb19e9a2f --- /dev/null +++ b/tests/glsl/sascha-willems/pipelines/toon.frag @@ -0,0 +1,40 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + // Desaturate color + vec3 color = vec3(mix(inColor, vec3(dot(vec3(0.2126,0.7152,0.0722), inColor)), 0.65)); + + // High ambient colors because mesh materials are pretty dark + vec3 ambient = color * vec3(1.0); + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * color; + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75); + outFragColor = vec4(ambient + diffuse * 1.75 + specular, 1.0); + + float intensity = dot(N,L); + float shade = 1.0; + shade = intensity < 0.5 ? 0.75 : shade; + shade = intensity < 0.35 ? 0.6 : shade; + shade = intensity < 0.25 ? 0.5 : shade; + shade = intensity < 0.1 ? 0.25 : shade; + + outFragColor.rgb = inColor * 3.0 * shade; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pipelines/toon.vert b/tests/glsl/sascha-willems/pipelines/toon.vert new file mode 100644 index 000000000..44fd2e692 --- /dev/null +++ b/tests/glsl/sascha-willems/pipelines/toon.vert @@ -0,0 +1,42 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(ubo.model) * inNormal; + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pipelines/wireframe.frag b/tests/glsl/sascha-willems/pipelines/wireframe.frag new file mode 100644 index 000000000..7158d7ac9 --- /dev/null +++ b/tests/glsl/sascha-willems/pipelines/wireframe.frag @@ -0,0 +1,14 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inColor; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor.rgb = inColor * 1.5; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pipelines/wireframe.vert b/tests/glsl/sascha-willems/pipelines/wireframe.vert new file mode 100644 index 000000000..6ec18a4bf --- /dev/null +++ b/tests/glsl/sascha-willems/pipelines/wireframe.vert @@ -0,0 +1,29 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + { + outColor = inColor; + } + gl_Position = ubo.projection * ubo.model * inPos; +} diff --git a/tests/glsl/sascha-willems/pushconstants/lights.frag b/tests/glsl/sascha-willems/pushconstants/lights.frag new file mode 100644 index 000000000..9624b51be --- /dev/null +++ b/tests/glsl/sascha-willems/pushconstants/lights.frag @@ -0,0 +1,41 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +#define lightCount 6 + +layout (location = 0) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (location = 3) in vec4 inLightVec[lightCount]; + +layout (location = 0) out vec4 outFragColor; + +#define MAX_LIGHT_DIST 9.0 * 9.0 + +void main() +{ + vec3 lightColor[lightCount]; + lightColor[0] = vec3(1.0, 0.0, 0.0); + lightColor[1] = vec3(0.0, 1.0, 0.0); + lightColor[2] = vec3(0.0, 0.0, 1.0); + lightColor[3] = vec3(1.0, 0.0, 1.0); + lightColor[4] = vec3(0.0, 1.0, 1.0); + lightColor[5] = vec3(1.0, 1.0, 0.0); + + vec3 diffuse = vec3(0.0); + // Just some very basic attenuation + for (int i = 0; i < lightCount; ++i) + { + float lRadius = MAX_LIGHT_DIST * inLightVec[i].w; + + float dist = min(dot(inLightVec[i], inLightVec[i]), lRadius) / lRadius; + float distFactor = 1.0 - dist; + + diffuse += lightColor[i] * distFactor; + } + + outFragColor.rgb = diffuse; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/pushconstants/lights.vert b/tests/glsl/sascha-willems/pushconstants/lights.vert new file mode 100644 index 000000000..693e69d5d --- /dev/null +++ b/tests/glsl/sascha-willems/pushconstants/lights.vert @@ -0,0 +1,48 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 3) in vec3 inColor; + +#define lightCount 6 + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightColor[lightCount]; +} ubo; + +layout(push_constant) uniform PushConsts { + vec4 lightPos[lightCount]; +} pushConsts; + +layout (location = 0) out vec3 outNormal; +layout (location = 2) out vec3 outColor; + +layout (location = 3) out vec4 outLightVec[lightCount]; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + for (int i = 0; i < lightCount; ++i) + { + vec4 worldPos = ubo.model * vec4(inPos.xyz, 1.0); + outLightVec[i].xyz = pushConsts.lightPos[i].xyz - inPos.xyz; + // Store light radius in w + outLightVec[i].w = pushConsts.lightPos[i].w; + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/radialblur/colorpass.frag b/tests/glsl/sascha-willems/radialblur/colorpass.frag new file mode 100644 index 000000000..5d56315fd --- /dev/null +++ b/tests/glsl/sascha-willems/radialblur/colorpass.frag @@ -0,0 +1,25 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerGradientRamp; + +layout (location = 0) in vec3 inColor; +layout (location = 1) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + // Use max. color channel value to detect bright glow emitters + if ((inColor.r >= 0.9) || (inColor.g >= 0.9) || (inColor.b >= 0.9)) + { + outFragColor.rgb = texture(samplerGradientRamp, inUV).rgb; + } + else + { + outFragColor.rgb = inColor; + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/radialblur/colorpass.vert b/tests/glsl/sascha-willems/radialblur/colorpass.vert new file mode 100644 index 000000000..68d4f2e56 --- /dev/null +++ b/tests/glsl/sascha-willems/radialblur/colorpass.vert @@ -0,0 +1,30 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 2) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + float gradientPos; +} ubo; + +layout (location = 0) out vec3 outColor; +layout (location = 1) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + outUV = vec2(ubo.gradientPos, 0.0f); + gl_Position = ubo.projection * ubo.model * vec4(inPos, 1.0); +} diff --git a/tests/glsl/sascha-willems/radialblur/phongpass.frag b/tests/glsl/sascha-willems/radialblur/phongpass.frag new file mode 100644 index 000000000..83e96fc58 --- /dev/null +++ b/tests/glsl/sascha-willems/radialblur/phongpass.frag @@ -0,0 +1,37 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerGradientRamp; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inEyePos; +layout (location = 3) in vec3 inLightVec; +layout (location = 4) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + // No light calculations for glow color + // Use max. color channel value + // to detect bright glow emitters + if ((inColor.r >= 0.9) || (inColor.g >= 0.9) || (inColor.b >= 0.9)) + { + outFragColor.rgb = texture(samplerGradientRamp, inUV).rgb; + } + else + { + vec3 Eye = normalize(-inEyePos); + vec3 Reflected = normalize(reflect(-inLightVec, inNormal)); + + vec4 IAmbient = vec4(0.2, 0.2, 0.2, 1.0); + vec4 IDiffuse = vec4(0.5, 0.5, 0.5, 0.5) * max(dot(inNormal, inLightVec), 0.0); + float specular = 0.25; + vec4 ISpecular = vec4(0.5, 0.5, 0.5, 1.0) * pow(max(dot(Reflected, Eye), 0.0), 4.0) * specular; + outFragColor = vec4((IAmbient + IDiffuse) * vec4(inColor, 1.0) + ISpecular); + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/radialblur/phongpass.vert b/tests/glsl/sascha-willems/radialblur/phongpass.vert new file mode 100644 index 000000000..a375c39cf --- /dev/null +++ b/tests/glsl/sascha-willems/radialblur/phongpass.vert @@ -0,0 +1,38 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 pos; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + float gradientPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outEyePos; +layout (location = 3) out vec3 outLightVec; +layout (location = 4) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = vec2(ubo.gradientPos, 0.0); + gl_Position = ubo.projection * ubo.model * pos; + outEyePos = vec3(ubo.model * pos); + vec4 lightPos = vec4(0.0, 0.0, -5.0, 1.0);// * ubo.model; + outLightVec = normalize(lightPos.xyz - pos.xyz); +} diff --git a/tests/glsl/sascha-willems/radialblur/radialblur.frag b/tests/glsl/sascha-willems/radialblur/radialblur.frag new file mode 100644 index 000000000..bc1e884e9 --- /dev/null +++ b/tests/glsl/sascha-willems/radialblur/radialblur.frag @@ -0,0 +1,39 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (binding = 0) uniform UBO +{ + float radialBlurScale; + float radialBlurStrength; + vec2 radialOrigin; +} ubo; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + ivec2 texDim = textureSize(samplerColor, 0); + vec2 radialSize = vec2(1.0 / texDim.s, 1.0 / texDim.t); + + vec2 UV = inUV; + + vec4 color = vec4(0.0, 0.0, 0.0, 0.0); + UV += radialSize * 0.5 - ubo.radialOrigin; + + #define samples 32 + + for (int i = 0; i < samples; i++) + { + float scale = 1.0 - ubo.radialBlurScale * (float(i) / float(samples-1)); + color += texture(samplerColor, UV * scale + ubo.radialOrigin); + } + + outFragColor = (color / samples) * ubo.radialBlurStrength; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/radialblur/radialblur.vert b/tests/glsl/sascha-willems/radialblur/radialblur.vert new file mode 100644 index 000000000..117eb02ec --- /dev/null +++ b/tests/glsl/sascha-willems/radialblur/radialblur.vert @@ -0,0 +1,18 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/raytracing/raytracing.comp b/tests/glsl/sascha-willems/raytracing/raytracing.comp new file mode 100644 index 000000000..8ef6c19e8 --- /dev/null +++ b/tests/glsl/sascha-willems/raytracing/raytracing.comp @@ -0,0 +1,258 @@ +//TEST:COMPARE_GLSL: +// Shader is looseley based on the ray tracing coding session by Inigo Quilez (www.iquilezles.org) + +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (local_size_x = 16, local_size_y = 16) in; +layout (binding = 0, rgba8) uniform writeonly image2D resultImage; + +#define EPSILON 0.0001 +#define MAXLEN 1000.0 +#define SHADOW 0.5 +#define RAYBOUNCES 2 +#define REFLECTIONS true +#define REFLECTIONSTRENGTH 0.4 +#define REFLECTIONFALLOFF 0.5 + +struct Camera +{ + vec3 pos; + vec3 lookat; + float fov; +}; + +layout (binding = 1) uniform UBO +{ + vec3 lightPos; + float aspectRatio; + vec4 fogColor; + Camera camera; + mat4 rotMat; +} ubo; + +struct Sphere +{ + vec3 pos; + float radius; + vec3 diffuse; + float specular; + int id; +}; + +struct Plane +{ + vec3 normal; + float distance; + vec3 diffuse; + float specular; + int id; +}; + +layout (std140, binding = 2) buffer Spheres +{ + Sphere spheres[ ]; +}; + +layout (std140, binding = 3) buffer Planes +{ + Plane planes[ ]; +}; + +void reflectRay(inout vec3 rayD, in vec3 mormal) +{ + rayD = rayD + 2.0 * -dot(mormal, rayD) * mormal; +} + +// Lighting ========================================================= + +float lightDiffuse(vec3 normal, vec3 lightDir) +{ + return clamp(dot(normal, lightDir), 0.1, 1.0); +} + +float lightSpecular(vec3 normal, vec3 lightDir, float specularFactor) +{ + vec3 viewVec = normalize(ubo.camera.pos); + vec3 halfVec = normalize(lightDir + viewVec); + return pow(clamp(dot(normal, halfVec), 0.0, 1.0), specularFactor); +} + +// Sphere =========================================================== + +float sphereIntersect(in vec3 rayO, in vec3 rayD, in Sphere sphere) +{ + vec3 oc = rayO - sphere.pos; + float b = 2.0 * dot(oc, rayD); + float c = dot(oc, oc) - sphere.radius*sphere.radius; + float h = b*b - 4.0*c; + if (h < 0.0) + { + return -1.0; + } + float t = (-b - sqrt(h)) / 2.0; + + return t; +} + +vec3 sphereNormal(in vec3 pos, in Sphere sphere) +{ + return (pos - sphere.pos) / sphere.radius; +} + +// Plane =========================================================== + +float planeIntersect(vec3 rayO, vec3 rayD, Plane plane) +{ + float d = dot(rayD, plane.normal); + + if (d == 0.0) + return 0.0; + + float t = -(plane.distance + dot(rayO, plane.normal)) / d; + + if (t < 0.0) + return 0.0; + + return t; +} + + +int intersect(in vec3 rayO, in vec3 rayD, inout float resT) +{ + int id = -1; + + for (int i = 0; i < spheres.length(); i++) + { + float tSphere = sphereIntersect(rayO, rayD, spheres[i]); + if ((tSphere > EPSILON) && (tSphere < resT)) + { + id = spheres[i].id; + resT = tSphere; + } + } + + for (int i = 0; i < planes.length(); i++) + { + float tplane = planeIntersect(rayO, rayD, planes[i]); + if ((tplane > EPSILON) && (tplane < resT)) + { + id = planes[i].id; + resT = tplane; + } + } + + return id; +} + +float calcShadow(in vec3 rayO, in vec3 rayD, in int objectId, inout float t) +{ + for (int i = 0; i < spheres.length(); i++) + { + if (spheres[i].id == objectId) + continue; + float tSphere = sphereIntersect(rayO, rayD, spheres[i]); + if ((tSphere > EPSILON) && (tSphere < t)) + { + t = tSphere; + return SHADOW; + } + } + return 1.0; +} + +vec3 fog(in float t, in vec3 color) +{ + return mix(color, ubo.fogColor.rgb, clamp(sqrt(t*t)/20.0, 0.0, 1.0)); +} + +vec3 renderScene(inout vec3 rayO, inout vec3 rayD, inout int id) +{ + vec3 color = vec3(0.0); + float t = MAXLEN; + + // Get intersected object ID + int objectID = intersect(rayO, rayD, t); + + if (objectID == -1) + { + return color; + } + + vec3 pos = rayO + t * rayD; + vec3 lightVec = normalize(ubo.lightPos - pos); + vec3 normal; + + // Planes + + // Spheres + + for (int i = 0; i < planes.length(); i++) + { + if (objectID == planes[i].id) + { + normal = planes[i].normal; + float diffuse = lightDiffuse(normal, lightVec); + float specular = lightSpecular(normal, lightVec, planes[i].specular); + color = diffuse * planes[i].diffuse + specular; + } + } + + for (int i = 0; i < spheres.length(); i++) + { + if (objectID == spheres[i].id) + { + normal = sphereNormal(pos, spheres[i]); + float diffuse = lightDiffuse(normal, lightVec); + float specular = lightSpecular(normal, lightVec, spheres[i].specular); + color = diffuse * spheres[i].diffuse + specular; + } + } + + if (id == -1) + return color; + + id = objectID; + + // Shadows + t = length(ubo.lightPos - pos); + color *= calcShadow(pos, lightVec, id, t); + + // Fog + color = fog(t, color); + + // Reflect ray for next render pass + reflectRay(rayD, normal); + rayO = pos; + + return color; +} + +void main() +{ + ivec2 dim = imageSize(resultImage); + vec2 uv = vec2(gl_GlobalInvocationID.xy) / dim; + + vec3 rayO = ubo.camera.pos; + vec3 rayD = normalize(vec3((-1.0 + 2.0 * uv) * vec2(ubo.aspectRatio, 1.0), -1.0)); + + // Basic color path + int id = 0; + vec3 finalColor = renderScene(rayO, rayD, id); + + // Reflection + if (REFLECTIONS) + { + float reflectionStrength = REFLECTIONSTRENGTH; + for (int i = 0; i < RAYBOUNCES; i++) + { + vec3 reflectionColor = renderScene(rayO, rayD, id); + finalColor = (1.0 - reflectionStrength) * finalColor + reflectionStrength * mix(reflectionColor, finalColor, 1.0 - reflectionStrength); + reflectionStrength *= REFLECTIONFALLOFF; + } + } + + imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), vec4(finalColor, 0.0)); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/raytracing/texture.frag b/tests/glsl/sascha-willems/raytracing/texture.frag new file mode 100644 index 000000000..06635bf4b --- /dev/null +++ b/tests/glsl/sascha-willems/raytracing/texture.frag @@ -0,0 +1,16 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerColor, vec2(inUV.s, 1.0 - inUV.t)); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/raytracing/texture.vert b/tests/glsl/sascha-willems/raytracing/texture.vert new file mode 100644 index 000000000..cbf5c0a0d --- /dev/null +++ b/tests/glsl/sascha-willems/raytracing/texture.vert @@ -0,0 +1,18 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f + -1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/scenerendering/scene.frag b/tests/glsl/sascha-willems/scenerendering/scene.frag new file mode 100644 index 000000000..032ce45a9 --- /dev/null +++ b/tests/glsl/sascha-willems/scenerendering/scene.frag @@ -0,0 +1,35 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (set = 1, binding = 0) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout(push_constant) uniform Material +{ + vec4 ambient; + vec4 diffuse; + vec4 specular; + float opacity; +} material; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColorMap, inUV) * vec4(inColor, 1.0); + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * material.diffuse.rgb; + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * material.specular.rgb; + outFragColor = vec4((material.ambient.rgb + diffuse) * color.rgb + specular, 1.0-material.opacity); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/scenerendering/scene.vert b/tests/glsl/sascha-willems/scenerendering/scene.vert new file mode 100644 index 000000000..50852f78c --- /dev/null +++ b/tests/glsl/sascha-willems/scenerendering/scene.vert @@ -0,0 +1,46 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (set = 0, binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = inUV; + + mat4 modelView = ubo.view * ubo.model; + + gl_Position = ubo.projection * modelView * vec4(inPos.xyz, 1.0); + + vec4 pos = modelView * vec4(inPos, 0.0); + outNormal = mat3(ubo.model) * inNormal; + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - (ubo.model * vec4(inPos, 1.0)).xyz; + outViewVec = -(ubo.model * vec4(inPos, 1.0)).xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/screenshot/mesh.frag b/tests/glsl/sascha-willems/screenshot/mesh.frag new file mode 100644 index 000000000..3504b70cf --- /dev/null +++ b/tests/glsl/sascha-willems/screenshot/mesh.frag @@ -0,0 +1,24 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 ambient = vec3(0.1); + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0); + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75); + outFragColor = vec4((ambient + diffuse) * inColor.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/screenshot/mesh.vert b/tests/glsl/sascha-willems/screenshot/mesh.vert new file mode 100644 index 000000000..a0c75ab18 --- /dev/null +++ b/tests/glsl/sascha-willems/screenshot/mesh.vert @@ -0,0 +1,40 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (set = 0, binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + gl_Position = ubo.projection * ubo.view * ubo.model * inPos; + + vec4 pos = ubo.view * ubo.model * vec4(inPos.xyz, 1.0); + outNormal = mat3(ubo.model) * inNormal; + + vec3 lightPos = vec3(1.0f, -1.0f, 1.0f); + outLightVec = lightPos.xyz - pos.xyz; + outViewVec = -pos.xyz; +} diff --git a/tests/glsl/sascha-willems/shadowmapomni/cubemapdisplay.frag b/tests/glsl/sascha-willems/shadowmapomni/cubemapdisplay.frag new file mode 100644 index 000000000..740cd09e1 --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapomni/cubemapdisplay.frag @@ -0,0 +1,17 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform samplerCube shadowCubeMap; + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + float dist = length(texture(shadowCubeMap, inUVW).rgb) * 0.005; + outFragColor = vec4(vec3(dist), 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapomni/cubemapdisplay.vert b/tests/glsl/sascha-willems/shadowmapomni/cubemapdisplay.vert new file mode 100644 index 000000000..b5f889174 --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapomni/cubemapdisplay.vert @@ -0,0 +1,28 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUVW; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUVW = inPos; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} + diff --git a/tests/glsl/sascha-willems/shadowmapomni/offscreen.frag b/tests/glsl/sascha-willems/shadowmapomni/offscreen.frag new file mode 100644 index 000000000..6426c4118 --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapomni/offscreen.frag @@ -0,0 +1,17 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out float outFragColor; + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inLightPos; + +void main() +{ + // Store distance to light as 32 bit float value + vec3 lightVec = inPos.xyz - inLightPos; + outFragColor = length(lightVec); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapomni/offscreen.vert b/tests/glsl/sascha-willems/shadowmapomni/offscreen.vert new file mode 100644 index 000000000..7513070dc --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapomni/offscreen.vert @@ -0,0 +1,36 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (location = 0) out vec4 outPos; +layout (location = 1) out vec3 outLightPos; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; + vec4 lightPos; +} ubo; + +layout(push_constant) uniform PushConsts +{ + mat4 view; +} pushConsts; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + gl_Position = ubo.projection * pushConsts.view * ubo.model * vec4(inPos, 1.0); + + outPos = vec4(inPos, 1.0); + outLightPos = ubo.lightPos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapomni/scene.frag b/tests/glsl/sascha-willems/shadowmapomni/scene.frag new file mode 100644 index 000000000..1bdc44bdb --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapomni/scene.frag @@ -0,0 +1,44 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform samplerCube shadowCubeMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inEyePos; +layout (location = 3) in vec3 inLightVec; +layout (location = 4) in vec3 inWorldPos; +layout (location = 5) in vec3 inLightPos; + +layout (location = 0) out vec4 outFragColor; + +#define EPSILON 0.15 +#define SHADOW_OPACITY 0.5 + +void main() +{ + // Lighting + vec3 N = normalize(inNormal); + vec3 L = normalize(vec3(1.0)); + + vec3 Eye = normalize(-inEyePos); + vec3 Reflected = normalize(reflect(-inLightVec, inNormal)); + + vec4 IAmbient = vec4(vec3(0.05), 1.0); + vec4 IDiffuse = vec4(1.0) * max(dot(inNormal, inLightVec), 0.0); + + outFragColor = vec4(IAmbient + IDiffuse * vec4(inColor, 1.0)); + + // Shadow + vec3 lightVec = inWorldPos - inLightPos; + float sampledDist = texture(shadowCubeMap, lightVec).r; + float dist = length(lightVec); + + // Check if fragment is in shadow + float shadow = (dist <= sampledDist + EPSILON) ? 1.0 : SHADOW_OPACITY; + + outFragColor.rgb *= shadow; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapomni/scene.vert b/tests/glsl/sascha-willems/shadowmapomni/scene.vert new file mode 100644 index 000000000..aed2c97ae --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapomni/scene.vert @@ -0,0 +1,43 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outEyePos; +layout (location = 3) out vec3 outLightVec; +layout (location = 4) out vec3 outWorldPos; +layout (location = 5) out vec3 outLightPos; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + outNormal = inNormal; + + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPos.xyz, 1.0); + outEyePos = vec3(ubo.model * vec4(inPos, 1.0f)); + outLightVec = normalize(ubo.lightPos.xyz - inPos.xyz); + outWorldPos = inPos; + + outLightPos = ubo.lightPos.xyz; +} + diff --git a/tests/glsl/sascha-willems/shadowmapping/offscreen.frag b/tests/glsl/sascha-willems/shadowmapping/offscreen.frag new file mode 100644 index 000000000..81c8f55a3 --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapping/offscreen.frag @@ -0,0 +1,14 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout(location = 0) out vec4 color; +//layout(location = 0) out float fragmentdepth; + +void main() +{ +// fragmentdepth = gl_FragCoord.z; + color = vec4(1.0, 0.0, 0.0, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapping/offscreen.vert b/tests/glsl/sascha-willems/shadowmapping/offscreen.vert new file mode 100644 index 000000000..e5d18fc88 --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapping/offscreen.vert @@ -0,0 +1,23 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (binding = 0) uniform UBO +{ + mat4 depthMVP; +} ubo; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + + +void main() +{ + gl_Position = ubo.depthMVP * vec4(inPos, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapping/quad.frag b/tests/glsl/sascha-willems/shadowmapping/quad.frag new file mode 100644 index 000000000..3074bd5ee --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapping/quad.frag @@ -0,0 +1,25 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +float LinearizeDepth(float depth) +{ + float n = 1.0; // camera z near + float f = 128.0; // camera z far + float z = depth; + return (2.0 * n) / (f + n - z * (f - n)); +} + +void main() +{ + float depth = texture(samplerColor, inUV).r; + outFragColor = vec4(vec3(1.0-LinearizeDepth(depth)), 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapping/quad.vert b/tests/glsl/sascha-willems/shadowmapping/quad.vert new file mode 100644 index 000000000..7c1286a4c --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapping/quad.vert @@ -0,0 +1,28 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + + +void main() +{ + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/shadowmapping/scene.frag b/tests/glsl/sascha-willems/shadowmapping/scene.frag new file mode 100644 index 000000000..58e2986ce --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapping/scene.frag @@ -0,0 +1,72 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D shadowMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; +layout (location = 4) in vec4 inShadowCoord; + +layout (constant_id = 0) const int enablePCF = 0; + +layout (location = 0) out vec4 outFragColor; + +#define ambient 0.1 + +float textureProj(vec4 P, vec2 off) +{ + float shadow = 1.0; + vec4 shadowCoord = P / P.w; + if ( shadowCoord.z > -1.0 && shadowCoord.z < 1.0 ) + { + float dist = texture( shadowMap, shadowCoord.st + off ).r; + if ( shadowCoord.w > 0.0 && dist < shadowCoord.z ) + { + shadow = ambient; + } + } + return shadow; +} + +float filterPCF(vec4 sc) +{ + ivec2 texDim = textureSize(shadowMap, 0); + float scale = 1.5; + float dx = scale * 1.0 / float(texDim.x); + float dy = scale * 1.0 / float(texDim.y); + + float shadowFactor = 0.0; + int count = 0; + int range = 1; + + for (int x = -range; x <= range; x++) + { + for (int y = -range; y <= range; y++) + { + shadowFactor += textureProj(sc, vec2(dx*x, dy*y)); + count++; + } + + } + return shadowFactor / count; +} + +void main() +{ + float shadow = (enablePCF == 1) ? filterPCF(inShadowCoord / inShadowCoord.w) : textureProj(inShadowCoord / inShadowCoord.w, vec2(0.0)); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = normalize(-reflect(L, N)); + vec3 diffuse = max(dot(N, L), ambient) * inColor; +// vec3 specular = pow(max(dot(R, V), 0.0), 50.0) * vec3(0.75); + + outFragColor = vec4(diffuse * shadow, 1.0); + +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/shadowmapping/scene.vert b/tests/glsl/sascha-willems/shadowmapping/scene.vert new file mode 100644 index 000000000..6fda339c3 --- /dev/null +++ b/tests/glsl/sascha-willems/shadowmapping/scene.vert @@ -0,0 +1,52 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; + mat4 lightSpace; + vec3 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; +layout (location = 4) out vec4 outShadowCoord; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +const mat4 biasMat = mat4( + 0.5, 0.0, 0.0, 0.0, + 0.0, 0.5, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.5, 0.5, 0.0, 1.0 ); + +void main() +{ + outColor = inColor; + outNormal = inNormal; + + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(ubo.model) * inNormal; + outLightVec = normalize(ubo.lightPos - inPos); + outViewVec = -pos.xyz; + + outShadowCoord = ( biasMat * ubo.lightSpace * ubo.model ) * vec4(inPos, 1.0); +} + diff --git a/tests/glsl/sascha-willems/skeletalanimation/mesh.frag b/tests/glsl/sascha-willems/skeletalanimation/mesh.frag new file mode 100644 index 000000000..c25560d09 --- /dev/null +++ b/tests/glsl/sascha-willems/skeletalanimation/mesh.frag @@ -0,0 +1,28 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColorMap, inUV) * vec4(inColor, 1.0); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0);// * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 32.0) * vec3(0.5); + outFragColor = vec4(diffuse * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/skeletalanimation/mesh.vert b/tests/glsl/sascha-willems/skeletalanimation/mesh.vert new file mode 100644 index 000000000..ca11e0b42 --- /dev/null +++ b/tests/glsl/sascha-willems/skeletalanimation/mesh.vert @@ -0,0 +1,53 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; +layout (location = 4) in vec4 inBoneWeights; +layout (location = 5) in ivec4 inBoneIDs; + +#define MAX_BONES 64 + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; + mat4 bones[MAX_BONES]; + vec4 lightPos; + vec4 viewPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + mat4 boneTransform = ubo.bones[inBoneIDs[0]] * inBoneWeights[0]; + boneTransform += ubo.bones[inBoneIDs[1]] * inBoneWeights[1]; + boneTransform += ubo.bones[inBoneIDs[2]] * inBoneWeights[2]; + boneTransform += ubo.bones[inBoneIDs[3]] * inBoneWeights[3]; + + outColor = inColor; + outUV = inUV; + + gl_Position = ubo.projection * ubo.view * ubo.model * boneTransform * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(inverse(transpose(ubo.model * boneTransform))) * inNormal; + outLightVec = ubo.lightPos.xyz - pos.xyz; + outViewVec = ubo.viewPos.xyz - pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/skeletalanimation/texture.frag b/tests/glsl/sascha-willems/skeletalanimation/texture.frag new file mode 100644 index 000000000..1adb226bb --- /dev/null +++ b/tests/glsl/sascha-willems/skeletalanimation/texture.frag @@ -0,0 +1,32 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColorMap, inUV); + + float distSqr = dot(inLightVec, inLightVec); + vec3 lVec = inLightVec * inversesqrt(distSqr); + + const float attInvRadius = 1.0/5000.0; + float atten = max(clamp(1.0 - attInvRadius * sqrt(distSqr), 0.0, 1.0), 0.0); + + // Fake drop shadow + const float shadowInvRadius = 1.0/2500.0; + float dropshadow = max(clamp(1.0 - shadowInvRadius * sqrt(distSqr), 0.0, 1.0), 0.0); + + outFragColor = vec4(color.rgba * (1.0 - dropshadow)); + outFragColor.rgb *= atten; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/skeletalanimation/texture.vert b/tests/glsl/sascha-willems/skeletalanimation/texture.vert new file mode 100644 index 000000000..67b3f2334 --- /dev/null +++ b/tests/glsl/sascha-willems/skeletalanimation/texture.vert @@ -0,0 +1,40 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; + vec4 lightPos; + vec4 viewPos; + vec2 uvOffset; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV + ubo.uvOffset; + vec4 pos = vec4(inPos, 1.0); + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(pos); + + outNormal = mat3(ubo.model) * inNormal; + outLightVec = ubo.lightPos.xyz - pos.xyz; + outViewVec = ubo.viewPos.xyz - pos.xyz; +} diff --git a/tests/glsl/sascha-willems/specializationconstants/uber.frag b/tests/glsl/sascha-willems/specializationconstants/uber.frag new file mode 100644 index 000000000..9661dff03 --- /dev/null +++ b/tests/glsl/sascha-willems/specializationconstants/uber.frag @@ -0,0 +1,75 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColormap; +layout (binding = 2) uniform sampler2D samplerDiscard; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +// We use this constant to control the flow of the shader depending on the +// lighting model selected at pipeline creation time +layout (constant_id = 0) const int LIGHTING_MODEL = 0; +// Parameter for the toon shading part of the shader +layout (constant_id = 1) const float PARAM_TOON_DESATURATION = 0.0f; + +void main() +{ + switch (LIGHTING_MODEL) { + case 0: // Phong + { + vec3 ambient = inColor * vec3(0.25); + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * inColor; + vec3 specular = pow(max(dot(R, V), 0.0), 32.0) * vec3(0.75); + outFragColor = vec4(ambient + diffuse * 1.75 + specular, 1.0); + break; + } + case 1: // Toon + { + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + float intensity = dot(N,L); + vec3 color; + if (intensity > 0.98) + color = inColor * 1.5; + else if (intensity > 0.9) + color = inColor * 1.0; + else if (intensity > 0.5) + color = inColor * 0.6; + else if (intensity > 0.25) + color = inColor * 0.4; + else + color = inColor * 0.2; + // Desaturate a bit + color = vec3(mix(color, vec3(dot(vec3(0.2126,0.7152,0.0722), color)), PARAM_TOON_DESATURATION)); + outFragColor.rgb = color; + break; + } + case 2: // Textured + { + vec4 color = texture(samplerColormap, inUV).rrra; + vec3 ambient = color.rgb * vec3(0.25) * inColor; + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * color.rgb; + float specular = pow(max(dot(R, V), 0.0), 32.0) * color.a; + outFragColor = vec4(ambient + diffuse + vec3(specular), 1.0); + break; + } + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/specializationconstants/uber.vert b/tests/glsl/sascha-willems/specializationconstants/uber.vert new file mode 100644 index 000000000..44fd2e692 --- /dev/null +++ b/tests/glsl/sascha-willems/specializationconstants/uber.vert @@ -0,0 +1,42 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec2 outUV; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outColor = inColor; + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(ubo.model) * inNormal; + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/sphericalenvmapping/sem.frag b/tests/glsl/sascha-willems/sphericalenvmapping/sem.frag new file mode 100644 index 000000000..6735ee03c --- /dev/null +++ b/tests/glsl/sascha-willems/sphericalenvmapping/sem.frag @@ -0,0 +1,23 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2DArray matCap; + +layout (location = 0) in vec3 inColor; +layout (location = 1) in vec3 inEyePos; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in flat int inTexIndex; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 r = reflect( inEyePos, inNormal ); + vec3 r2 = vec3( r.x, r.y, r.z + 1.0 ); + float m = 2.0 * length( r2 ); + vec2 vN = r.xy / m + .5; + outFragColor = vec4( texture( matCap, vec3(vN, inTexIndex)).rgb * (clamp(inColor.r * 2, 0.0, 1.0)), 1.0 ); +} diff --git a/tests/glsl/sascha-willems/sphericalenvmapping/sem.vert b/tests/glsl/sascha-willems/sphericalenvmapping/sem.vert new file mode 100644 index 000000000..bb94e3f0e --- /dev/null +++ b/tests/glsl/sascha-willems/sphericalenvmapping/sem.vert @@ -0,0 +1,35 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 normal; + mat4 view; + int texIndex; +} ubo; + +layout (location = 0) out vec3 outColor; +layout (location = 1) out vec3 outEyePos; +layout (location = 2) out vec3 outNormal; +layout (location = 3) out flat int outTexIndex; + +void main() +{ + outColor = inColor; + mat4 modelView = ubo.view * ubo.model; + outEyePos = normalize( vec3( modelView * inPos ) ); + outTexIndex = ubo.texIndex; + outNormal = normalize( mat3(ubo.normal) * inNormal ); + vec3 r = reflect( outEyePos, outNormal ); + float m = 2.0 * sqrt( pow(r.x, 2.0) + pow(r.y, 2.0) + pow(r.z + 1.0, 2.0)); + gl_Position = ubo.projection * modelView * inPos; +} diff --git a/tests/glsl/sascha-willems/ssao/blur.frag b/tests/glsl/sascha-willems/ssao/blur.frag new file mode 100644 index 000000000..23b808d9b --- /dev/null +++ b/tests/glsl/sascha-willems/ssao/blur.frag @@ -0,0 +1,29 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerSSAO; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out float outFragColor; + +void main() +{ + const int blurRange = 2; + int n = 0; + vec2 texelSize = 1.0 / vec2(textureSize(samplerSSAO, 0)); + float result = 0.0; + for (int x = -blurRange; x < blurRange; x++) + { + for (int y = -blurRange; y < blurRange; y++) + { + vec2 offset = vec2(float(x), float(y)) * texelSize; + result += texture(samplerSSAO, inUV + offset).r; + n++; + } + } + outFragColor = result / (float(n)); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/ssao/composition.frag b/tests/glsl/sascha-willems/ssao/composition.frag new file mode 100644 index 000000000..edf49b5d3 --- /dev/null +++ b/tests/glsl/sascha-willems/ssao/composition.frag @@ -0,0 +1,56 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerposition; +layout (binding = 1) uniform sampler2D samplerNormal; +layout (binding = 2) uniform sampler2D samplerAlbedo; +layout (binding = 3) uniform sampler2D samplerSSAO; +layout (binding = 4) uniform sampler2D samplerSSAOBlur; +layout (binding = 5) uniform UBO +{ + mat4 _dummy; + uint ssao; + uint ssaoOnly; + uint ssaoBlur; +} uboParams; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 fragPos = texture(samplerposition, inUV).rgb; + vec3 normal = normalize(texture(samplerNormal, inUV).rgb * 2.0 - 1.0); + vec4 albedo = texture(samplerAlbedo, inUV); + + float ssao = (uboParams.ssaoBlur == 1) ? texture(samplerSSAOBlur, inUV).r : texture(samplerSSAO, inUV).r; + + vec3 lightPos = vec3(0.0); + vec3 L = normalize(lightPos - fragPos); + float NdotL = max(0.5, dot(normal, L)); + + if (uboParams.ssaoOnly == 1) + { + outFragColor.rgb = ssao.rrr; + } + else + { + vec3 baseColor = albedo.rgb * NdotL; + + if (uboParams.ssao == 1) + { + outFragColor.rgb = ssao.rrr; + + if (uboParams.ssaoOnly != 1) + outFragColor.rgb *= baseColor; + } + else + { + outFragColor.rgb = baseColor; + } + } +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/ssao/fullscreen.vert b/tests/glsl/sascha-willems/ssao/fullscreen.vert new file mode 100644 index 000000000..117eb02ec --- /dev/null +++ b/tests/glsl/sascha-willems/ssao/fullscreen.vert @@ -0,0 +1,18 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +} diff --git a/tests/glsl/sascha-willems/ssao/gbuffer.frag b/tests/glsl/sascha-willems/ssao/gbuffer.frag new file mode 100644 index 000000000..9e81b9231 --- /dev/null +++ b/tests/glsl/sascha-willems/ssao/gbuffer.frag @@ -0,0 +1,30 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inPos; + +layout (location = 0) out vec4 outPosition; +layout (location = 1) out vec4 outNormal; +layout (location = 2) out vec4 outAlbedo; + +const float NEAR_PLANE = 0.1f; //todo: specialization const +const float FAR_PLANE = 64.0f; //todo: specialization const + +float linearDepth(float depth) +{ + float z = depth * 2.0f - 1.0f; + return (2.0f * NEAR_PLANE * FAR_PLANE) / (FAR_PLANE + NEAR_PLANE - z * (FAR_PLANE - NEAR_PLANE)); +} + +void main() +{ + outPosition = vec4(inPos, linearDepth(gl_FragCoord.z)); + outNormal = vec4(normalize(inNormal) * 0.5 + 0.5, 1.0); + outAlbedo = vec4(inColor * 2.0, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/ssao/gbuffer.vert b/tests/glsl/sascha-willems/ssao/gbuffer.vert new file mode 100644 index 000000000..3160cf949 --- /dev/null +++ b/tests/glsl/sascha-willems/ssao/gbuffer.vert @@ -0,0 +1,43 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outColor; +layout (location = 3) out vec3 outPos; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + gl_Position = ubo.projection * ubo.view * ubo.model * inPos; + + outUV = inUV; + + // Vertex position in view space + outPos = vec3(ubo.view * ubo.model * inPos); + + // Normal in view space + mat3 normalMatrix = transpose(inverse(mat3(ubo.view * ubo.model))); + outNormal = normalMatrix * inNormal; + + outColor = inColor; +} diff --git a/tests/glsl/sascha-willems/ssao/ssao.frag b/tests/glsl/sascha-willems/ssao/ssao.frag new file mode 100644 index 000000000..cdcbfd3ec --- /dev/null +++ b/tests/glsl/sascha-willems/ssao/ssao.frag @@ -0,0 +1,73 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 0) uniform sampler2D samplerPositionDepth; +layout (binding = 1) uniform sampler2D samplerNormal; +layout (binding = 2) uniform sampler2D ssaoNoise; + +layout (constant_id = 0) const int SSAO_KERNEL_SIZE = 64; +layout (constant_id = 1) const float SSAO_RADIUS = 0.5; + +layout (binding = 3) uniform UBOSSAOKernel +{ + vec4 samples[SSAO_KERNEL_SIZE]; +} uboSSAOKernel; + +layout (binding = 4) uniform UBO +{ + mat4 projection; +} ubo; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out float outFragColor; + +void main() +{ + // Get G-Buffer values + vec3 fragPos = texture(samplerPositionDepth, inUV).rgb; + vec3 normal = normalize(texture(samplerNormal, inUV).rgb * 2.0 - 1.0); + + // Get a random vector using a noise lookup + ivec2 texDim = textureSize(samplerPositionDepth, 0); + ivec2 noiseDim = textureSize(ssaoNoise, 0); + const vec2 noiseUV = vec2(float(texDim.x)/float(noiseDim.x), float(texDim.y)/(noiseDim.y)) * inUV; + vec3 randomVec = texture(ssaoNoise, noiseUV).xyz * 2.0 - 1.0; + + // Create TBN matrix + vec3 tangent = normalize(randomVec - normal * dot(randomVec, normal)); + vec3 bitangent = cross(tangent, normal); + mat3 TBN = mat3(tangent, bitangent, normal); + + // Calculate occlusion value + float occlusion = 0.0f; + for(int i = 0; i < SSAO_KERNEL_SIZE; i++) + { + vec3 samplePos = TBN * uboSSAOKernel.samples[i].xyz; + samplePos = fragPos + samplePos * SSAO_RADIUS; + + // project + vec4 offset = vec4(samplePos, 1.0f); + offset = ubo.projection * offset; + offset.xyz /= offset.w; + offset.xyz = offset.xyz * 0.5f + 0.5f; + + float sampleDepth = -texture(samplerPositionDepth, offset.xy).w; + +#define RANGE_CHECK 1 +#ifdef RANGE_CHECK + // Range check + float rangeCheck = smoothstep(0.0f, 1.0f, SSAO_RADIUS / abs(fragPos.z - sampleDepth)); + occlusion += (sampleDepth >= samplePos.z ? 1.0f : 0.0f) * rangeCheck; +#else + occlusion += (sampleDepth >= samplePos.z ? 1.0f : 0.0f); +#endif + } + occlusion = 1.0 - (occlusion / float(SSAO_KERNEL_SIZE)); + + outFragColor = occlusion; +} + diff --git a/tests/glsl/sascha-willems/subpasses/composition.frag b/tests/glsl/sascha-willems/subpasses/composition.frag new file mode 100644 index 000000000..0a538471a --- /dev/null +++ b/tests/glsl/sascha-willems/subpasses/composition.frag @@ -0,0 +1,82 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (input_attachment_index = 0, binding = 0) uniform subpassInput samplerposition; +layout (input_attachment_index = 1, binding = 1) uniform subpassInput samplerNormal; +layout (input_attachment_index = 2, binding = 2) uniform subpassInput samplerAlbedo; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragcolor; +layout (location = 1) out vec4 outPosition; +layout (location = 2) out vec4 outNormal; +layout (location = 3) out vec4 outAlbedo; + +layout (constant_id = 0) const int NUM_LIGHTS = 64; + +struct Light { + vec4 position; + vec3 color; + float radius; +}; + +layout (binding = 3) uniform UBO +{ + vec4 viewPos; + Light lights[NUM_LIGHTS]; +} ubo; + + +void main() +{ + // Read G-Buffer values from previous sub pass + vec3 fragPos = subpassLoad(samplerposition).rgb; + vec3 normal = subpassLoad(samplerNormal).rgb; + vec4 albedo = subpassLoad(samplerAlbedo); + + #define ambient 0.15 + + // Ambient part + vec3 fragcolor = albedo.rgb * ambient; + + for(int i = 0; i < NUM_LIGHTS; ++i) + { + // Vector to light + vec3 L = ubo.lights[i].position.xyz - fragPos; + // Distance from light to fragment position + float dist = length(L); + + // Viewer to fragment + vec3 V = ubo.viewPos.xyz - fragPos; + V = normalize(V); + + // Light to fragment + L = normalize(L); + + // Attenuation + float atten = ubo.lights[i].radius / (pow(dist, 2.0) + 1.0); + + // Diffuse part + vec3 N = normalize(normal); + float NdotL = max(0.0, dot(N, L)); + vec3 diff = ubo.lights[i].color * albedo.rgb * NdotL * atten; + + // Specular part + // Specular map values are stored in alpha of albedo mrt + vec3 R = reflect(-L, N); + float NdotR = max(0.0, dot(R, V)); + //vec3 spec = ubo.lights[i].color * albedo.a * pow(NdotR, 32.0) * atten; + + fragcolor += diff;// + spec; + } + + outFragcolor = vec4(fragcolor, 1.0); + + // Write G-Buffer attachments to avoid undefined behaviour (validation error) + outPosition = vec4(0.0); + outNormal = vec4(0.0); + outAlbedo = vec4(0.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/subpasses/composition.vert b/tests/glsl/sascha-willems/subpasses/composition.vert new file mode 100644 index 000000000..f463f9182 --- /dev/null +++ b/tests/glsl/sascha-willems/subpasses/composition.vert @@ -0,0 +1,18 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/subpasses/gbuffer.frag b/tests/glsl/sascha-willems/subpasses/gbuffer.frag new file mode 100644 index 000000000..389d1aa80 --- /dev/null +++ b/tests/glsl/sascha-willems/subpasses/gbuffer.frag @@ -0,0 +1,43 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; +layout (binding = 2) uniform sampler2D samplerNormalMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inWorldPos; + +layout (location = 0) out vec4 outColor; +layout (location = 1) out vec4 outPosition; +layout (location = 2) out vec4 outNormal; +layout (location = 3) out vec4 outAlbedo; + +layout (constant_id = 0) const float NEAR_PLANE = 0.1f; +layout (constant_id = 1) const float FAR_PLANE = 256.0f; + +float linearDepth(float depth) +{ + float z = depth * 2.0f - 1.0f; + return (2.0f * NEAR_PLANE * FAR_PLANE) / (FAR_PLANE + NEAR_PLANE - z * (FAR_PLANE - NEAR_PLANE)); +} + +void main() +{ + outPosition = vec4(inWorldPos, 1.0); + + vec3 N = normalize(inNormal); + N.y = -N.y; + outNormal = vec4(N, 1.0); + + outAlbedo.rgb = inColor; + + // Store linearized depth in alpha component + outPosition.a = linearDepth(gl_FragCoord.z); + + // Write color attachments to avoid undefined behaviour (validation error) + outColor = vec4(0.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/subpasses/gbuffer.vert b/tests/glsl/sascha-willems/subpasses/gbuffer.vert new file mode 100644 index 000000000..c57d04e6d --- /dev/null +++ b/tests/glsl/sascha-willems/subpasses/gbuffer.vert @@ -0,0 +1,43 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outWorldPos; +layout (location = 3) out vec3 outTangent; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + gl_Position = ubo.projection * ubo.view * ubo.model * inPos; + + // Vertex position in world space + outWorldPos = vec3(ubo.model * inPos); + // GL to Vulkan coord space + outWorldPos.y = -outWorldPos.y; + + // Normal in world space + mat3 mNormal = transpose(inverse(mat3(ubo.model))); + outNormal = mNormal * normalize(inNormal); + + // Currently just vertex color + outColor = inColor; +} diff --git a/tests/glsl/sascha-willems/subpasses/transparent.frag b/tests/glsl/sascha-willems/subpasses/transparent.frag new file mode 100644 index 000000000..f293f2b8c --- /dev/null +++ b/tests/glsl/sascha-willems/subpasses/transparent.frag @@ -0,0 +1,34 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (input_attachment_index = 0, binding = 1) uniform subpassInput samplerPositionDepth; +layout (binding = 2) uniform sampler2D samplerTexture; + +layout (location = 0) in vec3 inColor; +layout (location = 1) in vec2 inUV; + +layout (location = 0) out vec4 outColor; + +layout (constant_id = 0) const float NEAR_PLANE = 0.1f; +layout (constant_id = 1) const float FAR_PLANE = 256.0f; + +float linearDepth(float depth) +{ + float z = depth * 2.0f - 1.0f; + return (2.0f * NEAR_PLANE * FAR_PLANE) / (FAR_PLANE + NEAR_PLANE - z * (FAR_PLANE - NEAR_PLANE)); +} + +void main () +{ + // Sample depth from deferred depth buffer and discard if obscured + float depth = subpassLoad(samplerPositionDepth).a; + if ((depth != 0.0) && (linearDepth(gl_FragCoord.z) > depth)) + { + discard; + }; + + outColor = texture(samplerTexture, inUV); +} diff --git a/tests/glsl/sascha-willems/subpasses/transparent.vert b/tests/glsl/sascha-willems/subpasses/transparent.vert new file mode 100644 index 000000000..8d667b820 --- /dev/null +++ b/tests/glsl/sascha-willems/subpasses/transparent.vert @@ -0,0 +1,28 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 view; +} ubo; + +layout (location = 0) out vec3 outColor; +layout (location = 1) out vec2 outUV; + +void main () +{ + outColor = inColor; + outUV = inUV; + + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPos.xyz, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/terraintessellation/skysphere.frag b/tests/glsl/sascha-willems/terraintessellation/skysphere.frag new file mode 100644 index 000000000..7d87b21cd --- /dev/null +++ b/tests/glsl/sascha-willems/terraintessellation/skysphere.frag @@ -0,0 +1,14 @@ +//TEST:COMPARE_GLSL: +#version 450 core + +layout (location = 0) in vec2 inUV; + +layout (set = 0, binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) out vec4 outFragColor; + +void main(void) +{ + vec4 color = texture(samplerColorMap, inUV); + outFragColor = vec4(color.rgb, 1.0); +} diff --git a/tests/glsl/sascha-willems/terraintessellation/skysphere.vert b/tests/glsl/sascha-willems/terraintessellation/skysphere.vert new file mode 100644 index 000000000..74dd7ad3b --- /dev/null +++ b/tests/glsl/sascha-willems/terraintessellation/skysphere.vert @@ -0,0 +1,24 @@ +//TEST:COMPARE_GLSL: +#version 450 core + +layout (location = 0) in vec3 inPos; +layout (location = 2) in vec2 inUV; + +layout (location = 0) out vec2 outUV; + +layout (set = 0, binding = 0) uniform UBO +{ + mat4 mvp; +} ubo; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main(void) +{ + gl_Position = ubo.mvp * vec4(inPos, 1.0); + outUV = inUV; + outUV.t = 1.0 - outUV.t; +} diff --git a/tests/glsl/sascha-willems/terraintessellation/terrain.frag b/tests/glsl/sascha-willems/terraintessellation/terrain.frag new file mode 100644 index 000000000..fcd8612ae --- /dev/null +++ b/tests/glsl/sascha-willems/terraintessellation/terrain.frag @@ -0,0 +1,65 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (set = 0, binding = 1) uniform sampler2D samplerHeight; +layout (set = 0, binding = 2) uniform sampler2DArray samplerLayers; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; +layout (location = 4) in vec3 inEyePos; +layout (location = 5) in vec3 inWorldPos; + +layout (location = 0) out vec4 outFragColor; + +vec3 sampleTerrainLayer() +{ + // Define some layer ranges for sampling depending on terrain height + vec2 layers[6]; + layers[0] = vec2(-10.0, 10.0); + layers[1] = vec2(5.0, 45.0); + layers[2] = vec2(45.0, 80.0); + layers[3] = vec2(75.0, 100.0); + layers[4] = vec2(95.0, 140.0); + layers[5] = vec2(140.0, 190.0); + + vec3 color = vec3(0.0); + + // Get height from displacement map + float height = textureLod(samplerHeight, inUV, 0.0).r * 255.0; + + for (int i = 0; i < 6; i++) + { + float range = layers[i].y - layers[i].x; + float weight = (range - abs(height - layers[i].y)) / range; + weight = max(0.0, weight); + color += weight * texture(samplerLayers, vec3(inUV * 16.0, i)).rgb; + } + + return color; +} + +float fog(float density) +{ + const float LOG2 = -1.442695; + float dist = gl_FragCoord.z / gl_FragCoord.w * 0.1; + float d = density * dist; + return 1.0 - clamp(exp2(d * d * LOG2), 0.0, 1.0); +} + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 ambient = vec3(0.5); + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0); + + vec4 color = vec4((ambient + diffuse) * sampleTerrainLayer(), 1.0); + + const vec4 fogColor = vec4(0.47, 0.5, 0.67, 0.0); + outFragColor = mix(color, fogColor, fog(0.25)); +} diff --git a/tests/glsl/sascha-willems/terraintessellation/terrain.tesc b/tests/glsl/sascha-willems/terraintessellation/terrain.tesc new file mode 100644 index 000000000..133e44ea7 --- /dev/null +++ b/tests/glsl/sascha-willems/terraintessellation/terrain.tesc @@ -0,0 +1,120 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout(set = 0, binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 lightPos; + vec4 frustumPlanes[6]; + float displacementFactor; + float tessellationFactor; + vec2 viewportDim; + float tessellatedEdgeSize; +} ubo; + +layout(set = 0, binding = 1) uniform sampler2D samplerHeight; + +layout (vertices = 4) out; + +layout (location = 0) in vec3 inNormal[]; +layout (location = 1) in vec2 inUV[]; + +layout (location = 0) out vec3 outNormal[4]; +layout (location = 1) out vec2 outUV[4]; + +// Calculate the tessellation factor based on screen space +// dimensions of the edge +float screenSpaceTessFactor(vec4 p0, vec4 p1) +{ + // Calculate edge mid point + vec4 midPoint = 0.5 * (p0 + p1); + // Sphere radius as distance between the control points + float radius = distance(p0, p1) / 2.0; + + // View space + vec4 v0 = ubo.modelview * midPoint; + + // Project into clip space + vec4 clip0 = (ubo.projection * (v0 - vec4(radius, vec3(0.0)))); + vec4 clip1 = (ubo.projection * (v0 + vec4(radius, vec3(0.0)))); + + // Get normalized device coordinates + clip0 /= clip0.w; + clip1 /= clip1.w; + + // Convert to viewport coordinates + clip0.xy *= ubo.viewportDim; + clip1.xy *= ubo.viewportDim; + + // Return the tessellation factor based on the screen size + // given by the distance of the two edge control points in screen space + // and a reference (min.) tessellation size for the edge set by the application + return clamp(distance(clip0, clip1) / ubo.tessellatedEdgeSize * ubo.tessellationFactor, 1.0, 64.0); +} + +// Checks the current's patch visibility against the frustum using a sphere check +// Sphere radius is given by the patch size +bool frustumCheck() +{ + // Fixed radius (increase if patch size is increased in example) + const float radius = 8.0f; + vec4 pos = gl_in[gl_InvocationID].gl_Position; + pos.y -= textureLod(samplerHeight, inUV[0], 0.0).r * ubo.displacementFactor; + + // Check sphere against frustum planes + for (int i = 0; i < 6; i++) { + if (dot(pos, ubo.frustumPlanes[i]) + radius < 0.0) + { + return false; + } + } + return true; +} + +void main() +{ + if (gl_InvocationID == 0) + { + if (!frustumCheck()) + { + gl_TessLevelInner[0] = 0.0; + gl_TessLevelInner[1] = 0.0; + gl_TessLevelOuter[0] = 0.0; + gl_TessLevelOuter[1] = 0.0; + gl_TessLevelOuter[2] = 0.0; + gl_TessLevelOuter[3] = 0.0; + } + else + { + if (ubo.tessellationFactor > 0.0) + { + gl_TessLevelOuter[0] = screenSpaceTessFactor(gl_in[3].gl_Position, gl_in[0].gl_Position); + gl_TessLevelOuter[1] = screenSpaceTessFactor(gl_in[0].gl_Position, gl_in[1].gl_Position); + gl_TessLevelOuter[2] = screenSpaceTessFactor(gl_in[1].gl_Position, gl_in[2].gl_Position); + gl_TessLevelOuter[3] = screenSpaceTessFactor(gl_in[2].gl_Position, gl_in[3].gl_Position); + gl_TessLevelInner[0] = mix(gl_TessLevelOuter[0], gl_TessLevelOuter[3], 0.5); + gl_TessLevelInner[1] = mix(gl_TessLevelOuter[2], gl_TessLevelOuter[1], 0.5); + } + else + { + // Tessellation factor can be set to zero by example + // to demonstrate a simple passthrough + gl_TessLevelInner[0] = 1.0; + gl_TessLevelInner[1] = 1.0; + gl_TessLevelOuter[0] = 1.0; + gl_TessLevelOuter[1] = 1.0; + gl_TessLevelOuter[2] = 1.0; + gl_TessLevelOuter[3] = 1.0; + } + } + + } + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + outNormal[gl_InvocationID] = inNormal[gl_InvocationID]; + outUV[gl_InvocationID] = inUV[gl_InvocationID]; +} diff --git a/tests/glsl/sascha-willems/terraintessellation/terrain.tese b/tests/glsl/sascha-willems/terraintessellation/terrain.tese new file mode 100644 index 000000000..1a74ac377 --- /dev/null +++ b/tests/glsl/sascha-willems/terraintessellation/terrain.tese @@ -0,0 +1,58 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (set = 0, binding = 0) uniform UBO +{ + mat4 projection; + mat4 modelview; + vec4 lightPos; + vec4 frustumPlanes[6]; + float displacementFactor; + float tessellationFactor; + vec2 viewportDim; + float tessellatedEdgeSize; +} ubo; + +layout (set = 0, binding = 1) uniform sampler2D displacementMap; + +layout(quads, equal_spacing, cw) in; + +layout (location = 0) in vec3 inNormal[]; +layout (location = 1) in vec2 inUV[]; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; +layout (location = 4) out vec3 outEyePos; +layout (location = 5) out vec3 outWorldPos; + +void main() +{ + // Interpolate UV coordinates + vec2 uv1 = mix(inUV[0], inUV[1], gl_TessCoord.x); + vec2 uv2 = mix(inUV[3], inUV[2], gl_TessCoord.x); + outUV = mix(uv1, uv2, gl_TessCoord.y); + + vec3 n1 = mix(inNormal[0], inNormal[1], gl_TessCoord.x); + vec3 n2 = mix(inNormal[3], inNormal[2], gl_TessCoord.x); + outNormal = mix(n1, n2, gl_TessCoord.y); + + // Interpolate positions + vec4 pos1 = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x); + vec4 pos2 = mix(gl_in[3].gl_Position, gl_in[2].gl_Position, gl_TessCoord.x); + vec4 pos = mix(pos1, pos2, gl_TessCoord.y); + // Displace + pos.y -= textureLod(displacementMap, outUV, 0.0).r * ubo.displacementFactor; + // Perspective projection + gl_Position = ubo.projection * ubo.modelview * pos; + + // Calculate vectors for lighting based on tessellated position + outViewVec = -pos.xyz; + outLightVec = normalize(ubo.lightPos.xyz + outViewVec); + outWorldPos = pos.xyz; + outEyePos = vec3(ubo.modelview * pos); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/terraintessellation/terrain.vert b/tests/glsl/sascha-willems/terraintessellation/terrain.vert new file mode 100644 index 000000000..4a7c6ec66 --- /dev/null +++ b/tests/glsl/sascha-willems/terraintessellation/terrain.vert @@ -0,0 +1,24 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main(void) +{ + gl_Position = vec4(inPos.xyz, 1.0); + outUV = inUV; + outNormal = inNormal; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/tessellation/base.frag b/tests/glsl/sascha-willems/tessellation/base.frag new file mode 100644 index 000000000..3bdd8b954 --- /dev/null +++ b/tests/glsl/sascha-willems/tessellation/base.frag @@ -0,0 +1,22 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 2) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(vec3(0.0, -4.0, 4.0)); + + vec4 color = texture(samplerColorMap, inUV); + + outFragColor.rgb = vec3(clamp(max(dot(N,L), 0.0), 0.2, 1.0)) * color.rgb * 1.5; +} diff --git a/tests/glsl/sascha-willems/tessellation/base.vert b/tests/glsl/sascha-willems/tessellation/base.vert new file mode 100644 index 000000000..0bce97d99 --- /dev/null +++ b/tests/glsl/sascha-willems/tessellation/base.vert @@ -0,0 +1,24 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main(void) +{ + gl_Position = vec4(inPos.xyz, 1.0); + outNormal = inNormal; + outUV = inUV; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/tessellation/passthrough.tesc b/tests/glsl/sascha-willems/tessellation/passthrough.tesc new file mode 100644 index 000000000..348bb43d7 --- /dev/null +++ b/tests/glsl/sascha-willems/tessellation/passthrough.tesc @@ -0,0 +1,28 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (vertices = 3) out; + +layout (location = 0) in vec3 inNormal[]; +layout (location = 1) in vec2 inUV[]; + +layout (location = 0) out vec3 outNormal[3]; +layout (location = 1) out vec2 outUV[3]; + +void main(void) +{ + if (gl_InvocationID == 0) + { + gl_TessLevelInner[0] = 1.0; + gl_TessLevelOuter[0] = 1.0; + gl_TessLevelOuter[1] = 1.0; + gl_TessLevelOuter[2] = 1.0; + } + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + outNormal[gl_InvocationID] = inNormal[gl_InvocationID]; + outUV[gl_InvocationID] = inUV[gl_InvocationID]; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/tessellation/passthrough.tese b/tests/glsl/sascha-willems/tessellation/passthrough.tese new file mode 100644 index 000000000..42dc5c562 --- /dev/null +++ b/tests/glsl/sascha-willems/tessellation/passthrough.tese @@ -0,0 +1,31 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (triangles) in; + +layout (binding = 1) uniform UBO +{ + mat4 projection; + mat4 model; + float tessAlpha; +} ubo; + +layout (location = 0) in vec3 inNormal[]; +layout (location = 1) in vec2 inUV[]; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; + +void main(void) +{ + gl_Position = (gl_TessCoord.x * gl_in[0].gl_Position) + + (gl_TessCoord.y * gl_in[1].gl_Position) + + (gl_TessCoord.z * gl_in[2].gl_Position); + gl_Position = ubo.projection * ubo.model * gl_Position; + + outNormal = gl_TessCoord.x*inNormal[0] + gl_TessCoord.y*inNormal[1] + gl_TessCoord.z*inNormal[2]; + outUV = gl_TessCoord.x*inUV[0] + gl_TessCoord.y*inUV[1] + gl_TessCoord.z*inUV[2]; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/tessellation/pntriangles.tesc b/tests/glsl/sascha-willems/tessellation/pntriangles.tesc new file mode 100644 index 000000000..90755153c --- /dev/null +++ b/tests/glsl/sascha-willems/tessellation/pntriangles.tesc @@ -0,0 +1,87 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +// PN patch data +struct PnPatch +{ + float b210; + float b120; + float b021; + float b012; + float b102; + float b201; + float b111; + float n110; + float n011; + float n101; +}; + +// tessellation levels +layout (binding = 0) uniform UBO +{ + float tessLevel; +} ubo; + +layout(vertices=3) out; + +layout(location = 0) in vec3 inNormal[]; +layout(location = 1) in vec2 inUV[]; + +layout(location = 0) out vec3 outNormal[3]; +layout(location = 3) out vec2 outUV[3]; +layout(location = 6) out PnPatch outPatch[3]; + +float wij(int i, int j) +{ + return dot(gl_in[j].gl_Position.xyz - gl_in[i].gl_Position.xyz, inNormal[i]); +} + +float vij(int i, int j) +{ + vec3 Pj_minus_Pi = gl_in[j].gl_Position.xyz + - gl_in[i].gl_Position.xyz; + vec3 Ni_plus_Nj = inNormal[i]+inNormal[j]; + return 2.0*dot(Pj_minus_Pi, Ni_plus_Nj)/dot(Pj_minus_Pi, Pj_minus_Pi); +} + +void main() +{ + // get data + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + outNormal[gl_InvocationID] = inNormal[gl_InvocationID]; + outUV[gl_InvocationID] = inUV[gl_InvocationID]; + + // set base + float P0 = gl_in[0].gl_Position[gl_InvocationID]; + float P1 = gl_in[1].gl_Position[gl_InvocationID]; + float P2 = gl_in[2].gl_Position[gl_InvocationID]; + float N0 = inNormal[0][gl_InvocationID]; + float N1 = inNormal[1][gl_InvocationID]; + float N2 = inNormal[2][gl_InvocationID]; + + // compute control points + outPatch[gl_InvocationID].b210 = (2.0*P0 + P1 - wij(0,1)*N0)/3.0; + outPatch[gl_InvocationID].b120 = (2.0*P1 + P0 - wij(1,0)*N1)/3.0; + outPatch[gl_InvocationID].b021 = (2.0*P1 + P2 - wij(1,2)*N1)/3.0; + outPatch[gl_InvocationID].b012 = (2.0*P2 + P1 - wij(2,1)*N2)/3.0; + outPatch[gl_InvocationID].b102 = (2.0*P2 + P0 - wij(2,0)*N2)/3.0; + outPatch[gl_InvocationID].b201 = (2.0*P0 + P2 - wij(0,2)*N0)/3.0; + float E = ( outPatch[gl_InvocationID].b210 + + outPatch[gl_InvocationID].b120 + + outPatch[gl_InvocationID].b021 + + outPatch[gl_InvocationID].b012 + + outPatch[gl_InvocationID].b102 + + outPatch[gl_InvocationID].b201 ) / 6.0; + float V = (P0 + P1 + P2)/3.0; + outPatch[gl_InvocationID].b111 = E + (E - V)*0.5; + outPatch[gl_InvocationID].n110 = N0+N1-vij(0,1)*(P1-P0); + outPatch[gl_InvocationID].n011 = N1+N2-vij(1,2)*(P2-P1); + outPatch[gl_InvocationID].n101 = N2+N0-vij(2,0)*(P0-P2); + + // set tess levels + gl_TessLevelOuter[gl_InvocationID] = ubo.tessLevel; + gl_TessLevelInner[0] = ubo.tessLevel; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/tessellation/pntriangles.tese b/tests/glsl/sascha-willems/tessellation/pntriangles.tese new file mode 100644 index 000000000..0187ad781 --- /dev/null +++ b/tests/glsl/sascha-willems/tessellation/pntriangles.tese @@ -0,0 +1,92 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +// PN patch data +struct PnPatch +{ + float b210; + float b120; + float b021; + float b012; + float b102; + float b201; + float b111; + float n110; + float n011; + float n101; +}; + +layout (binding = 1) uniform UBO +{ + mat4 projection; + mat4 model; + float tessAlpha; +} ubo; + +layout(triangles, fractional_odd_spacing, ccw) in; + +layout(location = 0) in vec3 iNormal[]; +layout(location = 3) in vec2 iTexCoord[]; +layout(location = 6) in PnPatch iPnPatch[]; + +layout(location = 0) out vec3 oNormal; +layout(location = 1) out vec2 oTexCoord; + +#define uvw gl_TessCoord + +void main() +{ + vec3 uvwSquared = uvw * uvw; + vec3 uvwCubed = uvwSquared * uvw; + + // extract control points + vec3 b210 = vec3(iPnPatch[0].b210, iPnPatch[1].b210, iPnPatch[2].b210); + vec3 b120 = vec3(iPnPatch[0].b120, iPnPatch[1].b120, iPnPatch[2].b120); + vec3 b021 = vec3(iPnPatch[0].b021, iPnPatch[1].b021, iPnPatch[2].b021); + vec3 b012 = vec3(iPnPatch[0].b012, iPnPatch[1].b012, iPnPatch[2].b012); + vec3 b102 = vec3(iPnPatch[0].b102, iPnPatch[1].b102, iPnPatch[2].b102); + vec3 b201 = vec3(iPnPatch[0].b201, iPnPatch[1].b201, iPnPatch[2].b201); + vec3 b111 = vec3(iPnPatch[0].b111, iPnPatch[1].b111, iPnPatch[2].b111); + + // extract control normals + vec3 n110 = normalize(vec3(iPnPatch[0].n110, iPnPatch[1].n110, iPnPatch[2].n110)); + vec3 n011 = normalize(vec3(iPnPatch[0].n011, iPnPatch[1].n011, iPnPatch[2].n011)); + vec3 n101 = normalize(vec3(iPnPatch[0].n101, iPnPatch[1].n101, iPnPatch[2].n101)); + + // compute texcoords + oTexCoord = gl_TessCoord[2]*iTexCoord[0] + gl_TessCoord[0]*iTexCoord[1] + gl_TessCoord[1]*iTexCoord[2]; + + // normal + // Barycentric normal + vec3 barNormal = gl_TessCoord[2]*iNormal[0] + gl_TessCoord[0]*iNormal[1] + gl_TessCoord[1]*iNormal[2]; + vec3 pnNormal = iNormal[0]*uvwSquared[2] + iNormal[1]*uvwSquared[0] + iNormal[2]*uvwSquared[1] + + n110*uvw[2]*uvw[0] + n011*uvw[0]*uvw[1]+ n101*uvw[2]*uvw[1]; + oNormal = ubo.tessAlpha*pnNormal + (1.0-ubo.tessAlpha) * barNormal; + + // compute interpolated pos + vec3 barPos = gl_TessCoord[2]*gl_in[0].gl_Position.xyz + + gl_TessCoord[0]*gl_in[1].gl_Position.xyz + + gl_TessCoord[1]*gl_in[2].gl_Position.xyz; + + // save some computations + uvwSquared *= 3.0; + + // compute PN position + vec3 pnPos = gl_in[0].gl_Position.xyz*uvwCubed[2] + + gl_in[1].gl_Position.xyz*uvwCubed[0] + + gl_in[2].gl_Position.xyz*uvwCubed[1] + + b210*uvwSquared[2]*uvw[0] + + b120*uvwSquared[0]*uvw[2] + + b201*uvwSquared[2]*uvw[1] + + b021*uvwSquared[0]*uvw[1] + + b102*uvwSquared[1]*uvw[2] + + b012*uvwSquared[1]*uvw[0] + + b111*6.0*uvw[0]*uvw[1]*uvw[2]; + + // final position and normal + vec3 finalPos = (1.0-ubo.tessAlpha)*barPos + ubo.tessAlpha*pnPos; + gl_Position = ubo.projection * ubo.model * vec4(finalPos,1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/textoverlay/background.frag b/tests/glsl/sascha-willems/textoverlay/background.frag new file mode 100644 index 000000000..00ac6416b --- /dev/null +++ b/tests/glsl/sascha-willems/textoverlay/background.frag @@ -0,0 +1,16 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec2 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerColorMap, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/textoverlay/background.vert b/tests/glsl/sascha-willems/textoverlay/background.vert new file mode 100644 index 000000000..476834f38 --- /dev/null +++ b/tests/glsl/sascha-willems/textoverlay/background.vert @@ -0,0 +1,18 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(outUV * vec2(2.0f, 2.0f) + vec2(-1.0f, -1.0f), 0.0f, 1.0f); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/textoverlay/mesh.frag b/tests/glsl/sascha-willems/textoverlay/mesh.frag new file mode 100644 index 000000000..696914ded --- /dev/null +++ b/tests/glsl/sascha-willems/textoverlay/mesh.frag @@ -0,0 +1,27 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColorMap; + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColorMap, inUV); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * color.rgb; + vec3 specular = pow(max(dot(R, V), 0.0), 1.0) * vec3(color.a); + outFragColor = vec4(diffuse + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/textoverlay/mesh.vert b/tests/glsl/sascha-willems/textoverlay/mesh.vert new file mode 100644 index 000000000..dde3bf92d --- /dev/null +++ b/tests/glsl/sascha-willems/textoverlay/mesh.vert @@ -0,0 +1,39 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 lightPos; +} ubo; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec2 outUV; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outNormal = inNormal; + outUV = inUV; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(transpose(inverse(ubo.model))) * normalize(inNormal); + vec3 lPos = mat3(ubo.model) * ubo.lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = -pos.xyz; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/textoverlay/text.frag b/tests/glsl/sascha-willems/textoverlay/text.frag new file mode 100644 index 000000000..42414689f --- /dev/null +++ b/tests/glsl/sascha-willems/textoverlay/text.frag @@ -0,0 +1,14 @@ +//TEST:COMPARE_GLSL: +#version 450 core + +layout (location = 0) in vec2 inUV; + +layout (binding = 0) uniform sampler2D samplerFont; + +layout (location = 0) out vec4 outFragColor; + +void main(void) +{ + float color = texture(samplerFont, inUV).r; + outFragColor = vec4(vec3(color), 1.0); +} diff --git a/tests/glsl/sascha-willems/textoverlay/text.vert b/tests/glsl/sascha-willems/textoverlay/text.vert new file mode 100644 index 000000000..a9877dd0e --- /dev/null +++ b/tests/glsl/sascha-willems/textoverlay/text.vert @@ -0,0 +1,18 @@ +//TEST:COMPARE_GLSL: +#version 450 core + +layout (location = 0) in vec2 inPos; +layout (location = 1) in vec2 inUV; + +layout (location = 0) out vec2 outUV; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main(void) +{ + gl_Position = vec4(inPos, 0.0, 1.0); + outUV = inUV; +} diff --git a/tests/glsl/sascha-willems/texture/texture.frag b/tests/glsl/sascha-willems/texture/texture.frag new file mode 100644 index 000000000..508f7f69c --- /dev/null +++ b/tests/glsl/sascha-willems/texture/texture.frag @@ -0,0 +1,29 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in float inLodBias; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColor, inUV, inLodBias); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0); + float specular = pow(max(dot(R, V), 0.0), 16.0) * color.a; + + outFragColor = vec4(diffuse * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/texture/texture.vert b/tests/glsl/sascha-willems/texture/texture.vert new file mode 100644 index 000000000..9cd64b831 --- /dev/null +++ b/tests/glsl/sascha-willems/texture/texture.vert @@ -0,0 +1,45 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 viewPos; + float lodBias; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out float outLodBias; +layout (location = 2) out vec3 outNormal; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + outLodBias = ubo.lodBias; + + vec3 worldPos = vec3(ubo.model * vec4(inPos, 1.0)); + + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(inverse(transpose(ubo.model))) * inNormal; + vec3 lightPos = vec3(0.0); + vec3 lPos = mat3(ubo.model) * lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = ubo.viewPos.xyz - pos.xyz; +} diff --git a/tests/glsl/sascha-willems/texture3d/texture3d.frag b/tests/glsl/sascha-willems/texture3d/texture3d.frag new file mode 100644 index 000000000..d9464a233 --- /dev/null +++ b/tests/glsl/sascha-willems/texture3d/texture3d.frag @@ -0,0 +1,29 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler3D samplerColor; + +layout (location = 0) in vec3 inUV; +layout (location = 1) in float inLodBias; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(samplerColor, inUV); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0); + float specular = pow(max(dot(R, V), 0.0), 16.0) * color.r; + + outFragColor = vec4(diffuse * color.r + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/texture3d/texture3d.vert b/tests/glsl/sascha-willems/texture3d/texture3d.vert new file mode 100644 index 000000000..1aca0195b --- /dev/null +++ b/tests/glsl/sascha-willems/texture3d/texture3d.vert @@ -0,0 +1,44 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 viewPos; + float depth; +} ubo; + +layout (location = 0) out vec3 outUV; +layout (location = 1) out float outLodBias; +layout (location = 2) out vec3 outNormal; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = vec3(inUV, ubo.depth); + + vec3 worldPos = vec3(ubo.model * vec4(inPos, 1.0)); + + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec4 pos = ubo.model * vec4(inPos, 1.0); + outNormal = mat3(inverse(transpose(ubo.model))) * inNormal; + vec3 lightPos = vec3(0.0); + vec3 lPos = mat3(ubo.model) * lightPos.xyz; + outLightVec = lPos - pos.xyz; + outViewVec = ubo.viewPos.xyz - pos.xyz; +} diff --git a/tests/glsl/sascha-willems/texturearray/instancing.frag b/tests/glsl/sascha-willems/texturearray/instancing.frag new file mode 100644 index 000000000..b0e293e34 --- /dev/null +++ b/tests/glsl/sascha-willems/texturearray/instancing.frag @@ -0,0 +1,16 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform sampler2DArray samplerArray; + +layout (location = 0) in vec3 inUV; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerArray, inUV); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/texturearray/instancing.vert b/tests/glsl/sascha-willems/texturearray/instancing.vert new file mode 100644 index 000000000..ad738a59b --- /dev/null +++ b/tests/glsl/sascha-willems/texturearray/instancing.vert @@ -0,0 +1,30 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec2 inUV; + +struct Instance +{ + mat4 model; + vec4 arrayIndex; +}; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + Instance instance[8]; +} ubo; + +layout (location = 0) out vec3 outUV; + +void main() +{ + outUV = vec3(inUV, ubo.instance[gl_InstanceIndex].arrayIndex.x); + mat4 modelView = ubo.view * ubo.instance[gl_InstanceIndex].model; + gl_Position = ubo.projection * modelView * inPos; +} diff --git a/tests/glsl/sascha-willems/texturemipmapgen/texture.frag b/tests/glsl/sascha-willems/texturemipmapgen/texture.frag new file mode 100644 index 000000000..037ed4e30 --- /dev/null +++ b/tests/glsl/sascha-willems/texturemipmapgen/texture.frag @@ -0,0 +1,30 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (set = 0, binding = 1) uniform texture2D textureColor; +layout (set = 0, binding = 2) uniform sampler samplers[3]; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in float inLodBias; +layout (location = 2) flat in int inSamplerIndex; +layout (location = 3) in vec3 inNormal; +layout (location = 4) in vec3 inViewVec; +layout (location = 5) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = texture(sampler2D(textureColor, samplers[inSamplerIndex]), inUV, inLodBias); + + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(L, N); + vec3 diffuse = max(dot(N, L), 0.65) * vec3(1.0); + float specular = pow(max(dot(R, V), 0.0), 16.0) * color.a; + outFragColor = vec4(diffuse * color.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/texturemipmapgen/texture.vert b/tests/glsl/sascha-willems/texturemipmapgen/texture.vert new file mode 100644 index 000000000..ed98fc24e --- /dev/null +++ b/tests/glsl/sascha-willems/texturemipmapgen/texture.vert @@ -0,0 +1,47 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec3 inNormal; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; + mat4 model; + vec4 viewPos; + float lodBias; + int samplerIndex; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out float outLodBias; +layout (location = 2) flat out int outSamplerIndex; +layout (location = 3) out vec3 outNormal; +layout (location = 4) out vec3 outViewVec; +layout (location = 5) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV * vec2(2.0, 1.0); + outLodBias = ubo.lodBias; + outSamplerIndex = ubo.samplerIndex; + + vec3 worldPos = vec3(ubo.model * vec4(inPos, 1.0)); + + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(inPos.xyz, 1.0); + + outNormal = mat3(inverse(transpose(ubo.model))) * inNormal; + vec3 lightPos = vec3(-30.0, 0.0, 0.0); + outLightVec = worldPos - lightPos; + outViewVec = ubo.viewPos.xyz - worldPos; +} diff --git a/tests/glsl/sascha-willems/texturesparseresidency/sparseresidency.frag b/tests/glsl/sascha-willems/texturesparseresidency/sparseresidency.frag new file mode 100644 index 000000000..c82b621e4 --- /dev/null +++ b/tests/glsl/sascha-willems/texturesparseresidency/sparseresidency.frag @@ -0,0 +1,48 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_sparse_texture2 : enable +#extension GL_ARB_sparse_texture_clamp : enable + +layout (binding = 1) uniform sampler2D samplerColor; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in float inLodBias; +layout (location = 2) in vec3 inNormal; +layout (location = 3) in vec3 inViewVec; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec4 color = vec4(0.0); + + // Get residency code for current texel + int residencyCode = sparseTextureARB(samplerColor, inUV, color, inLodBias); + + // Fetch sparse until we get a valid texel + float minLod = 1.0; + while (!sparseTexelsResidentARB(residencyCode)) + { + residencyCode = sparseTextureClampARB(samplerColor, inUV, minLod, color); + minLod += 1.0f; + } + + // Check if texel is resident + bool texelResident = sparseTexelsResidentARB(residencyCode); + + if (!texelResident) + { + color = vec4(1.0, 0.0, 0.0, 0.0); + } + + vec3 N = normalize(inNormal); + + N = normalize((inNormal - 0.5) * 2.0); + + vec3 L = normalize(inLightVec); + vec3 R = reflect(-L, N); + vec3 diffuse = max(dot(N, L), 0.25) * color.rgb; + outFragColor = vec4(diffuse, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/texturesparseresidency/sparseresidency.vert b/tests/glsl/sascha-willems/texturesparseresidency/sparseresidency.vert new file mode 100644 index 000000000..f29db5b5b --- /dev/null +++ b/tests/glsl/sascha-willems/texturesparseresidency/sparseresidency.vert @@ -0,0 +1,43 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inUV; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + vec4 viewPos; + float lodBias; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out float outLodBias; +layout (location = 2) out vec3 outNormal; +layout (location = 3) out vec3 outViewVec; +layout (location = 4) out vec3 outLightVec; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + outLodBias = ubo.lodBias; + outNormal = inNormal; + + vec3 worldPos = vec3(ubo.model * vec4(inPos, 1.0)); + + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); + + vec3 lightPos = vec3(0.0, 50.0f, 0.0f); + outLightVec = lightPos - inPos.xyz; + outViewVec = ubo.viewPos.xyz - worldPos.xyz; +} diff --git a/tests/glsl/sascha-willems/triangle/triangle.frag b/tests/glsl/sascha-willems/triangle/triangle.frag new file mode 100644 index 000000000..70ee9fc53 --- /dev/null +++ b/tests/glsl/sascha-willems/triangle/triangle.frag @@ -0,0 +1,14 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inColor; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = vec4(inColor, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/triangle/triangle.vert b/tests/glsl/sascha-willems/triangle/triangle.vert new file mode 100644 index 000000000..b14410d80 --- /dev/null +++ b/tests/glsl/sascha-willems/triangle/triangle.vert @@ -0,0 +1,29 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projectionMatrix; + mat4 modelMatrix; + mat4 viewMatrix; +} ubo; + +layout (location = 0) out vec3 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + + +void main() +{ + outColor = inColor; + gl_Position = ubo.projectionMatrix * ubo.viewMatrix * ubo.modelMatrix * vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/viewportarray/multiview.geom b/tests/glsl/sascha-willems/viewportarray/multiview.geom new file mode 100644 index 000000000..39303fb33 --- /dev/null +++ b/tests/glsl/sascha-willems/viewportarray/multiview.geom @@ -0,0 +1,46 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_viewport_array : enable + +layout (triangles, invocations = 2) in; +layout (triangle_strip, max_vertices = 3) out; + +layout (binding = 0) uniform UBO +{ + mat4 projection[2]; + mat4 modelview[2]; + vec4 lightPos; +} ubo; + +layout (location = 0) in vec3 inNormal[]; +layout (location = 1) in vec3 inColor[]; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; +layout (location = 2) out vec3 outViewVec; +layout (location = 3) out vec3 outLightVec; + +void main(void) +{ + for(int i = 0; i < gl_in.length(); i++) + { + outNormal = mat3(ubo.modelview[gl_InvocationID]) * inNormal[i]; + outColor = inColor[i]; + + vec4 pos = gl_in[i].gl_Position; + vec4 worldPos = (ubo.modelview[gl_InvocationID] * pos); + + vec3 lPos = vec3(ubo.modelview[gl_InvocationID] * ubo.lightPos); + outLightVec = lPos - worldPos.xyz; + outViewVec = -worldPos.xyz; + + gl_Position = ubo.projection[gl_InvocationID] * worldPos; + + // Set the viewport index that the vertex will be emitted to + gl_ViewportIndex = gl_InvocationID; + + EmitVertex(); + } + EndPrimitive(); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/viewportarray/scene.frag b/tests/glsl/sascha-willems/viewportarray/scene.frag new file mode 100644 index 000000000..1ecd7fdb3 --- /dev/null +++ b/tests/glsl/sascha-willems/viewportarray/scene.frag @@ -0,0 +1,21 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (location = 0) in vec3 inNormal; +layout (location = 1) in vec3 inColor; +layout (location = 2) in vec3 inViewVec; +layout (location = 3) in vec3 inLightVec; + +layout (location = 0) out vec4 outColor; + +void main() +{ + vec3 N = normalize(inNormal); + vec3 L = normalize(inLightVec); + vec3 V = normalize(inViewVec); + vec3 R = reflect(-L, N); + vec3 ambient = vec3(0.1); + vec3 diffuse = max(dot(N, L), 0.0) * vec3(1.0); + vec3 specular = pow(max(dot(R, V), 0.0), 16.0) * vec3(0.75); + outColor = vec4((ambient + diffuse) * inColor.rgb + specular, 1.0); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/viewportarray/scene.vert b/tests/glsl/sascha-willems/viewportarray/scene.vert new file mode 100644 index 000000000..28e1c3d8b --- /dev/null +++ b/tests/glsl/sascha-willems/viewportarray/scene.vert @@ -0,0 +1,21 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; + +layout (location = 0) out vec3 outNormal; +layout (location = 1) out vec3 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outColor = inColor; + outNormal = inNormal; + gl_Position = vec4(inPos.xyz, 1.0); +} diff --git a/tests/glsl/sascha-willems/vulkanscene/logo.frag b/tests/glsl/sascha-willems/vulkanscene/logo.frag new file mode 100644 index 000000000..aa42773b4 --- /dev/null +++ b/tests/glsl/sascha-willems/vulkanscene/logo.frag @@ -0,0 +1,23 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inEyePos; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + vec3 Eye = normalize(-inEyePos); + vec3 Reflected = normalize(reflect(-inLightVec, inNormal)); + + vec4 diff = vec4(inColor, 1.0) * max(dot(inNormal, inLightVec), 0.0); + float shininess = 0.0; + vec4 spec = vec4(1.0, 1.0, 1.0, 1.0) * pow(max(dot(Reflected, Eye), 0.0), 2.5) * shininess; + + outFragColor = diff + spec; + outFragColor.a = 1.0; +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/vulkanscene/logo.vert b/tests/glsl/sascha-willems/vulkanscene/logo.vert new file mode 100644 index 000000000..2887245a7 --- /dev/null +++ b/tests/glsl/sascha-willems/vulkanscene/logo.vert @@ -0,0 +1,38 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inTexCoord; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 normal; + mat4 view; + vec3 lightpos; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out vec3 outColor; +layout (location = 3) out vec3 outEyePos; +layout (location = 4) out vec3 outLightVec; + +void main() +{ + mat4 modelView = ubo.view * ubo.model; + vec4 pos = modelView * inPos; + outUV = inTexCoord.st; + outNormal = normalize(mat3(ubo.normal) * inNormal); + outColor = inColor; + gl_Position = ubo.projection * pos; + outEyePos = vec3(modelView * pos); + vec4 lightPos = vec4(1.0, 2.0, 0.0, 1.0) * modelView; + outLightVec = normalize(lightPos.xyz - outEyePos); +} diff --git a/tests/glsl/sascha-willems/vulkanscene/mesh.frag b/tests/glsl/sascha-willems/vulkanscene/mesh.frag new file mode 100644 index 000000000..5afa294ed --- /dev/null +++ b/tests/glsl/sascha-willems/vulkanscene/mesh.frag @@ -0,0 +1,45 @@ +//TEST:COMPARE_GLSL: +#version 450 + +layout (binding = 1) uniform sampler2D tex; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec3 inColor; +layout (location = 3) in vec3 inEyePos; +layout (location = 4) in vec3 inLightVec; + +layout (location = 0) out vec4 outFragColor; + +float specpart(vec3 L, vec3 N, vec3 H) +{ + if (dot(N, L) > 0.0) + { + return pow(clamp(dot(H, N), 0.0, 1.0), 64.0); + } + return 0.0; +} + +void main() +{ + vec3 Eye = normalize(-inEyePos); + vec3 Reflected = normalize(reflect(-inLightVec, inNormal)); + + vec3 halfVec = normalize(inLightVec + inEyePos); + float diff = clamp(dot(inLightVec, inNormal), 0.0, 1.0); + float spec = specpart(inLightVec, inNormal, halfVec); + float intensity = 0.1 + diff + spec; + + vec4 IAmbient = vec4(0.2, 0.2, 0.2, 1.0); + vec4 IDiffuse = vec4(0.5, 0.5, 0.5, 0.5) * max(dot(inNormal, inLightVec), 0.0); + float shininess = 0.75; + vec4 ISpecular = vec4(0.5, 0.5, 0.5, 1.0) * pow(max(dot(Reflected, Eye), 0.0), 2.0) * shininess; + + outFragColor = vec4((IAmbient + IDiffuse) * vec4(inColor, 1.0) + ISpecular); + + // Some manual saturation + if (intensity > 0.95) + outFragColor *= 2.25; + if (intensity < 0.15) + outFragColor = vec4(0.1); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/vulkanscene/mesh.vert b/tests/glsl/sascha-willems/vulkanscene/mesh.vert new file mode 100644 index 000000000..ca7e5b06d --- /dev/null +++ b/tests/glsl/sascha-willems/vulkanscene/mesh.vert @@ -0,0 +1,38 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec4 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec2 inTexCoord; +layout (location = 3) in vec3 inColor; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; + mat4 normal; + mat4 view; + vec3 lightpos; +} ubo; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out vec3 outColor; +layout (location = 3) out vec3 outEyePos; +layout (location = 4) out vec3 outLightVec; + +void main() +{ + outUV = inTexCoord.st; + outNormal = normalize(mat3(ubo.normal) * inNormal); + outColor = inColor; + mat4 modelView = ubo.view * ubo.model; + vec4 pos = modelView * inPos; + gl_Position = ubo.projection * pos; + outEyePos = vec3(modelView * pos); + vec4 lightPos = vec4(ubo.lightpos, 1.0) * modelView; + outLightVec = normalize(lightPos.xyz - outEyePos); +} diff --git a/tests/glsl/sascha-willems/vulkanscene/skybox.frag b/tests/glsl/sascha-willems/vulkanscene/skybox.frag new file mode 100644 index 000000000..8cf1d0b8b --- /dev/null +++ b/tests/glsl/sascha-willems/vulkanscene/skybox.frag @@ -0,0 +1,16 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (binding = 1) uniform samplerCube samplerCubeMap; + +layout (location = 0) in vec3 inUVW; + +layout (location = 0) out vec4 outFragColor; + +void main() +{ + outFragColor = texture(samplerCubeMap, inUVW); +}
\ No newline at end of file diff --git a/tests/glsl/sascha-willems/vulkanscene/skybox.vert b/tests/glsl/sascha-willems/vulkanscene/skybox.vert new file mode 100644 index 000000000..81c2941dc --- /dev/null +++ b/tests/glsl/sascha-willems/vulkanscene/skybox.vert @@ -0,0 +1,21 @@ +//TEST:COMPARE_GLSL: +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +layout (location = 0) in vec3 inPos; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 model; +} ubo; + +layout (location = 0) out vec3 outUVW; + +void main() +{ + outUVW = inPos; + gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0); +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl new file mode 100644 index 000000000..b98b870da --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl @@ -0,0 +1,58 @@ +//TEST:COMPARE_HLSL: -profile vs_4_0 -entry RenderBaseVS -profile ps_4_0 -entry RenderPS -target dxbc-assembly +//-------------------------------------------------------------------------------------- +// File: Render.hlsl +// +// The shaders for rendering tessellated mesh and base mesh +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + row_major matrix g_mWorldViewProjection : packoffset( c0 ); +} + +// The tessellated vertex structure +struct TessedVertex +{ + uint BaseTriID; // Which triangle of the base mesh this tessellated vertex belongs to? + float2 bc; // Barycentric coordinates with regard to the base triangle +}; +Buffer<float4> g_base_vb_buffer : register(t0); // Base mesh vertex buffer +StructuredBuffer<TessedVertex> g_TessedVertices : register(t1); // Tessellated mesh vertex buffer + +float4 bary_centric(float4 v1, float4 v2, float4 v3, float2 bc) +{ + return (1 - bc.x - bc.y) * v1 + bc.x * v2 + bc.y * v3; +} + +float4 RenderVS( uint vertid : SV_VertexID ) : SV_POSITION +{ + TessedVertex input = g_TessedVertices[vertid]; + + // Get the positions of the three vertices of the base triangle + float4 v[3]; + [unroll] + for (int i = 0; i < 3; ++ i) + { + uint vert_id = input.BaseTriID * 3 + i; + v[i] = g_base_vb_buffer[vert_id]; + } + + // Calculate the position of this tessellated vertex from barycentric coordinates and then project it + return mul(bary_centric(v[0], v[1], v[2], input.bc), g_mWorldViewProjection); +} + +struct BaseVertex +{ + float4 pos : POSITION; +}; + +float4 RenderBaseVS( BaseVertex input ) : SV_POSITION +{ + return mul( input.pos, g_mWorldViewProjection ); +} + +float4 RenderPS() : SV_TARGET +{ + return float4( 1.0f, 1.0f, 0.0f, 1.0f ); +}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl new file mode 100644 index 000000000..46cdc1ed9 --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl @@ -0,0 +1,109 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSScanInBucket -entry CSScanBucketResult -entry CSScanAddBucketResult +//-------------------------------------------------------------------------------------- +// File: ScanCS.hlsl +// +// A simple inclusive prefix sum(scan) implemented in CS4.0, +// using a typical up sweep and down sweep scheme +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +StructuredBuffer<uint2> Input : register( t0 ); // Change uint2 here if scan other types, and +RWStructuredBuffer<uint2> Result : register( u0 ); // also here + +#define groupthreads 128 +groupshared uint4 bucket[groupthreads]; // Change uint4 to the "type x2" if scan other types, e.g. + // if scan uint2, then put uint4 here, + // if scan float, then put float2 here + +void CSScan( uint3 DTid, uint GI, uint2 x ) // Change the type of x here if scan other types +{ + // since CS40 can only support one shared memory for one shader, we use .xy and .zw as ping-ponging buffers + // if scan a single element type like int, search and replace all .xy to .x and .zw to .y below + bucket[GI].xy = x; + bucket[GI].zw = 0; + + // Up sweep + [unroll] + for ( uint stride = 2; stride <= groupthreads; stride <<= 1 ) + { + GroupMemoryBarrierWithGroupSync(); + + if ( (GI & (stride - 1)) == (stride - 1) ) + { + bucket[GI].xy += bucket[GI - stride/2].xy; + } + } + + if ( GI == (groupthreads - 1) ) + { + bucket[GI].xy = 0; + } + + // Down sweep + bool n = true; + [unroll] + for ( stride = groupthreads / 2; stride >= 1; stride >>= 1 ) + { + GroupMemoryBarrierWithGroupSync(); + + uint a = stride - 1; + uint b = stride | a; + + if ( n ) // ping-pong between passes + { + if ( ( GI & b) == b ) + { + bucket[GI].zw = bucket[GI-stride].xy + bucket[GI].xy; + } else + if ( (GI & a) == a ) + { + bucket[GI].zw = bucket[GI+stride].xy; + } else + { + bucket[GI].zw = bucket[GI].xy; + } + } else + { + if ( ( GI & b) == b ) + { + bucket[GI].xy = bucket[GI-stride].zw + bucket[GI].zw; + } else + if ( (GI & a) == a ) + { + bucket[GI].xy = bucket[GI+stride].zw; + } else + { + bucket[GI].xy = bucket[GI].zw; + } + } + + n = !n; + } + + Result[DTid.x] = bucket[GI].zw + x; +} + +// scan in each bucket +[numthreads( groupthreads, 1, 1 )] +void CSScanInBucket( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex ) +{ + uint2 x = Input[DTid.x]; // Change the type of x here if scan other types + CSScan( DTid, GI, x ); +} + +// record and scan the sum of each bucket +[numthreads( groupthreads, 1, 1 )] +void CSScanBucketResult( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex ) +{ + uint2 x = Input[DTid.x*groupthreads - 1]; // Change the type of x here if scan other types + CSScan( DTid, GI, x ); +} + +StructuredBuffer<uint2> Input1 : register( t1 ); + +// add the bucket scanned result to each bucket to get the final result +[numthreads( groupthreads, 1, 1 )] +void CSScanAddBucketResult( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex ) +{ + Result[DTid.x] = Input[DTid.x] + Input1[Gid.x]; +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl new file mode 100644 index 000000000..91ebca777 --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl @@ -0,0 +1,217 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSEdgeFactor +//-------------------------------------------------------------------------------------- +// File: TessellatorCS40_EdgeFactorCS.hlsl +// +// The CS to compute edge tessellation factor acoording to current world, view, projection matrix +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +// http://jgt.akpeters.com/papers/akeninemoller01/tribox.html +bool planeBoxOverlap(float3 normal, float d, float3 maxbox) +{ + float3 vmin = maxbox, vmax = maxbox; + [unroll] + for (int q = 0;q <= 2; ++ q) + { + if (normal[q] > 0.0f) + { + vmin[q] *= -1; + } + else + { + vmax[q] *= -1; + } + } + if (dot(normal, vmin) + d > 0.0f) + { + return false; + } + if (dot(normal, vmax) + d >= 0.0f) + { + return true; + } + + return false; +} + +/*======================== X-tests ========================*/ +bool AXISTEST_X01(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab) +{ + float p0 = ab.x * v0.y - ab.y * v0.z; + float p2 = ab.x * v2.y - ab.y * v2.z; + float min_v = min(p0, p2); + float max_v = max(p0, p2); + float rad = dot(fab, boxhalfsize.yz); + return (min_v < rad) && (max_v > -rad); +} + +bool AXISTEST_X2(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab) +{ + float p0 = ab.x * v0.y - ab.y * v0.z; + float p1 = ab.x * v1.y - ab.y * v1.z; + float min_v = min(p0, p1); + float max_v = max(p0, p1); + float rad = dot(fab, boxhalfsize.yz); + return (min_v < rad) && (max_v > -rad); +} + +/*======================== Y-tests ========================*/ +bool AXISTEST_Y02(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab) +{ + float p0 = -ab.x * v0.x + ab.y * v0.z; + float p2 = -ab.x * v2.x + ab.y * v2.z; + float min_v = min(p0, p2); + float max_v = max(p0, p2); + float rad = dot(fab, boxhalfsize.xz); + return (min_v < rad) && (max_v > -rad); +} + +bool AXISTEST_Y1(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab) +{ + float p0 = -ab.x * v0.x + ab.y * v0.z; + float p1 = -ab.x * v1.x + ab.y * v1.z; + float min_v = min(p0, p1); + float max_v = max(p0, p1); + float rad = dot(fab, boxhalfsize.xz); + return (min_v < rad) && (max_v > -rad); +} + +/*======================== Z-tests ========================*/ +bool AXISTEST_Z12(float3 v1, float3 v2, float3 boxhalfsize, float2 ab, float2 fab) +{ + float p1 = ab.x * v1.x - ab.y * v1.y; + float p2 = ab.x * v2.x - ab.y * v2.y; + float min_v = min(p1, p2); + float max_v = max(p1, p2); + float rad = dot(fab, boxhalfsize.xy); + return (min_v < rad) && (max_v > -rad); +} + +bool AXISTEST_Z0(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab) +{ + float p0 = ab.x * v0.x - ab.y * v0.y; + float p1 = ab.x * v1.x - ab.y * v1.y; + float min_v = min(p0, p1); + float max_v = max(p0, p1); + float rad = dot(fab, boxhalfsize.xy); + return (min_v < rad) && (max_v > -rad); +} + +bool triBoxOverlap(float3 boxcenter,float3 boxhalfsize,float3 triverts0, float3 triverts1, float3 triverts2) +{ + /* use separating axis theorem to test overlap between triangle and box */ + /* need to test for overlap in these directions: */ + /* 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle */ + /* we do not even need to test these) */ + /* 2) normal of the triangle */ + /* 3) crossproduct(edge from tri, {x,y,z}-directin) */ + /* this gives 3x3=9 more tests */ + + /* This is the fastest branch on Sun */ + /* move everything so that the boxcenter is in (0,0,0) */ + float3 v0 = triverts0 - boxcenter; + float3 v1 = triverts1 - boxcenter; + float3 v2 = triverts2 - boxcenter; + + /* compute triangle edges */ + float3 e0 = v1 - v0; /* tri edge 0 */ + float3 e1 = v2 - v1; /* tri edge 1 */ + float3 e2 = v0 - v2; /* tri edge 2 */ + + /* Bullet 3: */ + /* test the 9 tests first (this was faster) */ + float3 fe = abs(e0); + if (!AXISTEST_X01(v0, v2, boxhalfsize, e0.zy, fe.zy) + || !AXISTEST_Y02(v0, v2, boxhalfsize, e0.zx, fe.zx) + || !AXISTEST_Z12(v1, v2, boxhalfsize, e0.yx, fe.yx)) + { + return false; + } + + fe = abs(e1); + if (!AXISTEST_X01(v0, v2, boxhalfsize, e1.zy, fe.zy) + || !AXISTEST_Y02(v0, v2, boxhalfsize, e1.zx, fe.zx) + || !AXISTEST_Z0(v0, v1, boxhalfsize, e1.yx, fe.yx)) + { + return false; + } + + fe = abs(e2); + if (!AXISTEST_X2(v0, v1, boxhalfsize, e2.zy, fe.zy) + || !AXISTEST_Y1(v0, v1, boxhalfsize, e2.zx, fe.zx) + || !AXISTEST_Z12(v1, v2, boxhalfsize, e2.yx, fe.yx)) + { + return false; + } + + /* Bullet 1: */ + /* first test overlap in the {x,y,z}-directions */ + /* find min, max of the triangle each direction, and test for overlap in */ + /* that direction -- this is equivalent to testing a minimal AABB around */ + /* the triangle against the AABB */ + + float3 min_v = min(min(v0, v1), v2); + float3 max_v = max(max(v0, v1), v2); + if ((min_v.x > boxhalfsize.x || max_v.x < -boxhalfsize.x) + || (min_v.y > boxhalfsize.y || max_v.y < -boxhalfsize.y) + || (min_v.z > boxhalfsize.z || max_v.z < -boxhalfsize.z)) + { + return false; + } + + /* Bullet 2: */ + /* test if the box intersects the plane of the triangle */ + /* compute plane equation of triangle: normal*x+d=0 */ + float3 normal = cross(e0, e1); + float d = -dot(normal, v0); /* plane eq: normal.x+d=0 */ + if (!planeBoxOverlap(normal, d, boxhalfsize)) + { + return false; + } + + return true; /* box and triangle overlaps */ +} + + +Buffer<float4> InputVertices : register(t0); +RWStructuredBuffer<float4> EdgeFactorBufOut : register(u0); + +cbuffer cb +{ + row_major matrix g_matWVP; + float2 g_tess_edge_length_scale; + int num_triangles; + float dummy; +} + +[numthreads(128, 1, 1)] +void CSEdgeFactor( uint3 DTid : SV_DispatchThreadID ) +{ + if (DTid.x < num_triangles) + { + float4 p0 = mul(InputVertices[DTid.x*3+0], g_matWVP); + float4 p1 = mul(InputVertices[DTid.x*3+1], g_matWVP); + float4 p2 = mul(InputVertices[DTid.x*3+2], g_matWVP); + p0 = p0 / p0.w; + p1 = p1 / p1.w; + p2 = p2 / p2.w; + + float4 factor; + // Only triangles which are completely inside or intersect with the view frustum are taken into account + if ( triBoxOverlap( float3(0, 0, 0.5), float3(1.02, 1.02, 0.52), p0.xyz, p1.xyz, p2.xyz ) ) + { + factor.x = length((p0.xy - p2.xy) * g_tess_edge_length_scale); + factor.y = length((p1.xy - p0.xy) * g_tess_edge_length_scale); + factor.z = length((p2.xy - p1.xy) * g_tess_edge_length_scale); + factor.w = min(min(factor.x, factor.y), factor.z); + factor = clamp(factor, 0, 9); + } else + { + factor = 0; + } + + EdgeFactorBufOut[DTid.x] = factor; + } +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl new file mode 100644 index 000000000..4f2fb547b --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl @@ -0,0 +1,56 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSNumVerticesIndices +//-------------------------------------------------------------------------------------- +// File: TessellatorCS40_NumVerticesIndicesCS.hlsl +// +// The CS to compute number of vertices and triangles to be generated from edge tessellation factor +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "TessellatorCS40_common.hlsl" + +StructuredBuffer<float4> InputEdgeFactor : register(t0); +RWStructuredBuffer<uint2> NumVerticesIndicesOut : register(u0); + +cbuffer cbCS : register(b1) +{ + uint4 g_param; +} + +[numthreads(128, 1, 1)] +void CSNumVerticesIndices( uint3 DTid : SV_DispatchThreadID ) +{ + if (DTid.x < g_param.x) + { + float4 edge_factor = InputEdgeFactor[DTid.x]; + + PROCESSED_TESS_FACTORS_TRI processedTessFactors; + int num_points = TriProcessTessFactors(edge_factor, processedTessFactors, g_partitioning); + + int num_index; + if (0 == num_points) + { + num_index = 0; + } + else if (3 == num_points) + { + num_index = 4; + } + else + { + int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now + + int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor)); + uint3 n = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity); + num_index = n.x + n.y + n.z; + num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3; + if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD ) + { + num_index += 4; + } + } + + NumVerticesIndicesOut[DTid.x] = uint2(num_points, num_index); + } +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl new file mode 100644 index 000000000..17f003794 --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl @@ -0,0 +1,45 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSScatterVertexTriIDIndexID -entry CSScatterIndexTriIDIndexID +//-------------------------------------------------------------------------------------- +// File: TessellatorCS40_ScatterIDCS.hlsl +// +// The CS to scatter vertex ID and triangle ID +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +StructuredBuffer<uint2> InputScanned : register(t0); +RWStructuredBuffer<uint2> TriIDIndexIDOut : register(u0); + +cbuffer cbCS : register(b1) +{ + uint4 g_param; +} + +[numthreads(128, 1, 1)] +void CSScatterVertexTriIDIndexID( uint3 DTid : SV_DispatchThreadID ) +{ + if (DTid.x < g_param.x) + { + uint start = InputScanned[DTid.x-1].x; + uint end = InputScanned[DTid.x].x; + + for ( uint i = start; i < end; ++i ) + { + TriIDIndexIDOut[i] = uint2(DTid.x, i - start); + } + } +} + +[numthreads(128, 1, 1)] +void CSScatterIndexTriIDIndexID( uint3 DTid : SV_DispatchThreadID ) +{ + if (DTid.x < g_param.x) + { + uint start = InputScanned[DTid.x-1].y; + uint end = InputScanned[DTid.x].y; + + for ( uint i = start; i < end; ++i ) + { + TriIDIndexIDOut[i] = uint2(DTid.x, i - start); + } + } +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl new file mode 100644 index 000000000..756f99e58 --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl @@ -0,0 +1,628 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSTessellationIndices +//-------------------------------------------------------------------------------------- +// File: TessellatorCS40_TessellateIndicesCS.hlsl +// +// The CS to tessellate indices +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "TessellatorCS40_common.hlsl" + +StructuredBuffer<uint2> InputTriIDIndexID : register(t0); +StructuredBuffer<float4> InputEdgeFactor : register(t1); +StructuredBuffer<uint2> InputScanned : register(t2); + +RWByteAddressBuffer TessedIndicesOut : register(u0); + +cbuffer cbCS : register(b1) +{ + uint4 g_param; +} + + +int TransformIndex1(int index, int vertices_base) +{ + return vertices_base + index; +} + +int TransformIndex2(int index, int vertices_base, INDEX_PATCH_CONTEXT IndexPatchContext) +{ + if( index >= IndexPatchContext.outsidePointIndexPatchBase ) // assumed remapped outide indices are > remapped inside vertices + { + if( index == IndexPatchContext.outsidePointIndexBadValue ) + { + index = IndexPatchContext.outsidePointIndexReplacementValue; + } + else + { + index += IndexPatchContext.outsidePointIndexDeltaToRealValue; + } + } + else + { + if( index == IndexPatchContext.insidePointIndexBadValue ) + { + index = IndexPatchContext.insidePointIndexReplacementValue; + } + else + { + index += IndexPatchContext.insidePointIndexDeltaToRealValue; + } + } + + return vertices_base + index; +} + + +int AStitchRegular(bool bTrapezoid, int diagonals, + uint numInsideEdgePoints, + int2 outsideInsideEdgePointBaseOffset, + int i) +{ + if (bTrapezoid) + { + ++ outsideInsideEdgePointBaseOffset.x; + } + + int pt; + + if ((i < 4) && bTrapezoid) + { + if (i < 2) + { + pt = outsideInsideEdgePointBaseOffset.x - 1 + i; + } + else if (i == 2) + { + pt = outsideInsideEdgePointBaseOffset.y; + } + else + { + pt = -1; + } + } + + int index = i; + if (bTrapezoid) + { + index -= 4; + } + + if (index >= 0) + { + uint uindex = (uint)index; + + switch( diagonals ) + { + case DIAGONALS_INSIDE_TO_OUTSIDE: + if (uindex < 5 * numInsideEdgePoints - 5) + { + uint p = uindex / 5; + uint r = uindex - p * 5; + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + p + r; + } + else if (r < 4) + { + pt = outsideInsideEdgePointBaseOffset.y + p + r; + } + else + { + pt = -1; + } + } + else + { + int r = i - (4 + 5 * numInsideEdgePoints - 5); + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r; + } + else if (r == 2) + { + pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1; + } + else + { + pt = -1; + } + } + break; + + case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation + if (uindex < (numInsideEdgePoints / 2 - 1) * 5) + { + // First half + uint p = uindex / 5; + uint r = uindex - p * 5; + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + p + r; + } + else if (r < 4) + { + pt = outsideInsideEdgePointBaseOffset.y + p; + } + else + { + pt = -1; + } + } + else if (uindex < (numInsideEdgePoints / 2 - 1) * 5 + 8) + { + // Middle + uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5; + if (0 == r) + { + pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1; + } + else if (r < 3) + { + pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + (2 - r); + } + else if (r == 3) + { + pt = -1; + } + else if (r < 6) + { + pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1 + (r - 4); + } + else if (r == 6) + { + pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + 1; + } + else if (r == 7) + { + pt = -1; + } + } + //else if (uindex < (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5) + else if (uindex < numInsideEdgePoints * 5 - 2) + { + // Second half + uint p = (uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8) / 5 + numInsideEdgePoints / 2 + 1; + uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8 - (p - (numInsideEdgePoints / 2 + 1)) * 5; + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + p - 1 + r; + } + else if (r < 4) + { + pt = outsideInsideEdgePointBaseOffset.y + p - 1 + r; + } + else + { + pt = -1; + } + } + else + { + //int r = i - (4 + (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5); + int r = i - (numInsideEdgePoints * 5 + 2); + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r; + } + else if (r == 2) + { + pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1; + } + else + { + pt = -1; + } + } + break; + + case DIAGONALS_MIRRORED: + if (uindex < (numInsideEdgePoints / 2 + 1) * 2) + { + uint p = uindex / 2; + uint r = uindex - p * 2; + if (0 == r) + { + pt = outsideInsideEdgePointBaseOffset.y + p; + } + else + { + pt = outsideInsideEdgePointBaseOffset.x + p; + } + } + else if (uindex == (numInsideEdgePoints / 2 + 1) * 2) + { + pt = -1; + } + else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 1) + { + pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2; + } + //else if (uindex < (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2) + else if (uindex < numInsideEdgePoints * 2 + 4) + { + uint p = (uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2)) / 2 + numInsideEdgePoints / 2; + uint r = uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2) - (p - numInsideEdgePoints / 2) * 2; + if (0 == r) + { + pt = outsideInsideEdgePointBaseOffset.x + p; + } + else + { + pt = outsideInsideEdgePointBaseOffset.y + p; + } + } + //else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2) + else if (uindex == numInsideEdgePoints * 2 + 4) + { + pt = -1; + } + else + { + //int r = i - (4 + (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2 + 1); + uint r = i - (numInsideEdgePoints * 2 + 9); + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r; + } + else if (r == 2) + { + pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1; + } + else + { + pt = -1; + } + } + break; + } + } + + return pt; +} + +int AStitchTransition(int2 outsideInsideEdgePointBaseOffset, int2 outsideInsideNumHalfTessFactorPoints, + int2 outsideInsideEdgeTessFactorParity, + uint i) +{ + outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity); + + uint2 out_in_first_half = uint2(outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y, insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y) * 4; + + uint3 out_in_middle = 0; + if ((outsideInsideEdgeTessFactorParity.y != outsideInsideEdgeTessFactorParity.x) || (outsideInsideEdgeTessFactorParity.y == TESSELLATOR_PARITY_ODD)) + { + if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x) + { + // Quad in the middle + out_in_middle.z = 5; + out_in_middle.xy = 1; + } + else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y) + { + // Triangle pointing inside + out_in_middle.z = 4; + out_in_middle.x = 1; + } + else + { + // Triangle pointing outside + out_in_middle.z = 4; + out_in_middle.y = 1; + } + } + + + int pt = -1; + + if (i < out_in_first_half.y) + { + // Advance inside + + uint p = i / 4; + uint r = i - p * 4; + p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].z; + if ((0 == r) || (2 == r)) + { + pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y + r / 2; + } + else if (1 == r) + { + pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y; + } + } + else + { + i -= out_in_first_half.y; + + if (i < out_in_first_half.x) + { + // Advance outside + + uint p = i / 4; + uint r = i - p * 4; + p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z; + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y + r; + } + else if (r == 2) + { + pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y; + if (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].x) + { + ++ pt; + } + } + } + else + { + i -= out_in_first_half.x; + + if (i < out_in_middle.z) + { + uint r = i; + if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x) + { + // Quad in the middle + if ((0 == r) || (2 == r)) + { + pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2; + } + else if ((1 == r) || (3 == r)) + { + pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + (3 == r);//(r - 1) / 2; + } + } + else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y) + { + // Triangle pointing inside + if (r == 0) + { + pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4; + } + else if (r < 3) + { + pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + r - 1; + } + } + else + { + // Triangle pointing outside + if ((0 == r) || (2 == r)) + { + pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2; + } + else if (1 == r) + { + pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4; + } + } + } + else + { + i -= out_in_middle.z; + + if (i < out_in_first_half.x) + { + // Advance outside + + uint p = i / 4; + uint r = i - p * 4; + p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z; + if (r < 2) + { + pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y) + r; + } + else if (r == 2) + { + pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y); + } + } + else + { + // Advance inside + + i -= out_in_first_half.x; + + uint p = i / 4; + uint r = i - p * 4; + p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].w; + if ((0 == r) || (2 == r)) + { + pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y + + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y) + (2 == r);//r / 2; + } + else if (1 == r) + { + pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x + + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y); + if (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].x) + { + ++ pt; + } + } + } + } + } + } + + return pt; +} + +[numthreads(128, 1, 1)] +void CSTessellationIndices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) +{ + uint id = DTid.x; + //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers + + if ( id < g_param.x ) + { + uint tri_id = InputTriIDIndexID[id].x; + uint index_id = InputTriIDIndexID[id].y; + uint base_vertex = InputScanned[tri_id-1].x; + + float4 outside_inside_factor = InputEdgeFactor[tri_id]; + + PROCESSED_TESS_FACTORS_TRI processedTessFactors; + int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning); + + uint tessed_indices; + if (3 == num_points) + { + if (index_id < 3) + { + tessed_indices = TransformIndex1(index_id, base_vertex); + } + else + { + tessed_indices = -1; + } + } + else + { + // Generate primitives for all the concentric rings, one side at a time for each ring + static const int startRing = 1; + int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now + + int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor)); + uint3 num = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity); + num.y += num.x; + num.z += num.y; + uint num_index = num.z; + num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3; + if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD ) + { + num_index += 4; + } + + int pt; + + if (index_id < num.x) + { + int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing; + + pt = AStitchTransition(int2(0, processedTessFactors.insideEdgePointBaseOffset), + outsideInsideHalfTessFactor.xw, + processedTessFactors.outsideInsideTessFactorParity.xw, + index_id); + if (pt != -1) + { + pt = TransformIndex1(pt, base_vertex); + } + } + else if (index_id < num.y) + { + int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing; + + pt = AStitchTransition( + int2(processedTessFactors.numPointsForOutsideInside.x - 1, processedTessFactors.insideEdgePointBaseOffset + numPointsForInsideEdge - 1), + outsideInsideHalfTessFactor.yw, + processedTessFactors.outsideInsideTessFactorParity.yw, + index_id - num.x); + if (pt != -1) + { + pt = TransformIndex1(pt, base_vertex); + } + } + else if (index_id < num.z) + { + int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing; + + INDEX_PATCH_CONTEXT IndexPatchContext; + IndexPatchContext.insidePointIndexDeltaToRealValue = processedTessFactors.insideEdgePointBaseOffset + 2 * (numPointsForInsideEdge - 1); + IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge - 1; + IndexPatchContext.insidePointIndexReplacementValue = processedTessFactors.insideEdgePointBaseOffset; + IndexPatchContext.outsidePointIndexPatchBase = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range + IndexPatchContext.outsidePointIndexDeltaToRealValue = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y - 2 + - IndexPatchContext.outsidePointIndexPatchBase; + IndexPatchContext.outsidePointIndexBadValue = IndexPatchContext.outsidePointIndexPatchBase + + processedTessFactors.numPointsForOutsideInside.z - 1; + IndexPatchContext.outsidePointIndexReplacementValue = 0; + + pt = AStitchTransition(int2(numPointsForInsideEdge, 0), + outsideInsideHalfTessFactor.zw, + processedTessFactors.outsideInsideTessFactorParity.zw, + index_id - num.y); + if (pt != -1) + { + pt = TransformIndex2(pt, base_vertex, IndexPatchContext); + } + } + else + { + if ((processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) && (index_id >= num_index - 4)) + { + int outsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset + + ((processedTessFactors.numPointsForOutsideInside.w + 1) - (numRings + startRing)) * (numRings - startRing - 1) * 3; + + if (index_id - (num_index - 4) != 3) + { + pt = TransformIndex1(outsideEdgePointBaseOffset + index_id - (num_index - 4), base_vertex); + } + else + { + pt = -1; + } + } + else + { + int ring = GetRingFromIndexStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, index_id - num.z); + + int tn = TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, ring - 1) * 3; + int n = NumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w - 2 * ring); + + int edge = (index_id - num.z - tn) / n; + int index = (index_id - num.z - tn) - edge * n; + + int2 outsideInsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset + + int2(0, 3 * (processedTessFactors.numPointsForOutsideInside.w - 3)) + + ((processedTessFactors.numPointsForOutsideInside.w - (ring + startRing)) + int2(1, -1)) * (ring - startRing - 1) * 3; + + int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * ring; + int numLastPointsForInsideEdge = numPointsForInsideEdge + 2; + + if (edge < 2) + { + pt = AStitchRegular(true, DIAGONALS_MIRRORED, + numPointsForInsideEdge, + outsideInsideEdgePointBaseOffset + (int2(numLastPointsForInsideEdge, numPointsForInsideEdge) - 1) * edge, + index); + if (pt != -1) + { + pt = TransformIndex1(pt, base_vertex); + } + } + else + { + INDEX_PATCH_CONTEXT IndexPatchContext; + IndexPatchContext.insidePointIndexDeltaToRealValue = outsideInsideEdgePointBaseOffset.y + (numPointsForInsideEdge - 1) * 2; + IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge - 1; + IndexPatchContext.insidePointIndexReplacementValue = outsideInsideEdgePointBaseOffset.y; + IndexPatchContext.outsidePointIndexPatchBase = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range + IndexPatchContext.outsidePointIndexDeltaToRealValue = outsideInsideEdgePointBaseOffset.x + (numLastPointsForInsideEdge - 1) * 2 + - IndexPatchContext.outsidePointIndexPatchBase; + IndexPatchContext.outsidePointIndexBadValue = IndexPatchContext.outsidePointIndexPatchBase + + numLastPointsForInsideEdge - 1; + IndexPatchContext.outsidePointIndexReplacementValue = outsideInsideEdgePointBaseOffset.x; + + pt = AStitchRegular(true, DIAGONALS_MIRRORED, + numPointsForInsideEdge, + int2(numPointsForInsideEdge, 0), + index); + if (pt != -1) + { + pt = TransformIndex2(pt, base_vertex, IndexPatchContext); + } + } + } + } + + tessed_indices = pt; + } + + TessedIndicesOut.Store(id*4, tessed_indices); + } +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl new file mode 100644 index 000000000..55bf1be87 --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl @@ -0,0 +1,206 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSTessellationVertices +//-------------------------------------------------------------------------------------- +// File: TessellatorCS40_TessellateVerticesCS.hlsl +// +// The CS to tessellate vertices +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "TessellatorCS40_common.hlsl" + +StructuredBuffer<uint2> InputTriIDIndexID : register(t0); +StructuredBuffer<float4> InputEdgeFactor : register(t1); + +struct TessedVertex +{ + uint BaseTriID; + float2 bc; +}; +RWStructuredBuffer<TessedVertex> TessedVerticesOut : register(u0); + +cbuffer cbCS : register(b1) +{ + uint4 g_param; +} + +void PlacePointIn1D(PROCESSED_TESS_FACTORS_TRI processedTessFactors, int ctx_index, int pt, out float location, int parity) +{ + int numHalfTessFactorPoints = int(ceil(processedTessFactors.outsideInsideHalfTessFactor[ctx_index])); + + bool bFlip; + if( pt >= numHalfTessFactorPoints ) + { + pt = (numHalfTessFactorPoints << 1) - pt; + if( TESSELLATOR_PARITY_ODD == parity ) + { + pt -= 1; + } + bFlip = true; + } + else + { + bFlip = false; + } + + if( pt == numHalfTessFactorPoints ) + { + location = 0.5f; + } + else + { + unsigned int indexOnCeilHalfTessFactor = pt; + unsigned int indexOnFloorHalfTessFactor = indexOnCeilHalfTessFactor; + if( pt > processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor[ctx_index] ) + { + indexOnFloorHalfTessFactor -= 1; + } + float locationOnFloorHalfTessFactor = indexOnFloorHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor[ctx_index]; + float locationOnCeilHalfTessFactor = indexOnCeilHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor[ctx_index]; + + location = lerp(locationOnFloorHalfTessFactor, locationOnCeilHalfTessFactor, frac(processedTessFactors.outsideInsideHalfTessFactor[ctx_index])); + + if( bFlip ) + { + location = 1.0f - location; + } + } +} + +[numthreads(128, 1, 1)] +void CSTessellationVertices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) +{ + uint id = DTid.x; + //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers + + if ( id < g_param.x ) + { + uint tri_id = InputTriIDIndexID[id].x; + uint vert_id = InputTriIDIndexID[id].y; + + float4 outside_inside_factor = InputEdgeFactor[tri_id]; + + PROCESSED_TESS_FACTORS_TRI processedTessFactors; + int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning); + + float2 uv; + if (3 == num_points) + { + if (0 == vert_id) + { + uv = float2(0, 1); + } + else if (1 == vert_id) + { + uv = float2(0, 0); + } + else + { + uv = float2(1, 0); + } + } + else + { + if (vert_id < processedTessFactors.insideEdgePointBaseOffset) + { + // Generate exterior ring edge points, clockwise starting from point V (VW, the U==0 edge) + + int edge; + if (vert_id < processedTessFactors.numPointsForOutsideInside.x - 1) + { + edge = 0; + } + else + { + vert_id -= processedTessFactors.numPointsForOutsideInside.x - 1; + if (vert_id < processedTessFactors.numPointsForOutsideInside.y - 1) + { + edge = 1; + } + else + { + vert_id -= processedTessFactors.numPointsForOutsideInside.y - 1; + edge = 2; + } + } + + int p = vert_id; + int endPoint = processedTessFactors.numPointsForOutsideInside[edge] - 1; + float param; + int q = (edge & 0x1) ? p : endPoint - p; // whether to reverse point order given we are defining V or U (W implicit): + // edge0, VW, has V decreasing, so reverse 1D points below + // edge1, WU, has U increasing, so don't reverse 1D points below + // edge2, UV, has U decreasing, so reverse 1D points below + PlacePointIn1D(processedTessFactors, edge,q,param, processedTessFactors.outsideInsideTessFactorParity[edge]); + if (0 == edge) + { + uv = float2(0, param); + } + else if (1 == edge) + { + uv = float2(param, 0); + } + else + { + uv = float2(param, 1 - param); + } + } + else + { + // Generate interior ring points, clockwise spiralling in + + uint index = vert_id - processedTessFactors.insideEdgePointBaseOffset; + uint ring = 1 + (((3 * processedTessFactors.numPointsForOutsideInside.w - 6) - sqrt(sqr(3 * processedTessFactors.numPointsForOutsideInside.w - 6) - 4 * 3 * index)) + 0.001f) / 6; + index -= 3 * (processedTessFactors.numPointsForOutsideInside.w - ring - 1) * (ring - 1); + + uint startPoint = ring; + uint endPoint = processedTessFactors.numPointsForOutsideInside.w - 1 - startPoint; + if (index < 3 * (endPoint - startPoint)) + { + uint edge = index / (endPoint - startPoint); + uint p = index - edge * (endPoint - startPoint) + startPoint; + + int perpendicularAxisPoint = startPoint; + float perpParam; + PlacePointIn1D(processedTessFactors, 3, perpendicularAxisPoint, perpParam, processedTessFactors.outsideInsideTessFactorParity.w); + perpParam = perpParam * 2 / 3; + + float param; + int q = (edge & 0x1) ? p : endPoint - (p - startPoint); // whether to reverse point given we are defining V or U (W implicit): + // edge0, VW, has V decreasing, so reverse 1D points below + // edge1, WU, has U increasing, so don't reverse 1D points below + // edge2, UV, has U decreasing, so reverse 1D points below + PlacePointIn1D(processedTessFactors, 3, q,param, processedTessFactors.outsideInsideTessFactorParity.w); + // edge0 VW, has perpendicular parameter U constant + // edge1 WU, has perpendicular parameter V constant + // edge2 UV, has perpendicular parameter W constant + const unsigned int deriv = 2; // reciprocal is the rate of change of edge-parallel parameters as they are pushed into the triangle + if (0 == edge) + { + uv = float2(perpParam, param - perpParam / deriv); + } + else if (1 == edge) + { + uv = float2(param - perpParam / deriv, perpParam); + } + else + { + uv = float2(param - perpParam / deriv, 1 - (param - perpParam / deriv + perpParam)); + } + } + else + { + if( processedTessFactors.outsideInsideTessFactorParity.w != TESSELLATOR_PARITY_ODD ) + { + // Last point is the point at the center. + uv = 1 / 3.0f; + } + } + } + } + + TessedVerticesOut[id].BaseTriID = tri_id; + TessedVerticesOut[id].bc = uv; + } +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl new file mode 100644 index 000000000..309044cdb --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl @@ -0,0 +1,411 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: TessellatorCS40_common.hlsl +// +// The common utils included by other shaders in the sample +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "TessellatorCS40_defines.h" + +cbuffer cbNeverChanges : register(b0) +{ + uint4 insidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2]; + uint4 outsidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2]; +} + +#define D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR ( 64 ) +#define D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR ( 63 ) +#define D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR ( 2 ) +#define D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR ( 1 ) + +#define D3D11_TESSELLATOR_PARTITIONING_INTEGER ( 0 ) +#define D3D11_TESSELLATOR_PARTITIONING_POW2 ( 1 ) +#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD ( 2 ) +#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN ( 3 ) + +#define TESSELLATOR_PARITY_EVEN ( 0 ) +#define TESSELLATOR_PARITY_ODD ( 1 ) + +#define EPSILON 1e-6f +#define MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON (D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON/2) + +#define DIAGONALS_INSIDE_TO_OUTSIDE ( 0 ) +#define DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE ( 1 ) +#define DIAGONALS_MIRRORED ( 2 ) + + +// This is moved to macro defines at shader compile time, so that the partitioning mode can be changed during runtime +//#define g_partitioning (D3D11_TESSELLATOR_PARTITIONING_POW2) + + +struct PROCESSED_TESS_FACTORS_TRI +{ + float4 outsideInsideTessFactor; + int4 outsideInsideTessFactorParity; + + float4 outsideInsideInvNumSegmentsOnFloorTessFactor; + float4 outsideInsideInvNumSegmentsOnCeilTessFactor; + float4 outsideInsideHalfTessFactor; + int4 outsideInsideSplitPointOnFloorHalfTessFactor; + + // Stuff below is specific to the traversal order + uint4 numPointsForOutsideInside; + uint insideEdgePointBaseOffset; +}; + +struct INDEX_PATCH_CONTEXT +{ + int insidePointIndexDeltaToRealValue; + int insidePointIndexBadValue; + int insidePointIndexReplacementValue; + int outsidePointIndexPatchBase; + int outsidePointIndexDeltaToRealValue; + int outsidePointIndexBadValue; + int outsidePointIndexReplacementValue; +}; + +bool4 isEven(float4 input) +{ + return (((uint4)input) & 1) ? false : true; +} + +uint RemoveMSB(uint val) +{ + int check; + if( val <= 0x0000ffff ) + { + check = ( val <= 0x000000ff ) ? 0x00000080 : 0x00008000; + } + else + { + check = ( val <= 0x00ffffff ) ? 0x00800000 : 0x80000000; + } + for (int i = 0; i < 8; i++, check >>= 1) + { + if( val & check ) + { + return (val & ~check); + } + } + return 0; +} + +uint4 NumPointsForTessFactor(float4 tessFactor, int4 parity) +{ + return TESSELLATOR_PARITY_ODD == parity ? uint4(ceil(0.5f + tessFactor / 2)) * 2 : uint4(ceil(tessFactor / 2)) * 2 + 1; +} + +void ComputeTessFactorContext(float4 tessFactor, int4 parity, + out float4 invNumSegmentsOnFloorTessFactor, + out float4 invNumSegmentsOnCeilTessFactor, + out float4 halfTessFactor, + out int4 splitPointOnFloorHalfTessFactor) +{ + halfTessFactor = tessFactor / 2; + + halfTessFactor += 0.5 * ((TESSELLATOR_PARITY_ODD == parity) | (0.5f == halfTessFactor)); + + float4 floorHalfTessFactor = floor(halfTessFactor); + float4 ceilHalfTessFactor = ceil(halfTessFactor); + int4 numHalfTessFactorPoints = int4(ceilHalfTessFactor); + + for (int index = 0; index < 4; ++ index) + { + if( ceilHalfTessFactor[index] == floorHalfTessFactor[index] ) + { + splitPointOnFloorHalfTessFactor[index] = /*pick value to cause this to be ignored*/ numHalfTessFactorPoints[index]+1; + } + else if( TESSELLATOR_PARITY_ODD == parity[index] ) + { + if( floorHalfTessFactor[index] == 1 ) + { + splitPointOnFloorHalfTessFactor[index] = 0; + } + else + { + splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index]) - 1) << 1) + 1; + } + } + else + { + splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index])) << 1) + 1; + } + } + + int4 numFloorSegments = int4(floorHalfTessFactor * 2); + int4 numCeilSegments = int4(ceilHalfTessFactor * 2); + int4 s = (TESSELLATOR_PARITY_ODD == parity); + numFloorSegments -= s; + numCeilSegments -= s; + invNumSegmentsOnFloorTessFactor = 1.0f / numFloorSegments; + invNumSegmentsOnCeilTessFactor = 1.0f / numCeilSegments; +} + +int TriProcessTessFactors( inout float4 tessFactor, + out PROCESSED_TESS_FACTORS_TRI processedTessFactors, + int partitioning ) +{ + processedTessFactors = (PROCESSED_TESS_FACTORS_TRI)0; + + int parity = TESSELLATOR_PARITY_EVEN; + switch( partitioning ) + { + case D3D11_TESSELLATOR_PARTITIONING_INTEGER: + default: + break; + case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + parity = TESSELLATOR_PARITY_ODD; + break; + case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + parity = TESSELLATOR_PARITY_EVEN; + break; + } + + // Is the patch culled? + if( !(tessFactor.x > 0) || // NaN will pass + !(tessFactor.y > 0) || + !(tessFactor.z > 0) ) + { + return 0; + } + + // Clamp edge TessFactors + float lowerBound, upperBound; + switch(partitioning) + { + case D3D11_TESSELLATOR_PARTITIONING_INTEGER: + case D3D11_TESSELLATOR_PARTITIONING_POW2: // don't care about pow2 distinction for validation, just treat as integer + default: + lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; + upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; + break; + + case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + lowerBound = D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR; + upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; + break; + + case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; + upperBound = D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR; + break; + } + + tessFactor.xyz = min( upperBound, max( lowerBound, tessFactor.xyz ) ); + + // Clamp inside TessFactors + if(D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD == partitioning) + { + if( (tessFactor.x > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) || + (tessFactor.y > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) || + (tessFactor.z > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON)) + // Don't need the same check for insideTessFactor for tri patches, + // since there is only one insideTessFactor, as opposed to quad + // patches which have 2 insideTessFactors. + { + // Force picture frame + lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON; + } + } + + tessFactor.w = min( upperBound, max( lowerBound, tessFactor.w ) ); + // Note the above clamps map NaN to lowerBound + + if (partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER) + { + tessFactor = ceil(tessFactor); + } + else if (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2) + { + static const int exponentMask = 0x7f800000; + static const int mantissaMask = 0x007fffff; + static const int exponentLSB = 0x00800000; + + int4 bits = asint(tessFactor); + tessFactor = bits & mantissaMask ? asfloat((bits & exponentMask) + exponentLSB) : tessFactor; + } + + // Process tessFactors + if ((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)) + { + bool4 e = isEven(tessFactor); + processedTessFactors.outsideInsideTessFactorParity.xyz = e.xyz ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD; + processedTessFactors.outsideInsideTessFactorParity.w = (e.w || (1 == tessFactor.w)) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD; + } + else + { + processedTessFactors.outsideInsideTessFactorParity = parity; + } + + processedTessFactors.outsideInsideTessFactor = tessFactor; + + if (((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)) || (parity == TESSELLATOR_PARITY_ODD)) + { + // Special case if all TessFactors are 1 + if( (1 == processedTessFactors.outsideInsideTessFactor.x) && + (1 == processedTessFactors.outsideInsideTessFactor.y) && + (1 == processedTessFactors.outsideInsideTessFactor.z) && + (1 == processedTessFactors.outsideInsideTessFactor.w) ) + { + return 3; + } + } + + // Compute per-TessFactor metadata + ComputeTessFactorContext(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity, + processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor, + processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor, + processedTessFactors.outsideInsideHalfTessFactor, + processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor); + + // Compute some initial data. + + // outside edge offsets and storage + processedTessFactors.numPointsForOutsideInside = NumPointsForTessFactor(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity); + int NumPoints = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y + processedTessFactors.numPointsForOutsideInside.z - 3; + + // inside edge offsets + { + uint pointCountMin = (processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) ? 4 : 3; + // max() allows degenerate transition regions when inside TessFactor == 1 + processedTessFactors.numPointsForOutsideInside.w = max(pointCountMin, processedTessFactors.numPointsForOutsideInside.w); + } + + processedTessFactors.insideEdgePointBaseOffset = NumPoints; + + // inside storage, including interior edges above + { + int numInteriorRings = (processedTessFactors.numPointsForOutsideInside.w >> 1) - 1; + int numInteriorPoints; + if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD ) + { + numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1) - numInteriorRings); + } + else + { + numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1)) + 1; + } + NumPoints += numInteriorPoints; + } + + return NumPoints; +} + +int NumStitchRegular(bool bTrapezoid, int diagonals, int numInsideEdgePoints) +{ + int num_index = 0; + + if( bTrapezoid ) + { + num_index += 8; + } + switch( diagonals ) + { + case DIAGONALS_INSIDE_TO_OUTSIDE: + // Diagonals pointing from inside edge forward towards outside edge + num_index += 5 * numInsideEdgePoints - 5; + break; + + case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation + // Diagonals pointing from outside edge forward towards inside edge + num_index += 5 * numInsideEdgePoints - 2; + break; + + case DIAGONALS_MIRRORED: + num_index += 2 * numInsideEdgePoints + 5; + break; + } + + return num_index; +} + +uint TotalNumStitchRegular(bool bTrapezoid, int diagonals, + int numPointsForInsideTessFactor, int ring) +{ + uint num_index = 0; + + if( bTrapezoid ) + { + num_index += 8 * (ring - 1); + } + switch( diagonals ) + { + case DIAGONALS_INSIDE_TO_OUTSIDE: + // Diagonals pointing from inside edge forward towards outside edge + num_index += (5 * numPointsForInsideTessFactor - 35 - 5 * ring) * (ring - 1); + break; + + case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation + // Diagonals pointing from outside edge forward towards inside edge + num_index += (5 * numPointsForInsideTessFactor - 12 - 5 * ring) * (ring - 1); + break; + + case DIAGONALS_MIRRORED: + num_index += (2 * numPointsForInsideTessFactor + 1 - 2 * ring) * (ring - 1); + break; + } + + return num_index; +} + +int sqr(int x) +{ + return x * x; +} + +int GetRingFromIndexStitchRegular(bool bTrapezoid, int diagonals, int numPointsForInsideTessFactor, int index) +{ + int t = 0; + if (bTrapezoid) + { + t = 8; + } + + switch( diagonals ) + { + case DIAGONALS_INSIDE_TO_OUTSIDE: + t = (5 * numPointsForInsideTessFactor - (35 - t)) * 3; + return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30; + + case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: + t = (5 * numPointsForInsideTessFactor - (12 - t)) * 3; + return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30; + + case DIAGONALS_MIRRORED: + t = ((t + 1) + 2 * numPointsForInsideTessFactor) * 3; + return 1 + uint((t + 6) - sqrt(sqr(t + 6) - 4 * 6 * (t + index)) + 0.001f) / 12; + + default: + return -1; + } +} + +uint3 NumStitchTransition(int4 outsideInsideNumHalfTessFactorPoints, + int4 outsideInsideEdgeTessFactorParity) +{ + outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity); + + uint3 num_index = insidePointIndex[outsideInsideNumHalfTessFactorPoints.w][MAX_FACTOR / 2 + 1].y * 8; + + [unroll] + for (int edge = 0; edge < 3; ++ edge) + { + num_index[edge] += outsidePointIndex[outsideInsideNumHalfTessFactorPoints[edge]][MAX_FACTOR / 2 + 1].y * 8; + + if( (outsideInsideEdgeTessFactorParity.w != outsideInsideEdgeTessFactorParity[edge]) || (outsideInsideEdgeTessFactorParity.w == TESSELLATOR_PARITY_ODD)) + { + if( outsideInsideEdgeTessFactorParity.w == outsideInsideEdgeTessFactorParity[edge] ) + { + num_index[edge] += 5; + } + else + { + num_index[edge] += 4; + } + } + } + + return num_index; +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h new file mode 100644 index 000000000..6b4382393 --- /dev/null +++ b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h @@ -0,0 +1,9 @@ +//-------------------------------------------------------------------------------------- +// File: TessellatorCS40_defines.h +// +// This file defines common constants which are included by both CPU code and shader code +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#define MAX_FACTOR 16 diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl new file mode 100644 index 000000000..1e40c80ef --- /dev/null +++ b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl @@ -0,0 +1,2567 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: BC6HEncode.hlsl +// +// The Compute Shader for BC6H Encoder +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//#define REF_DEVICE + +#define UINTLENGTH 32 +#define NCHANNELS 3 +#define SIGNED_F16 96 +#define UNSIGNED_F16 95 +#define MAX_FLOAT asfloat(0x7F7FFFFF) +#define MIN_FLOAT asfloat(0xFF7FFFFF) +#define MAX_INT asint(0x7FFFFFFF) +#define MIN_INT asint(0x80000000) + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; //either SIGNED_F16 for DXGI_FORMAT_BC6H_SF16 or UNSIGNED_F16 for DXGI_FORMAT_BC6H_UF16 + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; +}; + +static const uint candidateModeMemory[14] = { 0x00, 0x01, 0x02, 0x06, 0x0A, 0x0E, 0x12, 0x16, 0x1A, 0x1E, 0x03, 0x07, 0x0B, 0x0F }; +static const uint candidateModeFlag[14] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; +static const bool candidateModeTransformed[14] = { true, true, true, true, true, true, true, true, true, false, false, true, true, true }; +static const uint4 candidateModePrec[14] = { uint4(10,5,5,5), uint4(7,6,6,6), + uint4(11,5,4,4), uint4(11,4,5,4), uint4(11,4,4,5), uint4(9,5,5,5), + uint4(8,6,5,5), uint4(8,5,6,5), uint4(8,5,5,6), uint4(6,6,6,6), + uint4(10,10,10,10), uint4(11,9,9,9), uint4(12,8,8,8), uint4(16,4,4,4) }; + +/*static const uint4x4 candidateSection[32] = +{ + {0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1}, {0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1}, {0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1}, {0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1}, + {0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1}, + {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1}, + {0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1}, + {0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1}, {0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0}, {0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0}, + {0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1}, + {0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0}, {0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0}, + {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0}, {0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0}, {0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0} +};*/ + +static const uint candidateSectionBit[32] = +{ + 0xCCCC, 0x8888, 0xEEEE, 0xECC8, + 0xC880, 0xFEEC, 0xFEC8, 0xEC80, + 0xC800, 0xFFEC, 0xFE80, 0xE800, + 0xFFE8, 0xFF00, 0xFFF0, 0xF000, + 0xF710, 0x008E, 0x7100, 0x08CE, + 0x008C, 0x7310, 0x3100, 0x8CCE, + 0x088C, 0x3110, 0x6666, 0x366C, + 0x17E8, 0x0FF0, 0x718E, 0x399C +}; + +static const uint candidateFixUpIndex1D[32] = +{ + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15, 2, 8, 2, + 2, 8, 8,15, + 2, 8, 2, 2, + 8, 8, 2, 2 +}; + +//0, 9, 18, 27, 37, 46, 55, 64 +static const uint aStep1[64] = {0,0,0,0,0,1,1,1, + 1,1,1,1,1,1,2,2, + 2,2,2,2,2,2,2,3, + 3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6, + 6,6,6,6,7,7,7,7}; + +//0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 +static const uint aStep2[64] = { 0, 0, 0, 1, 1, 1, 1, 2, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 7, 7, 7, + 7, 8, 8, 8, 8, 9, 9, 9, + 9,10,10,10,10,10,11,11, + 11,11,12,12,12,12,13,13, + 13,13,14,14,14,14,15,15}; + +static const float3 RGB2LUM = float3(0.2126f, 0.7152f, 0.0722f); + +#define THREAD_GROUP_SIZE 64 +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 +#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X) + + +//Forward declaration +uint3 float2half( float3 pixel_f ); +int3 start_quantize( uint3 pixel_h ); +void quantize( inout int2x3 endPoint, uint prec ); +void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); +void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); +void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); + +void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed ); +void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed ); +void unquantize( inout int2x3 color, uint prec ); +uint3 finish_unquantize( int3 color ); +void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i ); +void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i ); +float3 half2float(uint3 color_h ); + +void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ); +void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ); + +void swap(inout int3 lhs, inout int3 rhs) +{ + int3 tmp = lhs; + lhs = rhs; + rhs = tmp; +} + +Texture2D<float4> g_Input : register( t0 ); +StructuredBuffer<uint4> g_InBuff : register( t1 ); + +RWStructuredBuffer<uint4> g_OutBuff : register( u0 ); + +struct SharedData +{ + float3 pixel; + int3 pixel_ph; + float3 pixel_hr; + float pixel_lum; + float error; + uint best_mode; + uint best_partition; + int3 endPoint_low; + int3 endPoint_high; + float endPoint_lum_low; + float endPoint_lum_high; +}; + +groupshared SharedData shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryModeG10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) +{ + const uint MAX_USED_THREAD = 16; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; + uint3 pixel_h = float2half( shared_temp[GI].pixel ); + shared_temp[GI].pixel_hr = half2float(pixel_h); + shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM); + shared_temp[GI].pixel_ph = start_quantize( pixel_h ); + + shared_temp[GI].endPoint_low = shared_temp[GI].pixel_ph; + shared_temp[GI].endPoint_high = shared_temp[GI].pixel_ph; + shared_temp[GI].endPoint_lum_low = shared_temp[GI].pixel_lum; + shared_temp[GI].endPoint_lum_high = shared_temp[GI].pixel_lum; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + //ergod mode_type 11:14 + if ( threadInBlock == 0 ) + { + int2x3 endPoint; + // find_axis + endPoint[0] = shared_temp[threadBase + 0].endPoint_low; + endPoint[1] = shared_temp[threadBase + 0].endPoint_high; + + //compute_index + float3 span = endPoint[1] - endPoint[0];// fixed a bug in v0.2 + float span_norm_sqr = dot( span, span );// fixed a bug in v0.2 + float dotProduct = dot( span, shared_temp[threadBase + 0].pixel_ph - endPoint[0] );// fixed a bug in v0.2 + if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 ) + { + swap(endPoint[0], endPoint[1]); + + shared_temp[GI].endPoint_low = endPoint[0]; + shared_temp[GI].endPoint_high = endPoint[1]; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 4) + { + int2x3 endPoint; + endPoint[0] = shared_temp[threadBase + 0].endPoint_low; + endPoint[1] = shared_temp[threadBase + 0].endPoint_high; + + float3 span = endPoint[1] - endPoint[0]; + float span_norm_sqr = dot( span, span ); + + uint4 prec = candidateModePrec[threadInBlock + 10]; + int2x3 endPoint_q = endPoint; + quantize( endPoint_q, prec.x ); + + bool transformed = candidateModeTransformed[threadInBlock + 10]; + if (transformed) + { + endPoint_q[1] -= endPoint_q[0]; + } + + bool bBadQuantize; + finish_quantize( bBadQuantize, endPoint_q, prec, transformed ); + + start_unquantize( endPoint_q, prec, transformed ); + + unquantize( endPoint_q, prec.x ); + + float error = 0; + [loop]for ( uint j = 0; j < 16; j ++ ) + { + float dotProduct = dot( span, shared_temp[threadBase + j].pixel_ph - endPoint[0] );// fixed a bug in v0.2 + uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] ); + + uint3 pixel_rh; + generate_palette_unquantized16( pixel_rh, endPoint_q[0], endPoint_q[1], index ); + float3 pixel_r = half2float( pixel_rh ); + pixel_r -= shared_temp[threadBase + j].pixel_hr; + error += dot(pixel_r, pixel_r); + } + if ( bBadQuantize ) + error = 1e20f; + + shared_temp[GI].error = error; + shared_temp[GI].best_mode = candidateModeFlag[threadInBlock + 10]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode; + } + + g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, 0, 0); + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryModeLE10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) +{ + const uint MAX_USED_THREAD = 32; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } + + if (asfloat(g_InBuff[blockID].x) < 1e-6f) + { + g_OutBuff[blockID] = g_InBuff[blockID]; + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; + uint3 pixel_h = float2half( shared_temp[GI].pixel ); + shared_temp[GI].pixel_hr = half2float(pixel_h); + shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM); + shared_temp[GI].pixel_ph = start_quantize( pixel_h ); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + //ergod mode_type 1:10 + if (threadInBlock < 32) + { + // find_axis + int2x3 endPoint[2]; + endPoint[0][0] = MAX_INT; + endPoint[0][1] = MIN_INT; + endPoint[1][0] = MAX_INT; + endPoint[1][1] = MIN_INT; + + float2 endPoint_lum[2]; + endPoint_lum[0][0] = MAX_FLOAT; + endPoint_lum[0][1] = MIN_FLOAT; + endPoint_lum[1][0] = MAX_FLOAT; + endPoint_lum[1][1] = MIN_FLOAT; + + uint bit = candidateSectionBit[threadInBlock]; + for ( uint i = 0; i < 16; i ++ ) + { + int3 pixel_ph = shared_temp[threadBase + i].pixel_ph; + float pixel_lum = shared_temp[threadBase + i].pixel_lum; + if ( (bit >> i) & 1 ) //It gets error when using "candidateSection" as "endPoint_ph" index + { + if (endPoint_lum[1][0] > pixel_lum) + { + endPoint[1][0] = pixel_ph; + endPoint_lum[1][0] = pixel_lum; + } + if (endPoint_lum[1][1] < pixel_lum) + { + endPoint[1][1] = pixel_ph; + endPoint_lum[1][1] = pixel_lum; + } + } + else + { + if (endPoint_lum[0][0] > pixel_lum) + { + endPoint[0][0] = pixel_ph; + endPoint_lum[0][0] = pixel_lum; + } + if (endPoint_lum[0][1] < pixel_lum) + { + endPoint[0][1] = pixel_ph; + endPoint_lum[0][1] = pixel_lum; + } + } + } + + //compute_index + float3 span[2];// fixed a bug in v0.2 + float span_norm_sqr[2];// fixed a bug in v0.2 + [unroll] + for (uint p = 0; p < 2; ++ p) + { + span[p] = endPoint[p][1] - endPoint[p][0]; + span_norm_sqr[p] = dot( span[p], span[p] ); + + float dotProduct = dot( span[p], shared_temp[threadBase + (0 == p ? 0 : candidateFixUpIndex1D[threadInBlock])].pixel_ph - endPoint[p][0] );// fixed a bug in v0.2 + if ( span_norm_sqr[p] > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr[p] ) > 32 ) + { + span[p] = -span[p]; + swap(endPoint[p][0], endPoint[p][1]); + } + } + + uint4 prec = candidateModePrec[g_mode_id]; + int2x3 endPoint_q[2] = endPoint; + quantize( endPoint_q[0], prec.x ); + quantize( endPoint_q[1], prec.x ); + + bool transformed = candidateModeTransformed[g_mode_id]; + if (transformed) + { + endPoint_q[0][1] -= endPoint_q[0][0]; + endPoint_q[1][0] -= endPoint_q[0][0]; + endPoint_q[1][1] -= endPoint_q[0][0]; + } + + int bBadQuantize = 0; + finish_quantize_0( bBadQuantize, endPoint_q[0], prec, transformed ); + finish_quantize_1( bBadQuantize, endPoint_q[1], prec, transformed ); + + start_unquantize( endPoint_q, prec, transformed ); + + unquantize( endPoint_q[0], prec.x ); + unquantize( endPoint_q[1], prec.x ); + + float error = 0; + for ( uint j = 0; j < 16; j ++ ) + { + uint3 pixel_rh; + if ((bit >> j) & 1) + { + float dotProduct = dot( span[1], shared_temp[threadBase + j].pixel_ph - endPoint[1][0] );// fixed a bug in v0.2 + uint index = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[1] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep1[63] ); + generate_palette_unquantized8( pixel_rh, endPoint_q[1][0], endPoint_q[1][1], index ); + } + else + { + float dotProduct = dot( span[0], shared_temp[threadBase + j].pixel_ph - endPoint[0][0] );// fixed a bug in v0.2 + uint index = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[0] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep1[63] ); + generate_palette_unquantized8( pixel_rh, endPoint_q[0][0], endPoint_q[0][1], index ); + } + + float3 pixel_r = half2float( pixel_rh ); + pixel_r -= shared_temp[threadBase + j].pixel_hr; + error += dot(pixel_r, pixel_r); + } + if ( bBadQuantize ) + error = 1e20f; + + shared_temp[GI].error = error; + shared_temp[GI].best_mode = candidateModeFlag[g_mode_id]; + shared_temp[GI].best_partition = threadInBlock; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 16) + { + if ( shared_temp[GI].error > shared_temp[GI + 16].error ) + { + shared_temp[GI].error = shared_temp[GI + 16].error; + shared_temp[GI].best_mode = shared_temp[GI + 16].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 16].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].best_mode = shared_temp[GI + 8].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 8].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].best_mode = shared_temp[GI + 4].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 4].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 2].best_partition; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode; + shared_temp[GI].best_partition = shared_temp[GI + 1].best_partition; + } + + if (asfloat(g_InBuff[blockID].x) > shared_temp[GI].error) + { + g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, shared_temp[GI].best_partition, 0); + } + else + { + g_OutBuff[blockID] = g_InBuff[blockID]; + } + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + const uint MAX_USED_THREAD = 32; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; + shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel, RGB2LUM); + uint3 pixel_h = float2half( shared_temp[GI].pixel ); + shared_temp[GI].pixel_ph = start_quantize( pixel_h ); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + uint best_mode = g_InBuff[blockID].y; + uint best_partition = g_InBuff[blockID].z; + + uint4 block = 0; + + if (threadInBlock < 32) + { + int2x3 endPoint; + endPoint[0] = MAX_INT; + endPoint[1] = MIN_INT; + + float2 endPoint_lum; + endPoint_lum[0] = MAX_FLOAT; + endPoint_lum[1] = MIN_FLOAT; + + int2 endPoint_lum_index; + endPoint_lum_index[0] = -1; + endPoint_lum_index[1] = -1; + + int3 pixel_ph = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_ph; + float pixel_lum = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_lum; + if (threadInBlock < 16) + { + if (best_mode > 10) + { + endPoint[0] = endPoint[1] = pixel_ph; + endPoint_lum[0] = endPoint_lum[1] = pixel_lum; + } + else + { + uint bits = candidateSectionBit[best_partition]; + if (0 == ((bits >> threadInBlock) & 1)) + { + endPoint[0] = endPoint[1] = pixel_ph; + endPoint_lum[0] = endPoint_lum[1] = pixel_lum; + } + } + } + else + { + if (best_mode <= 10) + { + uint bits = candidateSectionBit[best_partition]; + if (1 == ((bits >> (threadInBlock & 0xF)) & 1)) + { + endPoint[0] = endPoint[1] = pixel_ph; + endPoint_lum[0] = endPoint_lum[1] = pixel_lum; + } + } + } + + shared_temp[GI].endPoint_low = endPoint[0]; + shared_temp[GI].endPoint_high = endPoint[1]; + + shared_temp[GI].endPoint_lum_low = endPoint_lum[0]; + shared_temp[GI].endPoint_lum_high = endPoint_lum[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 8) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 4) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 2) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if ((threadInBlock & 0xF) < 1) + { + if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low) + { + shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low; + shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low; + } + if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high) + { + shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high; + shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 2) + { + // find_axis + int2x3 endPoint; + endPoint[0] = shared_temp[threadBase + threadInBlock * 16].endPoint_low; + endPoint[1] = shared_temp[threadBase + threadInBlock * 16].endPoint_high; + + uint fixup = 0; + if ((1 == threadInBlock) && (best_mode <= 10)) + { + fixup = candidateFixUpIndex1D[best_partition]; + } + + float3 span = endPoint[1] - endPoint[0]; + float span_norm_sqr = dot( span, span ); + float dotProduct = dot( span, shared_temp[threadBase + fixup].pixel_ph - endPoint[0] ); + if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 ) + { + swap(endPoint[0], endPoint[1]); + } + + shared_temp[GI].endPoint_low = endPoint[0]; + shared_temp[GI].endPoint_high = endPoint[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 16) + { + uint bits; + if (best_mode > 10) + { + bits = 0; + } + else + { + bits = candidateSectionBit[best_partition]; + } + + float3 span; + float dotProduct; + if ((bits >> threadInBlock) & 1) + { + span = shared_temp[threadBase + 1].endPoint_high - shared_temp[threadBase + 1].endPoint_low; + dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 1].endPoint_low ); + } + else + { + span = shared_temp[threadBase + 0].endPoint_high - shared_temp[threadBase + 0].endPoint_low; + dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 0].endPoint_low ); + } + float span_norm_sqr = dot( span, span ); + + if (best_mode > 10) + { + uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] ); + if (threadInBlock == 0) + { + block.z |= index << 1; + } + else if (threadInBlock < 8) + { + block.z |= index << (threadInBlock * 4); + } + else + { + block.w |= index << ((threadInBlock - 8) * 4); + } + } + else + { + uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep1[63] ); + + uint fixup = candidateFixUpIndex1D[best_partition]; + int2 offset = int2((fixup != 2), (fixup == 15)); + + if (threadInBlock == 0) + { + block.z |= index << 18; + } + else if (threadInBlock < 3) + { + block.z |= index << (20 + (threadInBlock - 1) * 3); + } + else if (threadInBlock < 5) + { + block.z |= index << (25 + (threadInBlock - 3) * 3 + offset.x); + } + else if (threadInBlock == 5) + { + block.w |= index >> !offset.x; + if (!offset.x) + { + block.z |= index << 31; + } + } + else if (threadInBlock < 9) + { + block.w |= index << (2 + (threadInBlock - 6) * 3 + offset.x); + } + else + { + block.w |= index << (11 + (threadInBlock - 9) * 3 + offset.y); + } + } + + shared_temp[GI].pixel_hr.xy = asfloat(block.zw); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 8].pixel_hr.xy)); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 4].pixel_hr.xy)); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 2].pixel_hr.xy)); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 1].pixel_hr.xy)); + + block.zw = asuint(shared_temp[GI].pixel_hr.xy); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + bool transformed = candidateModeTransformed[best_mode - 1]; + uint4 prec = candidateModePrec[best_mode - 1]; + if (threadInBlock == 2) + { + int2x3 endPoint_q; + endPoint_q[0] = shared_temp[threadBase + 0].endPoint_low; + endPoint_q[1] = shared_temp[threadBase + 0].endPoint_high; + + quantize( endPoint_q, prec.x ); + if (transformed) + { + endPoint_q[1] -= endPoint_q[0]; + } + + shared_temp[GI].endPoint_low = endPoint_q[0]; + shared_temp[GI].endPoint_high = endPoint_q[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock == 3) + { + int3 ep0 = shared_temp[threadBase + 2].endPoint_low; + int2x3 endPoint_q; + endPoint_q[0] = shared_temp[threadBase + 1].endPoint_low; + endPoint_q[1] = shared_temp[threadBase + 1].endPoint_high; + + if (best_mode <= 10) + { + quantize( endPoint_q, prec.x ); + if (transformed) + { + endPoint_q[0] -= ep0; + endPoint_q[1] -= ep0; + } + + shared_temp[GI].endPoint_low = endPoint_q[0]; + shared_temp[GI].endPoint_high = endPoint_q[1]; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 2) + { + int2x3 endPoint_q; + endPoint_q[0] = shared_temp[threadBase + threadInBlock + 2].endPoint_low; + endPoint_q[1] = shared_temp[threadBase + threadInBlock + 2].endPoint_high; + + int bBadQuantize = 0; + if (threadInBlock == 0) + { + if (best_mode > 10) + { + finish_quantize( bBadQuantize, endPoint_q, prec, transformed ); + } + else + { + finish_quantize_0( bBadQuantize, endPoint_q, prec, transformed ); + } + } + else // if (threadInBlock == 1) + { + if (best_mode <= 10) + { + finish_quantize_1( bBadQuantize, endPoint_q, prec, transformed ); + } + } + + shared_temp[GI].endPoint_low = endPoint_q[0]; + shared_temp[GI].endPoint_high = endPoint_q[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if ( threadInBlock == 0 ) + { + int2x3 endPoint_q[2]; + endPoint_q[0][0] = shared_temp[threadBase + 0].endPoint_low; + endPoint_q[0][1] = shared_temp[threadBase + 0].endPoint_high; + endPoint_q[1][0] = shared_temp[threadBase + 1].endPoint_low; + endPoint_q[1][1] = shared_temp[threadBase + 1].endPoint_high; + + if ( best_mode > 10 ) + { + block_package( block, endPoint_q[0], best_mode ); + } + else + { + block_package( block, endPoint_q, best_mode, best_partition ); + } + + g_OutBuff[blockID] = block; + } +} + +uint float2half1( float f ) +{ + uint Result; + + uint IValue = asuint(f); + uint Sign = (IValue & 0x80000000U) >> 16U; + IValue = IValue & 0x7FFFFFFFU; + + if (IValue > 0x47FFEFFFU) + { + // The number is too large to be represented as a half. Saturate to infinity. + Result = 0x7FFFU; + } + else + { + if (IValue < 0x38800000U) + { + // The number is too small to be represented as a normalized half. + // Convert it to a denormalized value. + uint Shift = 113U - (IValue >> 23U); + IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; + } + else + { + // Rebias the exponent to represent the value as a normalized half. + IValue += 0xC8000000U; + } + + Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU; + } + return (Result|Sign); +} + +uint3 float2half( float3 endPoint_f ) +{ + //uint3 sign = asuint(endPoint_f) & 0x80000000; + //uint3 expo = asuint(endPoint_f) & 0x7F800000; + //uint3 base = asuint(endPoint_f) & 0x007FFFFF; + //return ( expo < 0x33800000 ) ? 0 + // //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present + // : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2 + // //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation + // : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff ) + // // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present + // // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number + // : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) ); + + + return uint3( float2half1( endPoint_f.x ), float2half1( endPoint_f.y ), float2half1( endPoint_f.z ) ); +} +int3 start_quantize( uint3 pixel_h ) +{ + if ( g_format == UNSIGNED_F16 ) + { + return asint( ( pixel_h << 6 ) / 31 ); + } + else + { + return ( pixel_h < 0x8000 ) ? ( ( pixel_h == 0x7bff ) ? 0x7fff : asint( ( pixel_h << 5 ) / 31 ) )// fixed a bug in v0.2 + : ( ( pixel_h == 0x7bff ) ? 0xffff8001 : -asint( ( ( 0x00007fff & pixel_h ) << 5 ) / 31 ) );// fixed a bug in v0.2 + } +} +void quantize( inout int2x3 endPoint, uint prec ) +{ + int iprec = asint( prec ); + if ( g_format == UNSIGNED_F16 ) + { + endPoint = ( ( iprec >= 15 ) | ( endPoint == 0 ) ) ? endPoint + : ( ( endPoint == asint(0xFFFF) ) ? ( ( 1 << iprec ) - 1 ) + : ( ( ( endPoint << iprec ) + asint(0x0000) ) >> 16 ) ); + } + else + { + endPoint = ( ( iprec >= 16 ) | ( endPoint == 0 ) ) ? endPoint + : ( ( endPoint >= 0 ) ? ( ( endPoint == asint(0x7FFF) ) ? ( ( 1 << ( iprec - 1 ) ) - 1 ) : ( ( ( endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) + : ( ( -endPoint == asint(0x7FFF) ) ? -( ( 1 << ( iprec - 1 ) ) - 1 ) : -( ( ( -endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) ); + } +} +void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + if ( transformed ) + { + bool3 bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadQuantize |= any(bBadComponent); + + endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 ); + endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) + : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); + } + else + { + endPoint &= ( ( 1 << prec.x ) - 1 ); + } +} +void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + if ( transformed ) + { + bool2x3 bBadComponent; + bBadComponent[0] = ( endPoint[0] >= 0 ) ? ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadComponent[1] = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadQuantize |= any(bBadComponent); + + endPoint[0] = ( endPoint[0] >= 0 ) ? ( ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[0] ) + : ( ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[0] & ( ( 1 << prec.yzw ) - 1 ) ) ); + endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) + : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); + } + else + { + endPoint &= ( ( 1 << prec.x ) - 1 ); + } +} +void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + if ( transformed ) + { + bool3 bBadComponent; + bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) + : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); + bBadQuantize = any( bBadComponent ); + + endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 ); + endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) + : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); + } + else + { + endPoint &= ( ( 1 << prec.x ) - 1 ); + + bBadQuantize = 0; + } +} + +void SIGN_EXTEND( uint3 prec, inout int3 color ) +{ + uint3 p = 1 << (prec - 1); + color = (color & p) ? (color & (p - 1)) - p : color; +} + +void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint ) +{ + if ( g_format == SIGNED_F16 ) + SIGN_EXTEND( prec.x, endPoint[0] ); + if ( g_format == SIGNED_F16 || transformed ) + SIGN_EXTEND( prec.yzw, endPoint[1] ); +} + +void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint[2] ) +{ + if ( g_format == SIGNED_F16 ) + SIGN_EXTEND( prec.x, endPoint[0][0] ); + if ( g_format == SIGNED_F16 || transformed ) + { + SIGN_EXTEND( prec.yzw, endPoint[0][1] ); + SIGN_EXTEND( prec.yzw, endPoint[1][0] ); + SIGN_EXTEND( prec.yzw, endPoint[1][1] ); + } +} +void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed ) +{ + sign_extend( transformed, prec, endPoint ); + if ( transformed ) + { + endPoint[0][1] += endPoint[0][0]; + endPoint[1][0] += endPoint[0][0]; + endPoint[1][1] += endPoint[0][0]; + } +} +void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed ) +{ + sign_extend( transformed, prec, endPoint ); + if ( transformed ) + endPoint[1] += endPoint[0]; +} +void unquantize( inout int2x3 color, uint prec ) +{ + int iprec = asint( prec ); + if (g_format == UNSIGNED_F16 ) + { + if (prec < 15) + { + color = (color != 0) ? (color == ((1 << iprec) - 1) ? 0xFFFF : (((color << 16) + 0x8000) >> iprec)) : color; + } + } + else + { + if (prec < 16) + { + uint2x3 s = color >= 0 ? 0 : 1; + color = abs(color); + color = (color != 0) ? (color >= ((1 << (iprec - 1)) - 1) ? 0x7FFF : (((color << 15) + 0x4000) >> (iprec - 1))) : color; + color = s > 0 ? -color : color; + } + } +} +uint3 finish_unquantize( int3 color ) +{ + if ( g_format == UNSIGNED_F16 ) + color = ( color * 31 ) >> 6; + else + { + color = ( color < 0 ) ? -( ( -color * 31 ) >> 5 ) : ( color * 31 ) >> 5; + color = ( color < 0 ) ? ( ( -color ) | 0x8000 ) : color; + } + return asuint(color); +} +void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i ) +{ + static const int aWeight3[] = {0, 9, 18, 27, 37, 46, 55, 64}; + + int3 tmp = ( low * ( 64 - aWeight3[i] ) + high * aWeight3[i] + 32 ) >> 6; + palette = finish_unquantize( tmp ); +} +void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i ) +{ + static const int aWeight4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; + + int3 tmp = ( low * ( 64 - aWeight4[i] ) + high * aWeight4[i] + 32 ) >> 6; + palette = finish_unquantize( tmp ); +} + +float half2float1( uint Value ) +{ + uint Mantissa = (uint)(Value & 0x03FF); + + uint Exponent; + if ((Value & 0x7C00) != 0) // The value is normalized + { + Exponent = (uint)((Value >> 10) & 0x1F); + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x0400) == 0); + + Mantissa &= 0x03FF; + } + else // The value is zero + { + Exponent = (uint)(-112); + } + + uint Result = ((Value & 0x8000) << 16) | // Sign + ((Exponent + 112) << 23) | // Exponent + (Mantissa << 13); // Mantissa + + return asfloat(Result); +} + +float3 half2float(uint3 color_h ) +{ + //uint3 sign = color_h & 0x8000; + //uint3 expo = color_h & 0x7C00; + //uint3 base = color_h & 0x03FF; + //return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24 + // : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00 + + return float3( half2float1( color_h.x ), half2float1( color_h.y ), half2float1( color_h.z ) ); +} + +void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ) // for mode 1 - 10 +{ + block.xy = 0; + block.z &= 0xFFFC0000; + + //block.z |= (partition_index & 0x1f) << 13; + + if ( mode_type == candidateModeFlag[0]) + { + /*block.x = candidateModeMemory[0]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.x |= ( endPoint[1][0].g >> 2 ) & 0x00000004; + block.x |= ( endPoint[1][0].b >> 1 ) & 0x00000008; + block.x |= endPoint[1][1].b & 0x00000010; + block.y |= ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[0] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[0] >> 1) & 1) << 1; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 2; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 3; + block.x |= ((endPoint[1][1].b >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[1]) + { + /*block.x = candidateModeMemory[1]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00000FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x003F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); + block.x |= ( ( endPoint[1][0].g >> 3 ) & 0x00000004 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 ); + block.x |= ( endPoint[1][1].g >> 1 ) & 0x00000018; + block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 ); + block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); + block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[1] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[1] >> 1) & 1) << 1; + block.x |= ((endPoint[1][0].g >> 5) & 1) << 2; + block.x |= ((endPoint[1][1].g >> 4) & 1) << 3; + block.x |= ((endPoint[1][1].g >> 5) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[1][1].b >> 0) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[1][0].b >> 5) & 1) << 22; + block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[1][1].b >> 3) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[2]) + { + /*block.x = candidateModeMemory[2]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].r >> 2 ) & 0x00000100; + block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000; + block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[2] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[2] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[2] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[2] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[2] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][0].r >> 10) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][0].g >> 10) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][0].b >> 10) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[3]) + { + /*block.x = candidateModeMemory[3]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080; + block.y |= ( endPoint[0][0].g << 8 ) & 0x00040000; + block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000001E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780); + block.yz |= ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000); + block.z |= ( ( endPoint[1][0].g << 7 ) & 0x00000800 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( endPoint[1][1].b << 4 ) & 0x00000040; + block.z |= ( endPoint[1][1].b << 5 ) & 0x00000020;*/ + + block.x |= ((candidateModeMemory[3] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[3] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[3] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[3] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[3] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][0].r >> 10) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][0].g >> 10) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][0].b >> 10) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][1].b >> 0) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][0].g >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[4]) + { + /*block.x = candidateModeMemory[4]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080; + block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000; + block.y |= ( ( endPoint[0][0].b << 18 ) & 0x10000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.y |= ( ( endPoint[1][0].g << 9 ) & 0x00001E00 ) | ( ( endPoint[1][0].b << 4 ) & 0x00000100 ); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780); + block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000060); + block.z |= ( endPoint[1][0].r << 1 ) & 0x0000001E; + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 7 ) & 0x00000800 ) | ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/ + + block.x |= ((candidateModeMemory[4] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[4] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[4] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[4] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[4] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][0].r >> 10) & 1) << 7; + block.y |= ((endPoint[1][0].b >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][0].g >> 10) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][0].b >> 10) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][1].b >> 1) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].b >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[5]) + { + /*block.x = candidateModeMemory[5]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00003FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x00FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000003; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); + block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/ + + block.x |= ((candidateModeMemory[5] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[5] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[5] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[5] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[5] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[6]) + { + /*block.x = candidateModeMemory[6]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000); + block.x |= ( ( endPoint[1][1].g << 9 ) & 0x00002000 ) | ( ( endPoint[1][1].b << 21 ) & 0x00800000); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); + block.y |= ( ( endPoint[1][1].b >> 2 ) & 0x00000006 ); + block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ) | ( ( endPoint[1][1].b << 18 ) & 0x00040000 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[6] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[6] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[6] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[6] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[6] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[1][1].g >> 4) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 3) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[7]) + { + /*block.x = candidateModeMemory[7]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.x |= ( ( endPoint[1][0].g << 18 ) & 0x00800000 ); + block.x |= ( ( endPoint[1][1].b << 13 ) & 0x00002000 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); + block.y |= ( endPoint[1][1].b << 27 ) & 0x10000000; + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/ + + block.x |= ((candidateModeMemory[7] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[7] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[7] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[7] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[7] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 0) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[1][0].g >> 5) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[1][1].g >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[8]) + { + /*block.x = candidateModeMemory[8]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); + block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.x |= ( ( endPoint[1][0].b << 18 ) & 0x00800000 ); + block.x |= ( endPoint[1][1].b << 12 ) & 0x00002000; + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); + block.y |= ( endPoint[1][1].b << 18 ) & 0x00040000; + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; + block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/ + + block.x |= ((candidateModeMemory[8] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[8] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[8] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[8] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[8] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; + block.x |= ((endPoint[1][0].b >> 5) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } + else if ( mode_type == candidateModeFlag[9]) + { + /*block.x = candidateModeMemory[9]; + block.x |= ( ( endPoint[0][0].r << 5 ) & 0x000007E0 ) | ( ( endPoint[0][0].g << 15 ) & 0x001F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0x7E000000 ); + block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); + block.x |= ( ( endPoint[1][0].g << 16 ) & 0x00200000 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 ); + block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); + block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 ); + block.x |= ( ( endPoint[1][1].g << 26 ) & 0x80000000 ) | ( ( endPoint[1][1].g << 7 ) & 0x00000800 ); + block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); + block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); + block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; + block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 ); + block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[9] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[9] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[9] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[9] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[9] >> 4) & 1) << 4; + block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; + block.x |= ((endPoint[1][1].g >> 4) & 1) << 11; + block.x |= ((endPoint[1][1].b >> 0) & 1) << 12; + block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; + block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; + block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; + block.x |= ((endPoint[1][0].g >> 5) & 1) << 21; + block.x |= ((endPoint[1][0].b >> 5) & 1) << 22; + block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; + block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; + block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; + block.x |= ((endPoint[1][1].g >> 5) & 1) << 31; + block.y |= ((endPoint[1][1].b >> 3) & 1) << 0; + block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; + block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; + block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; + block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; + block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; + block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; + block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; + block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; + block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; + block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; + block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; + block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; + block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; + block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; + block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; + block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; + block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; + block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; + block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; + block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; + block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; + block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; + block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; + block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; + block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; + block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; + block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; + block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; + block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; + block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; + block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; + block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; + block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; + block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; + block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; + block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; + block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; + block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; + block.z |= ((partition_index >> 0) & 1) << 13; + block.z |= ((partition_index >> 1) & 1) << 14; + block.z |= ((partition_index >> 2) & 1) << 15; + block.z |= ((partition_index >> 3) & 1) << 16; + block.z |= ((partition_index >> 4) & 1) << 17; + } +} +void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ) // for mode 11 - 14 +{ + /*block.x = ( ( endPoint[0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0].b << 25 ) & 0xFE000000 ); + block.y |= ( endPoint[0].b >> 7 ) & 0x00000007;*/ + + block.xy = 0; + block.z &= 0xFFFFFFFE; + + + if ( mode_type == candidateModeFlag[10]) + { + /* block.x |= candidateModeMemory[10]; + block.y |= ( ( endPoint[1].r << 3 ) & 0x00001FF8 ) | ( ( endPoint[1].g << 13 ) & 0x007FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 ); + block.z |= ( endPoint[1].b >> 9 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[10] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[10] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[10] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[10] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[10] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1].r >> 6) & 1) << 9; + block.y |= ((endPoint[1].r >> 7) & 1) << 10; + block.y |= ((endPoint[1].r >> 8) & 1) << 11; + block.y |= ((endPoint[1].r >> 9) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1].g >> 6) & 1) << 19; + block.y |= ((endPoint[1].g >> 7) & 1) << 20; + block.y |= ((endPoint[1].g >> 8) & 1) << 21; + block.y |= ((endPoint[1].g >> 9) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1].b >> 6) & 1) << 29; + block.y |= ((endPoint[1].b >> 7) & 1) << 30; + block.y |= ((endPoint[1].b >> 8) & 1) << 31; + block.z |= ((endPoint[1].b >> 9) & 1) << 0; + } + else if (mode_type == candidateModeFlag[11]) + { + /*block.x |= candidateModeMemory[11]; + block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 ); + block.y |= ( ( endPoint[1].r << 3 ) & 0x00000FF8 ) | ( ( endPoint[1].g << 13 ) & 0x003FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 ); + block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[11] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[11] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[11] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[11] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[11] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1].r >> 6) & 1) << 9; + block.y |= ((endPoint[1].r >> 7) & 1) << 10; + block.y |= ((endPoint[1].r >> 8) & 1) << 11; + block.y |= ((endPoint[0].r >> 10) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1].g >> 6) & 1) << 19; + block.y |= ((endPoint[1].g >> 7) & 1) << 20; + block.y |= ((endPoint[1].g >> 8) & 1) << 21; + block.y |= ((endPoint[0].g >> 10) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1].b >> 6) & 1) << 29; + block.y |= ((endPoint[1].b >> 7) & 1) << 30; + block.y |= ((endPoint[1].b >> 8) & 1) << 31; + block.z |= ((endPoint[0].b >> 10) & 1) << 0; + } + else if (mode_type == candidateModeFlag[12])// violate the spec in [0].low + { + /*block.x |= candidateModeMemory[12]; + block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 ); + block.y |= ( ( endPoint[0].r << 0 ) & 0x00000800 ) | ( ( endPoint[0].g << 10 ) & 0x00200000 ); + block.y |= ( endPoint[0].b << 20 ) & 0x80000000; + block.y |= ( ( endPoint[1].r << 3 ) & 0x000007F8 ) | ( ( endPoint[1].g << 13 ) & 0x001FE000 ) | ( ( endPoint[1].b << 23 ) & 0x7F800000 ); + block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[12] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[12] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[12] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[12] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[12] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[1].r >> 4) & 1) << 7; + block.y |= ((endPoint[1].r >> 5) & 1) << 8; + block.y |= ((endPoint[1].r >> 6) & 1) << 9; + block.y |= ((endPoint[1].r >> 7) & 1) << 10; + block.y |= ((endPoint[0].r >> 11) & 1) << 11; + block.y |= ((endPoint[0].r >> 10) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[1].g >> 4) & 1) << 17; + block.y |= ((endPoint[1].g >> 5) & 1) << 18; + block.y |= ((endPoint[1].g >> 6) & 1) << 19; + block.y |= ((endPoint[1].g >> 7) & 1) << 20; + block.y |= ((endPoint[0].g >> 11) & 1) << 21; + block.y |= ((endPoint[0].g >> 10) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[1].b >> 4) & 1) << 27; + block.y |= ((endPoint[1].b >> 5) & 1) << 28; + block.y |= ((endPoint[1].b >> 6) & 1) << 29; + block.y |= ((endPoint[1].b >> 7) & 1) << 30; + block.y |= ((endPoint[0].b >> 11) & 1) << 31; + block.z |= ((endPoint[0].b >> 10) & 1) << 0; + } + else if (mode_type == candidateModeFlag[13]) + { + /*block.x |= candidateModeMemory[13]; + block.y |= ( ( endPoint[0].r >> 8 ) & 0x00000080 ); + block.y |= ( ( endPoint[0].r >> 6 ) & 0x00000100 ); + block.y |= ( ( endPoint[0].r >> 4 ) & 0x00000200 ); + block.y |= ( ( endPoint[0].r >> 2 ) & 0x00000400 ); + block.y |= ( ( endPoint[0].r >> 0 ) & 0x00000800 ); + block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ); + block.y |= ( ( endPoint[0].g << 2 ) & 0x00020000 ); + block.y |= ( ( endPoint[0].g << 4 ) & 0x00040000 ); + block.y |= ( ( endPoint[0].g << 6 ) & 0x00080000 ); + block.y |= ( ( endPoint[0].g << 8 ) & 0x00100000 ); + block.y |= ( ( endPoint[0].g << 10 ) & 0x00200000 ); + block.y |= ( ( endPoint[0].g << 12 ) & 0x00400000 ); + block.y |= ( ( endPoint[0].b << 12 ) & 0x08000000 ); + block.y |= ( ( endPoint[0].b << 14 ) & 0x10000000 ); + block.y |= ( ( endPoint[0].b << 16 ) & 0x20000000 ); + block.y |= ( ( endPoint[0].b << 18 ) & 0x40000000 ); + block.y |= ( ( endPoint[0].b << 20 ) & 0x80000000 ); + block.y |= ( ( endPoint[1].r << 3 ) & 0x00000078 ) | ( ( endPoint[1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[1].b << 23 ) & 0x07800000 ); + block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ + + block.x |= ((candidateModeMemory[13] >> 0) & 1) << 0; + block.x |= ((candidateModeMemory[13] >> 1) & 1) << 1; + block.x |= ((candidateModeMemory[13] >> 2) & 1) << 2; + block.x |= ((candidateModeMemory[13] >> 3) & 1) << 3; + block.x |= ((candidateModeMemory[13] >> 4) & 1) << 4; + block.x |= ((endPoint[0].r >> 0) & 1) << 5; + block.x |= ((endPoint[0].r >> 1) & 1) << 6; + block.x |= ((endPoint[0].r >> 2) & 1) << 7; + block.x |= ((endPoint[0].r >> 3) & 1) << 8; + block.x |= ((endPoint[0].r >> 4) & 1) << 9; + block.x |= ((endPoint[0].r >> 5) & 1) << 10; + block.x |= ((endPoint[0].r >> 6) & 1) << 11; + block.x |= ((endPoint[0].r >> 7) & 1) << 12; + block.x |= ((endPoint[0].r >> 8) & 1) << 13; + block.x |= ((endPoint[0].r >> 9) & 1) << 14; + block.x |= ((endPoint[0].g >> 0) & 1) << 15; + block.x |= ((endPoint[0].g >> 1) & 1) << 16; + block.x |= ((endPoint[0].g >> 2) & 1) << 17; + block.x |= ((endPoint[0].g >> 3) & 1) << 18; + block.x |= ((endPoint[0].g >> 4) & 1) << 19; + block.x |= ((endPoint[0].g >> 5) & 1) << 20; + block.x |= ((endPoint[0].g >> 6) & 1) << 21; + block.x |= ((endPoint[0].g >> 7) & 1) << 22; + block.x |= ((endPoint[0].g >> 8) & 1) << 23; + block.x |= ((endPoint[0].g >> 9) & 1) << 24; + block.x |= ((endPoint[0].b >> 0) & 1) << 25; + block.x |= ((endPoint[0].b >> 1) & 1) << 26; + block.x |= ((endPoint[0].b >> 2) & 1) << 27; + block.x |= ((endPoint[0].b >> 3) & 1) << 28; + block.x |= ((endPoint[0].b >> 4) & 1) << 29; + block.x |= ((endPoint[0].b >> 5) & 1) << 30; + block.x |= ((endPoint[0].b >> 6) & 1) << 31; + block.y |= ((endPoint[0].b >> 7) & 1) << 0; + block.y |= ((endPoint[0].b >> 8) & 1) << 1; + block.y |= ((endPoint[0].b >> 9) & 1) << 2; + block.y |= ((endPoint[1].r >> 0) & 1) << 3; + block.y |= ((endPoint[1].r >> 1) & 1) << 4; + block.y |= ((endPoint[1].r >> 2) & 1) << 5; + block.y |= ((endPoint[1].r >> 3) & 1) << 6; + block.y |= ((endPoint[0].r >> 15) & 1) << 7; + block.y |= ((endPoint[0].r >> 14) & 1) << 8; + block.y |= ((endPoint[0].r >> 13) & 1) << 9; + block.y |= ((endPoint[0].r >> 12) & 1) << 10; + block.y |= ((endPoint[0].r >> 11) & 1) << 11; + block.y |= ((endPoint[0].r >> 10) & 1) << 12; + block.y |= ((endPoint[1].g >> 0) & 1) << 13; + block.y |= ((endPoint[1].g >> 1) & 1) << 14; + block.y |= ((endPoint[1].g >> 2) & 1) << 15; + block.y |= ((endPoint[1].g >> 3) & 1) << 16; + block.y |= ((endPoint[0].g >> 15) & 1) << 17; + block.y |= ((endPoint[0].g >> 14) & 1) << 18; + block.y |= ((endPoint[0].g >> 13) & 1) << 19; + block.y |= ((endPoint[0].g >> 12) & 1) << 20; + block.y |= ((endPoint[0].g >> 11) & 1) << 21; + block.y |= ((endPoint[0].g >> 10) & 1) << 22; + block.y |= ((endPoint[1].b >> 0) & 1) << 23; + block.y |= ((endPoint[1].b >> 1) & 1) << 24; + block.y |= ((endPoint[1].b >> 2) & 1) << 25; + block.y |= ((endPoint[1].b >> 3) & 1) << 26; + block.y |= ((endPoint[0].b >> 15) & 1) << 27; + block.y |= ((endPoint[0].b >> 14) & 1) << 28; + block.y |= ((endPoint[0].b >> 13) & 1) << 29; + block.y |= ((endPoint[0].b >> 12) & 1) << 30; + block.y |= ((endPoint[0].b >> 11) & 1) << 31; + block.z |= ((endPoint[0].b >> 10) & 1) << 0; + } +} diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl new file mode 100644 index 000000000..6a57c3862 --- /dev/null +++ b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl @@ -0,0 +1,1908 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: BC7Encode.hlsl +// +// The Compute Shader for BC7 Encoder +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//#define REF_DEVICE + +#define CHAR_LENGTH 8 +#define NCHANNELS 4 +#define BC7_UNORM 98 +#define MAX_UINT 0xFFFFFFFF +#define MIN_UINT 0 + +static const uint candidateSectionBit[64] = //Associated to partition 0-63 +{ + 0xCCCC, 0x8888, 0xEEEE, 0xECC8, + 0xC880, 0xFEEC, 0xFEC8, 0xEC80, + 0xC800, 0xFFEC, 0xFE80, 0xE800, + 0xFFE8, 0xFF00, 0xFFF0, 0xF000, + 0xF710, 0x008E, 0x7100, 0x08CE, + 0x008C, 0x7310, 0x3100, 0x8CCE, + 0x088C, 0x3110, 0x6666, 0x366C, + 0x17E8, 0x0FF0, 0x718E, 0x399C, + 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, + 0x3c3c, 0x55aa, 0x9696, 0xa55a, + 0x73ce, 0x13c8, 0x324c, 0x3bdc, + 0x6996, 0xc33c, 0x9966, 0x660, + 0x272, 0x4e4, 0x4e40, 0x2720, + 0xc936, 0x936c, 0x39c6, 0x639c, + 0x9336, 0x9cc6, 0x817e, 0xe718, + 0xccf0, 0xfcc, 0x7744, 0xee22, +}; +static const uint candidateSectionBit2[64] = //Associated to partition 64-127 +{ + 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, + 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, + 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, + 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, + 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, + 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500, + 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, + 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, + 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424, + 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, + 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0, + 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, + 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, + 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000, + 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, + 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254, +}; +static const uint2 candidateFixUpIndex1D[128] = +{ + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 8, 0},{ 2, 0}, + { 2, 0},{ 8, 0},{ 8, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + + {15, 0},{15, 0},{ 6, 0},{ 8, 0}, + { 2, 0},{ 8, 0},{15, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 2, 0},{15, 0},{15, 0},{ 6, 0}, + { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0}, + {15, 0},{15, 0},{ 2, 0},{ 2, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 2, 0},{15, 0}, + //candidateFixUpIndex1D[i][1], i < 64 should not be used + + { 3,15},{ 3, 8},{15, 8},{15, 3}, + { 8,15},{ 3,15},{15, 3},{15, 8}, + { 8,15},{ 8,15},{ 6,15},{ 6,15}, + { 6,15},{ 5,15},{ 3,15},{ 3, 8}, + { 3,15},{ 3, 8},{ 8,15},{15, 3}, + { 3,15},{ 3, 8},{ 6,15},{10, 8}, + { 5, 3},{ 8,15},{ 8, 6},{ 6,10}, + { 8,15},{ 5,15},{15,10},{15, 8}, + + { 8,15},{15, 3},{ 3,15},{ 5,10}, + { 6,10},{10, 8},{ 8, 9},{15,10}, + {15, 6},{ 3,15},{15, 8},{ 5,15}, + {15, 3},{15, 6},{15, 6},{15, 8}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct + { 3,15},{15, 3},{ 5,15},{ 5,15}, + { 5,15},{ 8,15},{ 5,15},{10,15}, + { 5,15},{10,15},{ 8,15},{13,15}, + {15, 3},{12,15},{ 3,15},{ 3, 8}, +}; +static const uint2 candidateFixUpIndex1DOrdered[128] = //Same with candidateFixUpIndex1D but order the result when i >= 64 +{ + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 8, 0},{ 2, 0}, + { 2, 0},{ 8, 0},{ 8, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + + {15, 0},{15, 0},{ 6, 0},{ 8, 0}, + { 2, 0},{ 8, 0},{15, 0},{15, 0}, + { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, + { 2, 0},{15, 0},{15, 0},{ 6, 0}, + { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0}, + {15, 0},{15, 0},{ 2, 0},{ 2, 0}, + {15, 0},{15, 0},{15, 0},{15, 0}, + {15, 0},{ 2, 0},{ 2, 0},{15, 0}, + //candidateFixUpIndex1DOrdered[i][1], i < 64 should not be used + + { 3,15},{ 3, 8},{ 8,15},{ 3,15}, + { 8,15},{ 3,15},{ 3,15},{ 8,15}, + { 8,15},{ 8,15},{ 6,15},{ 6,15}, + { 6,15},{ 5,15},{ 3,15},{ 3, 8}, + { 3,15},{ 3, 8},{ 8,15},{ 3,15}, + { 3,15},{ 3, 8},{ 6,15},{ 8,10}, + { 3, 5},{ 8,15},{ 6, 8},{ 6,10}, + { 8,15},{ 5,15},{10,15},{ 8,15}, + + { 8,15},{ 3,15},{ 3,15},{ 5,10}, + { 6,10},{ 8,10},{ 8, 9},{10,15}, + { 6,15},{ 3,15},{ 8,15},{ 5,15}, + { 3,15},{ 6,15},{ 6,15},{ 8,15}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct + { 3,15},{ 3,15},{ 5,15},{ 5,15}, + { 5,15},{ 8,15},{ 5,15},{10,15}, + { 5,15},{10,15},{ 8,15},{13,15}, + { 3,15},{12,15},{ 3,15},{ 3, 8}, +}; +//static const uint4x4 candidateRotation[4] = +//{ +// {1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}, +// {0,0,0,1},{0,1,0,0},{0,0,1,0},{1,0,0,0}, +// {1,0,0,0},{0,0,0,1},{0,0,1,0},{0,1,0,0}, +// {1,0,0,0},{0,1,0,0},{0,0,0,1},{0,0,1,0} +//}; +//static const uint2 candidateIndexPrec[8] = {{3,0},{3,0},{2,0},{2,0}, +// {2,3}, //color index and alpha index can exchange +// {2,2},{4,4},{2,2}}; + +static const uint aWeight[3][16] = { {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}, + {0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; + + //4 bit index: 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 +static const uint aStep[3][64] = { { 0, 0, 0, 1, 1, 1, 1, 2, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 7, 7, 7, + 7, 8, 8, 8, 8, 9, 9, 9, + 9,10,10,10,10,10,11,11, + 11,11,12,12,12,12,13,13, + 13,13,14,14,14,14,15,15 }, + //3 bit index: 0, 9, 18, 27, 37, 46, 55, 64 + { 0,0,0,0,0,1,1,1, + 1,1,1,1,1,1,2,2, + 2,2,2,2,2,2,2,3, + 3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6, + 6,6,6,6,7,7,7,7 }, + //2 bit index: 0, 21, 43, 64 + { 0,0,0,0,0,0,0,0, + 0,0,0,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, + 1,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,3,3, + 3,3,3,3,3,3,3,3 } }; + +cbuffer cbCS : register( b0 ) +{ + uint g_tex_width; + uint g_num_block_x; + uint g_format; + uint g_mode_id; + uint g_start_block_id; + uint g_num_total_blocks; + float g_alpha_weight; +}; + +//Forward declaration +uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ); //Mode = 0 +uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ); //Mode = 1 +uint2x4 compress_endpoints2( inout uint2x4 endPoint ); //Mode = 2 +uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ); //Mode = 3 +uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ); //Mode = 7 +uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ); //Mode = 6 +uint2x4 compress_endpoints4( inout uint2x4 endPoint ); //Mode = 4 +uint2x4 compress_endpoints5( inout uint2x4 endPoint ); //Mode = 5 + +void block_package0( out uint4 block, uint partition, uint threadBase ); //Mode0 +void block_package1( out uint4 block, uint partition, uint threadBase ); //Mode1 +void block_package2( out uint4 block, uint partition, uint threadBase ); //Mode2 +void block_package3( out uint4 block, uint partition, uint threadBase ); //Mode3 +void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ); //Mode4 +void block_package5( out uint4 block, uint rotation, uint threadBase ); //Mode5 +void block_package6( out uint4 block, uint threadBase ); //Mode6 +void block_package7( out uint4 block, uint partition, uint threadBase ); //Mode7 + + +void swap(inout uint4 lhs, inout uint4 rhs) +{ + uint4 tmp = lhs; + lhs = rhs; + rhs = tmp; +} +void swap(inout uint3 lhs, inout uint3 rhs) +{ + uint3 tmp = lhs; + lhs = rhs; + rhs = tmp; +} +void swap(inout uint lhs, inout uint rhs) +{ + uint tmp = lhs; + lhs = rhs; + rhs = tmp; +} + +uint ComputeError(in uint4 a, in uint4 b) +{ + return dot(a.rgb, b.rgb) + g_alpha_weight * a.a*b.a; +} + +void Ensure_A_Is_Larger( inout uint4 a, inout uint4 b ) +{ + if ( a.x < b.x ) + swap( a.x, b.x ); + if ( a.y < b.y ) + swap( a.y, b.y ); + if ( a.z < b.z ) + swap( a.z, b.z ); + if ( a.w < b.w ) + swap( a.w, b.w ); +} + + +Texture2D g_Input : register( t0 ); +StructuredBuffer<uint4> g_InBuff : register( t1 ); + +RWStructuredBuffer<uint4> g_OutBuff : register( u0 ); + +#define THREAD_GROUP_SIZE 64 +#define BLOCK_SIZE_Y 4 +#define BLOCK_SIZE_X 4 +#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X) + +struct BufferShared +{ + uint4 pixel; + uint error; + uint mode; + uint partition; + uint index_selector; + uint rotation; + uint4 endPoint_low; + uint4 endPoint_high; + uint4 endPoint_low_quantized; + uint4 endPoint_high_quantized; +}; +groupshared BufferShared shared_temp[THREAD_GROUP_SIZE]; + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryMode456CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 4 5 6 all have 1 subset per block, and fix-up index is always index 0 +{ + // we process 4 BC blocks per thread group + const uint MAX_USED_THREAD = 16; // pixels in a BC (block compressed) block + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; // the number of BC blocks a thread group processes = 64 / 16 = 4 + uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on + uint threadBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group + uint threadInBlock = GI - threadBase; // id of the pixel in this BC block + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + + shared_temp[GI].endPoint_low = shared_temp[GI].pixel; + shared_temp[GI].endPoint_high = shared_temp[GI].pixel; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + uint2x4 endPoint; + endPoint[0] = shared_temp[threadBase].endPoint_low; + endPoint[1] = shared_temp[threadBase].endPoint_high; + + uint error = 0xFFFFFFFF; + uint mode = 0; + uint index_selector = 0; + uint rotation = 0; + + uint2 indexPrec; + if (threadInBlock < 8) // all threads of threadInBlock < 8 will be working on trying out mode 4, since only mode 4 has index selector bit + { + if (0 == (threadInBlock & 1)) // thread 0, 2, 4, 6 + { + //2 represents 2bit index precision; 1 represents 3bit index precision + index_selector = 0; + indexPrec = uint2( 2, 1 ); + } + else // thread 1, 3, 5, 7 + { + //2 represents 2bit index precision; 1 represents 3bit index precision + index_selector = 1; + indexPrec = uint2( 1, 2 ); + } + } + else + { + //2 represents 2bit index precision + indexPrec = uint2( 2, 2 ); + } + + uint4 pixel_r; + uint color_index; + uint alpha_index; + int4 span; + int2 span_norm_sqr; + int2 dotProduct; + if (threadInBlock < 12) // Try mode 4 5 in threads 0..11 + { + // mode 4 5 have component rotation + if ((threadInBlock < 2) || (8 == threadInBlock)) // rotation = 0 in thread 0, 1 + { + rotation = 0; + } + else if ((threadInBlock < 4) || (9 == threadInBlock)) // rotation = 1 in thread 2, 3 + { + endPoint[0].ra = endPoint[0].ar; + endPoint[1].ra = endPoint[1].ar; + + rotation = 1; + } + else if ((threadInBlock < 6) || (10 == threadInBlock)) // rotation = 2 in thread 4, 5 + { + endPoint[0].ga = endPoint[0].ag; + endPoint[1].ga = endPoint[1].ag; + + rotation = 2; + } + else if ((threadInBlock < 8) || (11 == threadInBlock)) // rotation = 3 in thread 6, 7 + { + endPoint[0].ba = endPoint[0].ab; + endPoint[1].ba = endPoint[1].ab; + + rotation = 3; + } + + if (threadInBlock < 8) // try mode 4 in threads 0..7 + { + // mode 4 thread distribution + // Thread 0 1 2 3 4 5 6 7 + // Rotation 0 0 1 1 2 2 3 3 + // Index selector 0 1 0 1 0 1 0 1 + + mode = 4; + compress_endpoints4( endPoint ); + } + else // try mode 5 in threads 8..11 + { + // mode 5 thread distribution + // Thread 8 9 10 11 + // Rotation 0 1 2 3 + + mode = 5; + compress_endpoints5( endPoint ); + } + + uint4 pixel = shared_temp[threadBase + 0].pixel; + if (1 == rotation) + { + pixel.ra = pixel.ar; + } + else if (2 == rotation) + { + pixel.ga = pixel.ag; + } + else if (3 == rotation) + { + pixel.ba = pixel.ab; + } + + span = endPoint[1] - endPoint[0]; + span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a ); + + // in mode 4 5 6, end point 0 must be closer to pixel 0 than end point 1, because of the fix-up index is always index 0 + // TODO: this shouldn't be necessary here in error calculation + /* + dotProduct = int2( dot( span.rgb, pixel.rgb - endPoint[0].rgb ), span.a * ( pixel.a - endPoint[0].a ) ); + if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) + { + span.rgb = -span.rgb; + swap(endPoint[0].rgb, endPoint[1].rgb); + } + if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) ) + { + span.a = -span.a; + swap(endPoint[0].a, endPoint[1].a); + } + */ + + // should be the same as above + dotProduct = int2( dot( pixel.rgb - endPoint[0].rgb, pixel.rgb - endPoint[0].rgb ), dot( pixel.rgb - endPoint[1].rgb, pixel.rgb - endPoint[1].rgb ) ); + if ( dotProduct.x > dotProduct.y ) + { + span.rgb = -span.rgb; + swap(endPoint[0].rgb, endPoint[1].rgb); + } + dotProduct = int2( dot( pixel.a - endPoint[0].a, pixel.a - endPoint[0].a ), dot( pixel.a - endPoint[1].a, pixel.a - endPoint[1].a ) ); + if ( dotProduct.x > dotProduct.y ) + { + span.a = -span.a; + swap(endPoint[0].a, endPoint[1].a); + } + + error = 0; + for ( uint i = 0; i < 16; i ++ ) + { + pixel = shared_temp[threadBase + i].pixel; + if (1 == rotation) + { + pixel.ra = pixel.ar; + } + else if (2 == rotation) + { + pixel.ga = pixel.ag; + } + else if (3 == rotation) + { + pixel.ba = pixel.ab; + } + + dotProduct.x = dot( span.rgb, pixel.rgb - endPoint[0].rgb ); + color_index = ( span_norm_sqr.x <= 0 /*endPoint[0] == endPoint[1]*/ || dotProduct.x <= 0 /*pixel == endPoint[0]*/ ) ? 0 + : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] ); + dotProduct.y = dot( span.a, pixel.a - endPoint[0].a ); + alpha_index = ( span_norm_sqr.y <= 0 || dotProduct.y <= 0 ) ? 0 + : ( ( dotProduct.y < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct.y * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] ); + + // the same color_index and alpha_index should be used for reconstruction, so this should be left commented out + /*if (index_selector) + { + swap(color_index, alpha_index); + }*/ + + pixel_r.rgb = ( ( 64 - aWeight[indexPrec.x][color_index] ) * endPoint[0].rgb + + aWeight[indexPrec.x][color_index] * endPoint[1].rgb + + 32 ) >> 6; + pixel_r.a = ( ( 64 - aWeight[indexPrec.y][alpha_index] ) * endPoint[0].a + + aWeight[indexPrec.y][alpha_index] * endPoint[1].a + + 32 ) >> 6; + + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + if (1 == rotation) + { + pixel_r.ra = pixel_r.ar; + } + else if (2 == rotation) + { + pixel_r.ga = pixel_r.ag; + } + else if (3 == rotation) + { + pixel_r.ba = pixel_r.ab; + } + error += ComputeError(pixel_r, pixel_r); + } + } + else if (threadInBlock < 16) // Try mode 6 in threads 12..15, since in mode 4 5 6, only mode 6 has p bit + { + uint p = threadInBlock - 12; + + compress_endpoints6( endPoint, uint2(p >> 0, p >> 1) & 1 ); + + uint4 pixel = shared_temp[threadBase + 0].pixel; + + span = endPoint[1] - endPoint[0]; + span_norm_sqr = dot( span, span ); + dotProduct = dot( span, pixel - endPoint[0] ); + if ( span_norm_sqr.x > 0 && dotProduct.x >= 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) + { + span = -span; + swap(endPoint[0], endPoint[1]); + } + + error = 0; + for ( uint i = 0; i < 16; i ++ ) + { + pixel = shared_temp[threadBase + i].pixel; + + dotProduct.x = dot( span, pixel - endPoint[0] ); + color_index = ( span_norm_sqr.x <= 0 || dotProduct.x <= 0 ) ? 0 + : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[0][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[0][63] ); + + pixel_r = ( ( 64 - aWeight[0][color_index] ) * endPoint[0] + + aWeight[0][color_index] * endPoint[1] + 32 ) >> 6; + + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + error += ComputeError(pixel_r, pixel_r); + } + + mode = 6; + rotation = p; // Borrow rotation for p + } + + shared_temp[GI].error = error; + shared_temp[GI].mode = mode; + shared_temp[GI].index_selector = index_selector; + shared_temp[GI].rotation = rotation; + +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].mode = shared_temp[GI + 8].mode; + shared_temp[GI].index_selector = shared_temp[GI + 8].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 8].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].mode = shared_temp[GI + 4].mode; + shared_temp[GI].index_selector = shared_temp[GI + 4].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 4].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].mode = shared_temp[GI + 2].mode; + shared_temp[GI].index_selector = shared_temp[GI + 2].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 2].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].mode = shared_temp[GI + 1].mode; + shared_temp[GI].index_selector = shared_temp[GI + 1].index_selector; + shared_temp[GI].rotation = shared_temp[GI + 1].rotation; + } + + g_OutBuff[blockID] = uint4(shared_temp[GI].error, (shared_temp[GI].index_selector << 31) | shared_temp[GI].mode, + 0, shared_temp[GI].rotation); // rotation is indeed rotation for mode 4 5. for mode 6, rotation is p bit + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 1 3 7 all have 2 subsets per block +{ + const uint MAX_USED_THREAD = 64; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + } + GroupMemoryBarrierWithGroupSync(); + + shared_temp[GI].error = 0xFFFFFFFF; + + uint4 pixel_r; + uint2x4 endPoint[2]; // endPoint[0..1 for subset id][0..1 for low and high in the subset] + uint2x4 endPointBackup[2]; + uint color_index; + if (threadInBlock < 64) + { + uint partition = threadInBlock; + + endPoint[0][0] = MAX_UINT; + endPoint[0][1] = MIN_UINT; + endPoint[1][0] = MAX_UINT; + endPoint[1][1] = MIN_UINT; + uint bits = candidateSectionBit[partition]; + for ( uint i = 0; i < 16; i ++ ) + { + uint4 pixel = shared_temp[threadBase + i].pixel; + if ( (( bits >> i ) & 0x01) == 1 ) + { + endPoint[1][0] = min( endPoint[1][0], pixel ); + endPoint[1][1] = max( endPoint[1][1], pixel ); + } + else + { + endPoint[0][0] = min( endPoint[0][0], pixel ); + endPoint[0][1] = max( endPoint[0][1], pixel ); + } + } + + endPointBackup[0] = endPoint[0]; + endPointBackup[1] = endPoint[1]; + + uint max_p; + if (1 == g_mode_id) + { + // in mode 1, there is only one p bit per subset + max_p = 4; + } + else + { + // in mode 3 7, there are two p bits per subset, one for each end point + max_p = 16; + } + + uint rotation = 0; + uint error = MAX_UINT; + for ( uint p = 0; p < max_p; p ++ ) + { + endPoint[0] = endPointBackup[0]; + endPoint[1] = endPointBackup[1]; + + for ( i = 0; i < 2; i ++ ) // loop through 2 subsets + { + if (g_mode_id == 1) + { + compress_endpoints1( endPoint[i], (p >> i) & 1 ); + } + else if (g_mode_id == 3) + { + compress_endpoints3( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 ); + } + else if (g_mode_id == 7) + { + compress_endpoints7( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 ); + } + } + + int4 span[2]; + span[0] = endPoint[0][1] - endPoint[0][0]; + span[1] = endPoint[1][1] - endPoint[1][0]; + + if (g_mode_id != 7) + { + span[0].w = span[1].w = 0; + } + + int span_norm_sqr[2]; + span_norm_sqr[0] = dot( span[0], span[0] ); + span_norm_sqr[1] = dot( span[1], span[1] ); + + // TODO: again, this shouldn't be necessary here in error calculation + int dotProduct = dot( span[0], shared_temp[threadBase + 0].pixel - endPoint[0][0] ); + if ( span_norm_sqr[0] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[0] ) ) + { + span[0] = -span[0]; + swap(endPoint[0][0], endPoint[0][1]); + } + dotProduct = dot( span[1], shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel - endPoint[1][0] ); + if ( span_norm_sqr[1] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[1] ) ) + { + span[1] = -span[1]; + swap(endPoint[1][0], endPoint[1][1]); + } + + uint step_selector; + if (g_mode_id != 1) + { + step_selector = 2; // mode 3 7 have 2 bit index + } + else + { + step_selector = 1; // mode 1 has 3 bit index + } + + uint p_error = 0; + for ( i = 0; i < 16; i ++ ) + { + if (((bits >> i) & 0x01) == 1) + { + dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] ); + color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0) ? 0 + : ((dotProduct < span_norm_sqr[1]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[1])] : aStep[step_selector][63]); + } + else + { + dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] ); + color_index = (span_norm_sqr[0] <= 0 || dotProduct <= 0) ? 0 + : ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[0])] : aStep[step_selector][63]); + } + + uint subset_index = (bits >> i) & 0x01; + + pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0] + + aWeight[step_selector][color_index] * endPoint[subset_index][1] + 32) >> 6; + if (g_mode_id != 7) + { + pixel_r.a = 255; + } + + uint4 pixel = shared_temp[threadBase + i].pixel; + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + p_error += ComputeError(pixel_r, pixel_r); + } + + if (p_error < error) + { + error = p_error; + rotation = p; + } + } + + shared_temp[GI].error = error; + shared_temp[GI].mode = g_mode_id; + shared_temp[GI].partition = partition; + shared_temp[GI].rotation = rotation; // mode 1 3 7 don't have rotation, we use rotation for p bits + } + GroupMemoryBarrierWithGroupSync(); + + if (threadInBlock < 32) + { + if ( shared_temp[GI].error > shared_temp[GI + 32].error ) + { + shared_temp[GI].error = shared_temp[GI + 32].error; + shared_temp[GI].mode = shared_temp[GI + 32].mode; + shared_temp[GI].partition = shared_temp[GI + 32].partition; + shared_temp[GI].rotation = shared_temp[GI + 32].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif +if (threadInBlock < 16) + { + if ( shared_temp[GI].error > shared_temp[GI + 16].error ) + { + shared_temp[GI].error = shared_temp[GI + 16].error; + shared_temp[GI].mode = shared_temp[GI + 16].mode; + shared_temp[GI].partition = shared_temp[GI + 16].partition; + shared_temp[GI].rotation = shared_temp[GI + 16].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].mode = shared_temp[GI + 8].mode; + shared_temp[GI].partition = shared_temp[GI + 8].partition; + shared_temp[GI].rotation = shared_temp[GI + 8].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].mode = shared_temp[GI + 4].mode; + shared_temp[GI].partition = shared_temp[GI + 4].partition; + shared_temp[GI].rotation = shared_temp[GI + 4].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].mode = shared_temp[GI + 2].mode; + shared_temp[GI].partition = shared_temp[GI + 2].partition; + shared_temp[GI].rotation = shared_temp[GI + 2].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].mode = shared_temp[GI + 1].mode; + shared_temp[GI].partition = shared_temp[GI + 1].partition; + shared_temp[GI].rotation = shared_temp[GI + 1].rotation; + } + + if (g_InBuff[blockID].x > shared_temp[GI].error) + { + g_OutBuff[blockID] = uint4(shared_temp[GI].error, shared_temp[GI].mode, shared_temp[GI].partition, shared_temp[GI].rotation); // mode 1 3 7 don't have rotation, we use rotation for p bits + } + else + { + g_OutBuff[blockID] = g_InBuff[blockID]; + } + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 0 2 have 3 subsets per block +{ + const uint MAX_USED_THREAD = 64; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + if (threadInBlock < 16) + { + shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + } + GroupMemoryBarrierWithGroupSync(); + + shared_temp[GI].error = 0xFFFFFFFF; + + uint num_partitions; + if (0 == g_mode_id) + { + num_partitions = 16; + } + else + { + num_partitions = 64; + } + + uint4 pixel_r; + uint2x4 endPoint[3]; // endPoint[0..1 for subset id][0..1 for low and high in the subset] + uint2x4 endPointBackup[3]; + uint color_index[16]; + if (threadInBlock < num_partitions) + { + uint partition = threadInBlock + 64; + + endPoint[0][0] = MAX_UINT; + endPoint[0][1] = MIN_UINT; + endPoint[1][0] = MAX_UINT; + endPoint[1][1] = MIN_UINT; + endPoint[2][0] = MAX_UINT; + endPoint[2][1] = MIN_UINT; + uint bits2 = candidateSectionBit2[partition - 64]; + for ( uint i = 0; i < 16; i ++ ) + { + uint4 pixel = shared_temp[threadBase + i].pixel; + uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03; + if ( subset_index == 2 ) + { + endPoint[2][0] = min( endPoint[2][0], pixel ); + endPoint[2][1] = max( endPoint[2][1], pixel ); + } + else if ( subset_index == 1 ) + { + endPoint[1][0] = min( endPoint[1][0], pixel ); + endPoint[1][1] = max( endPoint[1][1], pixel ); + } + else + { + endPoint[0][0] = min( endPoint[0][0], pixel ); + endPoint[0][1] = max( endPoint[0][1], pixel ); + } + } + + endPointBackup[0] = endPoint[0]; + endPointBackup[1] = endPoint[1]; + endPointBackup[2] = endPoint[2]; + + uint max_p; + if (0 == g_mode_id) + { + max_p = 64; // changed from 32 to 64 + } + else + { + max_p = 1; + } + + uint rotation = 0; + uint error = MAX_UINT; + for ( uint p = 0; p < max_p; p ++ ) + { + endPoint[0] = endPointBackup[0]; + endPoint[1] = endPointBackup[1]; + endPoint[2] = endPointBackup[2]; + + for ( i = 0; i < 3; i ++ ) + { + if (0 == g_mode_id) + { + compress_endpoints0( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 ); + } + else + { + compress_endpoints2( endPoint[i] ); + } + } + + uint step_selector = 1 + (2 == g_mode_id); + + int4 span[3]; + span[0] = endPoint[0][1] - endPoint[0][0]; + span[1] = endPoint[1][1] - endPoint[1][0]; + span[2] = endPoint[2][1] - endPoint[2][0]; + span[0].w = span[1].w = span[2].w = 0; + int span_norm_sqr[3]; + span_norm_sqr[0] = dot( span[0], span[0] ); + span_norm_sqr[1] = dot( span[1], span[1] ); + span_norm_sqr[2] = dot( span[2], span[2] ); + + // TODO: again, this shouldn't be necessary here in error calculation + uint ci[3] = { 0, candidateFixUpIndex1D[partition].x, candidateFixUpIndex1D[partition].y }; + for (i = 0; i < 3; i ++) + { + int dotProduct = dot( span[i], shared_temp[threadBase + ci[i]].pixel - endPoint[i][0] ); + if ( span_norm_sqr[i] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[i] ) ) + { + span[i] = -span[i]; + swap(endPoint[i][0], endPoint[i][1]); + } + } + + uint p_error = 0; + for ( i = 0; i < 16; i ++ ) + { + uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03; + if ( subset_index == 2 ) + { + int dotProduct = dot( span[2], shared_temp[threadBase + i].pixel - endPoint[2][0] ); + color_index[i] = ( span_norm_sqr[2] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[2] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[2] ) ] : aStep[step_selector][63] ); + } + else if ( subset_index == 1 ) + { + int dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] ); + color_index[i] = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[1] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep[step_selector][63] ); + } + else + { + int dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] ); + color_index[i] = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr[0] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep[step_selector][63] ); + } + + pixel_r = ( ( 64 - aWeight[step_selector][color_index[i]] ) * endPoint[subset_index][0] + + aWeight[step_selector][color_index[i]] * endPoint[subset_index][1] + 32 ) >> 6; + pixel_r.a = 255; + + uint4 pixel = shared_temp[threadBase + i].pixel; + Ensure_A_Is_Larger( pixel_r, pixel ); + pixel_r -= pixel; + p_error += ComputeError(pixel_r, pixel_r); + } + + if (p_error < error) + { + error = p_error; + rotation = p; // Borrow rotation for p + } + } + + shared_temp[GI].error = error; + shared_temp[GI].partition = partition; + shared_temp[GI].rotation = rotation; + } + GroupMemoryBarrierWithGroupSync(); + + if (threadInBlock < 32) + { + if ( shared_temp[GI].error > shared_temp[GI + 32].error ) + { + shared_temp[GI].error = shared_temp[GI + 32].error; + shared_temp[GI].partition = shared_temp[GI + 32].partition; + shared_temp[GI].rotation = shared_temp[GI + 32].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 16) + { + if ( shared_temp[GI].error > shared_temp[GI + 16].error ) + { + shared_temp[GI].error = shared_temp[GI + 16].error; + shared_temp[GI].partition = shared_temp[GI + 16].partition; + shared_temp[GI].rotation = shared_temp[GI + 16].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 8) + { + if ( shared_temp[GI].error > shared_temp[GI + 8].error ) + { + shared_temp[GI].error = shared_temp[GI + 8].error; + shared_temp[GI].partition = shared_temp[GI + 8].partition; + shared_temp[GI].rotation = shared_temp[GI + 8].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + if ( shared_temp[GI].error > shared_temp[GI + 4].error ) + { + shared_temp[GI].error = shared_temp[GI + 4].error; + shared_temp[GI].partition = shared_temp[GI + 4].partition; + shared_temp[GI].rotation = shared_temp[GI + 4].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + if ( shared_temp[GI].error > shared_temp[GI + 2].error ) + { + shared_temp[GI].error = shared_temp[GI + 2].error; + shared_temp[GI].partition = shared_temp[GI + 2].partition; + shared_temp[GI].rotation = shared_temp[GI + 2].rotation; + } + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + if ( shared_temp[GI].error > shared_temp[GI + 1].error ) + { + shared_temp[GI].error = shared_temp[GI + 1].error; + shared_temp[GI].partition = shared_temp[GI + 1].partition; + shared_temp[GI].rotation = shared_temp[GI + 1].rotation; + } + + if (g_InBuff[blockID].x > shared_temp[GI].error) + { + g_OutBuff[blockID] = uint4(shared_temp[GI].error, g_mode_id, shared_temp[GI].partition, shared_temp[GI].rotation); // rotation is actually p bit for mode 0. for mode 2, rotation is always 0 + } + else + { + g_OutBuff[blockID] = g_InBuff[blockID]; + } + } +} + +[numthreads( THREAD_GROUP_SIZE, 1, 1 )] +void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) +{ + const uint MAX_USED_THREAD = 16; + uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; + uint blockInGroup = GI / MAX_USED_THREAD; + uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; + uint threadBase = blockInGroup * MAX_USED_THREAD; + uint threadInBlock = GI - threadBase; + +#ifndef REF_DEVICE + if (blockID >= g_num_total_blocks) + { + return; + } +#endif + + uint block_y = blockID / g_num_block_x; + uint block_x = blockID - block_y * g_num_block_x; + uint base_x = block_x * BLOCK_SIZE_X; + uint base_y = block_y * BLOCK_SIZE_Y; + + uint mode = g_InBuff[blockID].y & 0x7FFFFFFF; + uint partition = g_InBuff[blockID].z; + uint index_selector = (g_InBuff[blockID].y >> 31) & 1; + uint rotation = g_InBuff[blockID].w; + + if (threadInBlock < 16) + { + uint4 pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); + + if ((4 == mode) || (5 == mode)) + { + if (1 == rotation) + { + pixel.ra = pixel.ar; + } + else if (2 == rotation) + { + pixel.ga = pixel.ag; + } + else if (3 == rotation) + { + pixel.ba = pixel.ab; + } + } + + shared_temp[GI].pixel = pixel; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + uint bits = candidateSectionBit[partition]; + uint bits2 = candidateSectionBit2[partition - 64]; + + uint2x4 ep; + uint2x4 ep_quantized; + [unroll] + for (int ii = 2; ii >= 0; -- ii) + { + if (threadInBlock < 16) + { + uint2x4 ep; + ep[0] = MAX_UINT; + ep[1] = MIN_UINT; + + uint4 pixel = shared_temp[GI].pixel; + + uint subset_index = ( bits >> threadInBlock ) & 0x01; + uint subset_index2 = ( bits2 >> ( threadInBlock * 2 ) ) & 0x03; + if (0 == ii) + { + if ((0 == mode) || (2 == mode)) + { + if (0 == subset_index2) + { + ep[0] = ep[1] = pixel; + } + } + else if ((1 == mode) || (3 == mode) || (7 == mode)) + { + if (0 == subset_index) + { + ep[0] = ep[1] = pixel; + } + } + else if ((4 == mode) || (5 == mode) || (6 == mode)) + { + ep[0] = ep[1] = pixel; + } + } + else if (1 == ii) + { + if ((0 == mode) || (2 == mode)) + { + if (1 == subset_index2) + { + ep[0] = ep[1] = pixel; + } + } + else if ((1 == mode) || (3 == mode) || (7 == mode)) + { + if (1 == subset_index) + { + ep[0] = ep[1] = pixel; + } + } + } + else + { + if ((0 == mode) || (2 == mode)) + { + if (2 == subset_index2) + { + ep[0] = ep[1] = pixel; + } + } + } + + shared_temp[GI].endPoint_low = ep[0]; + shared_temp[GI].endPoint_high = ep[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 8) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 4) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 2) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + if (threadInBlock < 1) + { + shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low); + shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high); + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (ii == (int)threadInBlock) + { + ep[0] = shared_temp[threadBase].endPoint_low; + ep[1] = shared_temp[threadBase].endPoint_high; + } + } + + if (threadInBlock < 3) + { + uint2 P; + if (1 == mode) + { + P = (rotation >> threadInBlock) & 1; + } + else + { + P = uint2(rotation >> (threadInBlock * 2 + 0), rotation >> (threadInBlock * 2 + 1)) & 1; + } + + if (0 == mode) + { + ep_quantized = compress_endpoints0( ep, P ); + } + else if (1 == mode) + { + ep_quantized = compress_endpoints1( ep, P ); + } + else if (2 == mode) + { + ep_quantized = compress_endpoints2( ep ); + } + else if (3 == mode) + { + ep_quantized = compress_endpoints3( ep, P ); + } + else if (4 == mode) + { + ep_quantized = compress_endpoints4( ep ); + } + else if (5 == mode) + { + ep_quantized = compress_endpoints5( ep ); + } + else if (6 == mode) + { + ep_quantized = compress_endpoints6( ep, P ); + } + else //if (7 == mode) + { + ep_quantized = compress_endpoints7( ep, P ); + } + + int4 span = ep[1] - ep[0]; + if (mode < 4) + { + span.w = 0; + } + + if ((4 == mode) || (5 == mode)) + { + if (0 == threadInBlock) + { + int2 span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a ); + int2 dotProduct = int2( dot( span.rgb, shared_temp[threadBase + 0].pixel.rgb - ep[0].rgb ), span.a * ( shared_temp[threadBase + 0].pixel.a - ep[0].a ) ); + if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) + { + swap(ep[0].rgb, ep[1].rgb); + swap(ep_quantized[0].rgb, ep_quantized[1].rgb); + } + if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) ) + { + swap(ep[0].a, ep[1].a); + swap(ep_quantized[0].a, ep_quantized[1].a); + } + } + } + else //if ((0 == mode) || (2 == mode) || (1 == mode) || (3 == mode) || (7 == mode) || (6 == mode)) + { + int p; + if (0 == threadInBlock) + { + p = 0; + } + else if (1 == threadInBlock) + { + p = candidateFixUpIndex1D[partition].x; + } + else //if (2 == threadInBlock) + { + p = candidateFixUpIndex1D[partition].y; + } + + int span_norm_sqr = dot( span, span ); + int dotProduct = dot( span, shared_temp[threadBase + p].pixel - ep[0] ); + if ( span_norm_sqr > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr ) ) + { + swap(ep[0], ep[1]); + swap(ep_quantized[0], ep_quantized[1]); + } + } + + shared_temp[GI].endPoint_low = ep[0]; + shared_temp[GI].endPoint_high = ep[1]; + shared_temp[GI].endPoint_low_quantized = ep_quantized[0]; + shared_temp[GI].endPoint_high_quantized = ep_quantized[1]; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (threadInBlock < 16) + { + uint color_index = 0; + uint alpha_index = 0; + + uint2x4 ep; + + uint2 indexPrec; + if ((0 == mode) || (1 == mode)) + { + indexPrec = 1; + } + else if (6 == mode) + { + indexPrec = 0; + } + else if (4 == mode) + { + if (0 == index_selector) + { + indexPrec = uint2(2, 1); + } + else + { + indexPrec = uint2(1, 2); + } + } + else + { + indexPrec = 2; + } + + int subset_index; + if ((0 == mode) || (2 == mode)) + { + subset_index = (bits2 >> (threadInBlock * 2)) & 0x03; + } + else if ((1 == mode) || (3 == mode) || (7 == mode)) + { + subset_index = (bits >> threadInBlock) & 0x01; + } + else + { + subset_index = 0; + } + + ep[0] = shared_temp[threadBase + subset_index].endPoint_low; + ep[1] = shared_temp[threadBase + subset_index].endPoint_high; + + int4 span = ep[1] - ep[0]; + if (mode < 4) + { + span.w = 0; + } + + if ((4 == mode) || (5 == mode)) + { + int2 span_norm_sqr; + span_norm_sqr.x = dot( span.rgb, span.rgb ); + span_norm_sqr.y = span.a * span.a; + + int dotProduct = dot( span.rgb, shared_temp[threadBase + threadInBlock].pixel.rgb - ep[0].rgb ); + color_index = ( span_norm_sqr.x <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] ); + dotProduct = dot( span.a, shared_temp[threadBase + threadInBlock].pixel.a - ep[0].a ); + alpha_index = ( span_norm_sqr.y <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] ); + + if (index_selector) + { + swap(color_index, alpha_index); + } + } + else + { + int span_norm_sqr = dot( span, span ); + + int dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel - ep[0] ); + color_index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 + : ( ( dotProduct < span_norm_sqr ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep[indexPrec.x][63] ); + } + + shared_temp[GI].error = color_index; + shared_temp[GI].mode = alpha_index; + } +#ifdef REF_DEVICE + GroupMemoryBarrierWithGroupSync(); +#endif + + if (0 == threadInBlock) + { + uint4 block; + if (0 == mode) + { + block_package0( block, partition, threadBase ); + } + else if (1 == mode) + { + block_package1( block, partition, threadBase ); + } + else if (2 == mode) + { + block_package2( block, partition, threadBase ); + } + else if (3 == mode) + { + block_package3( block, partition, threadBase ); + } + else if (4 == mode) + { + block_package4( block, rotation, index_selector, threadBase ); + } + else if (5 == mode) + { + block_package5( block, rotation, threadBase ); + } + else if (6 == mode) + { + block_package6( block, threadBase ); + } + else //if (7 == mode) + { + block_package7( block, partition, threadBase ); + } + + g_OutBuff[blockID] = block; + } +} + +//uint4 truncate_and_round( uint4 color, uint bits) +//{ +// uint precisionMask = ((1 << bits) - 1) << (8 - bits); +// uint precisionHalf = (1 << (7-bits)); +// +// uint4 truncated = color & precisionMask; +// uint4 rounded = min(255, color + precisionHalf) & precisionMask; +// +// uint4 truncated_bak = truncated = truncated | (truncated >> bits); +// uint4 rounded_bak = rounded = rounded | (rounded >> bits); +// +// uint4 color_bak = color; +// +// Ensure_A_Is_Larger( rounded, color ); +// Ensure_A_Is_Larger( truncated, color_bak ); +// +// if (dot(rounded - color, rounded - color) < +// dot(truncated - color_bak, truncated - color_bak)) +// { +// return rounded_bak; +// } +// else +// { +// return truncated_bak; +// } +//} + +uint4 quantize( uint4 color, uint uPrec ) +{ + uint4 rnd = min(255, color + (1 << (7 - uPrec))); + return rnd >> (8 - uPrec); +} + +uint4 unquantize( uint4 color, uint uPrec ) +{ + color = color << (8 - uPrec); + return color | (color >> uPrec); +} + +uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb & 0xFFFFFFFE; + quantized[j].rgb |= P[j]; + quantized[j].a = 0xFF; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; + endPoint[j].a = 0xFF; + + quantized[j] <<= 3; + } + return quantized; +} +uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb & 0xFFFFFFFE; + quantized[j].rgb |= P[j]; + quantized[j].a = 0xFF; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb; + endPoint[j].a = 0xFF; + + quantized[j] <<= 1; + } + return quantized; +} +uint2x4 compress_endpoints2( inout uint2x4 endPoint ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb; + quantized[j].a = 0xFF; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; + endPoint[j].a = 0xFF; + + quantized[j] <<= 3; + } + return quantized; +} +uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = endPoint[j].rgb & 0xFFFFFFFE; + quantized[j].rgb |= P[j]; + quantized[j].a = 0xFF; + + endPoint[j].rgb = quantized[j].rgb; + endPoint[j].a = 0xFF; + } + return quantized; +} +uint2x4 compress_endpoints4( inout uint2x4 endPoint ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb; + quantized[j].a = quantize(endPoint[j].a, 6).r; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; + endPoint[j].a = unquantize(quantized[j].a, 6).r; + + quantized[j].rgb <<= 3; + quantized[j].a <<= 2; + } + return quantized; +} +uint2x4 compress_endpoints5( inout uint2x4 endPoint ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb; + quantized[j].a = endPoint[j].a; + + endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb; + // endPoint[j].a Alpha is full precision + + quantized[j].rgb <<= 1; + } + return quantized; +} +uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j] = endPoint[j] & 0xFFFFFFFE; + quantized[j] |= P[j]; + + endPoint[j] = quantized[j]; + } + return quantized; +} +uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ) +{ + uint2x4 quantized; + for ( uint j = 0; j < 2; j ++ ) + { + quantized[j] = quantize(endPoint[j], 6) & 0xFFFFFFFE; + quantized[j] |= P[j]; + + endPoint[j] = unquantize(quantized[j], 6); + } + return quantized << 2; +} + +#define get_end_point_l(subset) shared_temp[threadBase + subset].endPoint_low_quantized +#define get_end_point_h(subset) shared_temp[threadBase + subset].endPoint_high_quantized +#define get_color_index(index) shared_temp[threadBase + index].error +#define get_alpha_index(index) shared_temp[threadBase + index].mode + +void block_package0( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x01 | ( (partition - 64) << 1 ) + | ( ( get_end_point_l(0).r & 0xF0 ) << 1 ) | ( ( get_end_point_h(0).r & 0xF0 ) << 5 ) + | ( ( get_end_point_l(1).r & 0xF0 ) << 9 ) | ( ( get_end_point_h(1).r & 0xF0 ) << 13 ) + | ( ( get_end_point_l(2).r & 0xF0 ) << 17 ) | ( ( get_end_point_h(2).r & 0xF0 ) << 21 ) + | ( ( get_end_point_l(0).g & 0xF0 ) << 25 ); + block.y = ( ( get_end_point_l(0).g & 0xF0 ) >> 7 ) | ( ( get_end_point_h(0).g & 0xF0 ) >> 3 ) + | ( ( get_end_point_l(1).g & 0xF0 ) << 1 ) | ( ( get_end_point_h(1).g & 0xF0 ) << 5 ) + | ( ( get_end_point_l(2).g & 0xF0 ) << 9 ) | ( ( get_end_point_h(2).g & 0xF0 ) << 13 ) + | ( ( get_end_point_l(0).b & 0xF0 ) << 17 ) | ( ( get_end_point_h(0).b & 0xF0 ) << 21 ) + | ( ( get_end_point_l(1).b & 0xF0 ) << 25 ); + block.z = ( ( get_end_point_l(1).b & 0xF0 ) >> 7 ) | ( ( get_end_point_h(1).b & 0xF0 ) >> 3 ) + | ( ( get_end_point_l(2).b & 0xF0 ) << 1 ) | ( ( get_end_point_h(2).b & 0xF0 ) << 5 ) + | ( ( get_end_point_l(0).r & 0x08 ) << 10 ) | ( ( get_end_point_h(0).r & 0x08 ) << 11 ) + | ( ( get_end_point_l(1).r & 0x08 ) << 12 ) | ( ( get_end_point_h(1).r & 0x08 ) << 13 ) + | ( ( get_end_point_l(2).r & 0x08 ) << 14 ) | ( ( get_end_point_h(2).r & 0x08 ) << 15 ) + | ( get_color_index(0) << 19 ); + block.w = 0; + uint i = 1; + for ( ; i <= min( candidateFixUpIndex1DOrdered[partition][0], 4 ); i ++ ) + { + block.z |= get_color_index(i) << ( i * 3 + 18 ); + } + if ( candidateFixUpIndex1DOrdered[partition][0] < 4 ) //i = 4 + { + block.z |= get_color_index(4) << 29; + i += 1; + } + else //i = 5 + { + block.w |= ( get_color_index(4) & 0x04 ) >> 2; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + block.w |= get_color_index(i) << ( i * 3 - 14 ); + } + for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 3 - 15 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 3 - 16 ); + } +} +void block_package1( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x02 | ( partition << 2 ) + | ( ( get_end_point_l(0).r & 0xFC ) << 6 ) | ( ( get_end_point_h(0).r & 0xFC ) << 12 ) + | ( ( get_end_point_l(1).r & 0xFC ) << 18 ) | ( ( get_end_point_h(1).r & 0xFC ) << 24 ); + block.y = ( ( get_end_point_l(0).g & 0xFC ) >> 2 ) | ( ( get_end_point_h(0).g & 0xFC ) << 4 ) + | ( ( get_end_point_l(1).g & 0xFC ) << 10 ) | ( ( get_end_point_h(1).g & 0xFC ) << 16 ) + | ( ( get_end_point_l(0).b & 0xFC ) << 22 ) | ( ( get_end_point_h(0).b & 0xFC ) << 28 ); + block.z = ( ( get_end_point_h(0).b & 0xFC ) >> 4 ) | ( ( get_end_point_l(1).b & 0xFC ) << 2 ) + | ( ( get_end_point_h(1).b & 0xFC ) << 8 ) + | ( ( get_end_point_l(0).r & 0x02 ) << 15 ) | ( ( get_end_point_l(1).r & 0x02 ) << 16 ) + | ( get_color_index(0) << 18 ); + if ( candidateFixUpIndex1DOrdered[partition][0] == 15 ) + { + block.w = (get_color_index(15) << 30) | (get_color_index(14) << 27) | (get_color_index(13) << 24) | (get_color_index(12) << 21) | (get_color_index(11) << 18) | (get_color_index(10) << 15) + | (get_color_index(9) << 12) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5); + block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } + else if ( candidateFixUpIndex1DOrdered[partition][0] == 2 ) + { + block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) + | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 5) | (get_color_index(6) << 2) | (get_color_index(5) >> 1); + block.z |= (get_color_index(5) << 31) | (get_color_index(4) << 28) | (get_color_index(3) << 25) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } + else if ( candidateFixUpIndex1DOrdered[partition][0] == 8 ) + { + block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) + | (get_color_index(9) << 11) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5); + block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } + else //candidateFixUpIndex1DOrdered[partition] == 6 + { + block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) + | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 6) | (get_color_index(6) << 4) | get_color_index(5); + block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); + } +} +void block_package2( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x04 | ( (partition - 64) << 3 ) + | ( ( get_end_point_l(0).r & 0xF8 ) << 6 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 11 ) + | ( ( get_end_point_l(1).r & 0xF8 ) << 16 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 21 ) + | ( ( get_end_point_l(2).r & 0xF8 ) << 26 ); + block.y = ( ( get_end_point_l(2).r & 0xF8 ) >> 6 ) | ( ( get_end_point_h(2).r & 0xF8 ) >> 1 ) + | ( ( get_end_point_l(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 9 ) + | ( ( get_end_point_l(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_h(1).g & 0xF8 ) << 19 ) + | ( ( get_end_point_l(2).g & 0xF8 ) << 24 ); + block.z = ( ( get_end_point_h(2).g & 0xF8 ) >> 3 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 2 ) + | ( ( get_end_point_h(0).b & 0xF8 ) << 7 ) | ( ( get_end_point_l(1).b & 0xF8 ) << 12 ) + | ( ( get_end_point_h(1).b & 0xF8 ) << 17 ) | ( ( get_end_point_l(2).b & 0xF8 ) << 22 ) + | ( ( get_end_point_h(2).b & 0xF8 ) << 27 ); + block.w = ( ( get_end_point_h(2).b & 0xF8 ) >> 5 ) + | ( get_color_index(0) << 3 ); + uint i = 1; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 2 ); + } + for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 1 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 ); + } +} +void block_package3( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x08 | ( partition << 4 ) + | ( ( get_end_point_l(0).r & 0xFE ) << 9 ) | ( ( get_end_point_h(0).r & 0xFE ) << 16 ) + | ( ( get_end_point_l(1).r & 0xFE ) << 23 ) | ( ( get_end_point_h(1).r & 0xFE ) << 30 ); + block.y = ( ( get_end_point_h(1).r & 0xFE ) >> 2 ) | ( ( get_end_point_l(0).g & 0xFE ) << 5 ) + | ( ( get_end_point_h(0).g & 0xFE ) << 12 ) | ( ( get_end_point_l(1).g & 0xFE ) << 19 ) + | ( ( get_end_point_h(1).g & 0xFE ) << 26 ); + block.z = ( ( get_end_point_h(1).g & 0xFE ) >> 6 ) | ( ( get_end_point_l(0).b & 0xFE ) << 1 ) + | ( ( get_end_point_h(0).b & 0xFE ) << 8 ) | ( ( get_end_point_l(1).b & 0xFE ) << 15 ) + | ( ( get_end_point_h(1).b & 0xFE ) << 22 ) + | ( ( get_end_point_l(0).r & 0x01 ) << 30 ) | ( ( get_end_point_h(0).r & 0x01 ) << 31 ); + block.w = ( ( get_end_point_l(1).r & 0x01 ) << 0 ) | ( ( get_end_point_h(1).r & 0x01 ) << 1 ) + | ( get_color_index(0) << 2 ); + uint i = 1; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 1 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 ); + } +} +void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ) +{ + block.x = 0x10 | ( (rotation & 3) << 5 ) | ( (index_selector & 1) << 7 ) + | ( ( get_end_point_l(0).r & 0xF8 ) << 5 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 10 ) + | ( ( get_end_point_l(0).g & 0xF8 ) << 15 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 20 ) + | ( ( get_end_point_l(0).b & 0xF8 ) << 25 ); + + block.y = ( ( get_end_point_l(0).b & 0xF8 ) >> 7 ) | ( ( get_end_point_h(0).b & 0xF8 ) >> 2 ) + | ( ( get_end_point_l(0).a & 0xFC ) << 4 ) | ( ( get_end_point_h(0).a & 0xFC ) << 10 ) + | ( (get_color_index(0) & 1) << 18 ) | ( get_color_index(1) << 19 ) | ( get_color_index(2) << 21 ) | ( get_color_index(3) << 23 ) + | ( get_color_index(4) << 25 ) | ( get_color_index(5) << 27 ) | ( get_color_index(6) << 29 ) | ( get_color_index(7) << 31 ); + + block.z = ( get_color_index(7) >> 1 ) | ( get_color_index(8) << 1 ) | ( get_color_index(9) << 3 ) | ( get_color_index(10)<< 5 ) + | ( get_color_index(11)<< 7 ) | ( get_color_index(12)<< 9 ) | ( get_color_index(13)<< 11 ) | ( get_color_index(14)<< 13 ) + | ( get_color_index(15)<< 15 ) | ( (get_alpha_index(0) & 3) << 17 ) | ( get_alpha_index(1) << 19 ) | ( get_alpha_index(2) << 22 ) + | ( get_alpha_index(3) << 25 ) | ( get_alpha_index(4) << 28 ) | ( get_alpha_index(5) << 31 ); + + block.w = ( get_alpha_index(5) >> 1 ) | ( get_alpha_index(6) << 2 ) | ( get_alpha_index(7) << 5 ) | ( get_alpha_index(8) << 8 ) + | ( get_alpha_index(9) << 11 ) | ( get_alpha_index(10)<< 14 ) | ( get_alpha_index(11)<< 17 ) | ( get_alpha_index(12)<< 20 ) + | ( get_alpha_index(13)<< 23 ) | ( get_alpha_index(14)<< 26 ) | ( get_alpha_index(15)<< 29 ); +} +void block_package5( out uint4 block, uint rotation, uint threadBase ) +{ + block.x = 0x20 | ( rotation << 6 ) + | ( ( get_end_point_l(0).r & 0xFE ) << 7 ) | ( ( get_end_point_h(0).r & 0xFE ) << 14 ) + | ( ( get_end_point_l(0).g & 0xFE ) << 21 ) | ( ( get_end_point_h(0).g & 0xFE ) << 28 ); + block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 4 ) | ( ( get_end_point_l(0).b & 0xFE ) << 3 ) + | ( ( get_end_point_h(0).b & 0xFE ) << 10 ) | ( get_end_point_l(0).a << 18 ) | ( get_end_point_h(0).a << 26 ); + block.z = ( get_end_point_h(0).a >> 6 ) + | ( get_color_index(0) << 2 ) | ( get_color_index(1) << 3 ) | ( get_color_index(2) << 5 ) | ( get_color_index(3) << 7 ) + | ( get_color_index(4) << 9 ) | ( get_color_index(5) << 11 ) | ( get_color_index(6) << 13 ) | ( get_color_index(7) << 15 ) + | ( get_color_index(8) << 17 ) | ( get_color_index(9) << 19 ) | ( get_color_index(10)<< 21 ) | ( get_color_index(11)<< 23 ) + | ( get_color_index(12)<< 25 ) | ( get_color_index(13)<< 27 ) | ( get_color_index(14)<< 29 ) | ( get_color_index(15)<< 31 ); + block.w = ( get_color_index(15)>> 1 ) | ( get_alpha_index(0) << 1 ) | ( get_alpha_index(1) << 2 ) | ( get_alpha_index(2) << 4 ) + | ( get_alpha_index(3) << 6 ) | ( get_alpha_index(4) << 8 ) | ( get_alpha_index(5) << 10 ) | ( get_alpha_index(6) << 12 ) + | ( get_alpha_index(7) << 14 ) | ( get_alpha_index(8) << 16 ) | ( get_alpha_index(9) << 18 ) | ( get_alpha_index(10)<< 20 ) + | ( get_alpha_index(11)<< 22 ) | ( get_alpha_index(12)<< 24 ) | ( get_alpha_index(13)<< 26 ) | ( get_alpha_index(14)<< 28 ) + | ( get_alpha_index(15)<< 30 ); +} +void block_package6( out uint4 block, uint threadBase ) +{ + block.x = 0x40 + | ( ( get_end_point_l(0).r & 0xFE ) << 6 ) | ( ( get_end_point_h(0).r & 0xFE ) << 13 ) + | ( ( get_end_point_l(0).g & 0xFE ) << 20 ) | ( ( get_end_point_h(0).g & 0xFE ) << 27 ); + block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 5 ) | ( ( get_end_point_l(0).b & 0xFE ) << 2 ) + | ( ( get_end_point_h(0).b & 0xFE ) << 9 ) | ( ( get_end_point_l(0).a & 0xFE ) << 16 ) + | ( ( get_end_point_h(0).a & 0xFE ) << 23 ) + | ( get_end_point_l(0).r & 0x01 ) << 31; + block.z = ( get_end_point_h(0).r & 0x01 ) + | ( get_color_index(0) << 1 ) | ( get_color_index(1) << 4 ) | ( get_color_index(2) << 8 ) | ( get_color_index(3) << 12 ) + | ( get_color_index(4) << 16 ) | ( get_color_index(5) << 20 ) | ( get_color_index(6) << 24 ) | ( get_color_index(7) << 28 ); + block.w = ( get_color_index(8) << 0 ) | ( get_color_index(9) << 4 ) | ( get_color_index(10)<< 8 ) | ( get_color_index(11)<< 12 ) + | ( get_color_index(12)<< 16 ) | ( get_color_index(13)<< 20 ) | ( get_color_index(14)<< 24 ) | ( get_color_index(15)<< 28 ); +} +void block_package7( out uint4 block, uint partition, uint threadBase ) +{ + block.x = 0x80 | ( partition << 8 ) + | ( ( get_end_point_l(0).r & 0xF8 ) << 11 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 16 ) + | ( ( get_end_point_l(1).r & 0xF8 ) << 21 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 26 ); + block.y = ( ( get_end_point_h(1).r & 0xF8 ) >> 6 ) | ( ( get_end_point_l(0).g & 0xF8 ) >> 1 ) + | ( ( get_end_point_h(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_l(1).g & 0xF8 ) << 9 ) + | ( ( get_end_point_h(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 19 ) + | ( ( get_end_point_h(0).b & 0xF8 ) << 24 ); + block.z = ( ( get_end_point_l(1).b & 0xF8 ) >> 3 ) | ( ( get_end_point_h(1).b & 0xF8 ) << 2 ) + | ( ( get_end_point_l(0).a & 0xF8 ) << 7 ) | ( ( get_end_point_h(0).a & 0xF8 ) << 12 ) + | ( ( get_end_point_l(1).a & 0xF8 ) << 17 ) | ( ( get_end_point_h(1).a & 0xF8 ) << 22 ) + | ( ( get_end_point_l(0).r & 0x04 ) << 28 ) | ( ( get_end_point_h(0).r & 0x04 ) << 29 ); + block.w = ( ( get_end_point_l(1).r & 0x04 ) >> 2 ) | ( ( get_end_point_h(1).r & 0x04 ) >> 1 ) + | ( get_color_index(0) << 2 ); + uint i = 1; + for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 + 1 ); + } + for ( ; i < 16; i ++ ) + { + block.w |= get_color_index(i) << ( i * 2 ); + } +}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl b/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl new file mode 100644 index 000000000..798eea2ff --- /dev/null +++ b/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl @@ -0,0 +1,72 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSMain +//-------------------------------------------------------------------------------------- +// File: BasicCompute11.hlsl +// +// This file contains the Compute Shader to perform array A + array B +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#ifdef USE_STRUCTURED_BUFFERS + +struct BufType +{ + int i; + float f; +#ifdef TEST_DOUBLE + double d; +#endif +}; + +StructuredBuffer<BufType> Buffer0 : register(t0); +StructuredBuffer<BufType> Buffer1 : register(t1); +RWStructuredBuffer<BufType> BufferOut : register(u0); + +[numthreads(1, 1, 1)] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ + BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i; + BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f; +#ifdef TEST_DOUBLE + BufferOut[DTid.x].d = Buffer0[DTid.x].d + Buffer1[DTid.x].d; +#endif +} + +#else // The following code is for raw buffers + +ByteAddressBuffer Buffer0 : register(t0); +ByteAddressBuffer Buffer1 : register(t1); +RWByteAddressBuffer BufferOut : register(u0); + +[numthreads(1, 1, 1)] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ +#ifdef TEST_DOUBLE + int i0 = asint( Buffer0.Load( DTid.x*16 ) ); + float f0 = asfloat( Buffer0.Load( DTid.x*16+4 ) ); + double d0 = asdouble( Buffer0.Load( DTid.x*16+8 ), Buffer0.Load( DTid.x*16+12 ) ); + int i1 = asint( Buffer1.Load( DTid.x*16 ) ); + float f1 = asfloat( Buffer1.Load( DTid.x*16+4 ) ); + double d1 = asdouble( Buffer1.Load( DTid.x*16+8 ), Buffer1.Load( DTid.x*16+12 ) ); + + BufferOut.Store( DTid.x*16, asuint(i0 + i1) ); + BufferOut.Store( DTid.x*16+4, asuint(f0 + f1) ); + + uint dl, dh; + asuint( d0 + d1, dl, dh ); + + BufferOut.Store( DTid.x*16+8, dl ); + BufferOut.Store( DTid.x*16+12, dh ); +#else + int i0 = asint( Buffer0.Load( DTid.x*8 ) ); + float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) ); + int i1 = asint( Buffer1.Load( DTid.x*8 ) ); + float f1 = asfloat( Buffer1.Load( DTid.x*8+4 ) ); + + BufferOut.Store( DTid.x*8, asuint(i0 + i1) ); + BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) ); +#endif // TEST_DOUBLE +} + +#endif // USE_STRUCTURED_BUFFERS diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx new file mode 100644 index 000000000..bd28f862b --- /dev/null +++ b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx @@ -0,0 +1,158 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: BasicHLSL.fx +// +// The effect file for the BasicHLSL sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Global variables +//-------------------------------------------------------------------------------------- +float4 g_MaterialAmbientColor; // Material's ambient color +float4 g_MaterialDiffuseColor; // Material's diffuse color +int g_nNumLights; + +float3 g_LightDir; // Light's direction in world space +float4 g_LightDiffuse; // Light's diffuse color +float4 g_LightAmbient; // Light's ambient color + +texture g_MeshTexture; // Color texture for mesh + +float g_fTime; // App's time in seconds +float4x4 g_mWorld; // World matrix for object +float4x4 g_mWorldViewProjection; // World * View * Projection matrix + + + +//-------------------------------------------------------------------------------------- +// Texture samplers +//-------------------------------------------------------------------------------------- +sampler MeshTextureSampler = +sampler_state +{ + Texture = <g_MeshTexture>; + MipFilter = LINEAR; + MinFilter = LINEAR; + MagFilter = LINEAR; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex shader output structure +//-------------------------------------------------------------------------------------- +struct VS_OUTPUT +{ + float4 Position : POSITION; // vertex position + float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) + float2 TextureUV : TEXCOORD0; // vertex texture coords +}; + + +//-------------------------------------------------------------------------------------- +// This shader computes standard transform and lighting +//-------------------------------------------------------------------------------------- +VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, + float3 vNormal : NORMAL, + float2 vTexCoord0 : TEXCOORD0, + uniform int nNumLights, + uniform bool bTexture, + uniform bool bAnimate ) +{ + + VS_OUTPUT Output; + float3 vNormalWorldSpace; + + // Transform the position from object space to homogeneous projection space + Output.Position = mul(vPos, g_mWorldViewProjection); + + // Transform the normal from object space to world space + vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space) + + // Compute simple directional lighting equation + float3 vTotalLightDiffuse = float3(0,0,0); + for(int i=0; i<nNumLights; i++ ) + vTotalLightDiffuse += g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)); + + Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse + + g_MaterialAmbientColor * g_LightAmbient; + Output.Diffuse.a = 1.0f; + + // Just copy the texture coordinate through + if( bTexture ) + Output.TextureUV = vTexCoord0; + else + Output.TextureUV = 0; + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel shader output structure +//-------------------------------------------------------------------------------------- +struct PS_OUTPUT +{ + float4 RGBColor : COLOR0; // Pixel color +}; + + +//-------------------------------------------------------------------------------------- +// This shader outputs the pixel's color by modulating the texture's +// color with diffuse material color +//-------------------------------------------------------------------------------------- +PS_OUTPUT RenderScenePS( VS_OUTPUT In, + uniform bool bTexture ) +{ + PS_OUTPUT Output; + + // Lookup mesh texture and modulate it with diffuse + if( bTexture ) + Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse; + else + Output.RGBColor = In.Diffuse; + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// Renders scene to render target +//-------------------------------------------------------------------------------------- +technique RenderSceneWithTexture1Light +{ + pass P0 + { + VertexShader = compile vs_2_0 RenderSceneVS( 1, true, true ); + PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired) + } +} + +technique RenderSceneWithTexture2Light +{ + pass P0 + { + VertexShader = compile vs_2_0 RenderSceneVS( 2, true, true ); + PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired) + } +} + +technique RenderSceneWithTexture3Light +{ + pass P0 + { + VertexShader = compile vs_2_0 RenderSceneVS( 3, true, true ); + PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired) + } +} + +technique RenderSceneNoTexture +{ + pass P0 + { + VertexShader = compile vs_2_0 RenderSceneVS( 1, false, false ); + PixelShader = compile ps_2_0 RenderScenePS( false ); // trivial pixel shader (could use FF instead if desired) + } +} diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl new file mode 100644 index 000000000..78fff9eeb --- /dev/null +++ b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl @@ -0,0 +1,51 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PSMain +//-------------------------------------------------------------------------------------- +// File: BasicHLSL11_PS.hlsl +// +// The pixel shader file for the BasicHLSL11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + float4 g_vObjectColor : packoffset( c0 ); +}; + +cbuffer cbPerFrame : register( b1 ) +{ + float3 g_vLightDir : packoffset( c0 ); + float g_fAmbient : packoffset( c0.w ); +}; + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse : register( t0 ); +SamplerState g_samLinear : register( s0 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; +}; + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PSMain( PS_INPUT Input ) : SV_TARGET +{ + float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord ); + + float fLighting = saturate( dot( g_vLightDir, Input.vNormal ) ); + fLighting = max( fLighting, g_fAmbient ); + + return vDiffuse * fLighting; +} + diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl new file mode 100644 index 000000000..cb2c1b950 --- /dev/null +++ b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl @@ -0,0 +1,49 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain +//-------------------------------------------------------------------------------------- +// File: BasicHLSL11_VS.hlsl +// +// The vertex shader file for the BasicHLSL11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + matrix g_mWorldViewProjection : packoffset( c0 ); + matrix g_mWorld : packoffset( c4 ); +}; + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; +}; + +struct VS_OUTPUT +{ + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; + float4 vPosition : SV_POSITION; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); + Output.vNormal = mul( Input.vNormal, (float3x3)g_mWorld ); + Output.vTexcoord = Input.vTexcoord; + + return Output; +} + diff --git a/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx b/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx new file mode 100644 index 000000000..1ecc1930a --- /dev/null +++ b/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx @@ -0,0 +1,181 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: BasicHLSL11.fx +// +// The effect file for the BasicHLSL sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Global variables +//-------------------------------------------------------------------------------------- +float4 g_MaterialAmbientColor; // Material's ambient color +float4 g_MaterialDiffuseColor; // Material's diffuse color +int g_nNumLights; + +float3 g_LightDir[3]; // Light's direction in world space +float4 g_LightDiffuse[3]; // Light's diffuse color +float4 g_LightAmbient; // Light's ambient color + +Texture2D g_MeshTexture; // Color texture for mesh + +float g_fTime; // App's time in seconds +float4x4 g_mWorld; // World matrix for object +float4x4 g_mWorldViewProjection; // World * View * Projection matrix + +//-------------------------------------------------------------------------------------- +// DepthStates +//-------------------------------------------------------------------------------------- +DepthStencilState EnableDepth +{ + DepthEnable = TRUE; + DepthWriteMask = ALL; + DepthFunc = LESS_EQUAL; +}; + +//-------------------------------------------------------------------------------------- +// Texture samplers +//-------------------------------------------------------------------------------------- +SamplerState MeshTextureSampler +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Wrap; + AddressV = Wrap; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex shader output structure +//-------------------------------------------------------------------------------------- +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; // vertex position + float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) + float2 TextureUV : TEXCOORD0; // vertex texture coords +}; + + +//-------------------------------------------------------------------------------------- +// This shader computes standard transform and lighting +//-------------------------------------------------------------------------------------- +VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, + float3 vNormal : NORMAL, + float2 vTexCoord0 : TEXCOORD, + uniform int nNumLights, + uniform bool bTexture, + uniform bool bAnimate ) +{ + VS_OUTPUT Output; + float3 vNormalWorldSpace; + + float4 vAnimatedPos = vPos; + + // Animation the vertex based on time and the vertex's object space position + if( bAnimate ) + vAnimatedPos += float4(vNormal, 0) * (sin(g_fTime+5.5)+0.5)*5; + + // Transform the position from object space to homogeneous projection space + Output.Position = mul(vAnimatedPos, g_mWorldViewProjection); + + // Transform the normal from object space to world space + vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space) + + // Compute simple directional lighting equation + float3 vTotalLightDiffuse = float3(0,0,0); + for(int i=0; i<nNumLights; i++ ) + vTotalLightDiffuse += g_LightDiffuse[i] * max(0,dot(vNormalWorldSpace, g_LightDir[i])); + + Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse + + g_MaterialAmbientColor * g_LightAmbient; + Output.Diffuse.a = 1.0f; + + // Just copy the texture coordinate through + if( bTexture ) + Output.TextureUV = vTexCoord0; + else + Output.TextureUV = 0; + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel shader output structure +//-------------------------------------------------------------------------------------- +struct PS_OUTPUT +{ + float4 RGBColor : SV_Target; // Pixel color +}; + + +//-------------------------------------------------------------------------------------- +// This shader outputs the pixel's color by modulating the texture's +// color with diffuse material color +//-------------------------------------------------------------------------------------- +PS_OUTPUT RenderScenePS( VS_OUTPUT In, + uniform bool bTexture ) +{ + PS_OUTPUT Output; + + // Lookup mesh texture and modulate it with diffuse + if( bTexture ) + Output.RGBColor = g_MeshTexture.Sample(MeshTextureSampler, In.TextureUV) * In.Diffuse; + else + Output.RGBColor = In.Diffuse; + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// Renders scene to render target using D3D11 Techniques +//-------------------------------------------------------------------------------------- +technique11 RenderSceneWithTexture1Light +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +} + +technique11 RenderSceneWithTexture2Light +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 2, true, true ) ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +} + +technique11 RenderSceneWithTexture3Light +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 3, true, true ) ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +} + +technique11 RenderSceneNoTexture +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( false ) ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl new file mode 100644 index 000000000..6a6dca0c4 --- /dev/null +++ b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl @@ -0,0 +1,506 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSMain +//-------------------------------------------------------------------------------------- +// File: RenderCascadeScene.hlsl +// +// This is the main shader file. This shader is compiled with several different flags +// to provide different customizations based on user controls. +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- + +// This flag uses the derivative information to map the texels in a shadow map to the +// view space plane of the primitive being rendred. This depth is then used as the +// comparison depth and reduces self shadowing aliases. This technique is expensive +// and is only valid when objects are planer ( such as a ground plane ). +#ifndef USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG +#define USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG 0 +#endif + +// This flag enables the shadow to blend between cascades. This is most useful when the +// the shadow maps are small and artifact can be seen between the various cascade layers. +#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG +#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0 +#endif + +// There are two methods for selecting the proper cascade a fragment lies in. Interval selection +// compares the depth of the fragment against the frustum's depth partition. +// Map based selection compares the texture coordinates against the acutal cascade maps. +// Map based selection gives better coverage. +// Interval based selection is easier to extend and understand. +#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG +#define SELECT_CASCADE_BY_INTERVAL_FLAG 0 +#endif + +// The number of cascades +#ifndef CASCADE_COUNT_FLAG +#define CASCADE_COUNT_FLAG 3 +#endif + + +// Most titles will find that 3-4 cascades with +// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs. +// High end PCs will be able to handle more cascades, and larger blur bands. +// In some cases such as when large PCF kernels are used, derivative based depth offsets could be used +// with larger PCF blur kernels on high end PCs for the ground plane. + +cbuffer cbAllShadowData : register( b0 ) +{ + matrix m_mWorldViewProjection; + matrix m_mWorld; + matrix m_mWorldView; + matrix m_mShadow; + float4 m_vCascadeOffset[8]; + float4 m_vCascadeScale[8]; + int m_nCascadeLevels; // Number of Cascades + int m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene + int m_iPCFBlurForLoopStart; // For loop begin value. For a 5x5 Kernal this would be -2. + int m_iPCFBlurForLoopEnd; // For loop end value. For a 5x5 kernel this would be 3. + + // For Map based selection scheme, this keeps the pixels inside of the the valid range. + // When there is no boarder, these values are 0 and 1 respectivley. + float m_fMinBorderPadding; + float m_fMaxBorderPadding; + float m_fShadowBiasFromGUI; // A shadow map offset to deal with self shadow artifacts. + //These artifacts are aggravated by PCF. + float m_fShadowPartitionSize; + float m_fCascadeBlendArea; // Amount to overlap when blending between cascades. + float m_fTexelSize; + float m_fNativeTexelSizeInX; + float m_fPaddingForCB3; // Padding variables exist because CBs must be a multiple of 16 bytes. + float4 m_fCascadeFrustumsEyeSpaceDepthsFloat[2]; // The values along Z that seperate the cascades. + float4 m_fCascadeFrustumsEyeSpaceDepthsFloat4[8]; // the values along Z that separte the cascades. + // Wastefully stored in float4 so they are array indexable. + float3 m_vLightDir; + float m_fPaddingCB4; + +}; + + + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse : register( t0 ); +Texture2D g_txShadow : register( t5 ); + + +SamplerState g_samLinear : register( s0 ); +SamplerComparisonState g_samShadow : register( s5 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; +}; + +struct VS_OUTPUT +{ + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; + float4 vTexShadow : TEXCOORD1; + float4 vPosition : SV_POSITION; + float4 vInterpPos : TEXCOORD2; + float vDepth : TEXCOORD3; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection ); + Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld ); + Output.vTexcoord = Input.vTexcoord; + Output.vInterpPos = Input.vPosition; + Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ; + + // Transform the shadow texture coordinates for all the cascades. + Output.vTexShadow = mul( Input.vPosition, m_mShadow ); + return Output; + +} + + + +static const float4 vCascadeColorsMultiplier[8] = +{ + float4 ( 1.5f, 0.0f, 0.0f, 1.0f ), + float4 ( 0.0f, 1.5f, 0.0f, 1.0f ), + float4 ( 0.0f, 0.0f, 5.5f, 1.0f ), + float4 ( 1.5f, 0.0f, 5.5f, 1.0f ), + float4 ( 1.5f, 1.5f, 0.0f, 1.0f ), + float4 ( 1.0f, 1.0f, 1.0f, 1.0f ), + float4 ( 0.0f, 1.0f, 5.5f, 1.0f ), + float4 ( 0.5f, 3.5f, 0.75f, 1.0f ) +}; + + +void ComputeCoordinatesTransform( in int iCascadeIndex, + in float4 InterpolatedPosition , + in out float4 vShadowTexCoord , + in out float4 vShadowTexCoordViewSpace ) +{ + // Now that we know the correct map, we can transform the world space position of the current fragment + if( SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex]; + vShadowTexCoord += m_vCascadeOffset[iCascadeIndex]; + } + + vShadowTexCoord.x *= m_fShadowPartitionSize; // precomputed (float)iCascadeIndex / (float)CASCADE_CNT + vShadowTexCoord.x += (m_fShadowPartitionSize * (float)iCascadeIndex ); + + +} + + +//-------------------------------------------------------------------------------------- +// This function calculates the screen space depth for shadow space texels +//-------------------------------------------------------------------------------------- +void CalculateRightAndUpTexelDepthDeltas ( in float3 vShadowTexDDX, + in float3 vShadowTexDDY, + out float fUpTextDepthWeight, + out float fRightTextDepthWeight + ) { + + // We use the derivatives in X and Y to create a transformation matrix. Because these derivives give us the + // transformation from screen space to shadow space, we need the inverse matrix to take us from shadow space + // to screen space. This new matrix will allow us to map shadow map texels to screen space. This will allow + // us to find the screen space depth of a corresponding depth pixel. + // This is not a perfect solution as it assumes the underlying geometry of the scene is a plane. A more + // accureate way of finding the actual depth would be to do a deferred rendering approach and actually + //sample the depth. + + // Using an offset, or using variance shadow maps is a better approach to reducing these artifacts in most cases. + + float2x2 matScreentoShadow = float2x2( vShadowTexDDX.xy, vShadowTexDDY.xy ); + float fDeterminant = determinant ( matScreentoShadow ); + + float fInvDeterminant = 1.0f / fDeterminant; + + float2x2 matShadowToScreen = float2x2 ( + matScreentoShadow._22 * fInvDeterminant, matScreentoShadow._12 * -fInvDeterminant, + matScreentoShadow._21 * -fInvDeterminant, matScreentoShadow._11 * fInvDeterminant ); + + float2 vRightShadowTexelLocation = float2( m_fTexelSize, 0.0f ); + float2 vUpShadowTexelLocation = float2( 0.0f, m_fTexelSize ); + + // Transform the right pixel by the shadow space to screen space matrix. + float2 vRightTexelDepthRatio = mul( vRightShadowTexelLocation, matShadowToScreen ); + float2 vUpTexelDepthRatio = mul( vUpShadowTexelLocation, matShadowToScreen ); + + // We can now caculate how much depth changes when you move up or right in the shadow map. + // We use the ratio of change in x and y times the dervivite in X and Y of the screen space + // depth to calculate this change. + fUpTextDepthWeight = + vUpTexelDepthRatio.x * vShadowTexDDX.z + + vUpTexelDepthRatio.y * vShadowTexDDY.z; + fRightTextDepthWeight = + vRightTexelDepthRatio.x * vShadowTexDDX.z + + vRightTexelDepthRatio.y * vShadowTexDDY.z; + +} + + +//-------------------------------------------------------------------------------------- +// Use PCF to sample the depth map and return a percent lit value. +//-------------------------------------------------------------------------------------- +void CalculatePCFPercentLit ( in float4 vShadowTexCoord, + in float fRightTexelDepthDelta, + in float fUpTexelDepthDelta, + in float fBlurRowSize, + out float fPercentLit + ) +{ + fPercentLit = 0.0f; + // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed. + // This would be performance improvment. + for( int x = m_iPCFBlurForLoopStart; x < m_iPCFBlurForLoopEnd; ++x ) + { + for( int y = m_iPCFBlurForLoopStart; y < m_iPCFBlurForLoopEnd; ++y ) + { + float depthcompare = vShadowTexCoord.z; + // A very simple solution to the depth bias problems of PCF is to use an offset. + // Unfortunately, too much offset can lead to Peter-panning (shadows near the base of object disappear ) + // Too little offset can lead to shadow acne ( objects that should not be in shadow are partially self shadowed ). + depthcompare -= m_fShadowBiasFromGUI; + if ( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) + { + // Add in derivative computed depth scale based on the x and y pixel. + depthcompare += fRightTexelDepthDelta * ( (float) x ) + fUpTexelDepthDelta * ( (float) y ); + } + // Compare the transformed pixel depth to the depth read from the map. + fPercentLit += g_txShadow.SampleCmpLevelZero( g_samShadow, + float2( + vShadowTexCoord.x + ( ( (float) x ) * m_fNativeTexelSizeInX ) , + vShadowTexCoord.y + ( ( (float) y ) * m_fTexelSize ) + ), + depthcompare ); + } + } + fPercentLit /= (float)fBlurRowSize; +} + +//-------------------------------------------------------------------------------------- +// Calculate amount to blend between two cascades and the band where blending will occure. +//-------------------------------------------------------------------------------------- +void CalculateBlendAmountForInterval ( in int iCurrentCascadeIndex, + in out float fPixelDepth, + in out float fCurrentPixelsBlendBandLocation, + out float fBlendBetweenCascadesAmount + ) +{ + + // We need to calculate the band of the current shadow map where it will fade into the next cascade. + // We can then early out of the expensive PCF for loop. + // + float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepthsFloat4[ iCurrentCascadeIndex ].x; + //if( iNextCascadeIndex > 1 ) + int fBlendIntervalbelowIndex = min(0, iCurrentCascadeIndex-1); + fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x; + fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x; + + // The current pixel's blend band location will be used to determine when we need to blend and by how much. + fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval; + fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation; + // The fBlendBetweenCascadesAmount is our location in the blend band. + fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; +} + + + +//-------------------------------------------------------------------------------------- +// Calculate amount to blend between two cascades and the band where blending will occure. +//-------------------------------------------------------------------------------------- +void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord, + in out float fCurrentPixelsBlendBandLocation, + out float fBlendBetweenCascadesAmount ) +{ + // Calcaulte the blend band for the map based selection. + float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y ); + fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ); + float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y ); + fCurrentPixelsBlendBandLocation = + min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 ); + fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; +} + +//-------------------------------------------------------------------------------------- +// Calculate the shadow based on several options and rende the scene. +//-------------------------------------------------------------------------------------- +float4 PSMain( VS_OUTPUT Input ) : SV_TARGET +{ + float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord ); + + float4 vShadowMapTextureCoord = 0.0f; + float4 vShadowMapTextureCoord_blend = 0.0f; + + float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f); + + float fPercentLit = 0.0f; + float fPercentLit_blend = 0.0f; + + + float fUpTextDepthWeight=0; + float fRightTextDepthWeight=0; + float fUpTextDepthWeight_blend=0; + float fRightTextDepthWeight_blend=0; + + int iBlurRowSize = m_iPCFBlurForLoopEnd - m_iPCFBlurForLoopStart; + iBlurRowSize *= iBlurRowSize; + float fBlurRowSize = (float)iBlurRowSize; + + int iCascadeFound = 0; + int iNextCascadeIndex = 1; + + float fCurrentPixelDepth; + + // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions. + fCurrentPixelDepth = Input.vDepth; + + // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used. + // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum. + int iCurrentCascadeIndex; + + float4 vShadowMapTextureCoordViewSpace = Input.vTexShadow; + if( SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + iCurrentCascadeIndex = 0; + if ( CASCADE_COUNT_FLAG > 1 ) + { + float4 vCurrentPixelDepth = Input.vDepth; + float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[0]); + float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[1]); + float fIndex = dot( + float4( CASCADE_COUNT_FLAG > 0, + CASCADE_COUNT_FLAG > 1, + CASCADE_COUNT_FLAG > 2, + CASCADE_COUNT_FLAG > 3) + , fComparison ) + + dot( + float4( + CASCADE_COUNT_FLAG > 4, + CASCADE_COUNT_FLAG > 5, + CASCADE_COUNT_FLAG > 6, + CASCADE_COUNT_FLAG > 7) + , fComparison2 ) ; + + fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 ); + iCurrentCascadeIndex = (int)fIndex; + } + } + + if ( !SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + iCurrentCascadeIndex = 0; + if ( CASCADE_COUNT_FLAG == 1 ) + { + vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0]; + vShadowMapTextureCoord += m_vCascadeOffset[0]; + } + if ( CASCADE_COUNT_FLAG > 1 ) { + for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex ) + { + vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex]; + vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex]; + + if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding + && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding ) + { + iCurrentCascadeIndex = iCascadeIndex; + iCascadeFound = 1; + } + } + } + } + + float4 color = 0; + + if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) + { + // Repeat text coord calculations for the next cascade. + // The next cascade index is used for blurring between maps. + iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 ); + } + + float fBlendBetweenCascadesAmount = 1.0f; + float fCurrentPixelsBlendBandLocation = 1.0f; + + if( SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) + { + CalculateBlendAmountForInterval ( iCurrentCascadeIndex, fCurrentPixelDepth, + fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); + } + } + else + { + + if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) + { + CalculateBlendAmountForMap ( vShadowMapTextureCoord, + fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); + } + } + + float3 vShadowMapTextureCoordDDX; + float3 vShadowMapTextureCoordDDY; + // The derivatives are used to find the slope of the current plane. + // The derivative calculation has to be inside of the loop in order to prevent divergent flow control artifacts. + if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) + { + vShadowMapTextureCoordDDX = ddx( vShadowMapTextureCoordViewSpace ); + vShadowMapTextureCoordDDY = ddy( vShadowMapTextureCoordViewSpace ); + + vShadowMapTextureCoordDDX *= m_vCascadeScale[iCurrentCascadeIndex]; + vShadowMapTextureCoordDDY *= m_vCascadeScale[iCurrentCascadeIndex]; + } + + ComputeCoordinatesTransform( iCurrentCascadeIndex, + Input.vInterpPos, + vShadowMapTextureCoord, + vShadowMapTextureCoordViewSpace ); + + + vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex]; + + if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) + { + CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX, vShadowMapTextureCoordDDY, + fUpTextDepthWeight, fRightTextDepthWeight ); + } + + CalculatePCFPercentLit ( vShadowMapTextureCoord, fRightTextDepthWeight, + fUpTextDepthWeight, fBlurRowSize, fPercentLit ); + + if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) + { + if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea) + { // the current pixel is within the blend band. + + // Repeat text coord calculations for the next cascade. + // The next cascade index is used for blurring between maps. + if( !SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex]; + vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex]; + } + + ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos, + vShadowMapTextureCoord_blend, + vShadowMapTextureCoordViewSpace ); + + // We repeat the calcuation for the next cascade layer, when blending between maps. + if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea) + { // the current pixel is within the blend band. + if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) + { + + CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX, + vShadowMapTextureCoordDDY, + fUpTextDepthWeight_blend, + fRightTextDepthWeight_blend ); + } + CalculatePCFPercentLit ( vShadowMapTextureCoord_blend, fRightTextDepthWeight_blend, + fUpTextDepthWeight_blend, fBlurRowSize, fPercentLit_blend ); + fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount ); + // Blend the two calculated shadows by the blend amount. + } + } + } + + + if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4(1.0f,1.0f,1.0f,1.0f); + + float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f ); + float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f ); + float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f ); + float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f ); + // Some ambient-like lighting. + float fLighting = + saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f + + saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f + + saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f + + saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ; + + float4 vShadowLighting = fLighting * 0.5f; + fLighting += saturate( dot( m_vLightDir , Input.vNormal ) ); + fLighting = lerp( vShadowLighting, fLighting, fPercentLit ); + + return fLighting * vVisualizeCascadeColor * vDiffuse; + +} + diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl new file mode 100644 index 000000000..3b4d32a0d --- /dev/null +++ b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl @@ -0,0 +1,53 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain -entry VSMainPancake +//-------------------------------------------------------------------------------------- +// File: RenderCascadeShadow.hlsl +// +// The shader file for the RenderCascadeScene sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + matrix g_mWorldViewProjection : packoffset( c0 ); +}; + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; +}; + +struct VS_OUTPUT +{ + float4 vPosition : SV_POSITION; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + // There is nothing special here, just transform and write out the depth. + Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); + + return Output; +} + + +VS_OUTPUT VSMainPancake( VS_INPUT Input ) +{ + VS_OUTPUT Output; + // after transform move clipped geometry to near plane + Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); + //Output.vPosition.z = max( Output.vPosition.z, 0.0f ); + return Output; +}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl new file mode 100644 index 000000000..db7bd5136 --- /dev/null +++ b/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl @@ -0,0 +1,75 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose +//-------------------------------------------------------------------------------------- +// File: ComputeShaderSort11.hlsl +// +// This file contains the compute shaders to perform GPU sorting using DirectX 11. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#define BITONIC_BLOCK_SIZE 512 + +#define TRANSPOSE_BLOCK_SIZE 16 + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer CB : register( b0 ) +{ + unsigned int g_iLevel; + unsigned int g_iLevelMask; + unsigned int g_iWidth; + unsigned int g_iHeight; +}; + +//-------------------------------------------------------------------------------------- +// Structured Buffers +//-------------------------------------------------------------------------------------- +StructuredBuffer<unsigned int> Input : register( t0 ); +RWStructuredBuffer<unsigned int> Data : register( u0 ); + +//-------------------------------------------------------------------------------------- +// Bitonic Sort Compute Shader +//-------------------------------------------------------------------------------------- +groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE]; + +[numthreads(BITONIC_BLOCK_SIZE, 1, 1)] +void BitonicSort( uint3 Gid : SV_GroupID, + uint3 DTid : SV_DispatchThreadID, + uint3 GTid : SV_GroupThreadID, + uint GI : SV_GroupIndex ) +{ + // Load shared data + shared_data[GI] = Data[DTid.x]; + GroupMemoryBarrierWithGroupSync(); + + // Sort the shared data + for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1) + { + unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI]; + GroupMemoryBarrierWithGroupSync(); + shared_data[GI] = result; + GroupMemoryBarrierWithGroupSync(); + } + + // Store shared data + Data[DTid.x] = shared_data[GI]; +} + +//-------------------------------------------------------------------------------------- +// Matrix Transpose Compute Shader +//-------------------------------------------------------------------------------------- +groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE]; + +[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)] +void MatrixTranspose( uint3 Gid : SV_GroupID, + uint3 DTid : SV_DispatchThreadID, + uint3 GTid : SV_GroupThreadID, + uint GI : SV_GroupIndex ) +{ + transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x]; + GroupMemoryBarrierWithGroupSync(); + uint2 XY = DTid.yx - GTid.yx + GTid.xy; + Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y]; +} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx new file mode 100644 index 000000000..941e001b3 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx @@ -0,0 +1,23 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial02.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +float4 VS( float4 Pos : POSITION ) : SV_POSITION +{ + return Pos; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( float4 Pos : SV_POSITION ) : SV_Target +{ + return float4( 1.0f, 1.0f, 0.0f, 1.0f ); // Yellow, with Alpha = 1 +} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl new file mode 100644 index 000000000..5a59aadc6 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PS +#include "Tutorial02.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl new file mode 100644 index 000000000..d58459b78 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS +#include "Tutorial02.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx new file mode 100644 index 000000000..941e001b3 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx @@ -0,0 +1,23 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial02.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +float4 VS( float4 Pos : POSITION ) : SV_POSITION +{ + return Pos; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( float4 Pos : SV_POSITION ) : SV_Target +{ + return float4( 1.0f, 1.0f, 0.0f, 1.0f ); // Yellow, with Alpha = 1 +} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl new file mode 100644 index 000000000..29b6e8b2c --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PS +#include "Tutorial03.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl new file mode 100644 index 000000000..db47ead28 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS +#include "Tutorial03.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx new file mode 100644 index 000000000..deb7b585f --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx @@ -0,0 +1,46 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial04.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +cbuffer ConstantBuffer : register( b0 ) +{ + matrix World; + matrix View; + matrix Projection; +} + +//-------------------------------------------------------------------------------------- +struct VS_OUTPUT +{ + float4 Pos : SV_POSITION; + float4 Color : COLOR0; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VS( float4 Pos : POSITION, float4 Color : COLOR ) +{ + VS_OUTPUT output = (VS_OUTPUT)0; + output.Pos = mul( Pos, World ); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Color = Color; + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( VS_OUTPUT input ) : SV_Target +{ + return input.Color; +} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl new file mode 100644 index 000000000..dc627637c --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PS +#include "Tutorial04.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl new file mode 100644 index 000000000..96d0a642c --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS +#include "Tutorial04.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx new file mode 100644 index 000000000..b15c99e49 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx @@ -0,0 +1,54 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial05.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +cbuffer ConstantBuffer : register( b0 ) +{ + matrix World; + matrix View; + matrix Projection; +} + +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 Pos : POSITION; + float4 Color : COLOR; +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float4 Color : COLOR; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + output.Pos = mul( input.Pos, World ); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Color = input.Color; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + return input.Color; +} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl new file mode 100644 index 000000000..acc900ff5 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PS +#include "Tutorial05.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl new file mode 100644 index 000000000..726f05979 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS +#include "Tutorial05.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx new file mode 100644 index 000000000..7d839009d --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx @@ -0,0 +1,76 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -entry PSSolid +//-------------------------------------------------------------------------------------- +// File: Tutorial06.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +cbuffer ConstantBuffer : register( b0 ) +{ + matrix World; + matrix View; + matrix Projection; + float4 vLightDir[2]; + float4 vLightColor[2]; + float4 vOutputColor; +} + + +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 Pos : POSITION; + float3 Norm : NORMAL; +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float3 Norm : TEXCOORD0; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + output.Pos = mul( input.Pos, World ); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = mul( float4( input.Norm, 1 ), World ).xyz; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + float4 finalColor = 0; + + //do NdotL lighting for 2 lights + for(int i=0; i<2; i++) + { + finalColor += saturate( dot( (float3)vLightDir[i],input.Norm) * vLightColor[i] ); + } + finalColor.a = 1; + return finalColor; +} + + +//-------------------------------------------------------------------------------------- +// PSSolid - render a solid color +//-------------------------------------------------------------------------------------- +float4 PSSolid( PS_INPUT input) : SV_Target +{ + return vOutputColor; +} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl new file mode 100644 index 000000000..31ed082e7 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PS +#include "Tutorial06.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl new file mode 100644 index 000000000..a5512efb6 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS +#include "Tutorial06.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx new file mode 100644 index 000000000..0baad7a0c --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx @@ -0,0 +1,67 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial07.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +Texture2D txDiffuse : register( t0 ); +SamplerState samLinear : register( s0 ); + +cbuffer cbNeverChanges : register( b0 ) +{ + matrix View; +}; + +cbuffer cbChangeOnResize : register( b1 ) +{ + matrix Projection; +}; + +cbuffer cbChangesEveryFrame : register( b2 ) +{ + matrix World; + float4 vMeshColor; +}; + + +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 Pos : POSITION; + float2 Tex : TEXCOORD0; +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD0; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + output.Pos = mul( input.Pos, World ); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Tex = input.Tex; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor; +} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl new file mode 100644 index 000000000..c3c101943 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PS +#include "Tutorial07.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl new file mode 100644 index 000000000..4c287c790 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl @@ -0,0 +1,3 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS +#include "Tutorial07.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx new file mode 100644 index 000000000..6ff313b97 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx @@ -0,0 +1,56 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial08.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +Texture2D txDiffuse : register( t0 ); +SamplerState samLinear : register( s0 ); + +cbuffer cbChangesEveryFrame : register( b0 ) +{ + matrix WorldViewProj; + matrix World; + float4 vMeshColor; +}; + + +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 Pos : POSITION; + float2 Tex : TEXCOORD; +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD0; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + output.Pos = mul( input.Pos, WorldViewProj ); + output.Tex = input.Tex; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor; +} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx new file mode 100644 index 000000000..04a395588 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx @@ -0,0 +1,69 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial09.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +Texture2D txDiffuse : register( t0 ); +SamplerState samLinear : register( s0 ); + +cbuffer cbNeverChanges : register( b0 ) +{ + float3 vLightDir; +}; + +cbuffer cbChangesEveryFrame : register( b1 ) +{ + matrix WorldViewProj; + matrix World; +}; + +struct VS_INPUT +{ + float3 Pos : POSITION; //position + float3 Norm : NORMAL; //normal + float2 Tex : TEXCOORD0; //texture coordinate +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float4 Diffuse : COLOR0; + float2 Tex : TEXCOORD1; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + output.Pos = mul( float4(input.Pos,1), WorldViewProj ); + float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) ); + + float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) ); + output.Diffuse.rgb = fLighting; + output.Diffuse.a = 1.0f; + + output.Tex = input.Tex; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + //calculate lighting assuming light color is <1,1,1,1> + float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse; + outputColor.a = 1; + return outputColor; +} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx new file mode 100644 index 000000000..e9bded408 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx @@ -0,0 +1,73 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS +//-------------------------------------------------------------------------------------- +// File: Tutorial10.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +Texture2D txDiffuse : register( t0 ); +SamplerState samLinear : register( s0 ); + +cbuffer cbNeverChanges : register( b0 ) +{ + float3 vLightDir; +}; + +cbuffer cbChangesEveryFrame : register( b1 ) +{ + matrix WorldViewProj; + matrix World; + float Puffiness; +}; + +struct VS_INPUT +{ + float3 Pos : POSITION; //position + float3 Norm : NORMAL; //normal + float2 Tex : TEXCOORD0; //texture coordinate +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float4 Diffuse : COLOR0; + float2 Tex : TEXCOORD1; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + + input.Pos += input.Norm * Puffiness; + + output.Pos = mul( float4(input.Pos,1), WorldViewProj ); + float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) ); + + float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) ); + output.Diffuse.rgb = fLighting; + output.Diffuse.a = 1.0f; + + output.Tex = input.Tex; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + //calculate lighting assuming light color is <1,1,1,1> + float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse; + outputColor.a = 1; + return outputColor; +} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx new file mode 100644 index 000000000..a647a9079 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx @@ -0,0 +1,117 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: Tutorial11.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse; +SamplerState samLinear +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Wrap; + AddressV = Wrap; +}; + +cbuffer cbConstant +{ + float3 vLightDir = float3(-0.577,0.577,-0.577); +}; + +cbuffer cbChangesEveryFrame +{ + matrix World; + matrix View; + matrix Projection; + float Time; +}; + +cbuffer cbUserChanges +{ + float Waviness; +}; + +struct VS_INPUT +{ + float3 Pos : POSITION; + float3 Norm : NORMAL; + float2 Tex : TEXCOORD0; +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float3 Norm : TEXCOORD0; + float2 Tex : TEXCOORD1; +}; + +//-------------------------------------------------------------------------------------- +// DepthStates +//-------------------------------------------------------------------------------------- +DepthStencilState EnableDepth +{ + DepthEnable = TRUE; + DepthWriteMask = ALL; + DepthFunc = LESS_EQUAL; +}; + +BlendState NoBlending +{ + AlphaToCoverageEnable = FALSE; + BlendEnable[0] = FALSE; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + + output.Pos = mul( float4(input.Pos,1), World ); + + output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness; + + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = mul( input.Norm, World ); + output.Tex = input.Tex; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + // Calculate lighting assuming light color is <1,1,1,1> + float fLighting = saturate( dot( input.Norm, vLightDir ) ); + float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; + outputColor.a = 1; + return outputColor; +} + + +//-------------------------------------------------------------------------------------- +// Technique +//-------------------------------------------------------------------------------------- +technique11 Render +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, VS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PS() ) ); + + SetDepthStencilState( EnableDepth, 0 ); + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} + diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx new file mode 100644 index 000000000..aae7f9a87 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx @@ -0,0 +1,129 @@ +//TEST_IGNORE_FILE: +// +// Constant Buffer Variables +// + +Texture2D g_txDiffuse; +SamplerState samLinear +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Wrap; + AddressV = Wrap; +}; + +TextureCube g_txEnvMap; +SamplerState samLinearClamp +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Clamp; + AddressV = Clamp; +}; + +cbuffer cbConstant +{ + float3 vLightDir = float3(-0.577,0.577,-0.577); +}; + +cbuffer cbChangesEveryFrame +{ + matrix World; + matrix View; + matrix Projection; + float Time; +}; + +cbuffer cbUserChanges +{ + float Waviness; +}; + +struct VS_INPUT +{ + float3 Pos : POSITION; //position + float3 Norm : NORMAL; //normal + float2 Tex : TEXCOORD0; //texture coordinate +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float3 Norm : TEXCOORD0; + float2 Tex : TEXCOORD1; + float3 ViewR : TEXCOORD2; +}; + +//-------------------------------------------------------------------------------------- +// DepthStates +//-------------------------------------------------------------------------------------- +DepthStencilState EnableDepth +{ + DepthEnable = TRUE; + DepthWriteMask = ALL; + DepthFunc = LESS_EQUAL; +}; + +BlendState NoBlending +{ + AlphaToCoverageEnable = FALSE; + BlendEnable[0] = FALSE; +}; + +// +// Vertex Shader +// +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + + output.Pos = mul( float4(input.Pos,1), World ); + + output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness; + + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = mul( input.Norm, (float3x3)World ); + output.Tex = input.Tex; + + // Calculate the reflection vector + float3 viewNorm = mul( output.Norm, (float3x3)View ); + output.ViewR = reflect( viewNorm, float3(0,0,-1.0) ); + + return output; +} + + +// +// Pixel Shader +// +float4 PS( PS_INPUT input) : SV_Target +{ + // Calculate lighting assuming light color is <1,1,1,1> + float fLighting = saturate( dot( input.Norm, vLightDir ) ); + + // Load the environment map texture + float4 cReflect = g_txEnvMap.Sample( samLinearClamp, input.ViewR ); + + // Load the diffuse texture and multiply by the lighting amount + float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; + + // Add diffuse to reflection and go + float4 cTotal = cDiffuse + cReflect; + cTotal.a = 1; + return cTotal; +} + +// +// Technique +// +technique11 Render +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, VS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PS() ) ); + + SetDepthStencilState( EnableDepth, 0 ); + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx new file mode 100644 index 000000000..a6f09ecc7 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx @@ -0,0 +1,191 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: Tutorial13.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse; +SamplerState samLinear +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Wrap; + AddressV = Wrap; +}; + +TextureCube g_txEnvMap; +SamplerState samLinearClamp +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Clamp; + AddressV = Clamp; +}; + +cbuffer cbConstant +{ + float3 vLightDir = float3(-0.577,0.577,-0.577); +}; + +cbuffer cbChangesEveryFrame +{ + matrix World; + matrix View; + matrix Projection; + float Time; +}; + +cbuffer cbUserChanges +{ + float Explode; +}; + +struct VS_INPUT +{ + float3 Pos : POSITION; + float3 Norm : NORMAL; + float2 Tex : TEXCOORD0; +}; + +struct GSPS_INPUT +{ + float4 Pos : SV_POSITION; + float3 Norm : TEXCOORD0; + float2 Tex : TEXCOORD1; +}; + +//-------------------------------------------------------------------------------------- +// DepthStates +//-------------------------------------------------------------------------------------- +DepthStencilState EnableDepth +{ + DepthEnable = TRUE; + DepthWriteMask = ALL; + DepthFunc = LESS_EQUAL; +}; + +BlendState NoBlending +{ + AlphaToCoverageEnable = FALSE; + BlendEnable[0] = FALSE; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +GSPS_INPUT VS( VS_INPUT input ) +{ + GSPS_INPUT output = (GSPS_INPUT)0; + + output.Pos = mul( float4(input.Pos,1), World ); + output.Norm = mul( input.Norm, (float3x3)World ); + output.Tex = input.Tex; + + return output; +} + + +//-------------------------------------------------------------------------------------- +// Geometry Shader +//-------------------------------------------------------------------------------------- +[maxvertexcount(12)] +void GS( triangle GSPS_INPUT input[3], inout TriangleStream<GSPS_INPUT> TriStream ) +{ + GSPS_INPUT output; + + // + // Calculate the face normal + // + float3 faceEdgeA = input[1].Pos - input[0].Pos; + float3 faceEdgeB = input[2].Pos - input[0].Pos; + float3 faceNormal = normalize( cross(faceEdgeA, faceEdgeB) ); + float3 ExplodeAmt = faceNormal*Explode; + + // + // Calculate the face center + // + float3 centerPos = (input[0].Pos.xyz + input[1].Pos.xyz + input[2].Pos.xyz)/3.0; + float2 centerTex = (input[0].Tex + input[1].Tex + input[2].Tex)/3.0; + centerPos += faceNormal*Explode; + + // + // Output the pyramid + // + for( int i=0; i<3; i++ ) + { + output.Pos = input[i].Pos + float4(ExplodeAmt,0); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = input[i].Norm; + output.Tex = input[i].Tex; + TriStream.Append( output ); + + int iNext = (i+1)%3; + output.Pos = input[iNext].Pos + float4(ExplodeAmt,0); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = input[iNext].Norm; + output.Tex = input[iNext].Tex; + TriStream.Append( output ); + + output.Pos = float4(centerPos,1) + float4(ExplodeAmt,0); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = faceNormal; + output.Tex = centerTex; + TriStream.Append( output ); + + TriStream.RestartStrip(); + } + + for( int i=2; i>=0; i-- ) + { + output.Pos = input[i].Pos + float4(ExplodeAmt,0); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = -input[i].Norm; + output.Tex = input[i].Tex; + TriStream.Append( output ); + } + TriStream.RestartStrip(); +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( GSPS_INPUT input) : SV_Target +{ + // Calculate lighting assuming light color is <1,1,1,1> + float fLighting = saturate( dot( input.Norm, vLightDir ) ); + + // Load the diffuse texture and multiply by the lighting amount + float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; + cDiffuse.a = 1; + + // return diffuse + return cDiffuse; +} + + +//-------------------------------------------------------------------------------------- +// Technique +//-------------------------------------------------------------------------------------- +technique11 Render +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, VS() ) ); + SetGeometryShader( CompileShader( gs_4_0, GS() ) ); + SetPixelShader( CompileShader( ps_4_0, PS() ) ); + + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + SetDepthStencilState( EnableDepth, 0 ); + } +} + + diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx new file mode 100644 index 000000000..b1e45b842 --- /dev/null +++ b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx @@ -0,0 +1,294 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: Tutorial14.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Constant Buffer Variables +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse; +SamplerState samLinear +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Wrap; + AddressV = Wrap; +}; + +cbuffer cbConstant +{ + float3 vLightDir = float3(-0.577,0.577,-0.577); +}; + +cbuffer cbChangesEveryFrame +{ + matrix World; + matrix View; + matrix Projection; +}; + +struct VS_INPUT +{ + float3 Pos : POSITION; //position + float3 Norm : NORMAL; //normal + float2 Tex : TEXCOORD0; //texture coordinate +}; + +struct PS_INPUT +{ + float4 Pos : SV_POSITION; + float3 Norm : TEXCOORD0; + float2 Tex : TEXCOORD1; +}; + +struct QUADVS_INPUT +{ + float4 Pos : POSITION; + float2 Tex : TEXCOORD0; +}; + +struct QUADVS_OUTPUT +{ + float4 Pos : SV_POSITION; // Transformed position + float2 Tex : TEXCOORD0; +}; + +//-------------------------------------------------------------------------------------- +// Blending States +//-------------------------------------------------------------------------------------- +BlendState NoBlending +{ + BlendEnable[0] = FALSE; +}; + +BlendState SrcAlphaBlendingAdd +{ + BlendEnable[0] = TRUE; + SrcBlend = SRC_ALPHA; + DestBlend = ONE; + BlendOp = ADD; + SrcBlendAlpha = ZERO; + DestBlendAlpha = ZERO; + BlendOpAlpha = ADD; + RenderTargetWriteMask[0] = 0x0F; +}; + +BlendState SrcAlphaBlendingSub +{ + BlendEnable[0] = TRUE; + SrcBlend = SRC_ALPHA; + DestBlend = ONE; + BlendOp = SUBTRACT; + SrcBlendAlpha = ZERO; + DestBlendAlpha = ZERO; + BlendOpAlpha = ADD; + RenderTargetWriteMask[0] = 0x0F; +}; + +BlendState SrcColorBlendingAdd +{ + BlendEnable[0] = TRUE; + SrcBlend = SRC_COLOR; + DestBlend = ONE; + BlendOp = ADD; + SrcBlendAlpha = ZERO; + DestBlendAlpha = ZERO; + BlendOpAlpha = ADD; + RenderTargetWriteMask[0] = 0x0F; +}; + +BlendState SrcColorBlendingSub +{ + BlendEnable[0] = TRUE; + SrcBlend = SRC_COLOR; + DestBlend = ONE; + BlendOp = SUBTRACT; + SrcBlendAlpha = ZERO; + DestBlendAlpha = ZERO; + BlendOpAlpha = ADD; + RenderTargetWriteMask[0] = 0x0F; +}; + +//-------------------------------------------------------------------------------------- +// Depth/Stencil States +//-------------------------------------------------------------------------------------- +DepthStencilState RenderWithStencilState +{ + DepthEnable = false; + DepthWriteMask = ZERO; + DepthFunc = Less; + + // Setup stencil states + StencilEnable = true; + StencilReadMask = 0xFF; + StencilWriteMask = 0x00; + + FrontFaceStencilFunc = Not_Equal; + FrontFaceStencilPass = Keep; + FrontFaceStencilFail = Zero; + + BackFaceStencilFunc = Not_Equal; + BackFaceStencilPass = Keep; + BackFaceStencilFail = Zero; +}; + + + +//-------------------------------------------------------------------------------------- +// Scene Vertex Shader +//-------------------------------------------------------------------------------------- +PS_INPUT VS( VS_INPUT input ) +{ + PS_INPUT output = (PS_INPUT)0; + + output.Pos = mul( float4(input.Pos,1), World ); + output.Pos = mul( output.Pos, View ); + output.Pos = mul( output.Pos, Projection ); + output.Norm = mul( input.Norm, World ); + output.Tex = input.Tex; + + return output; +} + +//----------------------------------------------------------------------------- +// Quad Vertex Shaders +//----------------------------------------------------------------------------- +QUADVS_OUTPUT QuadVS( QUADVS_INPUT Input ) +{ + QUADVS_OUTPUT Output; + Output.Pos = mul( Input.Pos, World ); + Output.Pos = mul( Output.Pos, View ); + Output.Pos = mul( Output.Pos, Projection ); + Output.Tex = Input.Tex; + return Output; +} + +QUADVS_OUTPUT ScreenQuadVS( QUADVS_INPUT Input ) +{ + QUADVS_OUTPUT Output; + Output.Pos = Input.Pos; + Output.Tex = Input.Tex; + return Output; +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PS( PS_INPUT input) : SV_Target +{ + // Calculate lighting assuming light color is <1,1,1,1> + float fLighting = saturate( dot( input.Norm, vLightDir ) ); + float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; + outputColor.a = 1; + return outputColor; +} + +//-------------------------------------------------------------------------------------- +// Quad Pixel Shader +//-------------------------------------------------------------------------------------- +float4 QuadPS( QUADVS_OUTPUT input) : SV_Target +{ + return g_txDiffuse.Sample( samLinear, input.Tex ); +} + + +//-------------------------------------------------------------------------------------- +// Scene Techniques +//-------------------------------------------------------------------------------------- +technique11 RenderScene +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, VS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PS() ) ); + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} + +//-------------------------------------------------------------------------------------- +// RenderWithStencil - set the depth stencil state inside of the technique +//-------------------------------------------------------------------------------------- +technique11 RenderWithStencil +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, ScreenQuadVS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); + + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + SetDepthStencilState( RenderWithStencilState, 0 ); + } +} + +//-------------------------------------------------------------------------------------- +// Quad Techniques: Alpha blending state is set inside the technique +//-------------------------------------------------------------------------------------- +technique11 RenderQuadSolid +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); + + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} + +//-------------------------------------------------------------------------------------- +technique11 RenderQuadSrcAlphaAdd +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); + + SetBlendState( SrcAlphaBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} + +//-------------------------------------------------------------------------------------- +technique11 RenderQuadSrcAlphaSub +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); + + SetBlendState( SrcAlphaBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} + +//-------------------------------------------------------------------------------------- +technique11 RenderQuadSrcColorAdd +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); + + SetBlendState( SrcColorBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} + +//-------------------------------------------------------------------------------------- +technique11 RenderQuadSrcColorSub +{ + pass P0 + { + SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); + + SetBlendState( SrcColorBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + } +} + + diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h new file mode 100644 index 000000000..b44251829 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h @@ -0,0 +1,84 @@ +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkage11_LightPSH.h +// +// The pixel shader light header file for the DynamicShaderLinkage11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Interfaces +//-------------------------------------------------------------------------------------- +interface iBaseLight +{ + float3 IlluminateAmbient(float3 vNormal); + + float3 IlluminateDiffuse(float3 vNormal); + + float3 IlluminateSpecular(float3 vNormal, int specularPower ); + +}; + +//-------------------------------------------------------------------------------------- +// Classes +//-------------------------------------------------------------------------------------- +class cAmbientLight : iBaseLight +{ + float3 m_vLightColor; + bool m_bEnable; + + float3 IlluminateAmbient(float3 vNormal); + + float3 IlluminateDiffuse(float3 vNormal) + { + return (float3)0; + } + + float3 IlluminateSpecular(float3 vNormal, int specularPower ) + { + return (float3)0; + } +}; + +class cHemiAmbientLight : cAmbientLight +{ + // inherited float4 m_vLightColor is the SkyColor + float4 m_vGroundColor; + float4 m_vDirUp; + + float3 IlluminateAmbient(float3 vNormal); + +}; + +class cDirectionalLight : cAmbientLight +{ + // inherited float4 m_vLightColor is the LightColor + float4 m_vLightDir; + + float3 IlluminateDiffuse( float3 vNormal ); + + float3 IlluminateSpecular( float3 vNormal, int specularPower ); + +}; + +class cOmniLight : cAmbientLight +{ + float3 m_vLightPosition; + float radius; + + float3 IlluminateDiffuse( float3 vNormal ); + +}; + +class cSpotLight : cAmbientLight +{ + float3 m_vLightPosition; + float3 m_vLightDir; +}; + +class cEnvironmentLight : cAmbientLight +{ + float3 IlluminateSpecular( float3 vNormal, int specularPower ); +}; + + diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h new file mode 100644 index 000000000..7f6bc3d22 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h @@ -0,0 +1,103 @@ +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkage11_MATERIALPSH.h +// +// The pixel shader material header file for the DynamicShaderLinkage11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Interfaces +//-------------------------------------------------------------------------------------- +interface iBaseMaterial +{ + float3 GetAmbientColor(float2 vTexcoord); + + float3 GetDiffuseColor(float2 vTexcoord); + + int GetSpecularPower(); + +}; + +//-------------------------------------------------------------------------------------- +// Classes +//-------------------------------------------------------------------------------------- +class cBaseMaterial : iBaseMaterial +{ + float3 m_vColor; + int m_iSpecPower; + + float3 GetAmbientColor(float2 vTexcoord) + { + return m_vColor; + } + + float3 GetDiffuseColor(float2 vTexcoord) + { + return (float3)m_vColor; + } + + int GetSpecularPower() + { + return m_iSpecPower; + } + +}; + +class cPlasticMaterial : cBaseMaterial +{ + +}; + +class cPlasticTexturedMaterial : cPlasticMaterial +{ + float3 GetAmbientColor(float2 vTexcoord); + + float3 GetDiffuseColor(float2 vTexcoord); + +}; + +class cPlasticLightingOnlyMaterial : cBaseMaterial +{ + float3 GetAmbientColor(float2 vTexcoord) + { + return (float3)1.0f; + } + + float3 GetDiffuseColor(float2 vTexcoord) + { + return (float3)1.0f; + } + +}; + +class cRoughMaterial : cBaseMaterial +{ + int GetSpecularPower() + { + return m_iSpecPower; + } +}; + +class cRoughTexturedMaterial : cRoughMaterial +{ + float3 GetAmbientColor(float2 vTexcoord); + + float3 GetDiffuseColor(float2 vTexcoord); + +}; + + +class cRoughLightingOnlyMaterial : cRoughMaterial +{ + float3 GetAmbientColor(float2 vTexcoord) + { + return (float3)1.0f; + } + + float3 GetDiffuseColor(float2 vTexcoord) + { + return (float3)1.0f; + } + +}; diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl new file mode 100644 index 000000000..c3ee93057 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl @@ -0,0 +1,84 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PSMain +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkage11.psh +// +// The pixel shader header file for the DynamicShaderLinkage11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Header Includes +//-------------------------------------------------------------------------------------- +#include "DynamicShaderLinkage11_PSBuffers.h" + +// Defines for default static permutated setting +#if defined( STATIC_PERMUTE ) + #define HEMI_AMBIENT //CONST_AMBIENT //HEMI_AMBIENT + #define TEXTURE_ENABLE + #define SPECULAR_ENABLE +#endif + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float4 vPosition : SV_POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; + float4 vMatrix : TEXCOORD1; +}; + +//-------------------------------------------------------------------------------------- +// Abstract Interface Instances for dyamic linkage / permutation +//-------------------------------------------------------------------------------------- +#if !defined( STATIC_PERMUTE ) + iBaseLight g_abstractAmbientLighting; + iBaseLight g_abstractDirectLighting; + iBaseLight g_abstractEnvironmentLighting; + iBaseMaterial g_abstractMaterial; +#else +//-------------------------------------------------------------------------------------- +// Concrete Instances for STATIC_PERMUTE - static permutation +//-------------------------------------------------------------------------------------- + #if defined( HEMI_AMBIENT ) + #define g_abstractAmbientLighting g_hemiAmbientLight + #else + // CONST_AMBIENT + #define g_abstractAmbientLighting g_ambientLight + #endif + #define g_abstractDirectLighting g_directionalLight + #define g_abstractEnvironmentLighting g_environmentLight + #if defined( TEXTURE_ENABLE ) + #define g_abstractMaterial g_plasticTexturedMaterial + #else + #define g_abstractMaterial g_plasticMaterial + #endif +#endif + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PSMain( PS_INPUT Input ) : SV_TARGET +{ + // Compute the Ambient term + float3 Ambient = (float3)0.0f; + Ambient = g_abstractMaterial.GetAmbientColor( Input.vTexcoord ) * g_abstractAmbientLighting.IlluminateAmbient( Input.vNormal ); + + // Accumulate the Diffuse contribution + float3 Diffuse = (float3)0.0f; + + Diffuse += g_abstractMaterial.GetDiffuseColor( Input.vTexcoord ) * g_abstractDirectLighting.IlluminateDiffuse( Input.vNormal ); + + // Compute the Specular contribution + float3 Specular = (float3)0.0f; + Specular += g_abstractDirectLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() ); + Specular += g_abstractEnvironmentLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() ); + + // Accumulate the lighting with saturation + float3 Lighting = saturate( Ambient + Diffuse + Specular ); + + return float4(Lighting,1.0f); +} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h new file mode 100644 index 000000000..e2263b832 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h @@ -0,0 +1,129 @@ +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkage11_LightPSH.hlsl +// +// The pixel shader light source module file for the DynamicShaderLinkage11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "DynamicShaderLinkage11_LightPSH.h" +#include "DynamicShaderLinkage11_MaterialPSH.h" + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer cbPerFrame : register( b0 ) +{ + cAmbientLight g_ambientLight; + cHemiAmbientLight g_hemiAmbientLight; + cDirectionalLight g_directionalLight; + cEnvironmentLight g_environmentLight; + float4 g_vEyeDir; +}; + +cbuffer cbPerPrimitive : register( b1 ) +{ + cPlasticMaterial g_plasticMaterial; + cPlasticTexturedMaterial g_plasticTexturedMaterial; + cPlasticLightingOnlyMaterial g_plasticLightingOnlyMaterial; + cRoughMaterial g_roughMaterial; + cRoughTexturedMaterial g_roughTexturedMaterial; + cRoughLightingOnlyMaterial g_roughLightingOnlyMaterial; +}; + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse : register( t0 ); +Texture2D g_txNormalMap : register( t1 ); +TextureCube g_txEnvironmentMap : register( t2 ); + +SamplerState g_samLinear : register( s0 ); + +//-------------------------------------------------------------------------------------- +// Lighting Class Methods +//-------------------------------------------------------------------------------------- +// Ambient Lighting Class Methods +float3 cAmbientLight::IlluminateAmbient(float3 vNormal) +{ + return float4( m_vLightColor * m_bEnable, 1.0f); +} + +float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal) +{ + float thetha = (dot( vNormal, m_vDirUp ) + 1.0f) / 2.0f; + + return lerp( m_vGroundColor, m_vLightColor, thetha) * m_bEnable; +} + +// Directional Light class +float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal ) +{ + float lambert = saturate(dot( vNormal, m_vLightDir )); + return ((float3)lambert * m_vLightColor * m_bEnable); +} + +float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower ) +{ + float3 H = -normalize(g_vEyeDir) + m_vLightDir; + float3 halfAngle = normalize( H ); + float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower ); + + return ((float3)specular * m_vLightColor * m_bEnable); +} + +// Omni Light Class +float3 cOmniLight::IlluminateDiffuse( float3 vNormal ) +{ + return (float3)0.0f; // TO DO! +} + +// Environment Lighting +float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower ) +{ + // compute reflection vector taking into account a cheap fresnel falloff; + float3 N = normalize(vNormal); + float3 E = normalize(g_vEyeDir); + float3 R = reflect( E, N ); + float fresnel = 1 - dot( -E, N ); + fresnel = (fresnel * fresnel * fresnel ); + + float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ) * fresnel; + + return (specular * (float3)m_bEnable); +// return ((float3)fresnel); + +} + +//-------------------------------------------------------------------------------------- +// Material Class Methods +//-------------------------------------------------------------------------------------- +// Plastic Material Methods +float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse; +} + +float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse; +} + +// Rough Material Methods +float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse; +} + +float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse; +} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl new file mode 100644 index 000000000..800dbf3b3 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl @@ -0,0 +1,66 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkage11_VS.hlsl +// +// The vertex shader file for the DynamicShaderLinkage11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + float4x4 g_mWorldViewProjection : packoffset( c0 ); + float4x4 g_mWorld : packoffset( c4 ); +}; + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; +}; + +struct VS_OUTPUT +{ + float4 vPosition : SV_POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord0 : TEXCOORD0; + float4 vMatrix : TEXCOORD1; // DEBUG +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +// We aliased signed vectors as a unsigned format. +// Need to recover signed values. The values 1.0 and 2.0 +// are slightly inaccurate here. +float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec ) +{ + vVec *= 2.0f; + return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec; +} + +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + + VS_OUTPUT Output; + float3 tmpNormal; + + Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); + + // Expand compressed vectors + tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal ); + Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld ); + + Output.vTexcoord0 = Input.vTexcoord; + + Output.vMatrix = (float4)g_mWorld[0]; // DEBUG + return Output; +} + diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx new file mode 100644 index 000000000..c72b98843 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx @@ -0,0 +1,192 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkageFX11.fx +// +// The effect file for the DynamicShaderLinkageFX11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "DynamicShaderLinkageFX11_VS.hlsl" +#include "DynamicShaderLinkageFX11_PS.hlsl" + +// +// Settings for static permutations. +// All of the pre-5.0 targets need static specialization +// since they don't support late binding. The below +// just selects a single specialization but you could +// create any number of them, each one representing +// a new shader with the interfaces compiled out +// due to the compile-time class references. +// + +#define StaticMaterial g_plasticTexturedMaterial +#define StaticAmbientLight g_ambientLight +#define StaticDirectLight g_directionalLight +#define StaticEnvironmentLight g_environmentLight + +technique11 FeatureLevel10 +{ + pass + { + SetRasterizerState(g_rasterizerState[g_fillMode]); + SetVertexShader(CompileShader(vs_4_0, + VSMain())); + SetPixelShader(CompileShader(ps_4_0, + PSMainUniform(StaticAmbientLight, + StaticDirectLight, + StaticEnvironmentLight, + StaticMaterial))); + } +} + +technique11 FeatureLevel10_1 +{ + pass + { + SetRasterizerState(g_rasterizerState[g_fillMode]); + SetVertexShader(CompileShader(vs_4_1, + VSMain())); + SetPixelShader(CompileShader(ps_4_1, + PSMainUniform(StaticAmbientLight, + StaticDirectLight, + StaticEnvironmentLight, + StaticMaterial))); + } +} + +// +// Variables for dynamic shader linkage. +// There are two variations here for dynamic usage. +// In the first we use the uniform entry point +// and pass in global interface variables. This +// creates a shader which refers to the global +// interface variables when running and we can bind +// concrete instances in our C++ code by using +// ID3DX11EffectInterfaceVariable::SetClassInstance. +// This approach works well when you have several +// independent variations and want to bind them +// individually in your C++ code, such as the +// different lighting and material parameters in +// this sample. +// + +iBaseLight g_abstractAmbientLighting; +iBaseLight g_abstractDirectLighting; +iBaseLight g_abstractEnvironmentLighting; +iBaseMaterial g_abstractMaterial; + +technique11 FeatureLevel11 +{ + pass + { + SetRasterizerState(g_rasterizerState[g_fillMode]); + SetVertexShader(CompileShader(vs_5_0, + VSMain())); + SetPixelShader(CompileShader(ps_5_0, + PSMainUniform(g_abstractAmbientLighting, + g_abstractDirectLighting, + g_abstractEnvironmentLighting, + g_abstractMaterial))); + } +} + +// +// In this second variation we use the non-uniform +// entry point so that we don't have to specify +// any interfaces when compiling the shader. We +// then reuse the compiled shader with different +// BindInterfaces calls so that all bindings are +// handled automatically by the effect runtime. +// Below we have multiple techniques where +// we've given a concrete binding for the material. +// Lighting parameters are left as interfaces for +// binding via effect variables, but could also +// be specified concretely if the number of variations +// is manageable. +// This approach works well for a small number of variations +// that are known in advance, as you can just list them +// in your effect and you don't need to do the +// binding work explicitly in your C++ code. +// + +VertexShader g_NonUniVS = CompileShader(vs_5_0, VSMain()); +PixelShader g_NonUniPS = CompileShader(ps_5_0, PSMainNonUniform()); + +technique11 FeatureLevel11_g_plasticMaterial +{ + pass + { + SetVertexShader(g_NonUniVS); + SetPixelShader(BindInterfaces(g_NonUniPS, + g_abstractAmbientLighting, + g_abstractDirectLighting, + g_abstractEnvironmentLighting, + g_plasticMaterial)); + } +} + +technique11 FeatureLevel11_g_plasticTexturedMaterial +{ + pass + { + SetVertexShader(g_NonUniVS); + SetPixelShader(BindInterfaces(g_NonUniPS, + g_abstractAmbientLighting, + g_abstractDirectLighting, + g_abstractEnvironmentLighting, + g_plasticTexturedMaterial)); + } +} + +technique11 FeatureLevel11_g_plasticLightingOnlyMaterial +{ + pass + { + SetVertexShader(g_NonUniVS); + SetPixelShader(BindInterfaces(g_NonUniPS, + g_abstractAmbientLighting, + g_abstractDirectLighting, + g_abstractEnvironmentLighting, + g_plasticLightingOnlyMaterial)); + } +} + +technique11 FeatureLevel11_g_roughMaterial +{ + pass + { + SetVertexShader(g_NonUniVS); + SetPixelShader(BindInterfaces(g_NonUniPS, + g_abstractAmbientLighting, + g_abstractDirectLighting, + g_abstractEnvironmentLighting, + g_roughMaterial)); + } +} + +technique11 FeatureLevel11_g_roughTexturedMaterial +{ + pass + { + SetVertexShader(g_NonUniVS); + SetPixelShader(BindInterfaces(g_NonUniPS, + g_abstractAmbientLighting, + g_abstractDirectLighting, + g_abstractEnvironmentLighting, + g_roughTexturedMaterial)); + } +} + +technique11 FeatureLevel11_g_roughLightingOnlyMaterial +{ + pass + { + SetVertexShader(g_NonUniVS); + SetPixelShader(BindInterfaces(g_NonUniPS, + g_abstractAmbientLighting, + g_abstractDirectLighting, + g_abstractEnvironmentLighting, + g_roughLightingOnlyMaterial)); + } +} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h new file mode 100644 index 000000000..6f9a0f4d8 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h @@ -0,0 +1,82 @@ +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkageFX11_LightPSH.h +// +// The pixel shader light header file for the DynamicShaderLinkageFX11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Interfaces +//-------------------------------------------------------------------------------------- +interface iBaseLight +{ + float3 IlluminateAmbient(float3 vNormal); + + float3 IlluminateDiffuse(float3 vNormal); + + float3 IlluminateSpecular(float3 vNormal, int specularPower ); + +}; + +//-------------------------------------------------------------------------------------- +// Classes +//-------------------------------------------------------------------------------------- +class cAmbientLight : iBaseLight +{ + float3 m_vLightColor; + bool m_bEnable; + + float3 IlluminateAmbient(float3 vNormal); + + float3 IlluminateDiffuse(float3 vNormal) + { + return (float3)0; + } + + float3 IlluminateSpecular(float3 vNormal, int specularPower ) + { + return (float3)0; + } +}; + +class cHemiAmbientLight : cAmbientLight +{ + // inherited float4 m_vLightColor is the SkyColor + float4 m_vGroundColor; + float4 m_vDirUp; + + float3 IlluminateAmbient(float3 vNormal); + +}; + +class cDirectionalLight : cAmbientLight +{ + // inherited float4 m_vLightColor is the LightColor + float4 m_vLightDir; + + float3 IlluminateDiffuse( float3 vNormal ); + + float3 IlluminateSpecular( float3 vNormal, int specularPower ); + +}; + +class cOmniLight : cAmbientLight +{ + float3 m_vLightPosition; + float radius; + + float3 IlluminateDiffuse( float3 vNormal ); + +}; + +class cSpotLight : cAmbientLight +{ + float3 m_vLightPosition; + float3 m_vLightDir; +}; + +class cEnvironmentLight : cAmbientLight +{ + float3 IlluminateSpecular( float3 vNormal, int specularPower ); +}; diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h new file mode 100644 index 000000000..cd54a283d --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h @@ -0,0 +1,103 @@ +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkageFX11_MaterialPSH.h +// +// The pixel shader material header file for the DynamicShaderLinkageFX11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Interfaces +//-------------------------------------------------------------------------------------- +interface iBaseMaterial +{ + float3 GetAmbientColor(float2 vTexcoord); + + float3 GetDiffuseColor(float2 vTexcoord); + + int GetSpecularPower(); + +}; + +//-------------------------------------------------------------------------------------- +// Classes +//-------------------------------------------------------------------------------------- +class cBaseMaterial : iBaseMaterial +{ + float3 m_vColor; + int m_iSpecPower; + + float3 GetAmbientColor(float2 vTexcoord) + { + return m_vColor; + } + + float3 GetDiffuseColor(float2 vTexcoord) + { + return (float3)m_vColor; + } + + int GetSpecularPower() + { + return m_iSpecPower; + } + +}; + +class cPlasticMaterial : cBaseMaterial +{ + +}; + +class cPlasticTexturedMaterial : cPlasticMaterial +{ + float3 GetAmbientColor(float2 vTexcoord); + + float3 GetDiffuseColor(float2 vTexcoord); + +}; + +class cPlasticLightingOnlyMaterial : cBaseMaterial +{ + float3 GetAmbientColor(float2 vTexcoord) + { + return (float3)1.0f; + } + + float3 GetDiffuseColor(float2 vTexcoord) + { + return (float3)1.0f; + } + +}; + +class cRoughMaterial : cBaseMaterial +{ + int GetSpecularPower() + { + return m_iSpecPower; + } +}; + +class cRoughTexturedMaterial : cRoughMaterial +{ + float3 GetAmbientColor(float2 vTexcoord); + + float3 GetDiffuseColor(float2 vTexcoord); + +}; + + +class cRoughLightingOnlyMaterial : cRoughMaterial +{ + float3 GetAmbientColor(float2 vTexcoord) + { + return (float3)1.0f; + } + + float3 GetDiffuseColor(float2 vTexcoord) + { + return (float3)1.0f; + } + +}; diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h new file mode 100644 index 000000000..3b4c528be --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h @@ -0,0 +1,152 @@ +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkageFX11_LightPSH.hlsl +// +// The pixel shader light source module file for the DynamicShaderLinkageFX11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "DynamicShaderLinkageFX11_LightPSH.h" +#include "DynamicShaderLinkageFX11_MaterialPSH.h" + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer cbPerFrame : register( b0 ) +{ + cAmbientLight g_ambientLight; + cHemiAmbientLight g_hemiAmbientLight; + cDirectionalLight g_directionalLight; + cEnvironmentLight g_environmentLight; + float4 g_vEyeDir; +}; + +cbuffer cbPerPrimitive : register( b1 ) +{ + cPlasticMaterial g_plasticMaterial; + cPlasticTexturedMaterial g_plasticTexturedMaterial; + cPlasticLightingOnlyMaterial g_plasticLightingOnlyMaterial; + cRoughMaterial g_roughMaterial; + cRoughTexturedMaterial g_roughTexturedMaterial; + cRoughLightingOnlyMaterial g_roughLightingOnlyMaterial; +}; + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse : register( t0 ); +Texture2D g_txNormalMap : register( t1 ); +TextureCube g_txEnvironmentMap : register( t2 ); + +SamplerState g_samLinear : register( s0 ) +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = WRAP; + AddressV = WRAP; + AddressW = WRAP; +}; + +//-------------------------------------------------------------------------------------- +// Rasterization State +//-------------------------------------------------------------------------------------- +uint g_fillMode = 0; + +RasterizerState g_rasterizerState[2] +{ +{ + FillMode = SOLID; + MultisampleEnable = true; +}, +{ + FillMode = WIREFRAME; + MultisampleEnable = true; +} +}; + +//-------------------------------------------------------------------------------------- +// Lighting Class Methods +//-------------------------------------------------------------------------------------- +// Ambient Lighting Class Methods +float3 cAmbientLight::IlluminateAmbient(float3 vNormal) +{ + return m_vLightColor * m_bEnable; +} + +float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal) +{ + float thetha = (dot( vNormal, m_vDirUp.xyz ) + 1.0f) / 2.0f; + + return lerp( m_vGroundColor.xyz, m_vLightColor, thetha) * m_bEnable; +} + +// Directional Light class +float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal ) +{ + float lambert = saturate(dot( vNormal, m_vLightDir.xyz )); + return ((float3)lambert * m_vLightColor * m_bEnable); +} + +float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower ) +{ + float3 H = -normalize(g_vEyeDir.xyz) + m_vLightDir.xyz; + float3 halfAngle = normalize( H ); + float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower ); + + return ((float3)specular * m_vLightColor * m_bEnable); +} + +// Omni Light Class +float3 cOmniLight::IlluminateDiffuse( float3 vNormal ) +{ + return (float3)0.0f; // TO DO! +} + +// Environment Lighting +float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower ) +{ + // compute reflection vector taking into account a cheap fresnel falloff; + float3 N = normalize(vNormal); + float3 E = normalize(g_vEyeDir.xyz); + float3 R = reflect( E, N ); + float fresnel = 1 - dot( -E, N ); + fresnel = (fresnel * fresnel * fresnel ); + + float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ).xyz * fresnel; + + return (specular * (float3)m_bEnable); +// return ((float3)fresnel); + +} + +//-------------------------------------------------------------------------------------- +// Material Class Methods +//-------------------------------------------------------------------------------------- +// Plastic Material Methods +float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse.xyz; +} + +float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse.xyz; +} + +// Rough Material Methods +float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse.xyz; +} + +float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord) +{ + float4 vDiffuse = (float4)1.0f; + vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); + return m_vColor * vDiffuse.xyz; +} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl new file mode 100644 index 000000000..55d206259 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl @@ -0,0 +1,113 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkageFX11.psh +// +// The pixel shader header file for the DynamicShaderLinkageFX11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Header Includes +//-------------------------------------------------------------------------------------- +#include "DynamicShaderLinkageFX11_PSBuffers.h" + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float4 vPosition : SV_POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; + float4 vMatrix : TEXCOORD1; +}; + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- + +// This pixel shader uses several interfaces during its +// work. We show three different ways of providing interface +// bindings for the PS and those have two different +// entry points so we've separated the base PS code +// into a worker routine that's called by the entry +// points. Normally only one technique would be used +// and this layering of entry point and worker would +// not be necessary. +float4 PSMainWorker( iBaseLight ambientLighting, + iBaseLight directLighting, + iBaseLight environmentLighting, + iBaseMaterial material, + PS_INPUT Input ) +{ + // Compute the Ambient term + float3 Ambient = (float3)0.0f; + Ambient = material.GetAmbientColor( Input.vTexcoord ) * ambientLighting.IlluminateAmbient( Input.vNormal ); + + // Accumulate the Diffuse contribution + float3 Diffuse = (float3)0.0f; + + Diffuse += material.GetDiffuseColor( Input.vTexcoord ) * directLighting.IlluminateDiffuse( Input.vNormal ); + + // Compute the Specular contribution + float3 Specular = (float3)0.0f; + Specular += directLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() ); + Specular += environmentLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() ); + + // Accumulate the lighting with saturation + float3 Lighting = saturate( Ambient + Diffuse + Specular); + + return float4(Lighting,1.0f); +} + +// One way to provide bindings for shaders in Effects 11 is +// to use uniform interface parameters. As with non-interface +// uniform parameters you must specify a value for these +// parameters in your CompileShader invocations in the effect. +// You can provide concrete class instances if you want +// to statically specialize your shaders, such as for targets +// that don't support abstract interfaces; or you can provide +// other interfaces that you bind using effect variables. +// Both are shown in this sample's technique passes. +float4 PSMainUniform( uniform iBaseLight ambientLighting, + uniform iBaseLight directLighting, + uniform iBaseLight environmentLighting, + uniform iBaseMaterial material, + PS_INPUT Input ) : SV_Target +{ + return PSMainWorker(ambientLighting, + directLighting, + environmentLighting, + material, + Input); +} + +// Another way to use Effects 11 with interfaces is +// to have non-uniform parameters, which then are +// bound with a BindInterfaces in a technique pass. +// BindInterfaces gives concrete instances to use +// with a shader but does not do static specialization, +// it just saves information for the effect runtime +// to use when setting up the shader to run. +// This lets you share a single shader, compiled with +// interface usage, while still getting the convenience +// of declaring concrete bindings in the effect and +// not needed explicit binding in code via effect +// variable updates. If you have many different +// variations it may be simpler to use bindings +// through effect variables, as then you don't +// need to list every possible binding set in your +// techniques. +float4 PSMainNonUniform( iBaseLight ambientLighting, + iBaseLight directLighting, + iBaseLight environmentLighting, + iBaseMaterial material, + PS_INPUT Input ) : SV_Target +{ + return PSMainWorker(ambientLighting, + directLighting, + environmentLighting, + material, + Input); +} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl new file mode 100644 index 000000000..4791e5786 --- /dev/null +++ b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl @@ -0,0 +1,65 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: DynamicShaderLinkageFX11_VS.hlsl +// +// The vertex shader file for the DynamicShaderLinkageFX11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + float4x4 g_mWorldViewProjection : packoffset( c0 ); + float4x4 g_mWorld : packoffset( c4 ); +}; + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; +}; + +struct VS_OUTPUT +{ + float4 vPosition : SV_POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord0 : TEXCOORD0; + float4 vMatrix : TEXCOORD1; // DEBUG +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +// We aliased signed vectors as a unsigned format. +// Need to recover signed values. The values 1.0 and 2.0 +// are slightly inaccurate here. +float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec ) +{ + vVec *= 2.0f; + return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec; +} + +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + + VS_OUTPUT Output; + float3 tmpNormal; + + Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); + + // Expand compressed vectors + tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal ); + Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld ); + + Output.vTexcoord0 = Input.vTexcoord; + + Output.vMatrix = (float4)g_mWorld[0]; // DEBUG + return Output; +} diff --git a/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx b/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx new file mode 100644 index 000000000..699df8655 --- /dev/null +++ b/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx @@ -0,0 +1,468 @@ +//TEST_IGNORE_FILE: +// FixedFuncEMU.fx +// Copyright (c) 2005 Microsoft Corporation. All rights reserved. +// + +struct VSSceneIn +{ + float3 pos : POSITION; //position of the particle + float3 norm : NORMAL; //velocity of the particle + float2 tex : TEXTURE0; //tex coords +}; + +struct VSSceneOut +{ + float4 pos : SV_Position; //position + float2 tex : TEXTURE0; //texture coordinate + float3 wPos : TEXTURE1; //world space pos + float3 wNorm : TEXTURE2; //world space normal + float4 colorD : COLOR0; //color for gouraud and flat shading + float4 colorS : COLOR1; //color for specular + float fogDist : FOGDISTANCE; //distance used for fog calculations + float3 planeDist : SV_ClipDistance0; //clip distance for 3 planes +}; + +struct PSSceneIn +{ + float4 pos : SV_Position; //position + float2 tex : TEXTURE0; //texture coordinate + float3 wPos : TEXTURE1; //world space pos + float3 wNorm : TEXTURE2; //world space normal + float4 colorD : COLOR0; //color for gouraud and flat shading + float4 colorS : COLOR1; //color for specular + float fogDist : FOGDISTANCE; //distance used for fog calculations +}; + +struct Light +{ + float4 Position; + float4 Diffuse; + float4 Specular; + float4 Ambient; + float4 Atten; +}; + +#define FOGMODE_NONE 0 +#define FOGMODE_LINEAR 1 +#define FOGMODE_EXP 2 +#define FOGMODE_EXP2 3 +#define E 2.71828 + +cbuffer cbLights +{ + float4 g_clipplanes[3]; + Light g_lights[8]; +}; + +cbuffer cbPerFrame +{ + float4x4 g_mWorld; + float4x4 g_mView; + float4x4 g_mProj; + float4x4 g_mInvProj; + float4x4 g_mLightViewProj; +}; + +cbuffer cbPerTechnique +{ + bool g_bEnableLighting = true; + bool g_bEnableClipping = true; + bool g_bPointScaleEnable = false; + float g_pointScaleA; + float g_pointScaleB; + float g_pointScaleC; + float g_pointSize; + + //fog params + int g_fogMode = FOGMODE_NONE; + float g_fogStart; + float g_fogEnd; + float g_fogDensity; + float4 g_fogColor; +}; + +cbuffer cbPerViewChange +{ + //viewport params + float g_viewportHeight; + float g_viewportWidth; + float g_nearPlane; +}; + +cbuffer cbImmutable +{ + float3 g_positions[4] = + { + float3( -0.5, 0.5, 0 ), + float3( 0.5, 0.5, 0 ), + float3( -0.5, -0.5, 0 ), + float3( 0.5, -0.5, 0 ), + }; +}; + +Texture2D g_txDiffuse; +Texture2D g_txProjected; +SamplerState g_samLinear +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Clamp; + AddressV = Clamp; +}; + +DepthStencilState DisableDepth +{ + DepthEnable = FALSE; + DepthWriteMask = ZERO; +}; + +DepthStencilState EnableDepth +{ + DepthEnable = TRUE; + DepthWriteMask = ALL; +}; + +struct ColorsOutput +{ + float4 Diffuse; + float4 Specular; +}; + +ColorsOutput CalcLighting( float3 worldNormal, float3 worldPos, float3 cameraPos ) +{ + ColorsOutput output = (ColorsOutput)0.0; + + for(int i=0; i<8; i++) + { + float3 toLight = g_lights[i].Position.xyz - worldPos; + float lightDist = length( toLight ); + float fAtten = 1.0/dot( g_lights[i].Atten, float4(1,lightDist,lightDist*lightDist,0) ); + float3 lightDir = normalize( toLight ); + float3 halfAngle = normalize( normalize(-cameraPos) + lightDir ); + + output.Diffuse += max(0,dot( lightDir, worldNormal ) * g_lights[i].Diffuse * fAtten) + g_lights[i].Ambient; + output.Specular += max(0,pow( dot( halfAngle, worldNormal ), 64 ) * g_lights[i].Specular * fAtten ); + } + + return output; +} + +// +// VS for emulating fixed function pipeline +// +VSSceneOut VSScenemain(VSSceneIn input) +{ + VSSceneOut output = (VSSceneOut)0.0; + + //output our final position in clipspace + float4 worldPos = mul( float4( input.pos, 1 ), g_mWorld ); + float4 cameraPos = mul( worldPos, g_mView ); //Save cameraPos for fog calculations + output.pos = mul( cameraPos, g_mProj ); + + //save world pos for later + output.wPos = worldPos; + + //save the fog distance for later + output.fogDist = cameraPos.z; + + //find our clipping planes (fixed function clipping is done in world space) + if( g_bEnableClipping ) + { + worldPos.w = 1; + + //calc the distance from the 3 clipping planes + output.planeDist.x = dot( worldPos, g_clipplanes[0] ); + output.planeDist.y = dot( worldPos, g_clipplanes[1] ); + output.planeDist.z = dot( worldPos, g_clipplanes[2] ); + } + else + { + output.planeDist.x = 1; + output.planeDist.y = 1; + output.planeDist.z = 1; + } + + //do gouraud lighting + if( g_bEnableLighting ) + { + float3 worldNormal = normalize( mul( input.norm, (float3x3)g_mWorld ) ); + output.wNorm = worldNormal; + ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos ); + output.colorD = cOut.Diffuse; + output.colorS = cOut.Specular; + } + else + { + output.colorD = float4(1,1,1,1); + } + + //propogate texture coordinate + output.tex = input.tex; + + return output; +} + +// +// VS for rendering in screen space +// +PSSceneIn VSScreenSpacemain(VSSceneIn input) +{ + PSSceneIn output = (PSSceneIn)0.0; + + //output our final position + output.pos.x = (input.pos.x / (g_viewportWidth/2.0)) -1; + output.pos.y = -(input.pos.y / (g_viewportHeight/2.0)) +1; + output.pos.z = input.pos.z; + output.pos.w = 1; + + //propogate texture coordinate + output.tex = input.tex; + output.colorD = float4(1,1,1,1); + + return output; +} + +// +// GS for flat shaded rendering +// + +[maxvertexcount(3)] +void GSFlatmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> FlatTriStream ) +{ + VSSceneOut output; + + // + // Calculate the face normal + // + float3 faceEdgeA = input[1].wPos - input[0].wPos; + float3 faceEdgeB = input[2].wPos - input[0].wPos; + + // + // Cross product + // + float3 faceNormal = cross(faceEdgeA, faceEdgeB); + + // + //calculate the face center + // + float3 faceCenter = (input[0].wPos + input[1].wPos + input[2].wPos)/3.0; + + //find world pos and camera pos + float4 worldPos = float4( faceCenter, 1 ); + float4 cameraPos = mul( worldPos, g_mView ); + + //do shading + float3 worldNormal = normalize( faceNormal ); + ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos ); + + for(int i=0; i<3; i++) + { + output = input[i]; + output.colorD = cOut.Diffuse; + output.colorS = cOut.Specular; + + FlatTriStream.Append( output ); + } + FlatTriStream.RestartStrip(); +} + +// +// GS for point rendering +// +[maxvertexcount(12)] +void GSPointmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> PointTriStream ) +{ + VSSceneOut output; + + // + // Calculate the point size + // + //float fSizeX = (g_pointSize/g_viewportWidth)/4.0; + float fSizeY = (g_pointSize/g_viewportHeight)/4.0; + float fSizeX = fSizeY; + + for(int i=0; i<3; i++) + { + output = input[i]; + + //find world pos and camera pos + float4 worldPos = float4(input[i].wPos,1); + float4 cameraPos = mul( worldPos, g_mView ); + + //find our size + if( g_bPointScaleEnable ) + { + float dEye = length( cameraPos.xyz ); + fSizeX = fSizeY = g_viewportHeight * g_pointSize * + sqrt( 1.0f/( g_pointScaleA + g_pointScaleB*dEye + g_pointScaleC*(dEye*dEye) ) ); + } + + //do shading + if(g_bEnableLighting) + { + float3 worldNormal = input[i].wNorm; + ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos ); + + output.colorD = cOut.Diffuse; + output.colorS = cOut.Specular; + } + else + { + output.colorD = float4(1,1,1,1); + } + + output.tex = input[i].tex; + + // + // Emit two new triangles + // + for(int i=0; i<4; i++) + { + float4 outPos = mul( worldPos, g_mView ); + output.pos = mul( outPos, g_mProj ); + float zoverNear = (outPos.z)/g_nearPlane; + float4 posSize = float4( g_positions[i].x*fSizeX*zoverNear, + g_positions[i].y*fSizeY*zoverNear, + 0, + 0 ); + output.pos += posSize; + + PointTriStream.Append(output); + } + PointTriStream.RestartStrip(); + } +} + +// +// Calculates fog factor based upon distance +// +float CalcFogFactor( float d ) +{ + float fogCoeff = 1.0; + + if( FOGMODE_LINEAR == g_fogMode ) + { + fogCoeff = (g_fogEnd - d)/(g_fogEnd - g_fogStart); + } + else if( FOGMODE_EXP == g_fogMode ) + { + fogCoeff = 1.0 / pow( E, d*g_fogDensity ); + } + else if( FOGMODE_EXP2 == g_fogMode ) + { + fogCoeff = 1.0 / pow( E, d*d*g_fogDensity*g_fogDensity ); + } + + return clamp( fogCoeff, 0, 1 ); +} + +// +// PS for rendering with clip planes +// +float4 PSScenemain(PSSceneIn input) : SV_Target +{ + //calculate the fog factor + float fog = CalcFogFactor( input.fogDist ); + + //calculate the color based off of the normal, textures, etc + float4 normalColor = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD + input.colorS; + + //calculate the color from the projected texture + float4 cookieCoord = mul( float4(input.wPos,1), g_mLightViewProj ); + //since we don't have texldp, we must perform the w divide ourselves befor the texture lookup + cookieCoord.xy = 0.5 * cookieCoord.xy / cookieCoord.w + float2( 0.5, 0.5 ); + float4 cookieColor = float4(0,0,0,0); + if( cookieCoord.z > 0 ) + cookieColor = g_txProjected.Sample( g_samLinear, cookieCoord.xy ); + + //for standard light-modulating effects just multiply normalcolor and coookiecolor + normalColor += cookieColor; + + return fog * normalColor + (1.0 - fog)*g_fogColor; +} + +// +// PS for rendering with alpha test +// +float4 PSAlphaTestmain(PSSceneIn input) : SV_Target +{ + float4 color = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD; + if( color.a < 0.5 ) + discard; + return color; +} + +// +// RenderSceneGouraud - renders gouraud-shaded primitives +// +technique10 RenderSceneGouraud +{ + pass p0 + { + SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +} + +// +// RenderSceneFlat - renders flat-shaded primitives +// +technique10 RenderSceneFlat +{ + pass p0 + { + SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); + SetGeometryShader( CompileShader( gs_4_0, GSFlatmain() ) ); + SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +} + +// +// RenderScenePoint - replaces d3dfill_point +// +technique10 RenderScenePoint +{ + pass p0 + { + SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); + SetGeometryShader( CompileShader( gs_4_0, GSPointmain() ) ); + SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +} + +// +// RenderScreneSpace - shows how to render something in screenspace +// +technique10 RenderScreenSpaceAlphaTest +{ + pass p0 + { + SetVertexShader( CompileShader( vs_4_0, VSScreenSpacemain() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PSAlphaTestmain() ) ); + + SetDepthStencilState( DisableDepth, 0 ); + } +} + +// +// RenderScreneSpace - shows how to render something in screenspace +// +technique10 RenderTextureOnly +{ + pass p0 + { + SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); + + SetDepthStencilState( EnableDepth, 0 ); + } +} + diff --git a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl new file mode 100644 index 000000000..db7bd5136 --- /dev/null +++ b/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl @@ -0,0 +1,75 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose +//-------------------------------------------------------------------------------------- +// File: ComputeShaderSort11.hlsl +// +// This file contains the compute shaders to perform GPU sorting using DirectX 11. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#define BITONIC_BLOCK_SIZE 512 + +#define TRANSPOSE_BLOCK_SIZE 16 + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer CB : register( b0 ) +{ + unsigned int g_iLevel; + unsigned int g_iLevelMask; + unsigned int g_iWidth; + unsigned int g_iHeight; +}; + +//-------------------------------------------------------------------------------------- +// Structured Buffers +//-------------------------------------------------------------------------------------- +StructuredBuffer<unsigned int> Input : register( t0 ); +RWStructuredBuffer<unsigned int> Data : register( u0 ); + +//-------------------------------------------------------------------------------------- +// Bitonic Sort Compute Shader +//-------------------------------------------------------------------------------------- +groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE]; + +[numthreads(BITONIC_BLOCK_SIZE, 1, 1)] +void BitonicSort( uint3 Gid : SV_GroupID, + uint3 DTid : SV_DispatchThreadID, + uint3 GTid : SV_GroupThreadID, + uint GI : SV_GroupIndex ) +{ + // Load shared data + shared_data[GI] = Data[DTid.x]; + GroupMemoryBarrierWithGroupSync(); + + // Sort the shared data + for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1) + { + unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI]; + GroupMemoryBarrierWithGroupSync(); + shared_data[GI] = result; + GroupMemoryBarrierWithGroupSync(); + } + + // Store shared data + Data[DTid.x] = shared_data[GI]; +} + +//-------------------------------------------------------------------------------------- +// Matrix Transpose Compute Shader +//-------------------------------------------------------------------------------------- +groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE]; + +[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)] +void MatrixTranspose( uint3 Gid : SV_GroupID, + uint3 DTid : SV_DispatchThreadID, + uint3 GTid : SV_GroupThreadID, + uint GI : SV_GroupIndex ) +{ + transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x]; + GroupMemoryBarrierWithGroupSync(); + uint2 XY = DTid.yx - GTid.yx + GTid.xy; + Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y]; +} diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl new file mode 100644 index 000000000..26e6cdf60 --- /dev/null +++ b/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl @@ -0,0 +1,529 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry BuildGridCS -entry ClearGridIndicesCS -entry BuildGridIndicesCS -entry RearrangeParticlesCS -entry DensityCS_Simple -entry DensityCS_Shared -entry DensityCS_Grid -entry ForceCS_Simple -entry ForceCS_Shared -entry ForceCS_Grid -entry IntegrateCS +//-------------------------------------------------------------------------------------- +// File: FluidCS11.hlsl +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Smoothed Particle Hydrodynamics Algorithm Based Upon: +// Particle-Based Fluid Simulation for Interactive Applications +// Matthias Müller +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Optimized Grid Algorithm Based Upon: +// Broad-Phase Collision Detection with CUDA +// Scott Le Grand +//-------------------------------------------------------------------------------------- + +struct Particle +{ + float2 position; + float2 velocity; +}; + +struct ParticleForces +{ + float2 acceleration; +}; + +struct ParticleDensity +{ + float density; +}; + +cbuffer cbSimulationConstants : register( b0 ) +{ + uint g_iNumParticles; + float g_fTimeStep; + float g_fSmoothlen; + float g_fPressureStiffness; + float g_fRestDensity; + float g_fDensityCoef; + float g_fGradPressureCoef; + float g_fLapViscosityCoef; + float g_fWallStiffness; + + float4 g_vGravity; + float4 g_vGridDim; + float3 g_vPlanes[4]; +}; + +//-------------------------------------------------------------------------------------- +// Fluid Simulation +//-------------------------------------------------------------------------------------- + +#define SIMULATION_BLOCK_SIZE 256 + +//-------------------------------------------------------------------------------------- +// Structured Buffers +//-------------------------------------------------------------------------------------- +RWStructuredBuffer<Particle> ParticlesRW : register( u0 ); +StructuredBuffer<Particle> ParticlesRO : register( t0 ); + +RWStructuredBuffer<ParticleDensity> ParticlesDensityRW : register( u0 ); +StructuredBuffer<ParticleDensity> ParticlesDensityRO : register( t1 ); + +RWStructuredBuffer<ParticleForces> ParticlesForcesRW : register( u0 ); +StructuredBuffer<ParticleForces> ParticlesForcesRO : register( t2 ); + +RWStructuredBuffer<unsigned int> GridRW : register( u0 ); +StructuredBuffer<unsigned int> GridRO : register( t3 ); + +RWStructuredBuffer<uint2> GridIndicesRW : register( u0 ); +StructuredBuffer<uint2> GridIndicesRO : register( t4 ); + + +//-------------------------------------------------------------------------------------- +// Grid Construction +//-------------------------------------------------------------------------------------- + +// For simplicity, this sample uses a 16-bit hash based on the grid cell and +// a 16-bit particle ID to keep track of the particles while sorting +// This imposes a limitation of 64K particles and 256x256 grid work +// You could extended the implementation to support large scenarios by using a uint2 + +float2 GridCalculateCell(float2 position) +{ + return clamp(position * g_vGridDim.xy + g_vGridDim.zw, float2(0, 0), float2(255, 255)); +} + +unsigned int GridConstuctKey(uint2 xy) +{ + // Bit pack [-----UNUSED-----][----Y---][----X---] + // 16-bit 8-bit 8-bit + return dot(xy.yx, uint2(256, 1)); +} + +unsigned int GridConstuctKeyValuePair(uint2 xy, uint value) +{ + // Bit pack [----Y---][----X---][-----VALUE------] + // 8-bit 8-bit 16-bit + return dot(uint3(xy.yx, value), uint3(256*256*256, 256*256, 1)); +} + +unsigned int GridGetKey(unsigned int keyvaluepair) +{ + return (keyvaluepair >> 16); +} + +unsigned int GridGetValue(unsigned int keyvaluepair) +{ + return (keyvaluepair & 0xFFFF); +} + + +//-------------------------------------------------------------------------------------- +// Build Grid +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void BuildGridCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; // Particle ID to operate on + + float2 position = ParticlesRO[P_ID].position; + float2 grid_xy = GridCalculateCell( position ); + + GridRW[P_ID] = GridConstuctKeyValuePair((uint2)grid_xy, P_ID); +} + + +//-------------------------------------------------------------------------------------- +// Build Grid Indices +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void ClearGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + GridIndicesRW[DTid.x] = uint2(0, 0); +} + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void BuildGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int G_ID = DTid.x; // Grid ID to operate on + unsigned int G_ID_PREV = (G_ID == 0)? g_iNumParticles : G_ID; G_ID_PREV--; + unsigned int G_ID_NEXT = G_ID + 1; if (G_ID_NEXT == g_iNumParticles) { G_ID_NEXT = 0; } + + unsigned int cell = GridGetKey( GridRO[G_ID] ); + unsigned int cell_prev = GridGetKey( GridRO[G_ID_PREV] ); + unsigned int cell_next = GridGetKey( GridRO[G_ID_NEXT] ); + if (cell != cell_prev) + { + // I'm the start of a cell + GridIndicesRW[cell].x = G_ID; + } + if (cell != cell_next) + { + // I'm the end of a cell + GridIndicesRW[cell].y = G_ID + 1; + } +} + + +//-------------------------------------------------------------------------------------- +// Rearrange Particles +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void RearrangeParticlesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int ID = DTid.x; // Particle ID to operate on + const unsigned int G_ID = GridGetValue( GridRO[ ID ] ); + ParticlesRW[ID] = ParticlesRO[ G_ID ]; +} + + +//-------------------------------------------------------------------------------------- +// Density Calculation +//-------------------------------------------------------------------------------------- + +float CalculateDensity(float r_sq) +{ + const float h_sq = g_fSmoothlen * g_fSmoothlen; + // Implements this equation: + // W_poly6(r, h) = 315 / (64 * pi * h^9) * (h^2 - r^2)^3 + // g_fDensityCoef = fParticleMass * 315.0f / (64.0f * PI * fSmoothlen^9) + return g_fDensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq); +} + + +//-------------------------------------------------------------------------------------- +// Simple N^2 Algorithm +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void DensityCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; + const float h_sq = g_fSmoothlen * g_fSmoothlen; + float2 P_position = ParticlesRO[P_ID].position; + + float density = 0; + + // Calculate the density based on all neighbors + for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++) + { + float2 N_position = ParticlesRO[N_ID].position; + + float2 diff = N_position - P_position; + float r_sq = dot(diff, diff); + if (r_sq < h_sq) + { + density += CalculateDensity(r_sq); + } + } + + ParticlesDensityRW[P_ID].density = density; +} + + +//-------------------------------------------------------------------------------------- +// Shared Memory Optimized N^2 Algorithm +//-------------------------------------------------------------------------------------- + +groupshared float2 density_shared_pos[SIMULATION_BLOCK_SIZE]; + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void DensityCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; + const float h_sq = g_fSmoothlen * g_fSmoothlen; + float2 P_position = ParticlesRO[P_ID].position; + + float density = 0; + + // Calculate the density based on all neighbors + [loop] + for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE) + { + // Cache a tile of particles unto shared memory to increase IO efficiency + density_shared_pos[GI] = ParticlesRO[N_block_ID + GI].position; + + GroupMemoryBarrierWithGroupSync(); + + for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++) + { + float2 N_position = density_shared_pos[N_tile_ID]; + + float2 diff = N_position - P_position; + float r_sq = dot(diff, diff); + if (r_sq < h_sq) + { + density += CalculateDensity(r_sq); + } + } + + GroupMemoryBarrierWithGroupSync(); + } + + ParticlesDensityRW[P_ID].density = density; +} + + +//-------------------------------------------------------------------------------------- +// Optimized Grid + Sort Algorithm +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void DensityCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; + const float h_sq = g_fSmoothlen * g_fSmoothlen; + float2 P_position = ParticlesRO[P_ID].position; + + float density = 0; + + // Calculate the density based on neighbors from the 8 adjacent cells + current cell + int2 G_XY = (int2)GridCalculateCell( P_position ); + for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++) + { + for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++) + { + unsigned int G_CELL = GridConstuctKey(uint2(X, Y)); + uint2 G_START_END = GridIndicesRO[G_CELL]; + for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++) + { + float2 N_position = ParticlesRO[N_ID].position; + + float2 diff = N_position - P_position; + float r_sq = dot(diff, diff); + if (r_sq < h_sq) + { + density += CalculateDensity(r_sq); + } + } + } + } + + ParticlesDensityRW[P_ID].density = density; +} + + +//-------------------------------------------------------------------------------------- +// Force Calculation +//-------------------------------------------------------------------------------------- + +float CalculatePressure(float density) +{ + // Implements this equation: + // Pressure = B * ((rho / rho_0)^y - 1) + return g_fPressureStiffness * max(pow(density / g_fRestDensity, 3) - 1, 0); +} + +float2 CalculateGradPressure(float r, float P_pressure, float N_pressure, float N_density, float2 diff) +{ + const float h = g_fSmoothlen; + float avg_pressure = 0.5f * (N_pressure + P_pressure); + // Implements this equation: + // W_spkiey(r, h) = 15 / (pi * h^6) * (h - r)^3 + // GRAD( W_spikey(r, h) ) = -45 / (pi * h^6) * (h - r)^2 + // g_fGradPressureCoef = fParticleMass * -45.0f / (PI * fSmoothlen^6) + return g_fGradPressureCoef * avg_pressure / N_density * (h - r) * (h - r) / r * (diff); +} + +float2 CalculateLapVelocity(float r, float2 P_velocity, float2 N_velocity, float N_density) +{ + const float h = g_fSmoothlen; + float2 vel_diff = (N_velocity - P_velocity); + // Implements this equation: + // W_viscosity(r, h) = 15 / (2 * pi * h^3) * (-r^3 / (2 * h^3) + r^2 / h^2 + h / (2 * r) - 1) + // LAPLACIAN( W_viscosity(r, h) ) = 45 / (pi * h^6) * (h - r) + // g_fLapViscosityCoef = fParticleMass * fViscosity * 45.0f / (PI * fSmoothlen^6) + return g_fLapViscosityCoef / N_density * (h - r) * vel_diff; +} + + +//-------------------------------------------------------------------------------------- +// Simple N^2 Algorithm +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void ForceCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; // Particle ID to operate on + + float2 P_position = ParticlesRO[P_ID].position; + float2 P_velocity = ParticlesRO[P_ID].velocity; + float P_density = ParticlesDensityRO[P_ID].density; + float P_pressure = CalculatePressure(P_density); + + const float h_sq = g_fSmoothlen * g_fSmoothlen; + + float2 acceleration = float2(0, 0); + + // Calculate the acceleration based on all neighbors + for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++) + { + float2 N_position = ParticlesRO[N_ID].position; + + float2 diff = N_position - P_position; + float r_sq = dot(diff, diff); + if (r_sq < h_sq && P_ID != N_ID) + { + float2 N_velocity = ParticlesRO[N_ID].velocity; + float N_density = ParticlesDensityRO[N_ID].density; + float N_pressure = CalculatePressure(N_density); + float r = sqrt(r_sq); + + // Pressure Term + acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff); + + // Viscosity Term + acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density); + } + } + + ParticlesForcesRW[P_ID].acceleration = acceleration / P_density; +} + + +//-------------------------------------------------------------------------------------- +// Shared Memory Optimized N^2 Algorithm +//-------------------------------------------------------------------------------------- + +groupshared struct { float2 position; float2 velocity; float density; } force_shared_pos[SIMULATION_BLOCK_SIZE]; + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void ForceCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; // Particle ID to operate on + + float2 P_position = ParticlesRO[P_ID].position; + float2 P_velocity = ParticlesRO[P_ID].velocity; + float P_density = ParticlesDensityRO[P_ID].density; + float P_pressure = CalculatePressure(P_density); + + const float h_sq = g_fSmoothlen * g_fSmoothlen; + + float2 acceleration = float2(0, 0); + + // Calculate the acceleration based on all neighbors + [loop] + for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE) + { + // Cache a tile of particles unto shared memory to increase IO efficiency + force_shared_pos[GI].position = ParticlesRO[N_block_ID + GI].position; + force_shared_pos[GI].velocity = ParticlesRO[N_block_ID + GI].velocity; + force_shared_pos[GI].density = ParticlesDensityRO[N_block_ID + GI].density; + + GroupMemoryBarrierWithGroupSync(); + + [loop] + for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++ ) + { + uint N_ID = N_block_ID + N_tile_ID; + float2 N_position = force_shared_pos[N_tile_ID].position; + + float2 diff = N_position - P_position; + float r_sq = dot(diff, diff); + if (r_sq < h_sq && P_ID != N_ID) + { + float2 N_velocity = force_shared_pos[N_tile_ID].velocity; + float N_density = force_shared_pos[N_tile_ID].density; + float N_pressure = CalculatePressure(N_density); + float r = sqrt(r_sq); + + // Pressure Term + acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff); + + // Viscosity Term + acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density); + } + } + + GroupMemoryBarrierWithGroupSync(); + } + + ParticlesForcesRW[P_ID].acceleration = acceleration / P_density; +} + + +//-------------------------------------------------------------------------------------- +// Optimized Grid + Sort Algorithm +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void ForceCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; // Particle ID to operate on + + float2 P_position = ParticlesRO[P_ID].position; + float2 P_velocity = ParticlesRO[P_ID].velocity; + float P_density = ParticlesDensityRO[P_ID].density; + float P_pressure = CalculatePressure(P_density); + + const float h_sq = g_fSmoothlen * g_fSmoothlen; + + float2 acceleration = float2(0, 0); + + // Calculate the acceleration based on neighbors from the 8 adjacent cells + current cell + int2 G_XY = (int2)GridCalculateCell( P_position ); + for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++) + { + for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++) + { + unsigned int G_CELL = GridConstuctKey(uint2(X, Y)); + uint2 G_START_END = GridIndicesRO[G_CELL]; + for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++) + { + float2 N_position = ParticlesRO[N_ID].position; + + float2 diff = N_position - P_position; + float r_sq = dot(diff, diff); + if (r_sq < h_sq && P_ID != N_ID) + { + float2 N_velocity = ParticlesRO[N_ID].velocity; + float N_density = ParticlesDensityRO[N_ID].density; + float N_pressure = CalculatePressure(N_density); + float r = sqrt(r_sq); + + // Pressure Term + acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff); + + // Viscosity Term + acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density); + } + } + } + } + + ParticlesForcesRW[P_ID].acceleration = acceleration / P_density; +} + + +//-------------------------------------------------------------------------------------- +// Integration +//-------------------------------------------------------------------------------------- + +[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] +void IntegrateCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const unsigned int P_ID = DTid.x; // Particle ID to operate on + + float2 position = ParticlesRO[P_ID].position; + float2 velocity = ParticlesRO[P_ID].velocity; + float2 acceleration = ParticlesForcesRO[P_ID].acceleration; + + // Apply the forces from the map walls + [unroll] + for (unsigned int i = 0 ; i < 4 ; i++) + { + float dist = dot(float3(position, 1), g_vPlanes[i]); + acceleration += min(dist, 0) * -g_fWallStiffness * g_vPlanes[i].xy; + } + + // Apply gravity + acceleration += g_vGravity.xy; + + // Integrate + velocity += g_fTimeStep * acceleration; + position += g_fTimeStep * velocity; + + // Update + ParticlesRW[P_ID].position = position; + ParticlesRW[P_ID].velocity = velocity; +} diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl new file mode 100644 index 000000000..d7e24b7bc --- /dev/null +++ b/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl @@ -0,0 +1,112 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry ParticleVS -profile gs_4_0 -entry ParticleGS -profile ps_4_0 -entry ParticlePS +//-------------------------------------------------------------------------------------- +// File: FluidRender.hlsl +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Particle Rendering +//-------------------------------------------------------------------------------------- + +struct Particle { + float2 position; + float2 velocity; +}; + +struct ParticleDensity { + float density; +}; + +StructuredBuffer<Particle> ParticlesRO : register( t0 ); +StructuredBuffer<ParticleDensity> ParticleDensityRO : register( t1 ); + +cbuffer cbRenderConstants : register( b0 ) +{ + matrix g_mViewProjection; + float g_fParticleSize; +}; + +struct VSParticleOut +{ + float2 position : POSITION; + float4 color : COLOR; +}; + +struct GSParticleOut +{ + float4 position : SV_Position; + float4 color : COLOR; + float2 texcoord : TEXCOORD; +}; + + +//-------------------------------------------------------------------------------------- +// Visualization Helper +//-------------------------------------------------------------------------------------- + +static const float4 Rainbow[5] = { + float4(1, 0, 0, 1), // red + float4(1, 1, 0, 1), // orange + float4(0, 1, 0, 1), // green + float4(0, 1, 1, 1), // teal + float4(0, 0, 1, 1), // blue +}; + +float4 VisualizeNumber(float n) +{ + return lerp( Rainbow[ floor(n * 4.0f) ], Rainbow[ ceil(n * 4.0f) ], frac(n * 4.0f) ); +} + +float4 VisualizeNumber(float n, float lower, float upper) +{ + return VisualizeNumber( saturate( (n - lower) / (upper - lower) ) ); +} + + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- + +VSParticleOut ParticleVS(uint ID : SV_VertexID) +{ + VSParticleOut Out = (VSParticleOut)0; + Out.position = ParticlesRO[ID].position; + Out.color = VisualizeNumber(ParticleDensityRO[ID].density, 1000.0f, 2000.0f); + return Out; +} + + +//-------------------------------------------------------------------------------------- +// Particle Geometry Shader +//-------------------------------------------------------------------------------------- + +static const float2 g_positions[4] = { float2(-1, 1), float2(1, 1), float2(-1, -1), float2(1, -1) }; +static const float2 g_texcoords[4] = { float2(0, 1), float2(1, 1), float2(0, 0), float2(1, 0) }; + +[maxvertexcount(4)] +void ParticleGS(point VSParticleOut In[1], inout TriangleStream<GSParticleOut> SpriteStream) +{ + [unroll] + for (int i = 0; i < 4; i++) + { + GSParticleOut Out = (GSParticleOut)0; + float4 position = float4(In[0].position, 0, 1) + g_fParticleSize * float4(g_positions[i], 0, 0); + Out.position = mul(position, g_mViewProjection); + Out.color = In[0].color; + Out.texcoord = g_texcoords[i]; + SpriteStream.Append(Out); + } + SpriteStream.RestartStrip(); +} + + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- + +float4 ParticlePS(GSParticleOut In) : SV_Target +{ + return In.color; +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl new file mode 100644 index 000000000..87bad46ed --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl @@ -0,0 +1,64 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSMain +//-------------------------------------------------------------------------------------- +// File: BrightPassAndHorizFilterCS.hlsl +// +// The CS for bright pass and horizontal blur, used in CS path of +// HDRToneMappingCS11 sample +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +static const float MIDDLE_GRAY = 0.72f; +static const float LUM_WHITE = 1.5f; +static const float BRIGHT_THRESHOLD = 0.5f; + +Texture2D Input : register( t0 ); +StructuredBuffer<float> lum : register( t1 ); +RWStructuredBuffer<float4> Result : register( u0 ); + +cbuffer cb0 +{ + float4 g_avSampleWeights[15]; + uint g_outputwidth; + float g_inverse; + int2 g_inputsize; +} + +#define kernelhalf 7 +#define groupthreads 128 +groupshared float4 temp[groupthreads]; + +[numthreads( groupthreads, 1, 1 )] +void CSMain( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) +{ + int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y ); + coord = coord.xy * 8 + int2(4, 3); + coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) ); + float4 vColor = Input.Load( int3(coord, 0) ); + + float fLum = lum[0]*g_inverse; + + // Bright pass and tone mapping + vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD ); + vColor *= MIDDLE_GRAY / (fLum + 0.001f); + vColor *= (1.0f + vColor/LUM_WHITE); + vColor /= (1.0f + vColor); + + temp[GI] = vColor; + + GroupMemoryBarrierWithGroupSync(); + + // Horizontal blur + if ( GI >= kernelhalf && + GI < (groupthreads - kernelhalf) && + ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputwidth) ) + { + float4 vOut = 0; + + [unroll] + for ( int i = -kernelhalf; i <= kernelhalf; ++i ) + vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf]; + + Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputwidth] = float4(vOut.rgb, 1.0f); + } +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl new file mode 100644 index 000000000..d2d9611ce --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl @@ -0,0 +1,29 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PSDump +//-------------------------------------------------------------------------------------- +// File: DumpToTexture.hlsl +// +// The PS for converting CS output buffer to a texture, used in CS path of +// HDRToneMappingCS11 sample +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +StructuredBuffer<float4> buffer : register( t0 ); + +struct QuadVS_Output +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD0; +}; + +cbuffer cbPS : register( b0 ) +{ + uint4 g_param; +}; + +float4 PSDump( QuadVS_Output Input ) : SV_TARGET +{ + // To calculate the buffer offset, it is natural to use the screen space coordinates, + // Input.Pos is the screen space coordinates of the pixel being written + return buffer[ (Input.Pos.x - 0.5) + (Input.Pos.y - 0.5) * g_param.x ]; +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl new file mode 100644 index 000000000..09c91669a --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl @@ -0,0 +1,73 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSVerticalFilter -entry CSHorizFilter +//-------------------------------------------------------------------------------------- +// File: FilterCS.hlsl +// +// The CSs for doing vertical and horizontal blur, used in CS path of +// HDRToneMappingCS11 sample +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +StructuredBuffer<float4> InputBuf : register( t0 ); +Texture2D InputTex : register( t1 ); +RWStructuredBuffer<float4> Result : register( u0 ); + +cbuffer cb0 +{ + float4 g_avSampleWeights[15]; + int2 g_outputsize; + int2 g_inputsize; +} + +#define kernelhalf 7 +#define groupthreads 128 +groupshared float4 temp[groupthreads]; + +[numthreads( groupthreads, 1, 1 )] +void CSVerticalFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) +{ + int offsety = GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y; + offsety = clamp( offsety, 0, g_inputsize.y-1 ); + int offset = Gid.x + offsety * g_inputsize.x; + temp[GI] = InputBuf[offset]; + + GroupMemoryBarrierWithGroupSync(); + + // Vertical blur + if ( GI >= kernelhalf && + GI < (groupthreads - kernelhalf) && + ( (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) < g_outputsize.y) ) + { + float4 vOut = 0; + + [unroll] + for ( int i = -kernelhalf; i <= kernelhalf; ++i ) + vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf]; + + Result[Gid.x + (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) * g_outputsize.x] = float4(vOut.rgb, 1.0f); + } +} + +[numthreads( groupthreads, 1, 1 )] +void CSHorizFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) +{ + int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y ); + coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) ); + temp[GI] = InputTex.Load( int3(coord, 0) ); + + GroupMemoryBarrierWithGroupSync(); + + // Horizontal blur + if ( GI >= kernelhalf && + GI < (groupthreads - kernelhalf) && + ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputsize.x) ) + { + float4 vOut = 0; + + [unroll] + for ( int i = -kernelhalf; i <= kernelhalf; ++i ) + vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf]; + + Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputsize.x] = float4(vOut.rgb, 1.0f); + } +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl new file mode 100644 index 000000000..a4673c237 --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl @@ -0,0 +1,79 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry QuadVS -profile ps_4_0 -entry PSFinalPass -entry PSFinalPassForCPUReduction +//-------------------------------------------------------------------------------------- +// File: FinalPass.hlsl +// +// The PSs for doing tone-mapping based on the input luminance, used in CS path of +// HDRToneMappingCS11 sample +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +struct QuadVS_Input +{ + float4 Pos : POSITION; + float2 Tex : TEXCOORD0; +}; + +struct QuadVS_Output +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD0; +}; + +QuadVS_Output QuadVS( QuadVS_Input Input ) +{ + QuadVS_Output Output; + Output.Pos = Input.Pos; + Output.Tex = Input.Tex; + return Output; +} + +Texture2D<float4> tex : register( t0 ); +StructuredBuffer<float> lum : register( t1 ); +Texture2D<float4> bloom : register( t2 ); + +SamplerState PointSampler : register (s0); +SamplerState LinearSampler : register (s1); + + +static const float MIDDLE_GRAY = 0.72f; +static const float LUM_WHITE = 1.5f; + +cbuffer cbPS : register( b0 ) +{ + float4 g_param; +}; + +float4 PSFinalPass( QuadVS_Output Input ) : SV_TARGET +{ + float4 vColor = tex.Sample( PointSampler, Input.Tex ); + float fLum = lum[0]*g_param.x; + float3 vBloom = bloom.Sample( LinearSampler, Input.Tex ); + + // Tone mapping + vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f); + vColor.rgb *= (1.0f + vColor/LUM_WHITE); + vColor.rgb /= (1.0f + vColor); + + vColor.rgb += 0.6f * vBloom; + vColor.a = 1.0f; + + return vColor; +} + +float4 PSFinalPassForCPUReduction( QuadVS_Output Input ) : SV_TARGET +{ + float4 vColor = tex.Sample( PointSampler, Input.Tex ); + float fLum = g_param.x; + float3 vBloom = bloom.Sample( LinearSampler, Input.Tex ); + + // Tone mapping + vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f); + vColor.rgb *= (1.0f + vColor/LUM_WHITE); + vColor.rgb /= (1.0f + vColor); + + vColor.rgb += 0.6f * vBloom; + vColor.a = 1.0f; + + return vColor; +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl new file mode 100644 index 000000000..2b18cf0a1 --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl @@ -0,0 +1,129 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry DownScale2x2_Lum -entry DownScale3x3 -entry FinalPass -entry DownScale3x3_BrightPass -entry Bloom +//-------------------------------------------------------------------------------------- +// File: PSApproach.hlsl +// +// The PSs for doing post-processing, used in PS path of +// HDRToneMappingCS11 sample +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- +static const float4 LUM_VECTOR = float4(.299, .587, .114, 0); +static const float MIDDLE_GRAY = 0.72f; +static const float LUM_WHITE = 1.5f; +static const float BRIGHT_THRESHOLD = 0.5f; + +SamplerState PointSampler : register (s0); +SamplerState LinearSampler : register (s1); + +struct QuadVS_Output +{ + float4 Pos : SV_POSITION; + float2 Tex : TEXCOORD0; +}; + +Texture2D s0 : register(t0); +Texture2D s1 : register(t1); +Texture2D s2 : register(t2); + +float4 DownScale2x2_Lum ( QuadVS_Output Input ) : SV_TARGET +{ + float4 vColor = 0.0f; + float fAvg = 0.0f; + + for( int y = -1; y < 1; y++ ) + { + for( int x = -1; x < 1; x++ ) + { + // Compute the sum of color values + vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) ); + + fAvg += dot( vColor, LUM_VECTOR ); + } + } + + fAvg /= 4; + + return float4(fAvg, fAvg, fAvg, 1.0f); +} + +float4 DownScale3x3( QuadVS_Output Input ) : SV_TARGET +{ + float fAvg = 0.0f; + float4 vColor; + + for( int y = -1; y <= 1; y++ ) + { + for( int x = -1; x <= 1; x++ ) + { + // Compute the sum of color values + vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) ); + + fAvg += vColor.r; + } + } + + // Divide the sum to complete the average + fAvg /= 9; + + return float4(fAvg, fAvg, fAvg, 1.0f); +} + +float4 FinalPass( QuadVS_Output Input ) : SV_TARGET +{ + //float4 vColor = 0; + float4 vColor = s0.Sample( PointSampler, Input.Tex ); + float4 vLum = s1.Sample( PointSampler, float2(0,0) ); + float3 vBloom = s2.Sample( LinearSampler, Input.Tex ); + + // Tone mapping + vColor.rgb *= MIDDLE_GRAY / (vLum.r + 0.001f); + vColor.rgb *= (1.0f + vColor/LUM_WHITE); + vColor.rgb /= (1.0f + vColor); + + vColor.rgb += 0.6f * vBloom; + vColor.a = 1.0f; + + return vColor; +} + +float4 DownScale3x3_BrightPass( QuadVS_Output Input ) : SV_TARGET +{ + float3 vColor = 0.0f; + float4 vLum = s1.Sample( PointSampler, float2(0, 0) ); + float fLum = vLum.r; + + vColor = s0.Sample( PointSampler, Input.Tex ).rgb; + + // Bright pass and tone mapping + vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD ); + vColor *= MIDDLE_GRAY / (fLum + 0.001f); + vColor *= (1.0f + vColor/LUM_WHITE); + vColor /= (1.0f + vColor); + + return float4(vColor, 1.0f); +} + +cbuffer cb0 +{ + float2 g_avSampleOffsets[15]; + float4 g_avSampleWeights[15]; +} + +float4 Bloom( QuadVS_Output Input ) : SV_TARGET +{ + float4 vSample = 0.0f; + float4 vColor = 0.0f; + float2 vSamplePosition; + + for( int iSample = 0; iSample < 15; iSample++ ) + { + // Sample from adjacent points + vSamplePosition = Input.Tex + g_avSampleOffsets[iSample]; + vColor = s0.Sample( PointSampler, vSamplePosition); + + vSample += g_avSampleWeights[iSample]*vColor; + } + + return vSample; +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl new file mode 100644 index 000000000..027838743 --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl @@ -0,0 +1,72 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSMain +//----------------------------------------------------------------------------- +// File: ReduceTo1DCS.hlsl +// +// Desc: Reduce an input Texture2D to a buffer +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//----------------------------------------------------------------------------- +Texture2D Input : register( t0 ); +RWStructuredBuffer<float> Result : register( u0 ); + +cbuffer cbCS : register( b0 ) +{ + uint4 g_param; // (g_param.x, g_param.y) is the x and y dimensions of the Dispatch call + // (g_param.z, g_param.w) is the size of the above Input Texture2D +}; + +//#define CS_FULL_PIXEL_REDUCITON // Defining this or not must be the same as in HDRToneMappingCS11.cpp + +#define blocksize 8 +#define blocksizeY 8 +#define groupthreads (blocksize*blocksizeY) +groupshared float accum[groupthreads]; + +static const float4 LUM_VECTOR = float4(.299, .587, .114, 0); + +[numthreads(blocksize,blocksizeY,1)] +void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + float4 s = +#ifdef CS_FULL_PIXEL_REDUCITON + Input.Load( uint3(DTid.xy , 0) )+ + Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x, 0), 0) ) + + Input.Load( uint3(DTid.xy + uint2(0, blocksizeY*g_param.y), 0) ) + + Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x, blocksizeY*g_param.y), 0) ); +#else + Input.Load( uint3((float)DTid.x/81.0f*g_param.z, (float)DTid.y/81.0f*g_param.w, 0) ); +#endif + + accum[GI] = dot( s, LUM_VECTOR ); + + // Parallel reduction algorithm follows + GroupMemoryBarrierWithGroupSync(); + if ( GI < 32 ) + accum[GI] += accum[32+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 16 ) + accum[GI] += accum[16+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 8 ) + accum[GI] += accum[8+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 4 ) + accum[GI] += accum[4+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 2 ) + accum[GI] += accum[2+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 1 ) + accum[GI] += accum[1+GI]; + + if ( GI == 0 ) + { + Result[Gid.y*g_param.x+Gid.x] = accum[0]; + } +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl new file mode 100644 index 000000000..cf506283e --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl @@ -0,0 +1,63 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSMain +//----------------------------------------------------------------------------- +// File: ReduceToSingleCS.hlsl +// +// Desc: Reduce an input buffer by a factor of groupthreads +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//----------------------------------------------------------------------------- + +StructuredBuffer<float> Input : register( t0 ); +RWStructuredBuffer<float> Result : register( u0 ); + +cbuffer cbCS : register( b0 ) +{ + uint4 g_param; // g_param.x is the actual elements contained in Input + // g_param.y is the x dimension of the Dispatch call +}; + +#define groupthreads 128 +groupshared float accum[groupthreads]; + +[numthreads(groupthreads,1,1)] +void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + if ( DTid.x < g_param.x ) + accum[GI] = Input[DTid.x]; + else + accum[GI] = 0; + + // Parallel reduction algorithm follows + GroupMemoryBarrierWithGroupSync(); + if ( GI < 64 ) + accum[GI] += accum[64+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 32 ) + accum[GI] += accum[32+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 16 ) + accum[GI] += accum[16+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 8 ) + accum[GI] += accum[8+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 4 ) + accum[GI] += accum[4+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 2 ) + accum[GI] += accum[2+GI]; + + GroupMemoryBarrierWithGroupSync(); + if ( GI < 1 ) + accum[GI] += accum[1+GI]; + + if ( GI == 0 ) + { + Result[Gid.x] = accum[0]; + } +} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl new file mode 100644 index 000000000..2728665e2 --- /dev/null +++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl @@ -0,0 +1,44 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry SkyboxVS -profile ps_4_0 -entry SkyboxPS +//----------------------------------------------------------------------------- +// File: SkyBox11.hlsl +// +// Desc: +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//----------------------------------------------------------------------------- + +cbuffer cbPerObject : register( b0 ) +{ + row_major matrix g_mWorldViewProjection : packoffset( c0 ); +} + +TextureCube g_EnvironmentTexture : register( t0 ); +SamplerState g_sam : register( s0 ); + +struct SkyboxVS_Input +{ + float4 Pos : POSITION; +}; + +struct SkyboxVS_Output +{ + float4 Pos : SV_POSITION; + float3 Tex : TEXCOORD0; +}; + +SkyboxVS_Output SkyboxVS( SkyboxVS_Input Input ) +{ + SkyboxVS_Output Output; + + Output.Pos = Input.Pos; + Output.Tex = normalize( mul(Input.Pos, g_mWorldViewProjection) ); + + return Output; +} + +float4 SkyboxPS( SkyboxVS_Output Input ) : SV_TARGET +{ + float4 color = g_EnvironmentTexture.Sample( g_sam, Input.Tex ); + return color; +} diff --git a/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx b/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx new file mode 100644 index 000000000..3c8d45078 --- /dev/null +++ b/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx @@ -0,0 +1,591 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: Instancing.fx +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Input and output structures +//-------------------------------------------------------------------------------------- +struct VSInstIn +{ + float3 pos : POSITION; + float3 norm : NORMAL; + float2 tex : TEXTURE0; + row_major float4x4 mTransform : mTransform; +}; + +struct VSSceneIn +{ + float3 pos : POSITION; + float3 norm : NORMAL; + float2 tex : TEXTURE0; +}; + +struct VSGrassIn +{ + float3 pos : POSITION; + float3 norm : NORMAL; + float2 tex : TEXTURE0; + row_major float4x4 mTransform : mTransform; + uint VertexID : SV_VertexID; +}; + +struct VSGrassOut +{ + float3 pos : POSITION; + float3 norm : NORMAL; + float2 tex : TEXTURE0; + uint VertexID : VERTID; +}; + +struct VSQuadIn +{ + float3 pos : POSITION; + float2 tex : TEXTURE0; + row_major float4x4 mTransform : mTransform; + float fOcc : fOcc; + uint InstanceId : SV_InstanceID; +}; + +struct PSSceneIn +{ + float4 pos : SV_Position; + float2 tex : TEXTURE0; + float4 color : COLOR0; +}; + +struct PSQuadIn +{ + float4 pos : SV_Position; + float3 tex : TEXTURE0; + float4 color : COLOR0; +}; + +//-------------------------------------------------------------------------------------- +// Constant buffers +//-------------------------------------------------------------------------------------- +cbuffer crarely +{ + float4x4 g_mTreeMatrices[50]; + uint g_iNumTrees; +}; + +cbuffer ceveryframe +{ + float4x4 g_mWorldViewProj; + float4x4 g_mWorldView; +}; + +cbuffer cmultipleperframe +{ + float g_GrassWidth; + float g_GrassHeight; + uint g_iGrassCoverage; +}; + +cbuffer cusercontrolled +{ + float g_GrassMessiness; +}; + +struct light_struct +{ + float4 direction; + float4 color; +}; + +cbuffer cimmutable +{ + light_struct g_lights[4] = { + { float4(0.620275, 0.683659, 0.384537, 1), float4(0.75, 0.599, 0.405, 1) }, //sun + { float4(0.063288, -0.987444, 0.144735, 1), float4(0.192, 0.273, 0.275, 1) }, //bottom + { float4(0.23007, 0.785579, -0.574422, 1), float4(0.300, 0.292, 0.223, 1) }, //highlight + { float4(-0.620275, -0.683659, -0.384537, 1), float4(0.0, 0.0, 0.1, 1) } //blue rim-light + }; + + float4 g_ambient = float4(0.4945,0.465,0.5,1); + + float g_occDimHeight = 2400.0; //scalar that tells us how much to darken the tree near the top +}; + +cbuffer cgrassblade +{ + float3 g_positions[6] = + { + float3( -1, 0, 0 ), + float3( -1, 2, 0 ), + float3( 1, 0, 0 ), + float3( 1, 2, 0 ), + + float3( -1, 0, 0 ), + float3( -1, 2, 0 ), + }; + float2 g_texcoords[6] = + { + float2(0,1), + float2(0,0), + float2(1,1), + float2(1,0), + + float2(0,1), + float2(0,0), + }; +}; + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse; +Texture2DArray g_tx2dArray; +SamplerState g_samLinear +{ + Filter = ANISOTROPIC; + AddressU = Wrap; + AddressV = Wrap; +}; + +Texture1D g_txRandom; +SamplerState g_samPoint +{ + Filter = MIN_MAG_MIP_POINT; + AddressU = Wrap; + AddressV = Wrap; +}; + +//-------------------------------------------------------------------------------------- +// State structures +//-------------------------------------------------------------------------------------- +BlendState QuadAlphaBlendState +{ + AlphaToCoverageEnable = TRUE; + RenderTargetWriteMask[0] = 0x0F; +}; + +RasterizerState EnableMSAA +{ + CullMode = BACK; + MultisampleEnable = TRUE; +}; + +DepthStencilState DisableDepthTestWrite +{ + DepthEnable = FALSE; + DepthWriteMask = ZERO; +}; + +DepthStencilState EnableDepthTestWrite +{ + DepthEnable = TRUE; + DepthWriteMask = ALL; +}; + +BlendState NoBlending +{ + AlphaToCoverageEnable = FALSE; + BlendEnable[0] = FALSE; +}; + +//-------------------------------------------------------------------------------------- +// Sky vertex shader +//-------------------------------------------------------------------------------------- +PSSceneIn VSSkymain(VSSceneIn input) +{ + PSSceneIn output; + + // + // Transform the vert to view-space + // + float4 v4Position = mul(float4(input.pos, 1), g_mWorldViewProj); + output.pos = v4Position; + + // + // Transfer the rest + // + output.tex = input.tex; + + output.color = float4(1,1,1,1); + + return output; +} + +//-------------------------------------------------------------------------------------- +// CalcLighting helper function. Calculates lighting from 4 light sources, adds ambient +// and attenuates for depth. Used by all techniques for lighting. +//-------------------------------------------------------------------------------------- +float4 CalcLighting( float3 norm, float depth ) +{ + float4 color = float4(0,0,0,0); + + // add the contributions of 4 directional lights + [unroll] for( int i=0; i<4; i++ ) + { + color += saturate( dot(g_lights[i].direction,norm) )*g_lights[i].color; + } + + // give some attenuation due to depth + float attenuate = depth / 10000.0; + float4 attenColor = float4(0.15, 0.2, 0.3, 0); + + // add it all up plus ambient + return (1-attenuate*0.23)*(color + g_ambient) + attenColor*attenuate; +} + +//-------------------------------------------------------------------------------------- +// Instancing vertex shader. Positions the vertices based upon the matrix stored +// in the second vertex stream. +//-------------------------------------------------------------------------------------- +PSSceneIn VSInstmain(VSInstIn input) +{ + PSSceneIn output; + + // + // Transform by our Sceneance matrix + // + float4 InstancePosition = mul(float4(input.pos, 1), input.mTransform); + float4 ViewPos = mul(InstancePosition, g_mWorldView ); + + // + // Transform the vert to view-space + // + float4 v4Position = mul(InstancePosition, g_mWorldViewProj); + output.pos = v4Position; + + // + // Transfer the rest + // + output.tex = input.tex; + + // + // dot the norm with the light dir + // + float3 norm = mul(input.norm,(float3x3)input.mTransform); + output.color = CalcLighting( norm, ViewPos.z ); + + // + // Dim the color by how far up the tree we are. + // This is a nice way to fake occlusion of the branches by the leaves. + // + output.color *= 1.0f - saturate(input.pos.y/g_occDimHeight); + + + return output; +} + +//-------------------------------------------------------------------------------------- +// Quad (leaf) vertex shader. Instances the quad over multiple leaf positions and +// multiple trees. This demonstrates how to do double instancing. +//-------------------------------------------------------------------------------------- +PSQuadIn VSQuadmain(VSQuadIn input) +{ + PSQuadIn output; + + // base our leaf texture upon which instance id we are + uint iLeaf = input.InstanceId/g_iNumTrees; + uint iLeafTex = iLeaf%3; + output.tex = float3(input.tex, float(iLeafTex) ); + + // + // Transform the position by the Instance matrix + // + int iTree = input.InstanceId - (input.InstanceId/g_iNumTrees)*g_iNumTrees; + float4 vInstancePos = mul( float4(input.pos, 1), input.mTransform ); + float4 InstancePosition = mul(vInstancePos, g_mTreeMatrices[iTree] ); + float4 ViewPos = mul(InstancePosition, g_mWorldView ); + + // + // Transform the Instance position to view-space + // + output.pos = mul(InstancePosition, g_mWorldViewProj); + + // pack distance from the eye into the color alpha channel + output.color = float4(input.fOcc,input.fOcc,input.fOcc,ViewPos.z); + + return output; +} + +//-------------------------------------------------------------------------------------- +// Grass vertex shader. Basically a passthrough except for instancing the island base +// mesh. +//-------------------------------------------------------------------------------------- +VSGrassOut VSGrassmain(VSGrassIn input) +{ + // simple transform into the instance space + VSGrassOut output; + output.pos = mul(float4(input.pos, 1), input.mTransform); + output.norm = mul(input.norm, (float3x3)input.mTransform); + output.tex = input.tex; + output.VertexID = input.VertexID; + + return output; +} + +//-------------------------------------------------------------------------------------- +// Quad (leaf) GS. Calculates the normal and lighting for the leaf. +//-------------------------------------------------------------------------------------- +[maxvertexcount(3)] +void GSQuadmain(triangle PSQuadIn input[3], inout TriangleStream<PSQuadIn> QuadStream) +{ + PSQuadIn output; + + // + // Calculate the face normal + // + float4 faceNormalA = input[1].pos.xyzw - input[0].pos.xyzw; + float4 faceNormalB = input[2].pos.xyzw - input[0].pos.xyzw; + + // + // Cross product + // + float3 faceNormal = cross(faceNormalA, faceNormalB); + + // + // Normalize face normal + // + faceNormal = normalize(faceNormal); + + // + // Dot face normal with some arbitrary light vectors + // + float4 color1 = CalcLighting( faceNormal, input[0].color.a ); + color1 *= input[0].color; + + // + // Make sure we always have an alpha of 1 + // + color1.a = 1.0; + + // + // Emit out the new tri + // + for(int i=0; i<3; i++) + { + output.pos = input[i].pos; + output.color = color1; + output.tex = input[i].tex; + QuadStream.Append(output); + } + QuadStream.RestartStrip(); +} + +//-------------------------------------------------------------------------------------- +// RandomDir helper. Samples a random dir out of our 1d random texture. In this case +// we use a texture because the offset could be anywhere. If we were sampling linearly +// then we would probably just use a buffer and load from that. +//-------------------------------------------------------------------------------------- +float3 RandomDir(float fOffset) +{ + float tCoord = (fOffset) / 300.0; + return g_txRandom.SampleLevel( g_samPoint, tCoord, 0 ); +} + +//-------------------------------------------------------------------------------------- +// Helper to determing if a point is within a triangle +//-------------------------------------------------------------------------------------- +bool IsInTriangle( float3 P, float3 A, float3 B, float3 C ) +{ + float3 crossA = cross( B-A, P-A ); + float3 crossB = cross( C-B, P-B ); + float3 crossC = cross( A-C, P-C ); + + if( dot( crossA, crossB ) > 0 && + dot( crossB, crossC ) > 0 ) + { + return true; + } + else + { + return false; + } +} + +//-------------------------------------------------------------------------------------- +// Gets a random orientation matrix based upon the RandomDir funciton +//-------------------------------------------------------------------------------------- +float4x4 GetRandomOrientation( float3 Pos, float3 Norm, float fRandOffset ) +{ + float3 Tangent = RandomDir(fRandOffset); + + float3 Bitangent = normalize( cross( Tangent, Norm ) ); + Tangent = normalize( cross( Bitangent, Norm ) ); + + float4x4 matWorld = { float4( Tangent, 0 ), + float4( Norm, 0 ), + float4( Bitangent, 0 ), + float4( Pos, 1 ) }; + return matWorld; +} + +//-------------------------------------------------------------------------------------- +// Generates an actual grass blade +//-------------------------------------------------------------------------------------- +void OutputGrassBlade( VSGrassOut midPoint, inout TriangleStream<PSQuadIn> GrassStream, int iGrassTex ) +{ + PSQuadIn output; + + float4x4 mWorld = GetRandomOrientation( midPoint.pos, midPoint.norm, (float)midPoint.VertexID ); + float4 ViewPos = mul( midPoint.pos, g_mWorldView ); + + float3 grassNorm = midPoint.norm; + float4 color1 = CalcLighting( grassNorm, ViewPos.z ); + + for(int v=0; v<6; v++) + { + float3 pos = g_positions[v]; + pos.x *= g_GrassWidth; + pos.y *= g_GrassHeight; + + output.pos = mul( float4(pos,1), mWorld ); + output.pos = mul( output.pos, g_mWorldViewProj ); + output.tex = float3( g_texcoords[v], iGrassTex ); + output.color = color1; + + GrassStream.Append( output ); + } + + GrassStream.RestartStrip(); +} + +//-------------------------------------------------------------------------------------- +// Midpoint of the three vertices A,B,C +//-------------------------------------------------------------------------------------- +VSGrassOut CalcMidPoint( VSGrassOut A, VSGrassOut B, VSGrassOut C ) +{ + VSGrassOut MidPoint; + + MidPoint.pos = (A.pos + B.pos + C.pos)/3.0f; + MidPoint.norm = (A.norm + B.norm + C.norm)/3.0f; + MidPoint.tex = (A.tex + B.tex + C.tex)/3.0f; + MidPoint.VertexID = A.VertexID + B.VertexID + C.VertexID; + + return MidPoint; +} + +//-------------------------------------------------------------------------------------- +// The actual grass geometry shader. This generates grass blades based upon an input +// mesh (the tops of the islands) and a coverage texture. Each of the textures channels +// determines how much of each of the 4 types of grass to place at a particular spot. +//-------------------------------------------------------------------------------------- +[maxvertexcount(90)] +void GSGrassmain(triangle VSGrassOut input[3], inout TriangleStream<PSQuadIn> GrassStream ) +{ + VSGrassOut MidPoint = CalcMidPoint( input[0], input[1], input[2] ); + + float4 CoverageMask = g_tx2dArray.SampleLevel( g_samPoint, float3(MidPoint.tex,4), 0 ); + float cm[4]; + cm[0] = CoverageMask.r; + cm[1] = CoverageMask.g; + cm[2] = CoverageMask.b; + cm[3] = CoverageMask.a; + + for(int g=0; g<4; g++) + { + float MaxBlades = float(g_iGrassCoverage)*cm[g]; + for(float i=0; i<MaxBlades; i++) + { + float randOffset = g*5 + (i+1); + float3 Tan = RandomDir( MidPoint.pos.x + randOffset ); + float3 Len = normalize( RandomDir( MidPoint.pos.z + randOffset ) ); + float3 Shift = Len.x*g_GrassMessiness*normalize( cross( Tan, MidPoint.norm ) ); + VSGrassOut grassPoint = MidPoint; + grassPoint.VertexID += randOffset; + grassPoint.pos += Shift; + + //uncomment this to make the grass strictly conform to the mesh + //if( IsInTriangle( grassPoint.pos, input[0].pos, input[1].pos, input[2].pos ) ) + { + OutputGrassBlade( grassPoint, GrassStream, g ); + } + } + } +} + +//-------------------------------------------------------------------------------------- +// PS for non-leaf or grass items. +//-------------------------------------------------------------------------------------- +float4 PSScenemain(PSSceneIn input) : SV_Target +{ + float4 color = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color; + return color; +} + +//-------------------------------------------------------------------------------------- +// PS for leaves and grass +//-------------------------------------------------------------------------------------- +float4 PSQuadmain(PSQuadIn input) : SV_Target +{ + float4 color = g_tx2dArray.Sample( g_samLinear, input.tex ); + color.xyz *= input.color.xyz; + return color; +} + +//-------------------------------------------------------------------------------------- +// Render instanced meshes with vertex lighting +//-------------------------------------------------------------------------------------- +technique10 RenderInstancedVertLighting +{ + pass p0 + { + SetVertexShader( CompileShader( vs_4_0, VSInstmain() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); + + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + SetDepthStencilState( EnableDepthTestWrite, 0 ); + SetRasterizerState( EnableMSAA ); + } +} + +//-------------------------------------------------------------------------------------- +// Skybox +//-------------------------------------------------------------------------------------- +technique10 RenderSkybox +{ + pass p0 + { + SetVertexShader( CompileShader( vs_4_0, VSSkymain() ) ); + SetGeometryShader( NULL ); + SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); + + SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + SetDepthStencilState( DisableDepthTestWrite, 0 ); + SetRasterizerState( EnableMSAA ); + } +} + +//-------------------------------------------------------------------------------------- +// Render leaves +//-------------------------------------------------------------------------------------- +technique10 RenderQuad +{ + pass p0 + { + + SetVertexShader( CompileShader( vs_4_0, VSQuadmain() ) ); + SetGeometryShader( CompileShader( gs_4_0, GSQuadmain() ) ); + SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) ); + + SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + SetDepthStencilState( EnableDepthTestWrite, 0 ); + SetRasterizerState( EnableMSAA ); + } +} + +//-------------------------------------------------------------------------------------- +// Render grass +//-------------------------------------------------------------------------------------- +technique10 RenderGrass +{ + pass p0 + { + + SetVertexShader( CompileShader( vs_4_0, VSGrassmain() ) ); + SetGeometryShader( CompileShader( gs_4_0, GSGrassmain() ) ); + SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) ); + + SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); + SetDepthStencilState( EnableDepthTestWrite, 0 ); + SetRasterizerState( EnableMSAA ); + } +} diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl new file mode 100644 index 000000000..dbeb87f33 --- /dev/null +++ b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl @@ -0,0 +1,202 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry PSMain +//-------------------------------------------------------------------------------------- +// File: MultithreadedRendering11_PS.hlsl +// +// The pixel shader file for the MultithreadedRendering11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +// Various debug options +//#define NO_DIFFUSE_MAP +//#define NO_NORMAL_MAP +//#define NO_AMBIENT +//#define NO_DYNAMIC_LIGHTING +//#define NO_SHADOW_MAP + +#define SHADOW_DEPTH_BIAS 0.0005f + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +static const int g_iNumLights = 4; +static const int g_iNumShadows = 1; // by convention, the first n lights cast shadows + +cbuffer cbPerObject : register( b0 ) +{ + float4 g_vObjectColor : packoffset( c0 ); +}; + +cbuffer cbPerLight : register( b1 ) +{ + struct LightDataStruct + { + matrix m_mLightViewProj; + float4 m_vLightPos; + float4 m_vLightDir; + float4 m_vLightColor; + float4 m_vFalloffs; // x = dist end, y = dist range, z = cos angle end, w = cos range + } g_LightData[g_iNumLights] : packoffset( c0 ); +}; + +cbuffer cbPerScene : register( b2 ) +{ + float4 g_vMirrorPlane : packoffset( c0 ); + float4 g_vAmbientColor : packoffset( c1 ); + float4 g_vTintColor : packoffset( c2 ); +}; + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse : register( t0 ); +Texture2D g_txNormal : register( t1 ); +Texture2D g_txShadow[g_iNumShadows] : register( t2 ); + +SamplerState g_samPointClamp : register( s0 ); +SamplerState g_samLinearWrap : register( s1 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vNormal : NORMAL; + float3 vTangent : TANGENT; + float2 vTexcoord : TEXCOORD0; + float4 vPosWorld : TEXCOORD1; +}; + +//-------------------------------------------------------------------------------------- +// Sample normal map, convert to signed, apply tangent-to-world space transform +//-------------------------------------------------------------------------------------- +float3 CalcPerPixelNormal( float2 vTexcoord, float3 vVertNormal, float3 vVertTangent ) +{ + // Compute tangent frame + vVertNormal = normalize( vVertNormal ); + vVertTangent = normalize( vVertTangent ); + float3 vVertBinormal = normalize( cross( vVertTangent, vVertNormal ) ); + float3x3 mTangentSpaceToWorldSpace = float3x3( vVertTangent, vVertBinormal, vVertNormal ); + + // Compute per-pixel normal + float3 vBumpNormal = g_txNormal.Sample( g_samLinearWrap, vTexcoord ); + vBumpNormal = 2.0f * vBumpNormal - 1.0f; + + return mul( vBumpNormal, mTangentSpaceToWorldSpace ); +} + +//-------------------------------------------------------------------------------------- +// Test how much pixel is in shadow, using 2x2 percentage-closer filtering +//-------------------------------------------------------------------------------------- +float4 CalcUnshadowedAmountPCF2x2( int iShadow, float4 vPosWorld ) +{ + matrix mLightViewProj = g_LightData[iShadow].m_mLightViewProj; + Texture2D txShadow = g_txShadow[iShadow]; + + // Compute pixel position in light space + float4 vLightSpacePos = mul( vPosWorld, mLightViewProj ); + vLightSpacePos.xyz /= vLightSpacePos.w; + + // Translate from surface coords to texture coords + // Could fold these into the matrix + float2 vShadowTexCoord = 0.5f * vLightSpacePos + 0.5f; + vShadowTexCoord.y = 1.0f - vShadowTexCoord.y; + + // Depth bias to avoid pixel self-shadowing + float vLightSpaceDepth = vLightSpacePos.z - SHADOW_DEPTH_BIAS; + + // Find sub-pixel weights + float2 vShadowMapDims = float2( 2048.0f, 2048.0f ); // need to keep in sync with .cpp file + float4 vSubPixelCoords; + vSubPixelCoords.xy = frac( vShadowMapDims * vShadowTexCoord ); + vSubPixelCoords.zw = 1.0f - vSubPixelCoords; + float4 vBilinearWeights = vSubPixelCoords.zxzx * vSubPixelCoords.wwyy; + + // 2x2 percentage closer filtering + float2 vTexelUnits = 1.0f / vShadowMapDims; + float4 vShadowDepths; + vShadowDepths.x = txShadow.Sample( g_samPointClamp, vShadowTexCoord ); + vShadowDepths.y = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( vTexelUnits.x, 0.0f ) ); + vShadowDepths.z = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( 0.0f, vTexelUnits.y ) ); + vShadowDepths.w = txShadow.Sample( g_samPointClamp, vShadowTexCoord + vTexelUnits ); + + // What weighted fraction of the 4 samples are nearer to the light than this pixel? + float4 vShadowTests = ( vShadowDepths >= vLightSpaceDepth ) ? 1.0f : 0.0f; + return dot( vBilinearWeights, vShadowTests ); +} + +//-------------------------------------------------------------------------------------- +// Diffuse lighting calculation, with angle and distance falloff +//-------------------------------------------------------------------------------------- +float4 CalcLightingColor( int iLight, float3 vPosWorld, float3 vPerPixelNormal ) +{ + float3 vLightPos = g_LightData[iLight].m_vLightPos.xyz; + float3 vLightDir = g_LightData[iLight].m_vLightDir.xyz; + float4 vLightColor = g_LightData[iLight].m_vLightColor; + float4 vFalloffs = g_LightData[iLight].m_vFalloffs; + + float3 vLightToPixelUnNormalized = vPosWorld - vLightPos; + + // Dist falloff = 0 at vFalloffs.x, 1 at vFalloffs.x - vFalloffs.y + float fDist = length( vLightToPixelUnNormalized ); + float fDistFalloff = saturate( ( vFalloffs.x - fDist ) / vFalloffs.y ); + + // Normalize from here on + float3 vLightToPixelNormalized = vLightToPixelUnNormalized / fDist; + + // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w + float fCosAngle = dot( vLightToPixelNormalized, vLightDir ); + float fAngleFalloff = saturate( ( fCosAngle - vFalloffs.z ) / vFalloffs.w ); + + // Diffuse contribution + float fNDotL = saturate( -dot( vLightToPixelNormalized, vPerPixelNormal ) ); + + return vLightColor * fNDotL * fDistFalloff * fAngleFalloff; +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +float4 PSMain( PS_INPUT Input ) : SV_TARGET +{ + // Manual clip test, so that objects which are behind the mirror + // don't show up in the mirror. + clip( dot( g_vMirrorPlane.xyz, Input.vPosWorld.xyz ) + g_vMirrorPlane.w ); + +#ifdef NO_DIFFUSE_MAP + float4 vDiffuse = 0.5f; +#else // #ifdef NO_DIFFUSE_MAP + float4 vDiffuse = g_txDiffuse.Sample( g_samLinearWrap, Input.vTexcoord ); +#endif // #ifdef NO_DIFFUSE_MAP #else + + // Compute per-pixel normal +#ifdef NO_NORMAL_MAP + float3 vPerPixelNormal = Input.vNormal; +#else // #ifdef NO_NORMAL_MAP + float3 vPerPixelNormal = CalcPerPixelNormal( Input.vTexcoord, Input.vNormal, Input.vTangent ); +#endif // #ifdef NO_NORMAL_MAP #else + + // Compute lighting contribution +#ifdef NO_AMBIENT + float4 vTotalLightingColor = 0.0f; +#else // #ifdef NO_AMBIENT + float4 vTotalLightingColor = g_vAmbientColor; +#endif // #ifdef NO_AMBIENT #else + +#ifndef NO_DYNAMIC_LIGHTING + for ( int iLight = 0; iLight < g_iNumLights; ++iLight ) + { + float4 vLightingColor = CalcLightingColor( iLight, Input.vPosWorld, vPerPixelNormal ); +#ifndef NO_SHADOW_MAP + if ( iLight < g_iNumShadows && any( vLightingColor.xyz ) > 0.0f ) // Don't bother checking shadow map if the pixel is unlit + { + vLightingColor *= CalcUnshadowedAmountPCF2x2( iLight, Input.vPosWorld ); + } +#endif // #ifndef NO_SHADOW_MAP + vTotalLightingColor += vLightingColor; + } +#endif // #ifndef NO_DYNAMIC_LIGHTING + + return vDiffuse * g_vTintColor * g_vObjectColor * vTotalLightingColor; +} diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl new file mode 100644 index 000000000..0d8d32ffa --- /dev/null +++ b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl @@ -0,0 +1,75 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain +//-------------------------------------------------------------------------------------- +// File: MultithreadedRendering11_VS.hlsl +// +// The vertex shader file for the MultithreadedRendering11 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +// Various debug options +//#define UNCOMPRESSED_VERTEX_DATA // The sdkmesh file contained uncompressed vertex data + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + matrix g_mWorld : packoffset( c0 ); +}; +cbuffer cbPerScene : register( b1 ) +{ + matrix g_mViewProj : packoffset( c0 ); +}; + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; + float3 vTangent : TANGENT; +}; + +struct VS_OUTPUT +{ + float3 vNormal : NORMAL; + float3 vTangent : TANGENT; + float2 vTexcoord : TEXCOORD0; + float4 vPosWorld : TEXCOORD1; + float4 vPosition : SV_POSITION; +}; + +// We aliased signed vectors as a unsigned format. +// Need to recover signed values. The values 1.0 and 2.0 +// are slightly inaccurate here. +float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec ) +{ + vVec *= 2.0f; + return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec; +} + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + +#ifndef UNCOMPRESSED_VERTEX_DATA + // Expand compressed vectors + Input.vNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal ); + Input.vTangent = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vTangent ); +#endif // #ifndef UNCOMPRESSED_VERTEX_DATA + + Output.vPosWorld = mul( Input.vPosition, g_mWorld ); + Output.vPosition = mul( Output.vPosWorld, g_mViewProj ); + Output.vNormal = mul( Input.vNormal, (float3x3)g_mWorld ); + Output.vTangent = mul( Input.vTangent, (float3x3)g_mWorld ); + Output.vTexcoord = Input.vTexcoord; + + return Output; +} + diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl new file mode 100644 index 000000000..0a694450c --- /dev/null +++ b/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl @@ -0,0 +1,103 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSMain +//-------------------------------------------------------------------------------------- +// File: NBodyGravityCS11.hlsl +// +// Demonstrates how to use Compute Shader to do n-body gravity computation +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +static float softeningSquared = 0.0012500000*0.0012500000; +static float g_fG = 6.67300e-11f * 10000.0f; +static float g_fParticleMass = g_fG*10000.0f * 10000.0f; + +#define blocksize 128 +groupshared float4 sharedPos[blocksize]; + +// Body to body interaction, acceleration of the particle at position bi is updated +void bodyBodyInteraction(inout float3 ai, float4 bj, float4 bi, float mass, int particles ) +{ + float3 r = bj.xyz - bi.xyz; + + float distSqr = dot(r, r); + distSqr += softeningSquared; + + float invDist = 1.0f / sqrt(distSqr); + float invDistCube = invDist * invDist * invDist; + + float s = mass * invDistCube * particles; + + ai += r * s; +} + +cbuffer cbCS : register( b0 ) +{ + uint4 g_param; // pcbCS->param[0] = MAX_PARTICLES; + // pcbCS->param[1] = dimx; + float4 g_paramf; // pcbCS->paramf[0] = 0.1f; + // pcbCS->paramf[1] = 1; +}; + +struct PosVelo +{ + float4 pos; + float4 velo; +}; + +StructuredBuffer<PosVelo> oldPosVelo; +RWStructuredBuffer<PosVelo> newPosVelo; + +[numthreads(blocksize, 1, 1)] +void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + // Each thread of the CS updates one of the particles + + float4 pos = oldPosVelo[DTid.x].pos; + float4 vel = oldPosVelo[DTid.x].velo; + float3 accel = 0; + float mass = g_fParticleMass; + + // Update current particle using all other particles + [loop] + for (uint tile = 0; tile < g_param.y; tile++) + { + // Cache a tile of particles unto shared memory to increase IO efficiency + sharedPos[GI] = oldPosVelo[tile * blocksize + GI].pos; + + GroupMemoryBarrierWithGroupSync(); + + [unroll] + for (uint counter = 0; counter < blocksize; counter+=8 ) + { + bodyBodyInteraction(accel, sharedPos[counter], pos, mass, 1); + bodyBodyInteraction(accel, sharedPos[counter+1], pos, mass, 1); + bodyBodyInteraction(accel, sharedPos[counter+2], pos, mass, 1); + bodyBodyInteraction(accel, sharedPos[counter+3], pos, mass, 1); + bodyBodyInteraction(accel, sharedPos[counter+4], pos, mass, 1); + bodyBodyInteraction(accel, sharedPos[counter+5], pos, mass, 1); + bodyBodyInteraction(accel, sharedPos[counter+6], pos, mass, 1); + bodyBodyInteraction(accel, sharedPos[counter+7], pos, mass, 1); + } + + GroupMemoryBarrierWithGroupSync(); + } + + // g_param.x is the number of our particles, however this number might not be an exact multiple of the tile size. + // In such cases, out of bound reads occur in the process above, which means there will be + // tooManyParticles "phantom" particles generating false gravity at position (0, 0, 0), so we have to substract them here. + // NOTE, out of bound reads always return 0 in CS + const uint tooManyParticles = g_param.y * blocksize - g_param.x; + bodyBodyInteraction(accel, float4(0, 0, 0, 0), pos, mass, -tooManyParticles); + + // Update the velocity and position of current particle using the acceleration computed above + vel.xyz += accel.xyz * g_paramf.x; //deltaTime; + vel.xyz *= g_paramf.y; //damping; + pos.xyz += vel.xyz * g_paramf.x; //deltaTime; + + if ( DTid.x < g_param.x ) + { + newPosVelo[DTid.x].pos = pos; + newPosVelo[DTid.x].velo = float4(vel.xyz, length(accel)); + } +} diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl new file mode 100644 index 000000000..ea56e20e9 --- /dev/null +++ b/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl @@ -0,0 +1,128 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw +//-------------------------------------------------------------------------------------- +// File: ParticleDraw.hlsl +// +// Shaders for rendering the particle as point sprite +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +struct VSParticleIn +{ + float4 color : COLOR; + uint id : SV_VERTEXID; +}; + +struct VSParticleDrawOut +{ + float3 pos : POSITION; + float4 color : COLOR; +}; + +struct GSParticleDrawOut +{ + float2 tex : TEXCOORD0; + float4 color : COLOR; + float4 pos : SV_POSITION; +}; + +struct PSParticleDrawIn +{ + float2 tex : TEXCOORD0; + float4 color : COLOR; +}; + +struct PosVelo +{ + float4 pos; + float4 velo; +}; + +Texture2D g_txDiffuse; +StructuredBuffer<PosVelo> g_bufPosVelo; + + +SamplerState g_samLinear +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Clamp; + AddressV = Clamp; +}; + +cbuffer cb0 +{ + row_major float4x4 g_mWorldViewProj; + row_major float4x4 g_mInvView; +}; + +cbuffer cb1 +{ + static float g_fParticleRad = 10.0f; +}; + +cbuffer cbImmutable +{ + static float3 g_positions[4] = + { + float3( -1, 1, 0 ), + float3( 1, 1, 0 ), + float3( -1, -1, 0 ), + float3( 1, -1, 0 ), + }; + + static float2 g_texcoords[4] = + { + float2(0,0), + float2(1,0), + float2(0,1), + float2(1,1), + }; +}; + +// +// Vertex shader for drawing the point-sprite particles +// +VSParticleDrawOut VSParticleDraw(VSParticleIn input) +{ + VSParticleDrawOut output; + + output.pos = g_bufPosVelo[input.id].pos; + + float mag = g_bufPosVelo[input.id].velo.w/9; + output.color = lerp( float4(1,0.1,0.1,1), input.color, mag ); + + return output; +} + +// +// GS for rendering point sprite particles. Takes a point and turns it into 2 tris. +// +[maxvertexcount(4)] +void GSParticleDraw(point VSParticleDrawOut input[1], inout TriangleStream<GSParticleDrawOut> SpriteStream) +{ + GSParticleDrawOut output; + + // + // Emit two new triangles + // + for(int i=0; i<4; i++) + { + float3 position = g_positions[i] * g_fParticleRad; + position = mul( position, (float3x3)g_mInvView ) + input[0].pos; + output.pos = mul( float4(position,1.0), g_mWorldViewProj ); + + output.color = input[0].color; + output.tex = g_texcoords[i]; + SpriteStream.Append(output); + } + SpriteStream.RestartStrip(); +} + +// +// PS for drawing particles +// +float4 PSParticleDraw(PSParticleDrawIn input) : SV_Target +{ + return g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color; +}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl new file mode 100644 index 000000000..dfc98b217 --- /dev/null +++ b/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl @@ -0,0 +1,277 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw +//----------------------------------------------------------------------------- +// File: OIT_CS.hlsl +// +// Desc: Compute shaders for used in the Order Independent Transparency sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//----------------------------------------------------------------------------- +// TODO: use structured buffers +RWBuffer<float> deepBufferDepth : register( u0 ); +RWBuffer<uint> deepBufferColorUINT : register( u1 ); +RWTexture2D<float4> frameBuffer : register( u2 ); +RWBuffer<uint> prefixSum : register( u3 ); + +Texture2D<uint> fragmentCount : register ( t0 ); + +cbuffer CB : register( b0 ) +{ + uint g_nFrameWidth : packoffset( c0.x ); + uint g_nFrameHeight : packoffset( c0.y ); + uint g_nPassSize : packoffset( c0.z ); + uint g_nReserved : packoffset( c0.w ); +} + +#define blocksize 1 +#define groupthreads (blocksize*blocksize) +groupshared float accum[groupthreads]; + +// First pass of the prefix sum creation algorithm. Converts a 2D buffer to a 1D buffer, +// and sums every other value with the previous value. +[numthreads(1,1,1)] +void CreatePrefixSum_Pass0_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) +{ + int nThreadNum = nGid.y*g_nFrameWidth + nGid.x; + if( nThreadNum%2 == 0 ) + { + prefixSum[nThreadNum] = fragmentCount[nGid.xy]; + + // Add the Fragment count to the next bin + if( (nThreadNum+1) < g_nFrameWidth * g_nFrameHeight ) + { + int2 nextUV; + nextUV.x = (nThreadNum+1) % g_nFrameWidth; + nextUV.y = (nThreadNum+1) / g_nFrameWidth; + prefixSum[ nThreadNum+1 ] = prefixSum[ nThreadNum ] + fragmentCount[ nextUV ]; + } + } +} + +// Second and following passes. Each pass distributes the sum of the first half of the group +// to the second half of the group. There are n/groupsize groups in each pass. +// Each pass increases the group size until it is the size of the buffer. +// The resulting buffer holds the prefix sum of all preceding values in each +// position +[numthreads(1,1,1)] +void CreatePrefixSum_Pass1_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) +{ + int nThreadNum = nGid.x; + + int nValue = prefixSum[nThreadNum*g_nPassSize + g_nPassSize/2 - 1]; + for(int i = nThreadNum*g_nPassSize + g_nPassSize/2; i < nThreadNum*g_nPassSize + g_nPassSize && i < g_nFrameWidth*g_nFrameHeight; i++) + { + prefixSum[i] = prefixSum[i] + nValue; + } +} + +#if 1 + +// Sort the fragments using a bitonic sort, then accumulate the fragments into the final result. +groupshared int nIndex[32]; +#define NUM_THREADS 8 +[numthreads(1,1,1)] +void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) +{ + uint nThreadNum = nGid.y * g_nFrameWidth + nGid.x; + +// uint r0, r1, r2; +// float rd0, rd1, rd2, rd3, rd4, rd5, rd6, rd7; + + uint N = fragmentCount[nDTid.xy]; + + uint N2 = 1 << (int)(ceil(log2(N))); + + float fDepth[32]; + for(int i = 0; i < N; i++) + { + nIndex[i] = i; + fDepth[i] = deepBufferDepth[ prefixSum[nThreadNum-1] + i ]; + } + for(int i = N; i < N2; i++) + { + nIndex[i] = i; + fDepth[i] = 1.1f; + } + + uint idx = blocksize*nGTid.y + nGTid.x; + + // Bitonic sort + for( int k = 2; k <= N2; k = 2*k ) + { + for( int j = k>>1; j > 0 ; j = j>>1 ) + { + for( int i = 0; i < N2; i++ ) + { +// GroupMemoryBarrierWithGroupSync(); + //i = idx; + + float di = fDepth[ nIndex[ i ] ]; + int ixj = i^j; + if ( ( ixj ) > i ) + { + float dixj = fDepth[ nIndex[ ixj ] ]; + if ( ( i&k ) == 0 && di > dixj ) + { + int temp = nIndex[ i ]; + nIndex[ i ] = nIndex[ ixj ]; + nIndex[ ixj ] = temp; + } + if ( ( i&k ) != 0 && di < dixj ) + { + int temp = nIndex[ i ]; + nIndex[ i ] = nIndex[ ixj ]; + nIndex[ ixj ] = temp; + } + } + } + } + } + + // Output the final result to the frame buffer + if( idx == 0 ) + { + + /* + // Debug + uint color[8]; + for(int i = 0; i < 8; i++) + { + color[i] = deepBufferColorUINT[prefixSum[nThreadNum-1] + i]; + } + + for(int i = 0; i < 8; i++) + { + deepBufferDepth[nThreadNum*8+i] = fDepth[i];//fDepth[nIndex[i]]; + deepBufferColorUINT[nThreadNum*8+i] = color[nIndex[i]]; + } + */ + + // Accumulate fragments into final result + float4 result = 0.0f; + for( int x = N-1; x >= 0; x-- ) + { + uint bufferValue = deepBufferColorUINT[ prefixSum[nThreadNum-1] + nIndex[ x ] ]; + float4 color; + color.r = ( ( bufferValue >> 0 & 0xFF )) / 255.0f; + color.g = ( bufferValue >> 8 & 0xFF ) / 255.0f; + color.b = ( bufferValue >> 16 & 0xFF ) / 255.0f; + color.a = ( bufferValue >> 24 & 0xFF ) / 255.0f; + result = lerp( result, color, color.a ); + } + result.a = 1.0f; + frameBuffer[ nGid.xy ] = result; + } +} + +#else +[numthreads(1,1,1)] +void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) +{ + uint nThreadNum = nDTid.y * g_nFrameWidth + nDTid.x; + float d0 = deepBufferDepth[nThreadNum*8]; + float d1 = deepBufferDepth[nThreadNum*8+1]; + float d2 = deepBufferDepth[nThreadNum*8+2]; + + uint s0 = deepBufferColorUINT[nThreadNum*8 + 0]; + uint s1 = deepBufferColorUINT[nThreadNum*8 + 1]; + uint s2 = deepBufferColorUINT[nThreadNum*8 + 2]; + + uint r0, r1, r2; + float rd0, rd1, rd2; + if( d0 < d1 && d0 < d2 ) + { + r0 = s0; + rd0 = d0; + if( d1 < d2 ) + { + r1 = s1; + r2 = s2; + + rd1 = d1; + rd2 = d2; + } + else + { + r1 = s2; + r2 = s1; + + rd1 = d2; + rd2 = d1; + } + } + else if( d1 < d2 ) + { + r0 = s1; + rd0 = d1; + if( d0 < d2 ) + { + r1 = s0; + r2 = s2; + + rd1 = d0; + rd2 = d2; + } + else + { + r1 = s2; + r2 = s0; + + rd1 = d2; + rd2 = d0; + } + } + else + { + r0 = s2; + rd0 = d2; + if( d1 < d0 ) + { + r1 = s1; + r2 = s0; + + rd1 = d1; + rd2 = d0; + } + else + { + r1 = s0; + r2 = s1; + + rd1 = d0; + rd2 = d1; + } + } + + deepBufferDepth[nThreadNum*8] = rd0; + deepBufferDepth[nThreadNum*8+1] = rd1; + deepBufferDepth[nThreadNum*8+2] = rd2; + + deepBufferColorUINT[nThreadNum*8] = r0; + deepBufferColorUINT[nThreadNum*8+1] = r1; + deepBufferColorUINT[nThreadNum*8+2] = r2; + + // convert the color to floats + float4 color[3]; + color[0].r = (r0 >> 0 & 0xFF) / 255.0f; + color[0].g = (r0 >> 8 & 0xFF) / 255.0f; + color[0].b = (r0 >> 16 & 0xFF) / 255.0f; + color[0].a = (r0 >> 24 & 0xFF) / 255.0f; + + color[1].r = (r1 >> 0 & 0xFF) / 255.0f; + color[1].g = (r1 >> 8 & 0xFF) / 255.0f; + color[1].b = (r1 >> 16 & 0xFF) / 255.0f; + color[1].a = (r1 >> 24 & 0xFF) / 255.0f; + + color[2].r = (r2 >> 0 & 0xFF) / 255.0f; + color[2].g = (r2 >> 8 & 0xFF) / 255.0f; + color[2].b = (r2 >> 16 & 0xFF) / 255.0f; + color[2].a = (r2 >> 24 & 0xFF) / 255.0f; + + float4 result = lerp(lerp(lerp(0, color[2], color[2].a), color[1], color[1].a), color[0], color[0].a); + result.a = 1.0f; + + frameBuffer[nDTid.xy] = result; +} + +#endif
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl new file mode 100644 index 000000000..1fdb31622 --- /dev/null +++ b/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl @@ -0,0 +1,56 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile ps_4_0 -entry FragmentCountPS -entry FillDeepBufferPS +//----------------------------------------------------------------------------- +// File: OITPS.hlsl +// +// Desc: Pixel shaders used in the Order Independent Transparency sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//----------------------------------------------------------------------------- +//TODO: Use structured buffers +RWTexture2D<uint> fragmentCount : register( u1 ); +RWBuffer<float> deepBufferDepth : register( u2 ); +RWBuffer<uint4> deepBufferColor : register( u3 ); +RWBuffer<uint> prefixSum : register( u4 ); + +cbuffer CB : register( b0 ) +{ + uint g_nFrameWidth : packoffset( c0.x ); + uint g_nFrameHeight : packoffset( c0.y ); + uint g_nReserved0 : packoffset( c0.z ); + uint g_nReserved1 : packoffset( c0.w ); +} + +struct SceneVS_Output +{ + float4 pos : SV_POSITION; + float4 color : COLOR0; +}; + +void FragmentCountPS( SceneVS_Output input) +{ + // Increments need to be done atomically + InterlockedAdd(fragmentCount[input.pos.xy], 1); +} + +void FillDeepBufferPS( SceneVS_Output input ) +{ + uint x = input.pos.x; + uint y = input.pos.y; + + // Atomically allocate space in the deep buffer + uint fc; + InterlockedAdd(fragmentCount[input.pos.xy], 1, fc); + + uint nPrefixSumPos = y*g_nFrameWidth + x; + uint nDeepBufferPos; + if( nPrefixSumPos == 0 ) + nDeepBufferPos = fc; + else + nDeepBufferPos = prefixSum[nPrefixSumPos-1] + fc; + + // Store fragment data into the allocated space + deepBufferDepth[nDeepBufferPos] = input.pos.z; + deepBufferColor[nDeepBufferPos] = clamp(input.color, 0, 1)*255; +} + diff --git a/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl b/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl new file mode 100644 index 000000000..2f985d1d1 --- /dev/null +++ b/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl @@ -0,0 +1,36 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry SceneVS +//----------------------------------------------------------------------------- +// File: SceneVS.hlsl +// +// Desc: Vertex shader for the scene. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//----------------------------------------------------------------------------- + + +cbuffer cbPerObject : register( b0 ) +{ + row_major matrix g_mWorldViewProjection : packoffset( c0 ); +} + +struct SceneVS_Input +{ + float4 pos : POSITION; + float4 color : COLOR; +}; + +struct SceneVS_Output +{ + float4 pos : SV_POSITION; + float4 color : COLOR0; +}; + +SceneVS_Output SceneVS( SceneVS_Input input ) +{ + SceneVS_Output output; + + output.color = input.color; + output.pos = mul(input.pos, g_mWorldViewProjection ); + + return output; +} diff --git a/tests/hlsl/dxsdk/README.md b/tests/hlsl/dxsdk/README.md new file mode 100644 index 000000000..dd0c0fb6b --- /dev/null +++ b/tests/hlsl/dxsdk/README.md @@ -0,0 +1,5 @@ +DirectX SDK Sample Shaders +========================== + +This directory contains shaders that have shipped as part of the DirectX SDK. +The licsense terms for these shaders are specificed at the top of the source files.
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl b/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl new file mode 100644 index 000000000..7b7a1489c --- /dev/null +++ b/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl @@ -0,0 +1,230 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry BezierVS -profile hs_5_0 -entry BezierHS -profile ds_5_0 -entry BezierDS -profile ps_4_0 -entry BezierPS -entry SolidColorPS +//-------------------------------------------------------------------------------------- +// File: SimpleBezier11.hlsl +// +// This sample shows an simple implementation of the DirectX 11 Hardware Tessellator +// for rendering a Bezier Patch. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +// This allows us to compile the shader with a #define to choose +// the different partition modes for the hull shader. +// See the hull shader: [partitioning(BEZIER_HS_PARTITION)] +// This sample demonstrates "integer", "fractional_even", and "fractional_odd" +#ifndef BEZIER_HS_PARTITION +#define BEZIER_HS_PARTITION "integer" +#endif // BEZIER_HS_PARTITION + +// The input patch size. In this sample, it is 16 control points. +// This value should match the call to IASetPrimitiveTopology() +#define INPUT_PATCH_SIZE 16 + +// The output patch size. In this sample, it is also 16 control points. +#define OUTPUT_PATCH_SIZE 16 + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer cbPerFrame : register( b0 ) +{ + matrix g_mViewProjection; + float3 g_vCameraPosWorld; + float g_fTessellationFactor; +}; + +//-------------------------------------------------------------------------------------- +// Vertex shader section +//-------------------------------------------------------------------------------------- +struct VS_CONTROL_POINT_INPUT +{ + float3 vPosition : POSITION; +}; + +struct VS_CONTROL_POINT_OUTPUT +{ + float3 vPosition : POSITION; +}; + +// This simple vertex shader passes the control points straight through to the +// hull shader. In a more complex scene, you might transform the control points +// or perform skinning at this step. + +// The input to the vertex shader comes from the vertex buffer. + +// The output from the vertex shader will go into the hull shader. + +VS_CONTROL_POINT_OUTPUT BezierVS( VS_CONTROL_POINT_INPUT Input ) +{ + VS_CONTROL_POINT_OUTPUT Output; + + Output.vPosition = Input.vPosition; + + return Output; +} + +//-------------------------------------------------------------------------------------- +// Constant data function for the BezierHS. This is executed once per patch. +//-------------------------------------------------------------------------------------- +struct HS_CONSTANT_DATA_OUTPUT +{ + float Edges[4] : SV_TessFactor; + float Inside[2] : SV_InsideTessFactor; +}; + +struct HS_OUTPUT +{ + float3 vPosition : BEZIERPOS; +}; + +// This constant hull shader is executed once per patch. For the simple Mobius strip +// model, it will be executed 4 times. In this sample, we set the tessellation factor +// via SV_TessFactor and SV_InsideTessFactor for each patch. In a more complex scene, +// you might calculate a variable tessellation factor based on the camera's distance. + +HS_CONSTANT_DATA_OUTPUT BezierConstantHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> ip, + uint PatchID : SV_PrimitiveID ) +{ + HS_CONSTANT_DATA_OUTPUT Output; + + float TessAmount = g_fTessellationFactor; + + Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount; + Output.Inside[0] = Output.Inside[1] = TessAmount; + + return Output; +} + +// The hull shader is called once per output control point, which is specified with +// outputcontrolpoints. For this sample, we take the control points from the vertex +// shader and pass them directly off to the domain shader. In a more complex scene, +// you might perform a basis conversion from the input control points into a Bezier +// patch, such as the SubD11 Sample. + +// The input to the hull shader comes from the vertex shader + +// The output from the hull shader will go to the domain shader. +// The tessellation factor, topology, and partition mode will go to the fixed function +// tessellator stage to calculate the UVW and domain points. + +[domain("quad")] +[partitioning(BEZIER_HS_PARTITION)] +[outputtopology("triangle_cw")] +[outputcontrolpoints(OUTPUT_PATCH_SIZE)] +[patchconstantfunc("BezierConstantHS")] +HS_OUTPUT BezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> p, + uint i : SV_OutputControlPointID, + uint PatchID : SV_PrimitiveID ) +{ + HS_OUTPUT Output; + Output.vPosition = p[i].vPosition; + return Output; +} + +//-------------------------------------------------------------------------------------- +// Bezier evaluation domain shader section +//-------------------------------------------------------------------------------------- +struct DS_OUTPUT +{ + float4 vPosition : SV_POSITION; + float3 vWorldPos : WORLDPOS; + float3 vNormal : NORMAL; +}; + +//-------------------------------------------------------------------------------------- +float4 BernsteinBasis(float t) +{ + float invT = 1.0f - t; + + return float4( invT * invT * invT, + 3.0f * t * invT * invT, + 3.0f * t * t * invT, + t * t * t ); +} + +//-------------------------------------------------------------------------------------- +float4 dBernsteinBasis(float t) +{ + float invT = 1.0f - t; + + return float4( -3 * invT * invT, + 3 * invT * invT - 6 * t * invT, + 6 * t * invT - 3 * t * t, + 3 * t * t ); +} + +//-------------------------------------------------------------------------------------- +float3 EvaluateBezier( const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch, + float4 BasisU, + float4 BasisV ) +{ + float3 Value = float3(0,0,0); + Value = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w ); + Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w ); + Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w ); + Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w ); + + return Value; +} + +// The domain shader is run once per vertex and calculates the final vertex's position +// and attributes. It receives the UVW from the fixed function tessellator and the +// control point outputs from the hull shader. Since we are using the DirectX 11 +// Tessellation pipeline, it is the domain shader's responsibility to calculate the +// final SV_POSITION for each vertex. In this sample, we evaluate the vertex's +// position using a Bernstein polynomial and the normal is calculated as the cross +// product of the U and V derivatives. + +// The input SV_DomainLocation to the domain shader comes from fixed function +// tessellator. And the OutputPatch comes from the hull shader. From these, you +// must calculate the final vertex position, color, texcoords, and other attributes. + +// The output from the domain shader will be a vertex that will go to the video card's +// rasterization pipeline and get drawn to the screen. + +[domain("quad")] +DS_OUTPUT BezierDS( HS_CONSTANT_DATA_OUTPUT input, + float2 UV : SV_DomainLocation, + const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch ) +{ + float4 BasisU = BernsteinBasis( UV.x ); + float4 BasisV = BernsteinBasis( UV.y ); + float4 dBasisU = dBernsteinBasis( UV.x ); + float4 dBasisV = dBernsteinBasis( UV.y ); + + float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV ); + float3 Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV ); + float3 BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV ); + float3 Norm = normalize( cross( Tangent, BiTangent ) ); + + DS_OUTPUT Output; + Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection ); + Output.vWorldPos = WorldPos; + Output.vNormal = Norm; + + return Output; +} + +//-------------------------------------------------------------------------------------- +// Smooth shading pixel shader section +//-------------------------------------------------------------------------------------- + +// The pixel shader works the same as it would in a normal graphics pipeline. +// In this sample, it performs very simple N dot L lighting. + +float4 BezierPS( DS_OUTPUT Input ) : SV_TARGET +{ + float3 N = normalize(Input.vNormal); + float3 L = normalize(Input.vWorldPos - g_vCameraPosWorld); + return abs(dot(N, L)) * float4(1, 0, 0, 1); +} + +//-------------------------------------------------------------------------------------- +// Solid color shading pixel shader (used for wireframe overlay) +//-------------------------------------------------------------------------------------- +float4 SolidColorPS( DS_OUTPUT Input ) : SV_TARGET +{ + // Return a solid green color + return float4( 0, 1, 0, 1 ); +} diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx new file mode 100644 index 000000000..00883ce70 --- /dev/null +++ b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx @@ -0,0 +1,112 @@ +//TEST_IGNORE_FILE: +//-------------------------------------------------------------------------------------- +// File: SimpleSample.fx +// +// The effect file for the SimpleSample sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Global variables +//-------------------------------------------------------------------------------------- +float4 g_MaterialAmbientColor; // Material's ambient color +float4 g_MaterialDiffuseColor; // Material's diffuse color +float3 g_LightDir; // Light's direction in world space +float4 g_LightDiffuse; // Light's diffuse color +texture g_MeshTexture; // Color texture for mesh + +float g_fTime; // App's time in seconds +float4x4 g_mWorld; // World matrix for object +float4x4 g_mWorldViewProjection; // World * View * Projection matrix + + + +//-------------------------------------------------------------------------------------- +// Texture samplers +//-------------------------------------------------------------------------------------- +sampler MeshTextureSampler = +sampler_state +{ + Texture = <g_MeshTexture>; + MipFilter = LINEAR; + MinFilter = LINEAR; + MagFilter = LINEAR; +}; + + +//-------------------------------------------------------------------------------------- +// Vertex shader output structure +//-------------------------------------------------------------------------------------- +struct VS_OUTPUT +{ + float4 Position : POSITION; // vertex position + float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) + float2 TextureUV : TEXCOORD0; // vertex texture coords +}; + + +//-------------------------------------------------------------------------------------- +// This shader computes standard transform and lighting +//-------------------------------------------------------------------------------------- +VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, + float3 vNormal : NORMAL, + float2 vTexCoord0 : TEXCOORD0 ) +{ + VS_OUTPUT Output; + float3 vNormalWorldSpace; + + // Transform the position from object space to homogeneous projection space + Output.Position = mul(vPos, g_mWorldViewProjection); + + // Transform the normal from object space to world space + vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space) + + // Calc diffuse color + Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)) + + g_MaterialAmbientColor; + Output.Diffuse.a = 1.0f; + + // Just copy the texture coordinate through + Output.TextureUV = vTexCoord0; + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// Pixel shader output structure +//-------------------------------------------------------------------------------------- +struct PS_OUTPUT +{ + float4 RGBColor : COLOR0; // Pixel color +}; + + +//-------------------------------------------------------------------------------------- +// This shader outputs the pixel's color by modulating the texture's +// color with diffuse material color +//-------------------------------------------------------------------------------------- +PS_OUTPUT RenderScenePS( VS_OUTPUT In ) +{ + PS_OUTPUT Output; + + // Lookup mesh texture and modulate it with diffuse + Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse; + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// Renders scene +//-------------------------------------------------------------------------------------- +technique RenderScene +{ + pass P0 + { + VertexShader = compile vs_2_0 RenderSceneVS(); + PixelShader = compile ps_2_0 RenderScenePS(); + } +} diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl new file mode 100644 index 000000000..12f368f86 --- /dev/null +++ b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl @@ -0,0 +1,86 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry RenderSceneVS -profile ps_4_0 -entry RenderScenePS +//-------------------------------------------------------------------------------------- +// File: SimpleSample.hlsl +// +// The HLSL file for the SimpleSample sample for the Direct3D 11 device +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + matrix g_mWorldViewProjection : packoffset( c0 ); + matrix g_mWorld : packoffset( c4 ); + float4 g_MaterialAmbientColor : packoffset( c8 ); + float4 g_MaterialDiffuseColor : packoffset( c9 ); +} + +cbuffer cbPerFrame : register( b1 ) +{ + float3 g_vLightDir : packoffset( c0 ); + float g_fTime : packoffset( c0.w ); + float4 g_LightDiffuse : packoffset( c1 ); +}; + +//----------------------------------------------------------------------------------------- +// Textures and Samplers +//----------------------------------------------------------------------------------------- +Texture2D g_txDiffuse : register( t0 ); +SamplerState g_samLinear : register( s0 ); + +//-------------------------------------------------------------------------------------- +// shader input/output structure +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 Position : POSITION; // vertex position + float3 Normal : NORMAL; // this normal comes in per-vertex + float2 TextureUV : TEXCOORD0;// vertex texture coords +}; + +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; // vertex position + float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) + float2 TextureUV : TEXCOORD0; // vertex texture coords +}; + +//-------------------------------------------------------------------------------------- +// This shader computes standard transform and lighting +//-------------------------------------------------------------------------------------- +VS_OUTPUT RenderSceneVS( VS_INPUT input ) +{ + VS_OUTPUT Output; + float3 vNormalWorldSpace; + + // Transform the position from object space to homogeneous projection space + Output.Position = mul( input.Position, g_mWorldViewProjection ); + + // Transform the normal from object space to world space + vNormalWorldSpace = normalize(mul(input.Normal, (float3x3)g_mWorld)); // normal (world space) + + // Calc diffuse color + Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_vLightDir)) + + g_MaterialAmbientColor; + Output.Diffuse.a = 1.0f; + + // Just copy the texture coordinate through + Output.TextureUV = input.TextureUV; + + return Output; +} + +//-------------------------------------------------------------------------------------- +// This shader outputs the pixel's color by modulating the texture's +// color with diffuse material color +//-------------------------------------------------------------------------------------- +float4 RenderScenePS( VS_OUTPUT In ) : SV_TARGET +{ + // Lookup mesh texture and modulate it with diffuse + return g_txDiffuse.Sample( g_samLinear, In.TextureUV ) * In.Diffuse; +} diff --git a/tests/hlsl/dxsdk/SubD11/SubD11.hlsl b/tests/hlsl/dxsdk/SubD11/SubD11.hlsl new file mode 100644 index 000000000..c4ebf9620 --- /dev/null +++ b/tests/hlsl/dxsdk/SubD11/SubD11.hlsl @@ -0,0 +1,1238 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry PatchSkinningVS -entry MeshSkinningVS -profile hs_5_0 -entry SubDToBezierHS -entry SubDToBezierHS4444 -profile ds_5_0 -entry BezierEvalDS -profile ps_4_0 -entry SmoothPS -entry SolidColorPS +//-------------------------------------------------------------------------------------- +// File: SubD11.hlsl +// +// This file contains functions to convert from a Catmull-Clark subdivision +// representation to a bicubic patch representation. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//Work-around for an optimization rule problem in the June 2010 HLSL Compiler (9.29.952.3111) +//see http://support.microsoft.com/kb/2448404 +#if D3DX_VERSION == 0xa2b +#pragma ruledisable 0x0802405f +#endif + +//-------------------------------------------------------------------------------------- +// A sample extraordinary SubD quad is represented by the following diagram: +// +// 15 Valences: +// / \ Vertex 0: 5 +// / 14 Vertex 1: 4 +// 17---------16 / \ Vertex 2: 5 +// | \ | / \ Vertex 3: 3 +// | \ | / 13 +// | \ |/ / Prefixes: +// | 3------2------12 Vertex 0: 9 +// | | | | Vertex 1: 12 +// | | | | Vertex 2: 16 +// 4----0------1------11 Vertex 3: 18 +// / /| | | +// / / | | | +// 5 / 8------9------10 +// \ / / +// 6 / +// \ / +// 7 +// +// Where the quad bounded by vertices 0,1,2,3 represents the actual subd surface of interest +// The 1-ring neighborhood of the quad is represented by vertices 4 through 17. The counter- +// clockwise winding of this 1-ring neighborhood is important, especially when it comes to compute +// the corner vertices of the bicubic patch that we will use to approximate the subd quad (0,1,2,3). +// +// The resulting bicubic patch fits within the subd quad (0,1,2,3) and has the following control +// point layout: +// +// 12--13--14--15 +// 8---9--10--11 +// 4---5---6---7 +// 0---1---2---3 +// +// The inner 4 control points of the bicubic patch are a combination of only the vertices (0,1,2,3) +// of the subd quad. However, the corner control points for the bicubic patch (0,3,15,12) are actually +// a much more complex weighting of the subd patch and the 1-ring neighborhood. In the example above +// the bicubic control point 0 is actually a weighted combination of subd points 0,1,2,3 and 1-ring +// neighborhood points 17, 4, 5, 6, 7, 8, and 9. We can see that the 1-ring neighbor hood is simply +// walked from the prefix value from the previous corner (corner 3 in this case) to the prefix +// prefix value for the current corner. We add one more vertex on either side of the prefix values +// and we have all the data necessary to calculate the value for the corner points. +// +// The edge control points of the bicubic patch (1,2,13,14,4,8,7,11) are also combinations of their +// neighbors, but fortunately each one is only a combination of 6 values and no walk is required. +//-------------------------------------------------------------------------------------- + +#define MOD4(x) ((x)&3) +#ifndef MAX_POINTS +#define MAX_POINTS 32 +#endif +#define MAX_BONE_MATRICES 80 + +//-------------------------------------------------------------------------------------- +// Textures +//-------------------------------------------------------------------------------------- +Texture2D g_txHeight : register( t0 ); // Height and Bump texture +Texture2D g_txDiffuse : register( t1 ); // Diffuse texture +Texture2D g_txSpecular : register( t2 ); // Specular texture + +//-------------------------------------------------------------------------------------- +// Samplers +//-------------------------------------------------------------------------------------- +SamplerState g_samLinear : register( s0 ); +SamplerState g_samPoint : register( s0 ); + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer cbTangentStencilConstants : register( b0 ) +{ + float g_TanM[1024]; // Tangent patch stencils precomputed by the application + float g_fCi[16]; // Valence coefficients precomputed by the application +}; + +cbuffer cbPerMesh : register( b1 ) +{ + matrix g_mConstBoneWorld[MAX_BONE_MATRICES]; +}; + +cbuffer cbPerFrame : register( b2 ) +{ + matrix g_mViewProjection; + float3 g_vCameraPosWorld; + float g_fTessellationFactor; + float g_fDisplacementHeight; + float3 g_vSolidColor; +}; + +cbuffer cbPerSubset : register( b3 ) +{ + int g_iPatchStartIndex; +} + +//-------------------------------------------------------------------------------------- +Buffer<uint4> g_ValencePrefixBuffer : register( t0 ); + +//-------------------------------------------------------------------------------------- +struct VS_CONTROL_POINT_OUTPUT +{ + float3 vPosition : WORLDPOS; + float2 vUV : TEXCOORD0; + float3 vTangent : TANGENT; +}; + +struct BEZIER_CONTROL_POINT +{ + float3 vPosition : BEZIERPOS; +}; + +struct PS_INPUT +{ + float3 vWorldPos : POSITION; + float3 vNormal : NORMAL; + float2 vUV : TEXCOORD; + float3 vTangent : TANGENT; + float3 vBiTangent : BITANGENT; +}; + +//-------------------------------------------------------------------------------------- +// SubD to Bezier helper functions +//-------------------------------------------------------------------------------------- +// Helps with getting tangent stencils from the g_TanM constant array +#define TANM(a,v) ( g_TanM[ Val[v]*64 + (a) ] ) + +//-------------------------------------------------------------------------------------- +float3 ComputeInteriorVertex( uint index, + uint Val[4], + const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip ) +{ + switch( index ) + { + case 0: + return (ip[0].vPosition*Val[0] + ip[1].vPosition*2 + ip[2].vPosition + ip[3].vPosition*2) / (5+Val[0]); + case 1: + return (ip[0].vPosition*2 + ip[1].vPosition*Val[1] + ip[2].vPosition*2 + ip[3].vPosition) / (5+Val[1]); + case 2: + return (ip[0].vPosition + ip[1].vPosition*2 + ip[2].vPosition*Val[2] + ip[3].vPosition*2) / (5+Val[2]); + case 3: + return (ip[0].vPosition*2 + ip[1].vPosition + ip[2].vPosition*2 + ip[3].vPosition*Val[3]) / (5+Val[3]); + } + + return float3(0,0,0); +} + +//-------------------------------------------------------------------------------------- +// Computes the corner vertices of the output UV patch. The corner vertices are +// a weighted combination of all points that are "connected" to that corner by an edge. +// The interior 4 points of the original subd quad are easy to get. The points in the +// 1-ring neighborhood around the interior quad are not. +// +// Because the valence of that corner could be any number between 3 and 16, we need to +// walk around the subd patch vertices connected to that point. This is there the +// Pref (prefix) values come into play. Each corner has a prefix value that is the index +// of the last value around the 1-ring neighborhood that should be used in calculating +// the coefficient of that corner. The walk goes from the prefix value of the previous +// corner to the prefix value of the current corner. +//-------------------------------------------------------------------------------------- +void ComputeCornerVertex( uint index, + out float3 CornerB, // Corner for the Bezier patch + out float3 CornerU, // Corner for the tangent patch + out float3 CornerV, // Corner for the bitangent patch + const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, + const in uint Val[4], + const in uint Pref[4] ) +{ + const float fOWt = 1; + const float fEWt = 4; + + // Figure out where to start the walk by using the previous corner's prefix value + uint PrefIm1 = 0; + uint uStart = 4; + if( index ) + { + PrefIm1 = Pref[index-1]; + uStart = PrefIm1; + } + + // Setup the walk indices + uint uTIndexStart = 2 - (index&1); + uint uTIndex = uTIndexStart; + + // Calculate the N*N weight for the final value + CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part + + // Zero out the corners + CornerU = float4(0,0,0,0); + CornerV = float4(0,0,0,0); + + const uint uV = Val[index] + ( ( index & 1 ) ? 1 : -1 ); + + // Start the walk with the uStart prefix (the prefix of the corner before us) + CornerB += ip[uStart].vPosition * fEWt; + CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index); + + // Gather all vertices between the previous corner's prefix and our own prefix + // We'll do two at a time, since they always come in twos + while(uStart < Pref[index]-1) + { + ++uStart; + CornerB += ip[uStart].vPosition * fOWt; + CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + ++uTIndex; + ++uStart; + CornerB += ip[uStart].vPosition * fEWt; + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index ); + } + ++uStart; + + // Add in the last guy and make sure to wrap to the beginning if we're the last corner + if (index == 3) + uStart = 4; + CornerB += ip[uStart].vPosition * fOWt; + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + // Add in the guy before the prefix as well + if (index) + uStart = PrefIm1-1; + else + uStart = Pref[3]-1; + uTIndex = uTIndexStart-1; + + CornerB += ip[uStart].vPosition * fOWt; + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + // We're done with the walk now. Now we need to add the contributions of the original subd quad. + CornerB += ip[MOD4(index+1)].vPosition * fEWt; + CornerB += ip[MOD4(index+2)].vPosition * fOWt; + CornerB += ip[MOD4(index+3)].vPosition * fEWt; + + uTIndex = 0 + (index&1)*(Val[index]-1); + uStart = MOD4(index+1); + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); + + uStart = MOD4(index+2); + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + uStart = MOD4(index+3); + uTIndex = (uTIndex+1)%Val[index]; + + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); + + // Normalize the corner weights + CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize + + // fixup signs from directional derivatives... + if( !((index - 1) & 2) ) // 1 and 2 + CornerU *= -1; + + if( index >= 2 ) // 2 and 3 + CornerV *= -1; +} + +void ComputeCornerVertex4444( uint index, + out float3 CornerB, // Corner for the Bezier patch + out float3 CornerU, // Corner for the tangent patch + out float3 CornerV, // Corner for the bitangent patch + const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, + const in uint Val[4], + const in uint Pref[4] ) +{ + const float fOWt = 1; + const float fEWt = 4; + + // Figure out where to start the walk by using the previous corner's prefix value + uint PrefIm1 = 0; + uint uStart = 4; + if( index ) + { + PrefIm1 = Pref[index-1]; + uStart = PrefIm1; + } + + // Setup the walk indices + uint uTIndexStart = 2 - (index&1); + uint uTIndex = uTIndexStart; + + // Calculate the N*N weight for the final value + CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part + + // Zero out the corners + CornerU = float4(0,0,0,0); + CornerV = float4(0,0,0,0); + + const uint uV = Val[index] + ( ( index & 1 ) ? 1 : -1 ); + + // Start the walk with the uStart prefix (the prefix of the corner before us) + CornerB += ip[uStart].vPosition * fEWt; + CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index); + + // Gather all vertices between the previous corner's prefix and our own prefix + // We'll do two at a time, since they always come in twos + while(uStart < Pref[index]-1) + { + ++uStart; + CornerB += ip[uStart].vPosition * fOWt; + CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + ++uTIndex; + ++uStart; + CornerB += ip[uStart].vPosition * fEWt; + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index ); + } + ++uStart; + + // Add in the last guy and make sure to wrap to the beginning if we're the last corner + if (index == 3) + uStart = 4; + CornerB += ip[uStart].vPosition * fOWt; + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + // Add in the guy before the prefix as well + if (index) + uStart = PrefIm1-1; + else + uStart = Pref[3]-1; + uTIndex = uTIndexStart-1; + + CornerB += ip[uStart].vPosition * fOWt; + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + // We're done with the walk now. Now we need to add the contributions of the original subd quad. + CornerB += ip[MOD4(index+1)].vPosition * fEWt; + CornerB += ip[MOD4(index+2)].vPosition * fOWt; + CornerB += ip[MOD4(index+3)].vPosition * fEWt; + + uTIndex = 0 + (index&1)*(Val[index]-1); + uStart = MOD4(index+1); + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); + + uStart = MOD4(index+2); + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); + + uStart = MOD4(index+3); + uTIndex = (uTIndex+1)%Val[index]; + + CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); + CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); + + // Normalize the corner weights + CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize + + // fixup signs from directional derivatives... + if( !((index - 1) & 2) ) // 1 and 2 + CornerU *= -1; + + if( index >= 2 ) // 2 and 3 + CornerV *= -1; +} + +//-------------------------------------------------------------------------------------- +// Computes the edge vertices of the output bicubic patch. The edge vertices +// (1,2,4,7,8,11,13,14) are a weighted (by valence) combination of 6 interior and 1-ring +// neighborhood points. However, we don't have to do the walk on this one since we +// don't need all of the neighbor points attached to this vertex. +//-------------------------------------------------------------------------------------- +float3 ComputeEdgeVertex( in uint index /* 0-7 */, + const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, + const in uint Val[4], + const in uint Pref[4] ) +{ + float val1 = 2 * Val[0] + 10; + float val2 = 2 * Val[1] + 10; + float val13 = 2 * Val[3] + 10; + float val14 = 2 * Val[2] + 10; + float val4 = val1; + float val8 = val13; + float val7 = val2; + float val11 = val14; + + float3 vRetVal = float3(0,0,0); + switch( index ) + { + // Horizontal + case 0: + vRetVal = (Val[0]*2*ip[0].vPosition + 4*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*2 + + 2*ip[Pref[0]-1].vPosition + ip[Pref[0]].vPosition) / val1; + break; + case 1: + vRetVal = (4*ip[0].vPosition + Val[1]*2*ip[1].vPosition + ip[2].vPosition*2 + ip[3].vPosition + + ip[Pref[0]-1].vPosition + 2*ip[Pref[0]].vPosition) / val2; + break; + case 2: + vRetVal = (2*ip[0].vPosition + ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition*2*Val[3] + + 2*ip[Pref[2]].vPosition + ip[Pref[2]-1].vPosition) / val13; + break; + case 3: + vRetVal = (ip[0].vPosition + 2*ip[1].vPosition + Val[2]*2*ip[2].vPosition + ip[3].vPosition*4 + + ip[Pref[2]].vPosition + 2*ip[Pref[2]-1].vPosition) / val14; + break; + // Vertical + case 4: + vRetVal = (Val[0]*2*ip[0].vPosition + 2*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*4 + + 2*ip[4].vPosition + ip[Pref[3]-1].vPosition) / val4; + break; + case 5: + vRetVal = (4*ip[0].vPosition + ip[1].vPosition + 2*ip[2].vPosition + ip[3].vPosition*2*Val[3] + + ip[4].vPosition + 2*ip[Pref[3]-1].vPosition) / val8; + break; + case 6: + vRetVal = (2*ip[0].vPosition + Val[1]*2*ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition + + 2*ip[Pref[1]-1].vPosition + ip[Pref[1]].vPosition) / val7; + break; + case 7: + vRetVal = (ip[0].vPosition + 4*ip[1].vPosition + Val[2]*2*ip[2].vPosition + 2*ip[3].vPosition + + ip[Pref[1]-1].vPosition + 2*ip[Pref[1]].vPosition) / val11; + break; + } + + return vRetVal; +} + +//-------------------------------------------------------------------------------------- +// Helper function +//-------------------------------------------------------------------------------------- +void BezierRaise(inout float3 pQ[3], out float3 pC[4]) +{ + pC[0] = pQ[0]; + pC[3] = pQ[2]; + + for( int i=1; i<3; i++ ) + { + pC[i] = ( 1.0f / 3.0f ) * ( pQ[i - 1] * i + ( 3.0f - i ) * pQ[i] ); + } +} + +//-------------------------------------------------------------------------------------- +// Computes the tangent patch from the input bezier patch +//-------------------------------------------------------------------------------------- +void ComputeTanPatch( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch, + inout float3 vOut[16], + in float fCWts[4], + in float3 vCorner[4], + in float3 vCornerLocal[4], + in const uint cX, + in const uint cY) +{ + float3 vQuad[3]; + float3 vQuadB[3]; + float3 vCubic[4]; + + // boundary edges are really simple... + vQuad[0] = vCornerLocal[0]; + vQuad[2] = vCornerLocal[1]; + vQuad[1] = 3.0f*(bezpatch[2*cX+0*cY].vPosition-bezpatch[1*cX+0*cY].vPosition); + + BezierRaise(vQuad,vCubic); + vOut[1*cX + 0*cY] = vCubic[1]; + vOut[2*cX + 0*cY] = vCubic[2]; + + vQuad[0] = vCornerLocal[2]; + vQuad[2] = vCornerLocal[3]; + vQuad[1] = 3.0f*(bezpatch[2*cX+3*cY].vPosition-bezpatch[1*cX+3*cY].vPosition); + + BezierRaise(vQuad,vCubic); + vOut[1*cX + 3*cY] = vCubic[1]; + vOut[2*cX + 3*cY] = vCubic[2]; + + // two internal edges - this is where work happens... + float3 vA,vB,vC,vD,vE; + float fC0,fC1; + vQuad[1] = 3.0f*(bezpatch[2*cX+2*cY].vPosition-bezpatch[1*cX+2*cY].vPosition); + // also do "second" scan line + vQuadB[1] = 3.0f*(bezpatch[2*cX+1*cY].vPosition-bezpatch[1*cX+1*cY].vPosition); + + vD = 3.0f*(bezpatch[1*cX + 2*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition); + vE = 3.0f*(bezpatch[1*cX + 1*cY].vPosition - bezpatch[0*cX + 1*cY].vPosition); // used later... + + fC0 = fCWts[3]; + fC1 = fCWts[0]; + + // sign flip + vA = -vCorner[3]; + vB = 3.0f*(bezpatch[0*cX + 1*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition); + vC = -vCorner[0]; + + vQuad[0] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD; + vQuadB[0] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE; + + // do end of strip - same as before, but stuff is switched around... + vC = vCorner[2]; + vB = 3.0f*(bezpatch[3*cX + 2*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition); + vA = vCorner[1]; + + vD = 3.0f*(bezpatch[2*cX + 1*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition); + vE = 3.0f*(bezpatch[2*cX + 2*cY].vPosition - bezpatch[3*cX + 2*cY].vPosition); + + fC0 = fCWts[1]; + fC1 = fCWts[2]; + + vQuadB[2] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD; + vQuad[2] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE; + + vQuadB[2] *= -1.0f; + vQuad[2] *= -1.0f; + + BezierRaise(vQuad,vCubic); + + vOut[0*cX + 2*cY] = vCubic[0]; + vOut[1*cX + 2*cY] = vCubic[1]; + vOut[2*cX + 2*cY] = vCubic[2]; + vOut[3*cX + 2*cY] = vCubic[3]; + + BezierRaise(vQuadB,vCubic); + + vOut[0*cX + 1*cY] = vCubic[0]; + vOut[1*cX + 1*cY] = vCubic[1]; + vOut[2*cX + 1*cY] = vCubic[2]; + vOut[3*cX + 1*cY] = vCubic[3]; +} + +//-------------------------------------------------------------------------------------- +// Skinning vertex shader Section +//-------------------------------------------------------------------------------------- +struct VS_CONTROL_POINT_INPUT +{ + float3 vPosition : POSITION; + float2 vUV : TEXCOORD0; + float3 vTangent : TANGENT; + uint4 vBones : BONES; + float4 vWeights : WEIGHTS; +}; + +VS_CONTROL_POINT_OUTPUT PatchSkinningVS( VS_CONTROL_POINT_INPUT Input ) +{ + VS_CONTROL_POINT_OUTPUT Output; + + float4 vInputPos = float4( Input.vPosition, 1 ); + float4 vWorldPos = float4( 0, 0, 0, 0 ); + + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; + + float3 vWorldTan = float3( 0, 0, 0 ); + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; + + Output.vPosition = vWorldPos; + Output.vUV = Input.vUV; + Output.vTangent = vWorldTan; + + return Output; +} + +struct VS_MESH_POINT_INPUT +{ + float3 vPosition : POSITION; + float2 vUV : TEXCOORD0; + float3 vNormal : NORMAL; + float3 vTangent : TANGENT; + uint4 vBones : BONES; + float4 vWeights : WEIGHTS; +}; + +struct VS_MESH_POINT_OUTPUT +{ + float3 vWorldPos : POSITION; + float3 vNormal : NORMAL; + float2 vUV : TEXCOORD; + float3 vTangent : TANGENT; + float3 vBiTangent : BITANGENT; + + float4 vPosition : SV_POSITION; +}; + +VS_MESH_POINT_OUTPUT MeshSkinningVS( VS_MESH_POINT_INPUT Input ) +{ + VS_MESH_POINT_OUTPUT Output; + + float4 vInputPos = float4( Input.vPosition, 1 ); + float4 vWorldPos = float4( 0, 0, 0, 0 ); + + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; + vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; + + float3 vWorldTan = float3( 0, 0, 0 ); + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; + vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; + + float3 vWorldNormal = float3( 0, 0, 0 ); + vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; + vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; + vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; + vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; + + Output.vWorldPos = vWorldPos.xyz; + Output.vPosition = mul( float4( vWorldPos.xyz, 1 ), g_mViewProjection ); + Output.vUV = Input.vUV; + Output.vTangent = vWorldTan; + Output.vNormal = vWorldNormal; + Output.vBiTangent = cross( vWorldNormal, vWorldTan ); + + return Output; +} + +//-------------------------------------------------------------------------------------- +// SubD to Bezier hull shader Section +//-------------------------------------------------------------------------------------- +struct HS_CONSTANT_DATA_OUTPUT +{ + float Edges[4] : SV_TessFactor; + float Inside[2] : SV_InsideTessFactor; + + float3 vTangent[4] : TANGENT; + float2 vUV[4] : TEXCOORD; + float3 vTanUCorner[4] : TANUCORNER; + float3 vTanVCorner[4] : TANVCORNER; + float4 vCWts : TANWEIGHTS; +}; + +//-------------------------------------------------------------------------------------- +// Load per-patch valence and prefix data +//-------------------------------------------------------------------------------------- +void LoadValenceAndPrefixData( in uint PatchID, out uint Val[4], out uint Prefixes[4] ) +{ + PatchID += g_iPatchStartIndex; + uint4 ValPack = g_ValencePrefixBuffer.Load( PatchID * 2 ); + uint4 PrefPack = g_ValencePrefixBuffer.Load( PatchID * 2 + 1 ); + + Val[0] = ValPack.x; + Val[1] = ValPack.y; + Val[2] = ValPack.z; + Val[3] = ValPack.w; + + Prefixes[0] = PrefPack.x; + Prefixes[1] = PrefPack.y; + Prefixes[2] = PrefPack.z; + Prefixes[3] = PrefPack.w; +} + + +//-------------------------------------------------------------------------------------- +// Constant data function for the SubDToBezierHS. This is executed once per patch. +//-------------------------------------------------------------------------------------- +HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, + uint PatchID : SV_PrimitiveID ) +{ + HS_CONSTANT_DATA_OUTPUT Output; + + float TessAmount = g_fTessellationFactor; + + Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount; + Output.Inside[0] = Output.Inside[1] = TessAmount; + + Output.vTangent[0] = ip[0].vTangent; + Output.vTangent[1] = ip[1].vTangent; + Output.vTangent[2] = ip[2].vTangent; + Output.vTangent[3] = ip[3].vTangent; + + Output.vUV[0] = ip[0].vUV; + Output.vUV[1] = ip[1].vUV; + Output.vUV[2] = ip[2].vUV; + Output.vUV[3] = ip[3].vUV; + + // Compute part of our tangent patch here + uint Val[4]; + uint Prefixes[4]; + LoadValenceAndPrefixData( PatchID, Val, Prefixes ); + + [unroll] + for( int i=0; i<4; i++ ) + { + float3 CornerB, CornerU, CornerV; + ComputeCornerVertex( i, CornerB, CornerU, CornerV, ip, Val, Prefixes ); + Output.vTanUCorner[i] = CornerU; + Output.vTanVCorner[i] = CornerV; + } + + float fCWts[4]; + Output.vCWts.x = g_fCi[ Val[0]-3 ]; + Output.vCWts.y = g_fCi[ Val[1]-3 ]; + Output.vCWts.z = g_fCi[ Val[2]-3 ]; + Output.vCWts.w = g_fCi[ Val[3]-3 ]; + + return Output; +} + +HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, + uint PatchID : SV_PrimitiveID ) +{ + HS_CONSTANT_DATA_OUTPUT Output; + + float TessAmount = g_fTessellationFactor; + + Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount; + Output.Inside[0] = Output.Inside[1] = TessAmount; + + Output.vTangent[0] = ip[0].vTangent; + Output.vTangent[1] = ip[1].vTangent; + Output.vTangent[2] = ip[2].vTangent; + Output.vTangent[3] = ip[3].vTangent; + + Output.vUV[0] = ip[0].vUV; + Output.vUV[1] = ip[1].vUV; + Output.vUV[2] = ip[2].vUV; + Output.vUV[3] = ip[3].vUV; + + // Compute part of our tangent patch here + static const uint Val[4] = (uint[4])uint4(4,4,4,4); + static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16); + + [unroll] + for( int i=0; i<4; i++ ) + { + float3 CornerB, CornerU, CornerV; + ComputeCornerVertex4444( i, CornerB, CornerU, CornerV, ip, Val, Prefixes ); + Output.vTanUCorner[i] = CornerU; + Output.vTanVCorner[i] = CornerV; + } + + float fCWts[4]; + Output.vCWts.x = g_fCi[ Val[0]-3 ]; + Output.vCWts.y = g_fCi[ Val[1]-3 ]; + Output.vCWts.z = g_fCi[ Val[2]-3 ]; + Output.vCWts.w = g_fCi[ Val[3]-3 ]; + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// HS for SubDToBezier. This outputcontrolpoints(16) specifies that we will produce +// 16 control points. Therefore this function will be invoked 16x, one for each output +// control point. +// +// !! PERFORMANCE NOTE: This hull shader is written for maximum readability, and its +// performance is not expected to be optimal on D3D11 hardware. The switch statement +// below that determines the codepath for each patch control point generates sub-optimal +// code for parallel execution on the GPU. A future implementation of this hull shader +// will combine the 16 codepaths and 3 variants (corner, edge, interior) into one shared +// codepath; this change is expected to increase performance at the expense of readability. +//-------------------------------------------------------------------------------------- +[domain("quad")] +[partitioning("integer")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(16)] +[patchconstantfunc("SubDToBezierConstantsHS")] +BEZIER_CONTROL_POINT SubDToBezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p, + uint i : SV_OutputControlPointID, + uint PatchID : SV_PrimitiveID ) +{ + // Valences and prefixes are loaded from a buffer + uint Val[4]; + uint Prefixes[4]; + LoadValenceAndPrefixData( PatchID, Val, Prefixes ); + + float3 CornerB = float3(0,0,0); + float3 CornerU = float3(0,0,0); + float3 CornerV = float3(0,0,0); + + BEZIER_CONTROL_POINT Output; + Output.vPosition = float3(0,0,0); + + // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates + // inefficient code for the sake of readability. + switch( i ) + { + // Interior vertices + case 5: + Output.vPosition = ComputeInteriorVertex( 0, Val, p ); + break; + case 6: + Output.vPosition = ComputeInteriorVertex( 1, Val, p ); + break; + case 10: + Output.vPosition = ComputeInteriorVertex( 2, Val, p ); + break; + case 9: + Output.vPosition = ComputeInteriorVertex( 3, Val, p ); + break; + + // Corner vertices + case 0: + ComputeCornerVertex( 0, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + case 3: + ComputeCornerVertex( 1, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + case 15: + ComputeCornerVertex( 2, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + case 12: + ComputeCornerVertex( 3, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + + // Edge vertices + case 1: + Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes ); + break; + case 2: + Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes ); + break; + case 13: + Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes ); + break; + case 14: + Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes ); + break; + case 4: + Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes ); + break; + case 8: + Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes ); + break; + case 7: + Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes ); + break; + case 11: + Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes ); + break; + } + + return Output; +} + +//-------------------------------------------------------------------------------------- +// Specialised version for Regular (4,4,4,4) patches, this is much simpler and has less +// branching compared to the general one above +//-------------------------------------------------------------------------------------- +[domain("quad")] +[partitioning("integer")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(16)] +[patchconstantfunc("SubDToBezierConstantsHS4444")] +BEZIER_CONTROL_POINT SubDToBezierHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p, + uint i : SV_OutputControlPointID, + uint PatchID : SV_PrimitiveID ) +{ + // Valences and prefixes are Constant for this case (4,4,4,4) + static const uint Val[4] = (uint[4])uint4(4,4,4,4); + static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16); + + float3 CornerB = float3(0,0,0); + float3 CornerU = float3(0,0,0); + float3 CornerV = float3(0,0,0); + + BEZIER_CONTROL_POINT Output; + Output.vPosition = float3(0,0,0); + + // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates + // inefficient code for the sake of readability. + switch( i ) + { + // Interior vertices + case 5: + Output.vPosition = ComputeInteriorVertex( 0, Val, p ); + break; + case 6: + Output.vPosition = ComputeInteriorVertex( 1, Val, p ); + break; + case 10: + Output.vPosition = ComputeInteriorVertex( 2, Val, p ); + break; + case 9: + Output.vPosition = ComputeInteriorVertex( 3, Val, p ); + break; + + // Corner vertices + case 0: + ComputeCornerVertex4444( 0, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + case 3: + ComputeCornerVertex4444( 1, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + case 15: + ComputeCornerVertex4444( 2, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + case 12: + ComputeCornerVertex4444( 3, CornerB, CornerU, CornerV, p, Val, Prefixes ); + Output.vPosition = CornerB; + break; + + // Edge vertices + case 1: + Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes ); + break; + case 2: + Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes ); + break; + case 13: + Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes ); + break; + case 14: + Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes ); + break; + case 4: + Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes ); + break; + case 8: + Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes ); + break; + case 7: + Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes ); + break; + case 11: + Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes ); + break; + } + + return Output; +} + + +//-------------------------------------------------------------------------------------- +// Bezier evaluation domain shader section +//-------------------------------------------------------------------------------------- +struct DS_OUTPUT +{ + float3 vWorldPos : POSITION; + float3 vNormal : NORMAL; + float2 vUV : TEXCOORD; + float3 vTangent : TANGENT; + float3 vBiTangent : BITANGENT; + + float4 vPosition : SV_POSITION; +}; + +//-------------------------------------------------------------------------------------- +float4 BernsteinBasis(float t) +{ + float invT = 1.0f - t; + + return float4( invT * invT * invT, + 3.0f * t * invT * invT, + 3.0f * t * t * invT, + t * t * t ); +} + +//-------------------------------------------------------------------------------------- +float4 dBernsteinBasis(float t) +{ + float invT = 1.0f - t; + + return float4( -3 * invT * invT, + 3 * invT * invT - 6 * t * invT, + 6 * t * invT - 3 * t * t, + 3 * t * t ); +} + +//-------------------------------------------------------------------------------------- +float3 EvaluateBezier( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch, + float4 BasisU, + float4 BasisV ) +{ + float3 Value = float3(0,0,0); + Value = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w ); + Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w ); + Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w ); + Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w ); + + return Value; +} + +//-------------------------------------------------------------------------------------- +float3 EvaluateBezierTan( const float3 bezpatch[16], + float4 BasisU, + float4 BasisV ) +{ + float3 Value = float3(0,0,0); + Value = BasisV.x * ( bezpatch[0] * BasisU.x + bezpatch[1] * BasisU.y + bezpatch[2] * BasisU.z + bezpatch[3] * BasisU.w ); + Value += BasisV.y * ( bezpatch[4] * BasisU.x + bezpatch[5] * BasisU.y + bezpatch[6] * BasisU.z + bezpatch[7] * BasisU.w ); + Value += BasisV.z * ( bezpatch[8] * BasisU.x + bezpatch[9] * BasisU.y + bezpatch[10] * BasisU.z + bezpatch[11] * BasisU.w ); + Value += BasisV.w * ( bezpatch[12] * BasisU.x + bezpatch[13] * BasisU.y + bezpatch[14] * BasisU.z + bezpatch[15] * BasisU.w ); + + return Value; +} + +//-------------------------------------------------------------------------------------- +// Compute a two full tangent patches from the Tangent corner data created in the +// HS constant data function. +//-------------------------------------------------------------------------------------- +void CreatTangentPatches( in HS_CONSTANT_DATA_OUTPUT input, + const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch, + out float3 TanU[16], + out float3 TanV[16] ) +{ + TanV[0] = input.vTanVCorner[0]; + TanV[3] = input.vTanVCorner[1]; + TanV[15] = input.vTanVCorner[2]; + TanV[12] = input.vTanVCorner[3]; + + TanU[0] = input.vTanUCorner[0]; + TanU[3] = input.vTanUCorner[1]; + TanU[15] = input.vTanUCorner[2]; + TanU[12] = input.vTanUCorner[3]; + + float fCWts[4]; + fCWts[0] = input.vCWts.x; + fCWts[1] = input.vCWts.y; + fCWts[2] = input.vCWts.z; + fCWts[3] = input.vCWts.w; + + float3 vCorner[4]; + float3 vCornerLocal[4]; + + vCorner[0] = TanV[0]; + vCorner[1] = TanV[3]; + vCorner[2] = TanV[15]; + vCorner[3] = TanV[12]; + vCornerLocal[0] = TanU[0]; + vCornerLocal[1] = TanU[3]; + vCornerLocal[2] = TanU[12]; + vCornerLocal[3] = TanU[15]; + + ComputeTanPatch( bezpatch, TanU, fCWts, vCorner, vCornerLocal, 1, 4 ); + + fCWts[3] = input.vCWts.y; + fCWts[1] = input.vCWts.w; + + vCorner[0] = TanU[0]; + vCorner[3] = TanU[3]; + vCorner[2] = TanU[15]; + vCorner[1] = TanU[12]; + vCornerLocal[0] = TanV[0]; + vCornerLocal[1] = TanV[12]; + vCornerLocal[2] = TanV[3]; + vCornerLocal[3] = TanV[15]; + + ComputeTanPatch( bezpatch, TanV, fCWts, vCorner, vCornerLocal, 4, 1 ); +} + +//-------------------------------------------------------------------------------------- +// For each input UV (from the Tessellator), evaluate the Bezier patch at this position. +//-------------------------------------------------------------------------------------- +[domain("quad")] +DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input, + float2 UV : SV_DomainLocation, + const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch ) +{ + float4 BasisU = BernsteinBasis( UV.x ); + float4 BasisV = BernsteinBasis( UV.y ); + + float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV ); + + float3 TanU[16]; + float3 TanV[16]; + CreatTangentPatches( input, bezpatch, TanU, TanV ); + float3 Tangent = EvaluateBezierTan( TanU, BasisU, BasisV ); + float3 BiTangent = EvaluateBezierTan( TanV, BasisU, BasisV ); + + // To see what the patch looks like without using the tangent patches to fix the normals, uncomment this section + /* + float4 dBasisU = dBernsteinBasis( UV.x ); + float4 dBasisV = dBernsteinBasis( UV.y ); + Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV ); + BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV ); + */ + + float3 Norm = normalize( cross( Tangent, BiTangent ) ); + + DS_OUTPUT Output; + Output.vNormal = Norm; + + // Evalulate the tangent vectors through bilinear interpolation. + // These tangents are the texture-space tangents. They should not be confused with the parametric + // tangents that we use to get the normals for the bicubic patch. + float3 TextureTanU0 = input.vTangent[0]; + float3 TextureTanU1 = input.vTangent[1]; + float3 TextureTanU2 = input.vTangent[2]; + float3 TextureTanU3 = input.vTangent[3]; + + float3 UVbottom = lerp( TextureTanU0, TextureTanU1, UV.x ); + float3 UVtop = lerp( TextureTanU3, TextureTanU2, UV.x ); + float3 Tan = lerp( UVbottom, UVtop, UV.y ); + + Output.vTangent = Tan; + + // This is an optimization. We assume that the UV mapping of the mesh will result in a "relatively" orthogonal + // tangent basis. If we assume this, then we can avoid fetching and bilerping the BiTangent along with the tangent. + Output.vBiTangent = cross( Norm, Tan ); + + // bilerp the texture coordinates + float2 tex0 = input.vUV[0]; + float2 tex1 = input.vUV[1]; + float2 tex2 = input.vUV[2]; + float2 tex3 = input.vUV[3]; + + float2 bottom = lerp( tex0, tex1, UV.x ); + float2 top = lerp( tex3, tex2, UV.x ); + float2 TexUV = lerp( bottom, top, UV.y ); + Output.vUV = TexUV; + + if( g_fDisplacementHeight > 0 ) + { + // On this sample displacement can go into or out of the mesh. This is why we bias the heigh amount. + float height = g_fDisplacementHeight * ( g_txHeight.SampleLevel( g_samPoint, TexUV, 0 ).a * 2 - 1 ); + float3 WorldPosMiddle = Norm * height; + WorldPos += WorldPosMiddle; + } + + Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection ); + Output.vWorldPos = WorldPos; + + return Output; +} + +//-------------------------------------------------------------------------------------- +// Smooth shading pixel shader section +//-------------------------------------------------------------------------------------- + +float3 safe_normalize( float3 vInput ) +{ + float len2 = dot( vInput, vInput ); + if( len2 > 0 ) + { + return vInput * rsqrt( len2 ); + } + return vInput; +} + +static const float g_fSpecularExponent = 32.0f; +static const float g_fSpecularIntensity = 0.6f; +static const float g_fNormalMapIntensity = 1.5f; + +float2 ComputeDirectionalLight( float3 vWorldPos, float3 vWorldNormal, float3 vDirLightDir ) +{ + // Result.x is diffuse illumination, Result.y is specular illumination + float2 Result = float2( 0, 0 ); + Result.x = pow( saturate( dot( vWorldNormal, -vDirLightDir ) ), 2 ); + + float3 vPointToCamera = normalize( g_vCameraPosWorld - vWorldPos ); + float3 vHalfAngle = normalize( vPointToCamera - vDirLightDir ); + Result.y = pow( saturate( dot( vHalfAngle, vWorldNormal ) ), g_fSpecularExponent ); + + return Result; +} + +float3 ColorGamma( float3 Input ) +{ + return pow( Input, 2.2f ); +} + +float4 SmoothPS( PS_INPUT Input ) : SV_TARGET +{ + float4 vNormalMapSampleRaw = g_txHeight.Sample( g_samLinear, Input.vUV ); + float3 vNormalMapSampleBiased = ( vNormalMapSampleRaw.xyz * 2 ) - 1; + vNormalMapSampleBiased.xy *= g_fNormalMapIntensity; + float3 vNormalMapSample = normalize( vNormalMapSampleBiased ); + + float3 vNormal = safe_normalize( Input.vNormal ) * vNormalMapSample.z; + vNormal += safe_normalize( Input.vTangent ) * vNormalMapSample.x; + vNormal += safe_normalize( Input.vBiTangent ) * vNormalMapSample.y; + + //float3 vColor = float3( 1, 1, 1 ); + float3 vColor = g_txDiffuse.Sample( g_samLinear, Input.vUV ).rgb; + float vSpecular = g_txSpecular.Sample( g_samLinear, Input.vUV ).r * g_fSpecularIntensity; + + const float3 DirLightDirections[4] = + { + // key light + normalize( float3( -63.345150, -58.043934, 27.785097 ) ), + // fill light + normalize( float3( 23.652107, -17.391443, 54.972504 ) ), + // back light 1 + normalize( float3( 20.470509, -22.939510, -33.929531 ) ), + // back light 2 + normalize( float3( -31.003685, 24.242104, -41.352859 ) ), + }; + + const float3 DirLightColors[4] = + { + // key light + ColorGamma( float3( 1.0f, 0.964f, 0.706f ) * 1.0f ), + // fill light + ColorGamma( float3( 0.446f, 0.641f, 1.0f ) * 1.0f ), + // back light 1 + ColorGamma( float3( 1.0f, 0.862f, 0.419f ) * 1.0f ), + // back light 2 + ColorGamma( float3( 0.405f, 0.630f, 1.0f ) * 1.0f ), + }; + + float3 fLightColor = 0; + for( int i = 0; i < 4; ++i ) + { + float2 LightDiffuseSpecular = ComputeDirectionalLight( Input.vWorldPos, vNormal, DirLightDirections[i] ); + fLightColor += DirLightColors[i] * vColor * LightDiffuseSpecular.x; + fLightColor += DirLightColors[i] * LightDiffuseSpecular.y * vSpecular; + } + + return float4( fLightColor, 1 ); +} + +//-------------------------------------------------------------------------------------- +// Solid color shading pixel shader (used for wireframe overlay) +//-------------------------------------------------------------------------------------- +float4 SolidColorPS( PS_INPUT Input ) : SV_TARGET +{ + return float4( g_vSolidColor, 1 ); +} diff --git a/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl new file mode 100644 index 000000000..c4401f010 --- /dev/null +++ b/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl @@ -0,0 +1,211 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY +//-------------------------------------------------------------------------------------- +// File: Skinning10.fx +// +// The effect file for the Skinning10 sample. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#ifndef SEPERABLE_BLUR_KERNEL_SIZE +#define SEPERABLE_BLUR_KERNEL_SIZE 3 +#endif + +static const int BLUR_KERNEL_BEGIN = SEPERABLE_BLUR_KERNEL_SIZE / -2; +static const int BLUR_KERNEL_END = SEPERABLE_BLUR_KERNEL_SIZE / 2 + 1; +static const float FLOAT_BLUR_KERNEL_SIZE = (float)SEPERABLE_BLUR_KERNEL_SIZE; + +cbuffer cbblurVS : register( b2) +{ + int2 g_iWidthHeight : packoffset( c0 ); + int g_iKernelStart : packoffset( c0.z ); + int g_iKernelEnd : packoffset( c0.w ); +}; + +//-------------------------------------------------------------------------------------- +// defines +//-------------------------------------------------------------------------------------- + +Texture2DArray g_txShadow : register( t5 ); +SamplerState g_samShadow : register( s5 ); + +//-------------------------------------------------------------------------------------- +// Input/Output structures +//-------------------------------------------------------------------------------------- + +struct PSIn +{ + float4 Pos : SV_Position; //Position + float2 Tex : TEXCOORD; //Texture coordinate + float2 ITex : TEXCOORD2; +}; + +struct VSIn +{ + uint Pos : SV_VertexID ; +}; + + +PSIn VSMain(VSIn inn) +{ + PSIn output; + + output.Pos.y = -1.0f + (inn.Pos%2) * 2.0f ; + output.Pos.x = -1.0f + (inn.Pos/2) * 2.0f; + output.Pos.z = .5; + output.Pos.w = 1; + output.Tex.x = inn.Pos/2; + output.Tex.y = 1.0f - inn.Pos%2; + output.ITex.x = (float)(g_iWidthHeight.x * output.Tex.x); + output.ITex.y = (float)(g_iWidthHeight.y * output.Tex.y); + return output; +} + +//float PSDepth + +//------------------------------------------------------------------------------ +// Logarithmic filtering +//------------------------------------------------------------------------------ + +float log_conv ( float x0, float X, float y0, float Y ) +{ + return (X + log(x0 + (y0 * exp(Y - X)))); +} + + +//-------------------------------------------------------------------------------------- +// Pixel shader that performs bump mapping on the final vertex +//-------------------------------------------------------------------------------------- +float2 PSBlurX(PSIn input) : SV_Target +{ +/* + float2 centerDistance; + if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); + else centerDistance.x = input.Tex.x; + if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); + else centerDistance.y = input.Tex.y; + if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; + centerDistance.x -= .2; + centerDistance.x *= (1.0f / .8); + + float store_samples[8]; + int ind = 0; + for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) { + store_samples[ind] = g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).r; + ind++; + } + const float c = (1.f/5.f); + + float accum; + accum = log_conv( c, store_samples[0], c, store_samples[1] ); + + ind = 0; + for (x = g_iKernelStart - 2; x < g_iKernelEnd; ++x) { + ind++; + accum += log_conv( 1.0f, accum, c, store_samples[ind] ); + } + float2 rt; + rt.x = accum; + return rt; + */ + /* + float2 dep = 0; + float2 centerDistance; + if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); + else centerDistance.x = input.Tex.x; + if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); + else centerDistance.y = input.Tex.y; + if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; + centerDistance.x -= .2; + centerDistance.x *= ( 1.0f / 0.8f ); + + for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) { + dep += g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).rg; + } + dep /= (g_iKernelEnd - g_iKernelStart); + return dep; + */ + + float2 dep=0; + [unroll]for ( int x = BLUR_KERNEL_BEGIN; x < BLUR_KERNEL_END; ++x ) { + dep += g_txShadow.Sample( g_samShadow, float3( input.Tex.x, input.Tex.y, 0 ), int2( x,0 ) ).rg; + } + dep /= FLOAT_BLUR_KERNEL_SIZE; + return dep; + +// return g_txShadow.Sample(g_samShadow, float3(input.Tex.x, input.Tex.y, 0) ).rg; + +} + +//-------------------------------------------------------------------------------------- +// Pixel shader that performs bump mapping on the final vertex +//-------------------------------------------------------------------------------------- +float2 PSBlurY(PSIn input) : SV_Target +{ +/* + float2 centerDistance; + if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); + else centerDistance.x = input.Tex.x; + if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); + else centerDistance.y = input.Tex.y; + if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; + centerDistance.x -= .2; + centerDistance.x *= (1.0f / .8); + + float store_samples[8]; + int ind = 0; + for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) { + store_samples[ind] = g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).r; + } + const float c = (1.f/5.f); + + float accum; + accum = log_conv( c, store_samples[0], c, store_samples[1] ); + + ind = 0; + for (y = g_iKernelStart; y < g_iKernelEnd; ++y) { + ind++; + accum += log_conv( 1.0f, accum, c, store_samples[ind] ); + } + float2 rt; + rt.x = accum; + return rt; + */ + + + /* + float2 dep = 0; + + float2 centerDistance; + if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); + else centerDistance.x = input.Tex.x; + if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); + else centerDistance.y = input.Tex.y; + if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; + centerDistance.x -= 0; + centerDistance.x *= (1.0f / 1.0f); + + if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; + for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) { + dep += g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).rg; + } + + + dep /= (g_iKernelEnd - g_iKernelStart); + return dep; + + */ + + + float2 dep=0; + [unroll]for ( int y = BLUR_KERNEL_BEGIN; y < BLUR_KERNEL_END; ++y ) { + dep += g_txShadow.Sample( g_samShadow, float3( input.Tex.x, input.Tex.y, 0 ), int2( 0,y ) ).rg; + } + dep /= FLOAT_BLUR_KERNEL_SIZE; + return dep; + + //return g_txShadow.Sample(g_samShadow, float3(input.Tex.x, input.Tex.y, 0) ).rg; +} + + + diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl new file mode 100644 index 000000000..0b2e43b5c --- /dev/null +++ b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl @@ -0,0 +1,412 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY +//-------------------------------------------------------------------------------------- +// File: RenderCascadeScene.hlsl +// +// This is the main shader file. This shader is compiled with several different flags +// to provide different customizations based on user controls. +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- + +// This flag enables the shadow to blend between cascades. This is most useful when the +// the shadow maps are small and artifact can be seen between the various cascade layers. +#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG +#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0 +#endif + +// There are two methods for selecting the proper cascade a fragment lies in. Interval selection +// compares the depth of the fragment against the frustum's depth partition. +// Map based selection compares the texture coordinates against the acutal cascade maps. +// Map based selection gives better coverage. +// Interval based selection is easier to extend and understand. +#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG +#define SELECT_CASCADE_BY_INTERVAL_FLAG 0 +#endif + +// The number of cascades +#ifndef CASCADE_COUNT_FLAG +#define CASCADE_COUNT_FLAG 3 +#endif + + +// Most titles will find that 3-4 cascades with +// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs. + +cbuffer cbAllShadowData : register( b0 ) +{ + matrix m_mWorldViewProjection; + matrix m_mWorld; + matrix m_mWorldView; + matrix m_mShadow; + float4 m_vCascadeOffset[8]; + float4 m_vCascadeScale[8]; + int m_nCascadeLevels; // Number of Cascades + int m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene + + // For Map based selection scheme, this keeps the pixels inside of the the valid range. + // When there is no boarder, these values are 0 and 1 respectivley. + float m_fMinBorderPadding; + float m_fMaxBorderPadding; + + float m_fCascadeBlendArea; // Amount to overlap when blending between cascades. + float m_fTexelSize; // Padding variables exist because CBs must be a multiple of 16 bytes. + float m_fNativeTexelSizeInX; + float4 m_fCascadeFrustumsEyeSpaceDepthsData[2]; // The values along Z that seperate the cascades. + // This code creates an array based pointer that points towards the vectorized input data. + // This is the only way to index arbitrary arrays of data. + // If the array is used at run time, the compiler will generate code that uses logic to index the correct component. + + static float m_fCascadeFrustumsEyeSpaceDepths[8] = (float[8])m_fCascadeFrustumsEyeSpaceDepthsData; + + float3 m_vLightDir; + float m_fPaddingCB4; + +}; + + + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +Texture2D g_txDiffuse : register( t0 ); +Texture2DArray g_txShadow : register( t5 ); + +SamplerState g_samLinear : register( s0 ); +SamplerState g_samShadow : register( s5 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTexcoord : TEXCOORD0; +}; + +struct VS_OUTPUT +{ + float3 vNormal : NORMAL; + float2 vTexcoord : COLOR0; + float4 vTexShadow : TEXCOORD1; + float4 vPosition : SV_POSITION; + float4 vInterpPos : TEXCOORD2; + float vDepth : TEXCOORD3; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection ); + Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld ); + Output.vTexcoord = Input.vTexcoord; + Output.vInterpPos = Input.vPosition; + Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ; + + // Transform the shadow texture coordinates for all the cascades. + Output.vTexShadow = mul( Input.vPosition, m_mShadow ); + + return Output; +} + + + +static const float4 vCascadeColorsMultiplier[8] = +{ + float4 ( 1.5f, 0.0f, 0.0f, 1.0f ), + float4 ( 0.0f, 1.5f, 0.0f, 1.0f ), + float4 ( 0.0f, 0.0f, 5.5f, 1.0f ), + float4 ( 1.5f, 0.0f, 5.5f, 1.0f ), + float4 ( 1.5f, 1.5f, 0.0f, 1.0f ), + float4 ( 1.0f, 1.0f, 1.0f, 1.0f ), + float4 ( 0.0f, 1.0f, 5.5f, 1.0f ), + float4 ( 0.5f, 3.5f, 0.75f, 1.0f ) +}; + + +void ComputeCoordinatesTransform( in int iCascadeIndex, + in float4 InterpolatedPosition, + in out float4 vShadowTexCoord, + in out float4 vShadowTexCoordViewSpace ) +{ + // Now that we know the correct map, we can transform the world space position of the current fragment + if( SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex]; + vShadowTexCoord += m_vCascadeOffset[iCascadeIndex]; + } + vShadowTexCoord.w = vShadowTexCoord.z; // We put the z value in w so that we can index the texture array with Z. + vShadowTexCoord.z = iCascadeIndex; + +} + +//-------------------------------------------------------------------------------------- +// Use PCF to sample the depth map and return a percent lit value. +//-------------------------------------------------------------------------------------- +void CalculateVarianceShadow ( in float4 vShadowTexCoord, in float4 vShadowMapTextureCoordViewSpace, int iCascade, out float fPercentLit ) +{ + fPercentLit = 0.0f; + // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed. + // This would be a performance improvment. + + float2 mapDepth = 0; + + + // In orderto pull the derivative out of divergent flow control we calculate the + // derivative off of the view space coordinates an then scale the deriviative. + + float3 vShadowTexCoordDDX = + ddx(vShadowMapTextureCoordViewSpace ); + vShadowTexCoordDDX *= m_vCascadeScale[iCascade].xyz; + float3 vShadowTexCoordDDY = + ddy(vShadowMapTextureCoordViewSpace ); + vShadowTexCoordDDY *= m_vCascadeScale[iCascade].xyz; + + mapDepth += g_txShadow.SampleGrad( g_samShadow, vShadowTexCoord.xyz, + vShadowTexCoordDDX, + vShadowTexCoordDDY); + // The sample instruction uses gradients for some filters. + + float fAvgZ = mapDepth.x; // Filtered z + float fAvgZ2 = mapDepth.y; // Filtered z-squared + + if ( vShadowTexCoord.w <= fAvgZ ) // We put the z value in w so that we can index the texture array with Z. + { + fPercentLit = 1; + } + else + { + float variance = ( fAvgZ2 ) - ( fAvgZ * fAvgZ ); + variance = min( 1.0f, max( 0.0f, variance + 0.00001f ) ); + + float mean = fAvgZ; + float d = vShadowTexCoord.w - mean; // We put the z value in w so that we can index the texture array with Z. + float p_max = variance / ( variance + d*d ); + + // To combat light-bleeding, experiment with raising p_max to some power + // (Try values from 0.1 to 100.0, if you like.) + fPercentLit = pow( p_max, 4 ); + + } + +} + +//-------------------------------------------------------------------------------------- +// Calculate amount to blend between two cascades and the band where blending will occure. +//-------------------------------------------------------------------------------------- +void CalculateBlendAmountForInterval ( in int iNextCascadeIndex, + in out float fPixelDepth, + in out float fCurrentPixelsBlendBandLocation, + out float fBlendBetweenCascadesAmount + ) +{ + + // We need to calculate the band of the current shadow map where it will fade into the next cascade. + // We can then early out of the expensive PCF for loop. + // + float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex - 1 ]; + if( iNextCascadeIndex > 1 ) + { + fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ]; + fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ]; + } + // The current pixel's blend band location will be used to determine when we need to blend and by how much. + fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval; + fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation; + // The fBlendBetweenCascadesAmount is our location in the blend band. + fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; +} + + +//-------------------------------------------------------------------------------------- +// Calculate amount to blend between two cascades and the band where blending will occure. +//-------------------------------------------------------------------------------------- +void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord, + in out float fCurrentPixelsBlendBandLocation, + out float fBlendBetweenCascadesAmount ) +{ + // Calcaulte the blend band for the map based selection. + float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y ); + fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ); + float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y ); + fCurrentPixelsBlendBandLocation = + min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 ); + fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; +} + +//-------------------------------------------------------------------------------------- +// Calculate the shadow based on several options and rende the scene. +//-------------------------------------------------------------------------------------- + +float4 PSMain( VS_OUTPUT Input ) : SV_TARGET +{ + float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord ); + + + float4 vShadowMapTextureCoordViewSpace = 0.0f; + float4 vShadowMapTextureCoord = 0.0f; + float4 vShadowMapTextureCoord_blend = 0.0f; + + float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f); + + float fPercentLit = 0.0f; + float fPercentLit_blend = 0.0f; + + int iCascadeFound = 0; + int iCurrentCascadeIndex=1; + int iNextCascadeIndex = 0; + + float fCurrentPixelDepth; + + // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions. + fCurrentPixelDepth = Input.vDepth; + + // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used. + // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum. + vShadowMapTextureCoordViewSpace = Input.vTexShadow; + + + if( SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + iCurrentCascadeIndex = 0; + if (CASCADE_COUNT_FLAG > 1 ) + { + float4 vCurrentPixelDepth = Input.vDepth; + float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[0]); + float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[1]); + float fIndex = dot( + float4( CASCADE_COUNT_FLAG > 0, + CASCADE_COUNT_FLAG > 1, + CASCADE_COUNT_FLAG > 2, + CASCADE_COUNT_FLAG > 3) + , fComparison ) + + dot( + float4( + CASCADE_COUNT_FLAG > 4, + CASCADE_COUNT_FLAG > 5, + CASCADE_COUNT_FLAG > 6, + CASCADE_COUNT_FLAG > 7) + , fComparison2 ) ; + + fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 ); + iCurrentCascadeIndex = (int)fIndex; + } + } + + if ( !SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + iCurrentCascadeIndex = 0; + if ( CASCADE_COUNT_FLAG == 1 ) + { + vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0]; + vShadowMapTextureCoord += m_vCascadeOffset[0]; + } + if ( CASCADE_COUNT_FLAG > 1 ) { + for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex ) + { + vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex]; + vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex]; + + if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding + && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding ) + { + iCurrentCascadeIndex = iCascadeIndex; + iCascadeFound = 1; + } + } + } + } + // Found the correct map. + vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex]; + + ComputeCoordinatesTransform( iCurrentCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace ); + + if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) + { + // Repeat text coord calculations for the next cascade. + // The next cascade index is used for blurring between maps. + iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 ); + if( !SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex]; + vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex]; + } + ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace ); + } + float fBlendBetweenCascadesAmount = 1.0f; + float fCurrentPixelsBlendBandLocation = 1.0f; + + if( SELECT_CASCADE_BY_INTERVAL_FLAG ) + { + if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) + { + CalculateBlendAmountForInterval ( iNextCascadeIndex, fCurrentPixelDepth, + fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); + + } + } + else + { + if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) + { + CalculateBlendAmountForMap ( vShadowMapTextureCoord, + fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); + } + } + + // Because the Z coordinate specifies the texture array, + // the derivative will be 0 when there is no divergence + //float fDivergence = abs( ddy( vShadowMapTextureCoord.z ) ) + abs( ddx( vShadowMapTextureCoord.z ) ); + CalculateVarianceShadow ( vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace, + iCurrentCascadeIndex, fPercentLit); + + // We repeat the calcuation for the next cascade layer, when blending between maps. + if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) + { + if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea ) + { // the current pixel is within the blend band. + + // Because the Z coordinate species the texture array, + // the derivative will be 0 when there is no divergence + float fDivergence = abs( ddy( vShadowMapTextureCoord_blend.z ) ) + + abs( ddx( vShadowMapTextureCoord_blend.z) ); + CalculateVarianceShadow ( vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace, + iNextCascadeIndex, fPercentLit_blend ); + + // Blend the two calculated shadows by the blend amount. + fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount ); + + } + } + + if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4( 1.0f, 1.0f, 1.0f, 1.0f ); + + float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f ); + float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f ); + float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f ); + float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f ); + // Some ambient-like lighting. + float fLighting = + saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f + + saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f + + saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f + + saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ; + + float4 vShadowLighting = fLighting * 0.5f; + fLighting += saturate( dot( m_vLightDir , Input.vNormal ) ); + fLighting = lerp( vShadowLighting, fLighting, fPercentLit ); + + return fLighting * vVisualizeCascadeColor * vDiffuse; + +} + diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl new file mode 100644 index 000000000..9837bf299 --- /dev/null +++ b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl @@ -0,0 +1,45 @@ +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSMain + + +//-------------------------------------------------------------------------------------- +// Globals +//-------------------------------------------------------------------------------------- +cbuffer cbPerObject : register( b0 ) +{ + matrix g_mWorldViewProjection : packoffset( c0 ); +}; + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float4 vPosition : POSITION; +}; + +struct VS_OUTPUT +{ + float4 vPosition : SV_POSITION; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + + Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); + + return Output; +} + + +float2 PSMain (VS_OUTPUT Input) : SV_TARGET +{ + float2 rt; + rt.x = Input.vPosition.z; + rt.y = rt.x * rt.x; + return rt; +}
\ No newline at end of file diff --git a/tests/hlsl/simple/compute-numthreads.hlsl b/tests/hlsl/simple/compute-numthreads.hlsl new file mode 100644 index 000000000..3843c401f --- /dev/null +++ b/tests/hlsl/simple/compute-numthreads.hlsl @@ -0,0 +1,11 @@ +//TEST:COMPARE_HLSL: -no-checking -target dxbc-assembly -profile cs_5_0 -entry main + +// Confirm that we properly pass along the `numthreads` attribute on an entry point. + +RWStructuredBuffer<float> b; + +[numthreads(32,1,1)] +void main(uint3 tid : SV_DispatchThreadID) +{ + b[tid.x] = b[tid.x + 1] + 1.0f; +}
\ No newline at end of file diff --git a/tests/preprocessor/define-function-like.spire b/tests/preprocessor/define-function-like.spire new file mode 100644 index 000000000..f1dd9caa4 --- /dev/null +++ b/tests/preprocessor/define-function-like.spire @@ -0,0 +1,19 @@ +//TEST:SIMPLE: +// support for function-like macros + +#define FOO(x) 1.0 + x + +float foo(float y) { return FOO(y) * 2.0; } + +// simple token pasting + +#define PASTE(a,b) a##b + +PASTE(flo,at) bar() { return 0.0; } + +// no space before parens? not a function-like macro + +#define M (x) - (x) + +// Error: undefined identifier `x` +float bar(float a) { return M(a); } diff --git a/tests/preprocessor/define-function-like.spire.expected b/tests/preprocessor/define-function-like.spire.expected new file mode 100644 index 000000000..e7b2a582a --- /dev/null +++ b/tests/preprocessor/define-function-like.spire.expected @@ -0,0 +1,7 @@ +result code = -1 +standard error = { +Tests/Preprocessor/define-function-like.spire(16): error 30015: undefined identifier 'x'. +Tests/Preprocessor/define-function-like.spire(16): error 30015: undefined identifier 'x'. +} +standard output = { +} diff --git a/tests/preprocessor/define-simple.spire b/tests/preprocessor/define-simple.spire new file mode 100644 index 000000000..26436b258 --- /dev/null +++ b/tests/preprocessor/define-simple.spire @@ -0,0 +1,14 @@ +//TEST:SIMPLE: +// #define support + +#define FOO 1.0f + +float foo() { return FOO + 2.0; } + +#define BAR 99 + +#if BAR > 10 +int bar() { return 0; } +#else +BadThing shouldntCompile; +#endif diff --git a/tests/preprocessor/if.spire b/tests/preprocessor/if.spire new file mode 100644 index 000000000..fe5948c33 --- /dev/null +++ b/tests/preprocessor/if.spire @@ -0,0 +1,15 @@ +//TEST:SIMPLE: +// #ifdef support + + +#if (1 - 1*2) < 0 +int foo() { return 0; } +#else +BadThing thatWontCompile; +#endif + +#if (1 >> 1) && ~999 +AnotherError onThisLine; +#else +int bar() { return foo(); } +#endif
\ No newline at end of file diff --git a/tests/preprocessor/ifdef.spire b/tests/preprocessor/ifdef.spire new file mode 100644 index 000000000..a3ca82838 --- /dev/null +++ b/tests/preprocessor/ifdef.spire @@ -0,0 +1,16 @@ +//TEST:SIMPLE: +// #ifdef support + +#define A + +#ifdef A +int foo() { return 0; } +#else +BadThing thatWontCompile; +#endif + +#ifdef BadThing +AnotherError onThisLine; +#else +int bar() { return foo(); } +#endif
\ No newline at end of file diff --git a/tests/preprocessor/include-a.spireh b/tests/preprocessor/include-a.spireh new file mode 100644 index 000000000..8fecc6a98 --- /dev/null +++ b/tests/preprocessor/include-a.spireh @@ -0,0 +1,3 @@ +// #include support + +int bar() { return foo(); }
\ No newline at end of file diff --git a/tests/preprocessor/include.spire b/tests/preprocessor/include.spire new file mode 100644 index 000000000..8feca21cc --- /dev/null +++ b/tests/preprocessor/include.spire @@ -0,0 +1,8 @@ +//TEST:SIMPLE: +// #include support + +int foo() { return 0; } + +#include "include-a.spireh" + +int baz() { return bar(); }
\ No newline at end of file diff --git a/tests/reflection/arrays.hlsl b/tests/reflection/arrays.hlsl new file mode 100644 index 000000000..8880aaebd --- /dev/null +++ b/tests/reflection/arrays.hlsl @@ -0,0 +1,27 @@ +//TEST:SIMPLE:-profile ps_4_0 -target reflection-json + +// Confirm that we can generate reflection info for arrays +// +// Note: just working with fixed-size arrays for now. +// Unbounded arrays may require more work. + +cbuffer MyConstantBuffer +{ + float x; + + float a[10]; + + float y; +} + +Texture2D tx; +Texture2D ta[16]; +Texture2D ty; +SamplerState sx; +SamplerState sa[4]; +SamplerState sy; + +float4 main() : SV_Target +{ + return 0.0; +}
\ No newline at end of file diff --git a/tests/reflection/arrays.hlsl.expected b/tests/reflection/arrays.hlsl.expected new file mode 100644 index 000000000..052bd3927 --- /dev/null +++ b/tests/reflection/arrays.hlsl.expected @@ -0,0 +1,103 @@ +result code = 0 +standard error = { +} +standard output = { +{ + "parameters": [ + { + "name": "MyConstantBuffer", + "binding": {"kind": "constantBuffer", "index": 0}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "fields": [ + { + "name": "x", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 4} + }, + { + "name": "a", + "type": { + "kind": "array", + "elementCount": 10, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + }, + "uniformStride": 16 + }, + "binding": {"kind": "uniform", "offset": 16, "size": 160} + }, + { + "name": "y", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 176, "size": 4} + } + ] + } + } + }, + { + "name": "tx", + "binding": {"kind": "shaderResource", "index": 0}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "ta", + "binding": {"kind": "shaderResource", "index": 1, "count": 16}, + "type": { + "kind": "array", + "elementCount": 16, + "elementType": { + "kind": "resource", + "baseShape": "texture2D" + } + } + }, + { + "name": "ty", + "binding": {"kind": "shaderResource", "index": 17}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "sx", + "binding": {"kind": "samplerState", "index": 0}, + "type": { + "kind": "samplerState" + } + }, + { + "name": "sa", + "binding": {"kind": "samplerState", "index": 1, "count": 4}, + "type": { + "kind": "array", + "elementCount": 4, + "elementType": { + "kind": "samplerState" + } + } + }, + { + "name": "sy", + "binding": {"kind": "samplerState", "index": 5}, + "type": { + "kind": "samplerState" + } + } + ] +} +} diff --git a/tests/reflection/global-uniforms.hlsl b/tests/reflection/global-uniforms.hlsl new file mode 100644 index 000000000..7845af4b6 --- /dev/null +++ b/tests/reflection/global-uniforms.hlsl @@ -0,0 +1,21 @@ +//TEST:SIMPLE:-profile ps_4_0 -target reflection-json + +// Confirm that we handle uniforms at global scope + + +float4 u; + +Texture2D t; +SamplerState s; + +cbuffer CB +{ + float4 v; +} + +float4 w; + +float4 main() : SV_Target +{ + return u + v + w + t.Sample(s, u.xy); +}
\ No newline at end of file diff --git a/tests/reflection/global-uniforms.hlsl.expected b/tests/reflection/global-uniforms.hlsl.expected new file mode 100644 index 000000000..b40b2d69c --- /dev/null +++ b/tests/reflection/global-uniforms.hlsl.expected @@ -0,0 +1,72 @@ +result code = 0 +standard error = { +} +standard output = { +{ + "parameters": [ + { + "name": "u", + "binding": {"kind": "uniform", "offset": 0, "size": 16}, + "type": { + "kind": "vector", + "elementCount": 4, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + } + }, + { + "name": "t", + "binding": {"kind": "shaderResource", "index": 0}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "s", + "binding": {"kind": "samplerState", "index": 0}, + "type": { + "kind": "samplerState" + } + }, + { + "name": "CB", + "binding": {"kind": "constantBuffer", "index": 1}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "fields": [ + { + "name": "v", + "type": { + "kind": "vector", + "elementCount": 4, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 0, "size": 16} + } + ] + } + } + }, + { + "name": "w", + "binding": {"kind": "uniform", "offset": 16, "size": 16}, + "type": { + "kind": "vector", + "elementCount": 4, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + } + } + ] +} +} diff --git a/tests/reflection/multi-file-extra.hlsl b/tests/reflection/multi-file-extra.hlsl new file mode 100644 index 000000000..569ec2ce9 --- /dev/null +++ b/tests/reflection/multi-file-extra.hlsl @@ -0,0 +1,63 @@ +//TEST_IGNORE_FILE: + +// Here we are going to test that we can correctly generating bindings when we +// are presented with a program spanning multiple input files (and multiple entry points) + +// This file provides the fragment shader, and is only meant to be tested in combination with `multi-file.hlsl` + +// Let's make sure we generate correct output in cases +// where there are non-trivial `packoffset`s needed + +#ifdef __SPIRE__ +#define R(X) /**/ +#else +#define R(X) X +#endif + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +// Start with some parameters that will appear in both shaders +Texture2D sharedT; +SamplerState sharedS; +cbuffer sharedC +{ + float3 sharedCA; + float sharedCB; + float3 sharedCC; + float2 sharedCD; +} + +// Then some parameters specific to this shader. +// These will be placed *after* the ones from the main file, +// and even after the parameters further down in this file +// that end up being shared between the two files. + +Texture2D fragmentT; +SamplerState fragmentS; +cbuffer fragmentC +{ + float3 fragmentCA; + float fragmentCB; + float3 fragmentCC; + float2 fragmentCD; +} + +// And end with some shared parameters again +Texture2D sharedTV; +Texture2D sharedTF; + + +float4 main() : SV_Target +{ + // Go ahead and use everything here, just to make sure things got placed correctly + return use(sharedT, sharedS) + + use(sharedCD) + + use(fragmentT, fragmentS) + + use(fragmentCD) + + use(sharedTF, sharedS) + ; +}
\ No newline at end of file diff --git a/tests/reflection/multi-file.hlsl b/tests/reflection/multi-file.hlsl new file mode 100644 index 000000000..b263a6b71 --- /dev/null +++ b/tests/reflection/multi-file.hlsl @@ -0,0 +1,56 @@ +//TEST:SIMPLE:-profile ps_4_0 -target reflection-json Tests/bindings/multi-file-extra.hlsl + +// Here we are testing the case where multiple translation units are provided +// at once, so that we want combined reflection information for the resulting +// program. The other part of this program is in `multi-file-extra.hlsl`. + +float4 use(float val) { return val; }; +float4 use(float2 val) { return float4(val,0.0,0.0); }; +float4 use(float3 val) { return float4(val,0.0); }; +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) +{ + // This is the vertex shader, so we can't do implicit-gradient sampling + return t.SampleGrad(s, 0.0, 0.0, 0.0); +} + +// Start with some parameters that will appear in both shaders +Texture2D sharedT; +SamplerState sharedS; +cbuffer sharedC +{ + float3 sharedCA; + float sharedCB; + float3 sharedCC; + float2 sharedCD; +} + +// Then some parameters specific to this shader +// (these will get placed before the ones in the `extra` file, +// based on how they get named on the command-line) + +Texture2D vertexT; +SamplerState vertexS; +cbuffer vertexC +{ + float3 vertexCA; + float vertexCB; + float3 vertexCC; + float2 vertexCD; +} + +// And end with some shared parameters again +Texture2D sharedTV; +Texture2D sharedTF; + + +float4 main() : SV_Position +{ + // Go ahead and use everything here, just to make sure things got placed correctly + return use(sharedT, sharedS) + + use(sharedCD) + + use(vertexT, vertexS) + + use(vertexCD) + + use(sharedTV, vertexS) + ; +}
\ No newline at end of file diff --git a/tests/reflection/multi-file.hlsl.expected b/tests/reflection/multi-file.hlsl.expected new file mode 100644 index 000000000..38d028ffe --- /dev/null +++ b/tests/reflection/multi-file.hlsl.expected @@ -0,0 +1,238 @@ +result code = 0 +standard error = { +} +standard output = { +{ + "parameters": [ + { + "name": "sharedT", + "binding": {"kind": "shaderResource", "index": 0}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "sharedS", + "binding": {"kind": "samplerState", "index": 0}, + "type": { + "kind": "samplerState" + } + }, + { + "name": "sharedC", + "binding": {"kind": "constantBuffer", "index": 0}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "fields": [ + { + "name": "sharedCA", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 0, "size": 12} + }, + { + "name": "sharedCB", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 12, "size": 4} + }, + { + "name": "sharedCC", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 16, "size": 12} + }, + { + "name": "sharedCD", + "type": { + "kind": "vector", + "elementCount": 2, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 32, "size": 8} + } + ] + } + } + }, + { + "name": "vertexT", + "binding": {"kind": "shaderResource", "index": 1}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "vertexS", + "binding": {"kind": "samplerState", "index": 1}, + "type": { + "kind": "samplerState" + } + }, + { + "name": "vertexC", + "binding": {"kind": "constantBuffer", "index": 1}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "fields": [ + { + "name": "vertexCA", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 0, "size": 12} + }, + { + "name": "vertexCB", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 12, "size": 4} + }, + { + "name": "vertexCC", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 16, "size": 12} + }, + { + "name": "vertexCD", + "type": { + "kind": "vector", + "elementCount": 2, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 32, "size": 8} + } + ] + } + } + }, + { + "name": "sharedTV", + "binding": {"kind": "shaderResource", "index": 2}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "sharedTF", + "binding": {"kind": "shaderResource", "index": 3}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "fragmentT", + "binding": {"kind": "shaderResource", "index": 4}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "fragmentS", + "binding": {"kind": "samplerState", "index": 2}, + "type": { + "kind": "samplerState" + } + }, + { + "name": "fragmentC", + "binding": {"kind": "constantBuffer", "index": 2}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "fields": [ + { + "name": "fragmentCA", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 0, "size": 12} + }, + { + "name": "fragmentCB", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 12, "size": 4} + }, + { + "name": "fragmentCC", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 16, "size": 12} + }, + { + "name": "fragmentCD", + "type": { + "kind": "vector", + "elementCount": 2, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 32, "size": 8} + } + ] + } + } + } + ] +} +} diff --git a/tests/reflection/reflection0.hlsl b/tests/reflection/reflection0.hlsl new file mode 100644 index 000000000..1f138894f --- /dev/null +++ b/tests/reflection/reflection0.hlsl @@ -0,0 +1,19 @@ +//TEST:SIMPLE:-profile ps_4_0 -target reflection-json + +// Confirm that basic reflection info can be output + +float4 use(float4 val) { return val; }; +float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } + +Texture2D t; +SamplerState s; + +cbuffer C +{ + float c; +} + +float4 main() : SV_Target +{ + return use(t,s) + use(c); +}
\ No newline at end of file diff --git a/tests/reflection/reflection0.hlsl.expected b/tests/reflection/reflection0.hlsl.expected new file mode 100644 index 000000000..3b74988b2 --- /dev/null +++ b/tests/reflection/reflection0.hlsl.expected @@ -0,0 +1,44 @@ +result code = 0 +standard error = { +} +standard output = { +{ + "parameters": [ + { + "name": "t", + "binding": {"kind": "shaderResource", "index": 0}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "s", + "binding": {"kind": "samplerState", "index": 0}, + "type": { + "kind": "samplerState" + } + }, + { + "name": "C", + "binding": {"kind": "constantBuffer", "index": 0}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "fields": [ + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 4} + } + ] + } + } + } + ] +} +} diff --git a/tests/reflection/resource-in-cbuffer.hlsl b/tests/reflection/resource-in-cbuffer.hlsl new file mode 100644 index 000000000..956387587 --- /dev/null +++ b/tests/reflection/resource-in-cbuffer.hlsl @@ -0,0 +1,21 @@ +//TEST:SIMPLE:-profile ps_4_0 -target reflection-json + +// Confirm that we can generate reflection +// information for resources nested inside +// a cbuffer: + +cbuffer MyConstantBuffer +{ + float3 v; + + Texture2D myTexture; + + float c; + + SamplerState mySampler; +} + +float4 main() : SV_Target +{ + return 0.0; +}
\ No newline at end of file diff --git a/tests/reflection/resource-in-cbuffer.hlsl.expected b/tests/reflection/resource-in-cbuffer.hlsl.expected new file mode 100644 index 000000000..faae1c8b7 --- /dev/null +++ b/tests/reflection/resource-in-cbuffer.hlsl.expected @@ -0,0 +1,60 @@ +result code = 0 +standard error = { +} +standard output = { +{ + "parameters": [ + { + "name": "MyConstantBuffer", + "bindings": [ + {"kind": "constantBuffer", "index": 0}, + {"kind": "shaderResource", "index": 0}, + {"kind": "samplerState", "index": 0} + ], + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "fields": [ + { + "name": "v", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float32" + } + }, + "binding": {"kind": "uniform", "offset": 0, "size": 12} + }, + { + "name": "myTexture", + "type": { + "kind": "resource", + "baseShape": "texture2D" + }, + "binding": {"kind": "shaderResource", "index": 0} + }, + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 12, "size": 4} + }, + { + "name": "mySampler", + "type": { + "kind": "samplerState" + }, + "binding": {"kind": "samplerState", "index": 0} + } + ] + } + } + } + ] +} +} diff --git a/tests/render/cross-compile0.hlsl b/tests/render/cross-compile0.hlsl new file mode 100644 index 000000000..9a9dd1cdc --- /dev/null +++ b/tests/render/cross-compile0.hlsl @@ -0,0 +1,76 @@ +//TEST:COMPARE_HLSL_CROSS_COMPILE_RENDER: + +// Now we are going to test that we can cross-compile a Spire/HLSL +// input file over to GLSL and render with it. + +cbuffer Uniforms +{ + float4x4 modelViewProjection; +} + +struct AssembledVertex +{ + float3 position; + float3 color; +}; + +struct CoarseVertex +{ + float3 color; +}; + +struct Fragment +{ + float4 color; +}; + + +// Vertex Shader + +struct VertexStageInput +{ + AssembledVertex assembledVertex : A; +}; + +struct VertexStageOutput +{ + CoarseVertex coarseVertex : CoarseVertex; + float4 sv_position : SV_Position; +}; + +VertexStageOutput vertexMain(VertexStageInput input) +{ + VertexStageOutput output; + + float3 position = input.assembledVertex.position; + float3 color = input.assembledVertex.color; + + output.coarseVertex.color = color; + output.sv_position = mul(modelViewProjection, float4(position, 1.0)); + + return output; +} + +// Fragment Shader + +struct FragmentStageInput +{ + CoarseVertex coarseVertex : CoarseVertex; +}; + +struct FragmentStageOutput +{ + Fragment fragment : SV_Target; +}; + +FragmentStageOutput fragmentMain(FragmentStageInput input) +{ + FragmentStageOutput output; + + float3 color = input.coarseVertex.color; + + output.fragment.color = float4(color, 1.0); + + return output; +} + diff --git a/tests/render/render0.hlsl b/tests/render/render0.hlsl new file mode 100644 index 000000000..3ecd582f3 --- /dev/null +++ b/tests/render/render0.hlsl @@ -0,0 +1,74 @@ +//TEST:COMPARE_HLSL_RENDER: +// Starting with a basic test for the ability to render stuff... + +cbuffer Uniforms +{ + float4x4 modelViewProjection; +} + +struct AssembledVertex +{ + float3 position; + float3 color; +}; + +struct CoarseVertex +{ + float3 color; +}; + +struct Fragment +{ + float4 color; +}; + + +// Vertex Shader + +struct VertexStageInput +{ + AssembledVertex assembledVertex : A; +}; + +struct VertexStageOutput +{ + CoarseVertex coarseVertex : CoarseVertex; + float4 sv_position : SV_Position; +}; + +VertexStageOutput vertexMain(VertexStageInput input) +{ + VertexStageOutput output; + + float3 position = input.assembledVertex.position; + float3 color = input.assembledVertex.color; + + output.coarseVertex.color = color; + output.sv_position = mul(modelViewProjection, float4(position, 1.0)); + + return output; +} + +// Fragment Shader + +struct FragmentStageInput +{ + CoarseVertex coarseVertex : CoarseVertex; +}; + +struct FragmentStageOutput +{ + Fragment fragment : SV_Target; +}; + +FragmentStageOutput fragmentMain(FragmentStageInput input) +{ + FragmentStageOutput output; + + float3 color = input.coarseVertex.color; + + output.fragment.color = float4(color, 1.0); + + return output; +} + diff --git a/tests/rewriter/error0.hlsl b/tests/rewriter/error0.hlsl new file mode 100644 index 000000000..dc3e84fda --- /dev/null +++ b/tests/rewriter/error0.hlsl @@ -0,0 +1,19 @@ +//TEST:COMPARE_HLSL: -no-checking -target dxbc-assembly -profile ps_4_0 -entry main + +// We need to confirm that when there is an error in +// the input code, we allow the downstream compiler +// to detect and report the error, not us... + +// This file presents a simple case, where we forgot a semicolon. + +float4 main() : SV_Target +{ + float a = 1.0; + + // no semicolon at the end of this line! + float b = 2.0 + + float c = a + b; + + return float4(c); +}
\ No newline at end of file |
