diff options
Diffstat (limited to 'tests')
113 files changed, 624 insertions, 15734 deletions
diff --git a/tests/bindings/binding0.hlsl b/tests/bindings/binding0.hlsl index 85f17e940..5516b0135 100644 --- a/tests/bindings/binding0.hlsl +++ b/tests/bindings/binding0.hlsl @@ -6,8 +6,15 @@ #ifdef __SLANG__ #define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD + #else #define R(X) X +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define C C_0 #define t t_0 @@ -22,12 +29,13 @@ float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } Texture2D t R(: register(t0)); SamplerState s R(: register(s0)); -cbuffer C R(: register(b0)) +BEGIN_CBUFFER(C) { float c; } +END_CBUFFER(C, register(b0)) float4 main() : SV_TARGET { - return use(t,s) + use(c); + return use(t,s) + use(CBUFFER_REF(C,c)); }
\ No newline at end of file diff --git a/tests/bindings/binding1.hlsl b/tests/bindings/binding1.hlsl index 8709c31c6..47ab22bb9 100644 --- a/tests/bindings/binding1.hlsl +++ b/tests/bindings/binding1.hlsl @@ -13,8 +13,14 @@ #ifdef __SLANG__ #define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD #else #define R(X) X +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define tB tB_0 #define sB sB_0 @@ -32,17 +38,19 @@ Texture2D tB R(: register(t1)); SamplerState sA R(: register(s0)); SamplerState sB R(: register(s1)); -cbuffer C0 R(: register(b0)) +BEGIN_CBUFFER(C0) { float c0; } +END_CBUFFER(C0, register(b0)) -cbuffer C1 R(: register(b1)) +BEGIN_CBUFFER(C1) { float c1; } +END_CBUFFER(C1, register(b1)) float4 main() : SV_TARGET { - return use(tB,sB) + use(c1); + return use(tB,sB) + use(CBUFFER_REF(C1,c1)); }
\ No newline at end of file diff --git a/tests/bindings/explicit-binding.hlsl b/tests/bindings/explicit-binding.hlsl index 9c38cdee0..420eafec1 100644 --- a/tests/bindings/explicit-binding.hlsl +++ b/tests/bindings/explicit-binding.hlsl @@ -5,8 +5,16 @@ #ifdef __SLANG__ #define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define BEGIN_CBUFFER_R(NAME, REG) cbuffer NAME : REG +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD #else #define R(X) X +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define BEGIN_CBUFFER_R(NAME, REG) BEGIN_CBUFFER(NAME) +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define CA CA_0 #define ca ca_0 @@ -49,25 +57,29 @@ SamplerState sc : register(s1); // and even to make things non-contiguous. Here we bind // the third constnat buffer to register `b9` // -cbuffer CA R(: register(b0)) +BEGIN_CBUFFER(CA) { float ca; } +END_CBUFFER(CA, register(b0)) + // -cbuffer CB R(: register(b1)) +BEGIN_CBUFFER(CB) { float cb; } +END_CBUFFER(CB, register(b1)) // -cbuffer CC : register(b9) +BEGIN_CBUFFER_R(CC, register(b9)) { float cc; } +END_CBUFFER(CC, register(b9)) float4 main() : SV_TARGET { // Go ahead and use everything in this case: - return use(ta, sa) + use(ca) - + use(tb, sb) + use(cb) - + use(tc, sc) + use(cc); + return use(ta, sa) + use(CBUFFER_REF(CA,ca)) + + use(tb, sb) + use(CBUFFER_REF(CB,cb)) + + use(tc, sc) + use(CBUFFER_REF(CC,cc)); }
\ No newline at end of file diff --git a/tests/bindings/glsl-parameter-blocks.slang.glsl b/tests/bindings/glsl-parameter-blocks.slang.glsl index 3ade8bb6b..a27fbb3db 100644 --- a/tests/bindings/glsl-parameter-blocks.slang.glsl +++ b/tests/bindings/glsl-parameter-blocks.slang.glsl @@ -25,8 +25,8 @@ struct Test layout(binding = 0) uniform ParameterBlock_gTest { - Test gTest; -}; + Test _data; +} gTest; layout(binding = 1) uniform texture2D gTest_t; @@ -42,7 +42,7 @@ in vec2 uv; void main() { - vec4 temp_a = gTest.a; + vec4 temp_a = gTest._data.a; vec4 temp_sample = texture(sampler2D(gTest_t, gTest_s), uv); diff --git a/tests/bindings/multi-file-extra.hlsl b/tests/bindings/multi-file-extra.hlsl index 92227d54a..fe8766dcd 100644 --- a/tests/bindings/multi-file-extra.hlsl +++ b/tests/bindings/multi-file-extra.hlsl @@ -7,8 +7,14 @@ #ifdef __SLANG__ #define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD #else #define R(X) X +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define sharedC sharedC_0 #define sharedCA sharedCA_0 @@ -50,13 +56,15 @@ float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } // Start with some parameters that will appear in both shaders Texture2D sharedT R(: register(t0)); SamplerState sharedS R(: register(s0)); -cbuffer sharedC R(: register(b0)) + +BEGIN_CBUFFER(sharedC) { - float3 sharedCA R(: packoffset(c0)); - float sharedCB R(: packoffset(c0.w)); - float3 sharedCC R(: packoffset(c1)); - float2 sharedCD R(: packoffset(c2)); + float3 sharedCA; + float sharedCB; + float3 sharedCC; + float2 sharedCD; } +END_CBUFFER(sharedC, register(b0)) // Then some parameters specific to this shader. // These will be placed *after* the ones from the main file, @@ -65,13 +73,15 @@ cbuffer sharedC R(: register(b0)) Texture2D fragmentT R(: register(t4)); SamplerState fragmentS R(: register(s2)); -cbuffer fragmentC R(: register(b2)) + +BEGIN_CBUFFER(fragmentC) { - float3 fragmentCA R(: packoffset(c0)); - float fragmentCB R(: packoffset(c0.w)); - float3 fragmentCC R(: packoffset(c1)); - float2 fragmentCD R(: packoffset(c2)); + float3 fragmentCA; + float fragmentCB; + float3 fragmentCC; + float2 fragmentCD; } +END_CBUFFER(fragmentC, register(b2)) // And end with some shared parameters again Texture2D sharedTV R(: register(t2)); @@ -82,9 +92,9 @@ float4 main() : SV_TARGET { // Go ahead and use everything here, just to make sure things got placed correctly return use(sharedT, sharedS) - + use(sharedCD) + + use(CBUFFER_REF(sharedC,sharedCD)) + use(fragmentT, fragmentS) - + use(fragmentCD) + + use(CBUFFER_REF(fragmentC, fragmentCD)) + use(sharedTF, sharedS) ; }
\ No newline at end of file diff --git a/tests/bindings/multi-file.hlsl b/tests/bindings/multi-file.hlsl index 6269c703e..8c719bbcf 100644 --- a/tests/bindings/multi-file.hlsl +++ b/tests/bindings/multi-file.hlsl @@ -8,8 +8,14 @@ #ifdef __SLANG__ #define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD #else #define R(X) X +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define sharedC sharedC_0 #define sharedCA sharedCA_0 @@ -55,13 +61,15 @@ float4 use(Texture2D t, SamplerState s) // Start with some parameters that will appear in both shaders Texture2D sharedT R(: register(t0)); SamplerState sharedS R(: register(s0)); -cbuffer sharedC R(: register(b0)) + +BEGIN_CBUFFER(sharedC) { - float3 sharedCA R(: packoffset(c0)); - float sharedCB R(: packoffset(c0.w)); - float3 sharedCC R(: packoffset(c1)); - float2 sharedCD R(: packoffset(c2)); + float3 sharedCA; + float sharedCB; + float3 sharedCC; + float2 sharedCD; } +END_CBUFFER(sharedC, register(b0)) // Then some parameters specific to this shader // (these will get placed before the ones in the `extra` file, @@ -69,13 +77,15 @@ cbuffer sharedC R(: register(b0)) Texture2D vertexT R(: register(t1)); SamplerState vertexS R(: register(s1)); -cbuffer vertexC R(: register(b1)) + +BEGIN_CBUFFER(vertexC) { - float3 vertexCA R(: packoffset(c0)); - float vertexCB R(: packoffset(c0.w)); - float3 vertexCC R(: packoffset(c1)); - float2 vertexCD R(: packoffset(c2)); + float3 vertexCA; + float vertexCB; + float3 vertexCC; + float2 vertexCD; } +END_CBUFFER(vertexC, register(b1)) // And end with some shared parameters again Texture2D sharedTV R(: register(t2)); @@ -86,9 +96,9 @@ float4 main() : SV_POSITION { // Go ahead and use everything here, just to make sure things got placed correctly return use(sharedT, sharedS) - + use(sharedCD) + + use(CBUFFER_REF(sharedC, sharedCD)) + use(vertexT, vertexS) - + use(vertexCD) + + use(CBUFFER_REF(vertexC, vertexCD)) + use(sharedTV, vertexS) ; }
\ No newline at end of file diff --git a/tests/bindings/packoffset.hlsl b/tests/bindings/packoffset.hlsl deleted file mode 100644 index 81913d672..000000000 --- a/tests/bindings/packoffset.hlsl +++ /dev/null @@ -1,51 +0,0 @@ -//TEST:COMPARE_HLSL:-no-mangle -profile ps_4_0 -entry main - -// Let's make sure we generate correct output in cases -// where there are non-trivial `packoffset`s needed - -#ifdef __SLANG__ -#define R(X) /**/ -#else -#define R(X) X - -#define CA CA_0 -#define ca ca_0 -#define cb cb_0 -#define cc cc_0 -#define cd cd_0 -#define ce ce_0 - -#define ta CA_ta_0 -#define sa CA_sa_0 - -#endif - -float4 use(float val) { return val; }; -float4 use(float2 val) { return float4(val,0.0,0.0); }; -float4 use(float3 val) { return float4(val,0.0); }; -float4 use(float4 val) { return val; }; -float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } - -cbuffer CA R(: register(b0)) -{ - float4 ca R(: packoffset(c0)); - float3 cb R(: packoffset(c1.x)); - float cc R(: packoffset(c1.w)); - float2 cd R(: packoffset(c2.x)); - float2 ce R(: packoffset(c2.z)); - - Texture2D ta R(: register(t0)); - SamplerState sa R(: register(s0)); -} - -float4 main() : SV_TARGET -{ - // Go ahead and use everything in this case: - return use(ta, sa) - + use(ca) - + use(cb) - + use(cc) - + use(cd) - + use(ce) - ; -}
\ No newline at end of file diff --git a/tests/bindings/resources-in-cbuffer.hlsl b/tests/bindings/resources-in-cbuffer.hlsl index 4d3d381d9..71eaf40aa 100644 --- a/tests/bindings/resources-in-cbuffer.hlsl +++ b/tests/bindings/resources-in-cbuffer.hlsl @@ -2,12 +2,20 @@ // Confirm that resources inside constant buffers get correct locations, // including the case where there are *multiple* constant buffers -// with reosurces. +// with resources. #ifdef __SLANG__ #define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME { +#define MID_CBUFFER(NAME) /**/ +#define END_CBUFFER(NAME, REG) /**/ } +#define CBUFFER_REF(NAME, FIELD) FIELD #else #define R(X) X +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME { +#define MID_CBUFFER(NAME) }; +#define END_CBUFFER(NAME, REG) cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define CA CA_0 #define caa caa_0 @@ -46,43 +54,52 @@ float4 use(float3 val) { return float4(val,0.0); }; float4 use(float4 val) { return val; }; float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); } -cbuffer CA R(: register(b0)) -{ - float4 caa R(: packoffset(c0)); - float3 cab R(: packoffset(c1.x)); - float cac R(: packoffset(c1.w)); - float2 cad R(: packoffset(c2.x)); - float2 cae R(: packoffset(c2.z)); +BEGIN_CBUFFER(CA) + + float4 caa; + float3 cab; + float cac; + float2 cad; + float2 cae; + +MID_CBUFFER(CA) Texture2D ta R(: register(t0)); SamplerState sa R(: register(s0)); -} -cbuffer CB R(: register(b1)) -{ - float4 cba R(: packoffset(c0)); - float3 cbb R(: packoffset(c1.x)); - float cbc R(: packoffset(c1.w)); - float2 cbd R(: packoffset(c2.x)); - float2 cbe R(: packoffset(c2.z)); +END_CBUFFER(CA, register(b0)) + +BEGIN_CBUFFER(CB) + + float4 cba; + float3 cbb; + float cbc; + float2 cbd; + float2 cbe; + +MID_CBUFFER(CB) Texture2D tbx R(: register(t1)); Texture2D tby R(: register(t2)); SamplerState sb R(: register(s1)); -} -cbuffer CC R(: register(b2)) -{ - float4 cca R(: packoffset(c0)); - float3 ccb R(: packoffset(c1.x)); - float ccc R(: packoffset(c1.w)); - float2 ccd R(: packoffset(c2.x)); - float2 cce R(: packoffset(c2.z)); +END_CBUFFER(CB, register(b1)) + +BEGIN_CBUFFER(CC) + + float4 cca; + float3 ccb; + float ccc; + float2 ccd; + float2 cce; + +MID_CBUFFER(CC) Texture2D tc R(: register(t3)); SamplerState scx R(: register(s2)); SamplerState scy R(: register(s3)); -} + +END_CBUFFER(CC, register(b2)) float4 main() : SV_TARGET { @@ -91,8 +108,8 @@ float4 main() : SV_TARGET + use(tbx, sb) + use(tby, scx) + use(tc, scy) - + use(cae) - + use(cbe) - + use(cce) + + use(CBUFFER_REF(CA, cae)) + + use(CBUFFER_REF(CB, cbe)) + + use(CBUFFER_REF(CC, cce)) ; }
\ No newline at end of file diff --git a/tests/bugs/gh-103.slang b/tests/bugs/gh-103.slang index 65e71837b..4bad20b20 100644 --- a/tests/bugs/gh-103.slang +++ b/tests/bugs/gh-103.slang @@ -2,7 +2,15 @@ // Ensure that matrix-times-scalar works -#ifndef __SLANG__ +#ifdef __SLANG__ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD +#else +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD + #define C C_0 #define a a_0 #define b b_0 @@ -10,16 +18,19 @@ float4x4 doIt(float4x4 a, float b) { - return a * b; + return a * b; } -cbuffer C +BEGIN_CBUFFER(C) { - float4x4 a; - float b; -}; + float4x4 a; + float b; +} +END_CBUFFER(C,register(b0)) float4 main() : SV_TARGET { - return doIt(a, b)[0]; + return doIt( + CBUFFER_REF(C,a), + CBUFFER_REF(C,b))[0]; } diff --git a/tests/bugs/gh-172.slang b/tests/bugs/gh-172.slang index dd5f4d47a..6d9e69503 100644 --- a/tests/bugs/gh-172.slang +++ b/tests/bugs/gh-172.slang @@ -8,22 +8,27 @@ cbuffer C { - Texture2D t0, t1; - SamplerState s; - float2 uv; + Texture2D t0, t1; + SamplerState s; + float2 uv; }; float4 main() : SV_Target { - return t0.Sample(s, uv) + return t0.Sample(s, uv) + t1.Sample(s, uv); } #else +struct SLANG_ParameterGroup_C_0 +{ + float2 uv_0; +}; + cbuffer C_0 : register(b0) { - float2 uv_0; + SLANG_ParameterGroup_C_0 C_0; }; Texture2D C_t0_0 : register(t0); @@ -32,8 +37,8 @@ SamplerState C_s_0 : register(s0); float4 main() : SV_TARGET { - return C_t0_0.Sample(C_s_0, uv_0) - + C_t1_0.Sample(C_s_0, uv_0); + return C_t0_0.Sample(C_s_0, C_0.uv_0) + + C_t1_0.Sample(C_s_0, C_0.uv_0); } #endif diff --git a/tests/bugs/gh-333.slang b/tests/bugs/gh-333.slang index 2a23f7751..a1e3ea20d 100644 --- a/tests/bugs/gh-333.slang +++ b/tests/bugs/gh-333.slang @@ -2,7 +2,15 @@ // Ensure declaration order in output is correct -#ifndef __SLANG__ +#ifdef __SLANG__ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD +#else +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD + #define A A_0 #define x x_0 #define B B_0 @@ -23,13 +31,14 @@ struct B Texture2D t; }; -cbuffer C +BEGIN_CBUFFER(C) { A a; B b; -}; +} +END_CBUFFER(C,register(b0)) float4 main() : SV_TARGET { - return a.x; + return CBUFFER_REF(C,a).x; } diff --git a/tests/bugs/split-nested-types.hlsl b/tests/bugs/split-nested-types.hlsl index 577f64a75..2bfea49c2 100644 --- a/tests/bugs/split-nested-types.hlsl +++ b/tests/bugs/split-nested-types.hlsl @@ -1,9 +1,19 @@ //TEST:COMPARE_HLSL:-no-mangle -profile ps_5_0 #ifdef __SLANG__ + +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD + import split_nested_types; + #else +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD + #define A A_0 #define x x_0 @@ -31,12 +41,13 @@ struct M #endif -cbuffer C +BEGIN_CBUFFER(C) { M m; } +END_CBUFFER(C,register(b0)) float4 main() : SV_TARGET { - return m.b.y; + return CBUFFER_REF(C,m).b.y; } diff --git a/tests/bugs/vec-init-list.hlsl b/tests/bugs/vec-init-list.hlsl index 2f82a96b0..fe8cba09f 100644 --- a/tests/bugs/vec-init-list.hlsl +++ b/tests/bugs/vec-init-list.hlsl @@ -2,7 +2,14 @@ // Check handling of initializer list for vector -#ifndef __SLANG__ +#ifdef __SLANG__ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD +#else +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define C C_0 #define a a_0 @@ -10,10 +17,11 @@ #endif -cbuffer C : register(b0) +BEGIN_CBUFFER(C) { - float4 a; -}; + float4 a; +} +END_CBUFFER(C, register(b0)) float w0(float x) { return x; } float w1(float x) { return x; } @@ -22,6 +30,10 @@ float w3(float x) { return x; } float4 main() : SV_Position { - float4 wx = { w0(a.x), w1(a.x), w2(a.x), w3(a.x), }; + float4 wx = { + w0(CBUFFER_REF(C,a).x), + w1(CBUFFER_REF(C,a).x), + w2(CBUFFER_REF(C,a).x), + w3(CBUFFER_REF(C,a).x), }; return wx; } diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl b/tests/bugs/vk-structured-buffer-binding.hlsl index cdfeb19ab..3ef1bcc8c 100644 --- a/tests/bugs/vk-structured-buffer-binding.hlsl +++ b/tests/bugs/vk-structured-buffer-binding.hlsl @@ -1,5 +1,4 @@ -//TEST:SIMPLE: -profile ps_4_0 -entry main -target glsl -///////TEST:REFLECTION:-profile ps_4_0 -target spirv +//TEST:CROSS_COMPILE:-profile ps_4_0 -entry main -target spirv-assembly [[vk::binding(3, 4)]] RWStructuredBuffer<uint> gDoneGroups : register(u3); diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl.expected b/tests/bugs/vk-structured-buffer-binding.hlsl.expected deleted file mode 100644 index fc1cb93ea..000000000 --- a/tests/bugs/vk-structured-buffer-binding.hlsl.expected +++ /dev/null @@ -1,31 +0,0 @@ -result code = 0 -standard error = { -} -standard output = { -#version 450 -layout(row_major) uniform; -layout(row_major) buffer; - -#line 5 0 -layout(std430, binding = 3, set = 4) buffer _S1 { - uint gDoneGroups_0[]; -}; - -#line 7 -layout(location = 0) -out vec4 _S2; - - -#line 7 -layout(location = 0) -in vec3 _S3; - - -#line 7 -void main() -{ - _S2 = vec4(gDoneGroups_0[uint(int(_S3.z))]); - return; -} - -} diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl.glsl b/tests/bugs/vk-structured-buffer-binding.hlsl.glsl new file mode 100644 index 000000000..7298ea594 --- /dev/null +++ b/tests/bugs/vk-structured-buffer-binding.hlsl.glsl @@ -0,0 +1,26 @@ +// vk-structured-buffer-binding.hlsl.glsl +//TEST_IGNORE_FILE: + +#version 450 + +#define gDoneGroups gDoneGroups_0 +#define uv _S3 +#define SV_Target _S2 + +layout(std430, binding = 3, set = 4) +buffer _S1 +{ + uint _data[]; +} gDoneGroups; + +layout(location = 0) +out vec4 SV_Target; + +layout(location = 0) +in vec3 uv; + +void main() +{ + SV_Target = vec4(gDoneGroups._data[uint(int(uv.z))]); + return; +} diff --git a/tests/compute/matrix-layout.hlsl b/tests/compute/matrix-layout.hlsl index ad456d8be..034ac512c 100644 --- a/tests/compute/matrix-layout.hlsl +++ b/tests/compute/matrix-layout.hlsl @@ -32,7 +32,14 @@ cbuffer C0 //TEST_INPUT:cbuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]):dxbinding(1),glbinding(1) cbuffer C1 { - column_major + +// Note: support for the explicit `row_major` and `column_major` modifiers is being +// disabled for now, since our current Vulkan output strategy cannot possibly match the +// semantics of these modifiers in D3D. Once we do a more complete implementation of +// matrix layout (see GitHub issue #695) we can add a directed test for all the +// corners cases of explicit matrix layout. +// +// column_major int3x4 cc; int dd; }; @@ -47,7 +54,7 @@ int test(int val) int a = s.a[val / 4][val % 3]; int b = s.b; - int c = cc[val / 4][val % 4]; + int c = cc[val / 4][val % 3]; int d = dd; return ((a*N + b) * N + c) * N + d; diff --git a/tests/compute/matrix-layout.hlsl.expected.txt b/tests/compute/matrix-layout.hlsl.expected.txt index cb8e2cae7..3b67fe0cb 100644 --- a/tests/compute/matrix-layout.hlsl.expected.txt +++ b/tests/compute/matrix-layout.hlsl.expected.txt @@ -1,12 +1,12 @@ -10D0111 -20D0511 -30D0911 -10D0D11 -60D0211 -70D0611 -50D0A11 -60D0E11 -B0D0311 -90D0711 -A0D0B11 -B0D0F11 +10D010D +20D020D +30D030D +10D010D +60D060D +70D070D +50D050D +60D060D +B0D0B0D +90D090D +A0D0A0D +B0D0B0D diff --git a/tests/cross-compile/array-of-buffers.slang b/tests/cross-compile/array-of-buffers.slang new file mode 100644 index 000000000..de87e6d9d --- /dev/null +++ b/tests/cross-compile/array-of-buffers.slang @@ -0,0 +1,32 @@ +// array-of-buffers.slang + +//TEST:CROSS_COMPILE:-target spirv-assembly -entry main -stage fragment +//TEST:CROSS_COMPILE:-target dxil-assembly -entry main -stage fragment -profile sm_6_0 + +// This test ensures that we cross-compile arrays of structured/constant +// buffers into appropriate GLSL, where these are not first-class types. +// +// Note that this test does *not* currently test the case of passing +// a structured or constant buffer into a subroutine, which requires +// further work. + +struct S { float4 f; }; + +cbuffer C +{ + uint index; +} + +ConstantBuffer<S> cb [3]; +StructuredBuffer<S> sb1[4]; +RWStructuredBuffer<float4> sb2[5]; +ByteAddressBuffer bb [6]; + +float4 main() : SV_Target +{ + return cb [index] .f + + sb1[index][index].f + + sb2[index][index] + + float4(bb[index].Load(index*4)); +} + diff --git a/tests/cross-compile/array-of-buffers.slang.glsl b/tests/cross-compile/array-of-buffers.slang.glsl new file mode 100644 index 000000000..d3aa5082f --- /dev/null +++ b/tests/cross-compile/array-of-buffers.slang.glsl @@ -0,0 +1,57 @@ +//TEST_IGNORE_FILE: +#version 450 + +struct SLANG_ParameterGroup_C_0 +{ + uint index_0; +}; + +layout(binding = 0) +layout(std140) uniform _S1 +{ + SLANG_ParameterGroup_C_0 _data; +} C_0; + +struct S_0 +{ + vec4 f_0; +}; + +layout(binding = 1) +layout(std140) uniform _S2 +{ + S_0 _data; +} cb_0[3]; + + +layout(std430, binding = 2) +buffer _S3 { + S_0 _data[]; +} sb1_0[4]; + +layout(std430, binding = 3) +buffer _S4 { + vec4 _data[]; +} sb2_0[5]; + +layout(std430, binding = 4) +buffer _S5 +{ + uint _data[]; +} bb_0[6]; + +layout(location = 0) +out vec4 _S6; + +void main() +{ + vec4 _S7 = cb_0[C_0._data.index_0]._data.f_0; + + S_0 _S8 = sb1_0[C_0._data.index_0]._data[C_0._data.index_0]; + + vec4 _S9 = _S7 + _S8.f_0; + vec4 _S10 = _S9 + sb2_0[C_0._data.index_0]._data[C_0._data.index_0]; + uint _S11 = bb_0[C_0._data.index_0]._data[int(C_0._data.index_0 * uint(4))]; + _S6 = _S10 + vec4(_S11); + return; +} diff --git a/tests/cross-compile/array-of-buffers.slang.hlsl b/tests/cross-compile/array-of-buffers.slang.hlsl new file mode 100644 index 000000000..129993edc --- /dev/null +++ b/tests/cross-compile/array-of-buffers.slang.hlsl @@ -0,0 +1,35 @@ +//TEST_IGNORE_FILE: + +struct SLANG_ParameterGroup_C_0 +{ + uint index_0; +}; + +cbuffer C_0 : register(b0) +{ + SLANG_ParameterGroup_C_0 C_0; +} + +struct S_0 +{ + float4 f_0; +}; + +ConstantBuffer<S_0> cb_0 [3] : register(b1); +StructuredBuffer<S_0> sb1_0[4] : register(t0); +RWStructuredBuffer<float4> sb2_0[5] : register(u1); +ByteAddressBuffer bb_0[6] : register(t4); + +float4 main() : SV_TARGET +{ + float4 _S1 = cb_0[C_0.index_0].f_0; + + S_0 _S2 = sb1_0[C_0.index_0][C_0.index_0]; + + float4 _S3 = _S1 + _S2.f_0; + float4 _S4 = _S3 + sb2_0[C_0.index_0][C_0.index_0]; + uint _S5 = bb_0[C_0.index_0].Load( + (int) (C_0.index_0 * (uint) 4)); + + return _S4 + (float4) _S5; +} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl deleted file mode 100644 index c6b4ac197..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl +++ /dev/null @@ -1,65 +0,0 @@ -//TEST(smoke):COMPARE_HLSL:-no-mangle -profile sm_4_0 -entry RenderBaseVS -stage vertex -entry RenderPS -stage fragment - -#ifndef __SLANG__ -#define cbPerObject cbPerObject_0 -#define g_mWorldViewProjection g_mWorldViewProjection_0 -#endif - - -//-------------------------------------------------------------------------------------- -// File: Render.hlsl -// -// The shaders for rendering tessellated mesh and base mesh -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - row_major matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 ); -} - -// The tessellated vertex structure -struct TessedVertex -{ - uint BaseTriID; // Which triangle of the base mesh this tessellated vertex belongs to? - float2 bc; // Barycentric coordinates with regard to the base triangle -}; -Buffer<float4> g_base_vb_buffer : register(t0); // Base mesh vertex buffer -StructuredBuffer<TessedVertex> g_TessedVertices : register(t1); // Tessellated mesh vertex buffer - -float4 bary_centric(float4 v1, float4 v2, float4 v3, float2 bc) -{ - return (1 - bc.x - bc.y) * v1 + bc.x * v2 + bc.y * v3; -} - -float4 RenderVS( uint vertid : SV_VertexID ) : SV_POSITION -{ - TessedVertex input = g_TessedVertices[vertid]; - - // Get the positions of the three vertices of the base triangle - float4 v[3]; - [unroll] - for (int i = 0; i < 3; ++ i) - { - uint vert_id = input.BaseTriID * 3 + i; - v[i] = g_base_vb_buffer[vert_id]; - } - - // Calculate the position of this tessellated vertex from barycentric coordinates and then project it - return mul(bary_centric(v[0], v[1], v[2], input.bc), g_mWorldViewProjection); -} - -struct BaseVertex -{ - float4 pos : POSITION; -}; - -float4 RenderBaseVS( BaseVertex input ) : SV_POSITION -{ - return mul( input.pos, g_mWorldViewProjection ); -} - -float4 RenderPS() : SV_TARGET -{ - return float4( 1.0f, 1.0f, 0.0f, 1.0f ); -}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl deleted file mode 100644 index a4472179f..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl +++ /dev/null @@ -1,109 +0,0 @@ -//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSScanInBucket -entry CSScanBucketResult -entry CSScanAddBucketResult -//-------------------------------------------------------------------------------------- -// File: ScanCS.hlsl -// -// A simple inclusive prefix sum(scan) implemented in CS4.0, -// using a typical up sweep and down sweep scheme -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -StructuredBuffer<uint2> Input : register( t0 ); // Change uint2 here if scan other types, and -RWStructuredBuffer<uint2> Result : register( u0 ); // also here - -#define groupthreads 128 -groupshared uint4 bucket[groupthreads]; // Change uint4 to the "type x2" if scan other types, e.g. - // if scan uint2, then put uint4 here, - // if scan float, then put float2 here - -void CSScan( uint3 DTid, uint GI, uint2 x ) // Change the type of x here if scan other types -{ - // since CS40 can only support one shared memory for one shader, we use .xy and .zw as ping-ponging buffers - // if scan a single element type like int, search and replace all .xy to .x and .zw to .y below - bucket[GI].xy = x; - bucket[GI].zw = 0; - - // Up sweep - [unroll] - for ( uint stride = 2; stride <= groupthreads; stride <<= 1 ) - { - GroupMemoryBarrierWithGroupSync(); - - if ( (GI & (stride - 1)) == (stride - 1) ) - { - bucket[GI].xy += bucket[GI - stride/2].xy; - } - } - - if ( GI == (groupthreads - 1) ) - { - bucket[GI].xy = 0; - } - - // Down sweep - bool n = true; - [unroll] - for ( stride = groupthreads / 2; stride >= 1; stride >>= 1 ) - { - GroupMemoryBarrierWithGroupSync(); - - uint a = stride - 1; - uint b = stride | a; - - if ( n ) // ping-pong between passes - { - if ( ( GI & b) == b ) - { - bucket[GI].zw = bucket[GI-stride].xy + bucket[GI].xy; - } else - if ( (GI & a) == a ) - { - bucket[GI].zw = bucket[GI+stride].xy; - } else - { - bucket[GI].zw = bucket[GI].xy; - } - } else - { - if ( ( GI & b) == b ) - { - bucket[GI].xy = bucket[GI-stride].zw + bucket[GI].zw; - } else - if ( (GI & a) == a ) - { - bucket[GI].xy = bucket[GI+stride].zw; - } else - { - bucket[GI].xy = bucket[GI].zw; - } - } - - n = !n; - } - - Result[DTid.x] = bucket[GI].zw + x; -} - -// scan in each bucket -[numthreads( groupthreads, 1, 1 )] -void CSScanInBucket( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex ) -{ - uint2 x = Input[DTid.x]; // Change the type of x here if scan other types - CSScan( DTid, GI, x ); -} - -// record and scan the sum of each bucket -[numthreads( groupthreads, 1, 1 )] -void CSScanBucketResult( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex ) -{ - uint2 x = Input[DTid.x*groupthreads - 1]; // Change the type of x here if scan other types - CSScan( DTid, GI, x ); -} - -StructuredBuffer<uint2> Input1 : register( t1 ); - -// add the bucket scanned result to each bucket to get the final result -[numthreads( groupthreads, 1, 1 )] -void CSScanAddBucketResult( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex ) -{ - Result[DTid.x] = Input[DTid.x] + Input1[Gid.x]; -} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl deleted file mode 100644 index 1bd204efc..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl +++ /dev/null @@ -1,217 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSEdgeFactor -//-------------------------------------------------------------------------------------- -// File: TessellatorCS40_EdgeFactorCS.hlsl -// -// The CS to compute edge tessellation factor acoording to current world, view, projection matrix -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -// http://jgt.akpeters.com/papers/akeninemoller01/tribox.html -bool planeBoxOverlap(float3 normal, float d, float3 maxbox) -{ - float3 vmin = maxbox, vmax = maxbox; - [unroll] - for (int q = 0;q <= 2; ++ q) - { - if (normal[q] > 0.0f) - { - vmin[q] *= -1; - } - else - { - vmax[q] *= -1; - } - } - if (dot(normal, vmin) + d > 0.0f) - { - return false; - } - if (dot(normal, vmax) + d >= 0.0f) - { - return true; - } - - return false; -} - -/*======================== X-tests ========================*/ -bool AXISTEST_X01(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab) -{ - float p0 = ab.x * v0.y - ab.y * v0.z; - float p2 = ab.x * v2.y - ab.y * v2.z; - float min_v = min(p0, p2); - float max_v = max(p0, p2); - float rad = dot(fab, boxhalfsize.yz); - return (min_v < rad) && (max_v > -rad); -} - -bool AXISTEST_X2(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab) -{ - float p0 = ab.x * v0.y - ab.y * v0.z; - float p1 = ab.x * v1.y - ab.y * v1.z; - float min_v = min(p0, p1); - float max_v = max(p0, p1); - float rad = dot(fab, boxhalfsize.yz); - return (min_v < rad) && (max_v > -rad); -} - -/*======================== Y-tests ========================*/ -bool AXISTEST_Y02(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab) -{ - float p0 = -ab.x * v0.x + ab.y * v0.z; - float p2 = -ab.x * v2.x + ab.y * v2.z; - float min_v = min(p0, p2); - float max_v = max(p0, p2); - float rad = dot(fab, boxhalfsize.xz); - return (min_v < rad) && (max_v > -rad); -} - -bool AXISTEST_Y1(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab) -{ - float p0 = -ab.x * v0.x + ab.y * v0.z; - float p1 = -ab.x * v1.x + ab.y * v1.z; - float min_v = min(p0, p1); - float max_v = max(p0, p1); - float rad = dot(fab, boxhalfsize.xz); - return (min_v < rad) && (max_v > -rad); -} - -/*======================== Z-tests ========================*/ -bool AXISTEST_Z12(float3 v1, float3 v2, float3 boxhalfsize, float2 ab, float2 fab) -{ - float p1 = ab.x * v1.x - ab.y * v1.y; - float p2 = ab.x * v2.x - ab.y * v2.y; - float min_v = min(p1, p2); - float max_v = max(p1, p2); - float rad = dot(fab, boxhalfsize.xy); - return (min_v < rad) && (max_v > -rad); -} - -bool AXISTEST_Z0(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab) -{ - float p0 = ab.x * v0.x - ab.y * v0.y; - float p1 = ab.x * v1.x - ab.y * v1.y; - float min_v = min(p0, p1); - float max_v = max(p0, p1); - float rad = dot(fab, boxhalfsize.xy); - return (min_v < rad) && (max_v > -rad); -} - -bool triBoxOverlap(float3 boxcenter,float3 boxhalfsize,float3 triverts0, float3 triverts1, float3 triverts2) -{ - /* use separating axis theorem to test overlap between triangle and box */ - /* need to test for overlap in these directions: */ - /* 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle */ - /* we do not even need to test these) */ - /* 2) normal of the triangle */ - /* 3) crossproduct(edge from tri, {x,y,z}-directin) */ - /* this gives 3x3=9 more tests */ - - /* This is the fastest branch on Sun */ - /* move everything so that the boxcenter is in (0,0,0) */ - float3 v0 = triverts0 - boxcenter; - float3 v1 = triverts1 - boxcenter; - float3 v2 = triverts2 - boxcenter; - - /* compute triangle edges */ - float3 e0 = v1 - v0; /* tri edge 0 */ - float3 e1 = v2 - v1; /* tri edge 1 */ - float3 e2 = v0 - v2; /* tri edge 2 */ - - /* Bullet 3: */ - /* test the 9 tests first (this was faster) */ - float3 fe = abs(e0); - if (!AXISTEST_X01(v0, v2, boxhalfsize, e0.zy, fe.zy) - || !AXISTEST_Y02(v0, v2, boxhalfsize, e0.zx, fe.zx) - || !AXISTEST_Z12(v1, v2, boxhalfsize, e0.yx, fe.yx)) - { - return false; - } - - fe = abs(e1); - if (!AXISTEST_X01(v0, v2, boxhalfsize, e1.zy, fe.zy) - || !AXISTEST_Y02(v0, v2, boxhalfsize, e1.zx, fe.zx) - || !AXISTEST_Z0(v0, v1, boxhalfsize, e1.yx, fe.yx)) - { - return false; - } - - fe = abs(e2); - if (!AXISTEST_X2(v0, v1, boxhalfsize, e2.zy, fe.zy) - || !AXISTEST_Y1(v0, v1, boxhalfsize, e2.zx, fe.zx) - || !AXISTEST_Z12(v1, v2, boxhalfsize, e2.yx, fe.yx)) - { - return false; - } - - /* Bullet 1: */ - /* first test overlap in the {x,y,z}-directions */ - /* find min, max of the triangle each direction, and test for overlap in */ - /* that direction -- this is equivalent to testing a minimal AABB around */ - /* the triangle against the AABB */ - - float3 min_v = min(min(v0, v1), v2); - float3 max_v = max(max(v0, v1), v2); - if ((min_v.x > boxhalfsize.x || max_v.x < -boxhalfsize.x) - || (min_v.y > boxhalfsize.y || max_v.y < -boxhalfsize.y) - || (min_v.z > boxhalfsize.z || max_v.z < -boxhalfsize.z)) - { - return false; - } - - /* Bullet 2: */ - /* test if the box intersects the plane of the triangle */ - /* compute plane equation of triangle: normal*x+d=0 */ - float3 normal = cross(e0, e1); - float d = -dot(normal, v0); /* plane eq: normal.x+d=0 */ - if (!planeBoxOverlap(normal, d, boxhalfsize)) - { - return false; - } - - return true; /* box and triangle overlaps */ -} - - -Buffer<float4> InputVertices : register(t0); -RWStructuredBuffer<float4> EdgeFactorBufOut : register(u0); - -cbuffer cb -{ - row_major matrix g_matWVP; - float2 g_tess_edge_length_scale; - int num_triangles; - float dummy; -} - -[numthreads(128, 1, 1)] -void CSEdgeFactor( uint3 DTid : SV_DispatchThreadID ) -{ - if (DTid.x < num_triangles) - { - float4 p0 = mul(InputVertices[DTid.x*3+0], g_matWVP); - float4 p1 = mul(InputVertices[DTid.x*3+1], g_matWVP); - float4 p2 = mul(InputVertices[DTid.x*3+2], g_matWVP); - p0 = p0 / p0.w; - p1 = p1 / p1.w; - p2 = p2 / p2.w; - - float4 factor; - // Only triangles which are completely inside or intersect with the view frustum are taken into account - if ( triBoxOverlap( float3(0, 0, 0.5), float3(1.02, 1.02, 0.52), p0.xyz, p1.xyz, p2.xyz ) ) - { - factor.x = length((p0.xy - p2.xy) * g_tess_edge_length_scale); - factor.y = length((p1.xy - p0.xy) * g_tess_edge_length_scale); - factor.z = length((p2.xy - p1.xy) * g_tess_edge_length_scale); - factor.w = min(min(factor.x, factor.y), factor.z); - factor = clamp(factor, 0, 9); - } else - { - factor = 0; - } - - EdgeFactorBufOut[DTid.x] = factor; - } -} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl deleted file mode 100644 index 672996589..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl +++ /dev/null @@ -1,56 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSNumVerticesIndices -//-------------------------------------------------------------------------------------- -// File: TessellatorCS40_NumVerticesIndicesCS.hlsl -// -// The CS to compute number of vertices and triangles to be generated from edge tessellation factor -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#include "TessellatorCS40_common.hlsl" - -StructuredBuffer<float4> InputEdgeFactor : register(t0); -RWStructuredBuffer<uint2> NumVerticesIndicesOut : register(u0); - -cbuffer cbCS : register(b1) -{ - uint4 g_param; -} - -[numthreads(128, 1, 1)] -void CSNumVerticesIndices( uint3 DTid : SV_DispatchThreadID ) -{ - if (DTid.x < g_param.x) - { - float4 edge_factor = InputEdgeFactor[DTid.x]; - - PROCESSED_TESS_FACTORS_TRI processedTessFactors; - int num_points = TriProcessTessFactors(edge_factor, processedTessFactors, g_partitioning); - - int num_index; - if (0 == num_points) - { - num_index = 0; - } - else if (3 == num_points) - { - num_index = 4; - } - else - { - int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now - - int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor)); - uint3 n = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity); - num_index = n.x + n.y + n.z; - num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3; - if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD ) - { - num_index += 4; - } - } - - NumVerticesIndicesOut[DTid.x] = uint2(num_points, num_index); - } -} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl deleted file mode 100644 index f6f9081da..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl +++ /dev/null @@ -1,45 +0,0 @@ -//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSScatterVertexTriIDIndexID -entry CSScatterIndexTriIDIndexID -//-------------------------------------------------------------------------------------- -// File: TessellatorCS40_ScatterIDCS.hlsl -// -// The CS to scatter vertex ID and triangle ID -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -StructuredBuffer<uint2> InputScanned : register(t0); -RWStructuredBuffer<uint2> TriIDIndexIDOut : register(u0); - -cbuffer cbCS : register(b1) -{ - uint4 g_param; -} - -[numthreads(128, 1, 1)] -void CSScatterVertexTriIDIndexID( uint3 DTid : SV_DispatchThreadID ) -{ - if (DTid.x < g_param.x) - { - uint start = InputScanned[DTid.x-1].x; - uint end = InputScanned[DTid.x].x; - - for ( uint i = start; i < end; ++i ) - { - TriIDIndexIDOut[i] = uint2(DTid.x, i - start); - } - } -} - -[numthreads(128, 1, 1)] -void CSScatterIndexTriIDIndexID( uint3 DTid : SV_DispatchThreadID ) -{ - if (DTid.x < g_param.x) - { - uint start = InputScanned[DTid.x-1].y; - uint end = InputScanned[DTid.x].y; - - for ( uint i = start; i < end; ++i ) - { - TriIDIndexIDOut[i] = uint2(DTid.x, i - start); - } - } -} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl deleted file mode 100644 index 8c0a5b63b..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl +++ /dev/null @@ -1,628 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSTessellationIndices -//-------------------------------------------------------------------------------------- -// File: TessellatorCS40_TessellateIndicesCS.hlsl -// -// The CS to tessellate indices -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#include "TessellatorCS40_common.hlsl" - -StructuredBuffer<uint2> InputTriIDIndexID : register(t0); -StructuredBuffer<float4> InputEdgeFactor : register(t1); -StructuredBuffer<uint2> InputScanned : register(t2); - -RWByteAddressBuffer TessedIndicesOut : register(u0); - -cbuffer cbCS : register(b1) -{ - uint4 g_param; -} - - -int TransformIndex1(int index, int vertices_base) -{ - return vertices_base + index; -} - -int TransformIndex2(int index, int vertices_base, INDEX_PATCH_CONTEXT IndexPatchContext) -{ - if( index >= IndexPatchContext.outsidePointIndexPatchBase ) // assumed remapped outide indices are > remapped inside vertices - { - if( index == IndexPatchContext.outsidePointIndexBadValue ) - { - index = IndexPatchContext.outsidePointIndexReplacementValue; - } - else - { - index += IndexPatchContext.outsidePointIndexDeltaToRealValue; - } - } - else - { - if( index == IndexPatchContext.insidePointIndexBadValue ) - { - index = IndexPatchContext.insidePointIndexReplacementValue; - } - else - { - index += IndexPatchContext.insidePointIndexDeltaToRealValue; - } - } - - return vertices_base + index; -} - - -int AStitchRegular(bool bTrapezoid, int diagonals, - uint numInsideEdgePoints, - int2 outsideInsideEdgePointBaseOffset, - int i) -{ - if (bTrapezoid) - { - ++ outsideInsideEdgePointBaseOffset.x; - } - - int pt; - - if ((i < 4) && bTrapezoid) - { - if (i < 2) - { - pt = outsideInsideEdgePointBaseOffset.x - 1 + i; - } - else if (i == 2) - { - pt = outsideInsideEdgePointBaseOffset.y; - } - else - { - pt = -1; - } - } - - int index = i; - if (bTrapezoid) - { - index -= 4; - } - - if (index >= 0) - { - uint uindex = (uint)index; - - switch( diagonals ) - { - case DIAGONALS_INSIDE_TO_OUTSIDE: - if (uindex < 5 * numInsideEdgePoints - 5) - { - uint p = uindex / 5; - uint r = uindex - p * 5; - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + p + r; - } - else if (r < 4) - { - pt = outsideInsideEdgePointBaseOffset.y + p + r; - } - else - { - pt = -1; - } - } - else - { - int r = i - (4 + 5 * numInsideEdgePoints - 5); - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r; - } - else if (r == 2) - { - pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1; - } - else - { - pt = -1; - } - } - break; - - case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation - if (uindex < (numInsideEdgePoints / 2 - 1) * 5) - { - // First half - uint p = uindex / 5; - uint r = uindex - p * 5; - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + p + r; - } - else if (r < 4) - { - pt = outsideInsideEdgePointBaseOffset.y + p; - } - else - { - pt = -1; - } - } - else if (uindex < (numInsideEdgePoints / 2 - 1) * 5 + 8) - { - // Middle - uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5; - if (0 == r) - { - pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1; - } - else if (r < 3) - { - pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + (2 - r); - } - else if (r == 3) - { - pt = -1; - } - else if (r < 6) - { - pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1 + (r - 4); - } - else if (r == 6) - { - pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + 1; - } - else if (r == 7) - { - pt = -1; - } - } - //else if (uindex < (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5) - else if (uindex < numInsideEdgePoints * 5 - 2) - { - // Second half - uint p = (uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8) / 5 + numInsideEdgePoints / 2 + 1; - uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8 - (p - (numInsideEdgePoints / 2 + 1)) * 5; - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + p - 1 + r; - } - else if (r < 4) - { - pt = outsideInsideEdgePointBaseOffset.y + p - 1 + r; - } - else - { - pt = -1; - } - } - else - { - //int r = i - (4 + (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5); - int r = i - (numInsideEdgePoints * 5 + 2); - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r; - } - else if (r == 2) - { - pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1; - } - else - { - pt = -1; - } - } - break; - - case DIAGONALS_MIRRORED: - if (uindex < (numInsideEdgePoints / 2 + 1) * 2) - { - uint p = uindex / 2; - uint r = uindex - p * 2; - if (0 == r) - { - pt = outsideInsideEdgePointBaseOffset.y + p; - } - else - { - pt = outsideInsideEdgePointBaseOffset.x + p; - } - } - else if (uindex == (numInsideEdgePoints / 2 + 1) * 2) - { - pt = -1; - } - else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 1) - { - pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2; - } - //else if (uindex < (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2) - else if (uindex < numInsideEdgePoints * 2 + 4) - { - uint p = (uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2)) / 2 + numInsideEdgePoints / 2; - uint r = uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2) - (p - numInsideEdgePoints / 2) * 2; - if (0 == r) - { - pt = outsideInsideEdgePointBaseOffset.x + p; - } - else - { - pt = outsideInsideEdgePointBaseOffset.y + p; - } - } - //else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2) - else if (uindex == numInsideEdgePoints * 2 + 4) - { - pt = -1; - } - else - { - //int r = i - (4 + (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2 + 1); - uint r = i - (numInsideEdgePoints * 2 + 9); - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r; - } - else if (r == 2) - { - pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1; - } - else - { - pt = -1; - } - } - break; - } - } - - return pt; -} - -int AStitchTransition(int2 outsideInsideEdgePointBaseOffset, int2 outsideInsideNumHalfTessFactorPoints, - int2 outsideInsideEdgeTessFactorParity, - uint i) -{ - outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity); - - uint2 out_in_first_half = uint2(outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y, insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y) * 4; - - uint3 out_in_middle = 0; - if ((outsideInsideEdgeTessFactorParity.y != outsideInsideEdgeTessFactorParity.x) || (outsideInsideEdgeTessFactorParity.y == TESSELLATOR_PARITY_ODD)) - { - if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x) - { - // Quad in the middle - out_in_middle.z = 5; - out_in_middle.xy = 1; - } - else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y) - { - // Triangle pointing inside - out_in_middle.z = 4; - out_in_middle.x = 1; - } - else - { - // Triangle pointing outside - out_in_middle.z = 4; - out_in_middle.y = 1; - } - } - - - int pt = -1; - - if (i < out_in_first_half.y) - { - // Advance inside - - uint p = i / 4; - uint r = i - p * 4; - p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].z; - if ((0 == r) || (2 == r)) - { - pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y + r / 2; - } - else if (1 == r) - { - pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y; - } - } - else - { - i -= out_in_first_half.y; - - if (i < out_in_first_half.x) - { - // Advance outside - - uint p = i / 4; - uint r = i - p * 4; - p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z; - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y + r; - } - else if (r == 2) - { - pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y; - if (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].x) - { - ++ pt; - } - } - } - else - { - i -= out_in_first_half.x; - - if (i < out_in_middle.z) - { - uint r = i; - if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x) - { - // Quad in the middle - if ((0 == r) || (2 == r)) - { - pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2; - } - else if ((1 == r) || (3 == r)) - { - pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + (3 == r);//(r - 1) / 2; - } - } - else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y) - { - // Triangle pointing inside - if (r == 0) - { - pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4; - } - else if (r < 3) - { - pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + r - 1; - } - } - else - { - // Triangle pointing outside - if ((0 == r) || (2 == r)) - { - pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2; - } - else if (1 == r) - { - pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4; - } - } - } - else - { - i -= out_in_middle.z; - - if (i < out_in_first_half.x) - { - // Advance outside - - uint p = i / 4; - uint r = i - p * 4; - p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z; - if (r < 2) - { - pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y) + r; - } - else if (r == 2) - { - pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y); - } - } - else - { - // Advance inside - - i -= out_in_first_half.x; - - uint p = i / 4; - uint r = i - p * 4; - p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].w; - if ((0 == r) || (2 == r)) - { - pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y - + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y) + (2 == r);//r / 2; - } - else if (1 == r) - { - pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x - + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y); - if (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].x) - { - ++ pt; - } - } - } - } - } - } - - return pt; -} - -[numthreads(128, 1, 1)] -void CSTessellationIndices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) -{ - uint id = DTid.x; - //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers - - if ( id < g_param.x ) - { - uint tri_id = InputTriIDIndexID[id].x; - uint index_id = InputTriIDIndexID[id].y; - uint base_vertex = InputScanned[tri_id-1].x; - - float4 outside_inside_factor = InputEdgeFactor[tri_id]; - - PROCESSED_TESS_FACTORS_TRI processedTessFactors; - int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning); - - uint tessed_indices; - if (3 == num_points) - { - if (index_id < 3) - { - tessed_indices = TransformIndex1(index_id, base_vertex); - } - else - { - tessed_indices = -1; - } - } - else - { - // Generate primitives for all the concentric rings, one side at a time for each ring - static const int startRing = 1; - int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now - - int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor)); - uint3 num = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity); - num.y += num.x; - num.z += num.y; - uint num_index = num.z; - num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3; - if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD ) - { - num_index += 4; - } - - int pt; - - if (index_id < num.x) - { - int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing; - - pt = AStitchTransition(int2(0, processedTessFactors.insideEdgePointBaseOffset), - outsideInsideHalfTessFactor.xw, - processedTessFactors.outsideInsideTessFactorParity.xw, - index_id); - if (pt != -1) - { - pt = TransformIndex1(pt, base_vertex); - } - } - else if (index_id < num.y) - { - int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing; - - pt = AStitchTransition( - int2(processedTessFactors.numPointsForOutsideInside.x - 1, processedTessFactors.insideEdgePointBaseOffset + numPointsForInsideEdge - 1), - outsideInsideHalfTessFactor.yw, - processedTessFactors.outsideInsideTessFactorParity.yw, - index_id - num.x); - if (pt != -1) - { - pt = TransformIndex1(pt, base_vertex); - } - } - else if (index_id < num.z) - { - int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing; - - INDEX_PATCH_CONTEXT IndexPatchContext; - IndexPatchContext.insidePointIndexDeltaToRealValue = processedTessFactors.insideEdgePointBaseOffset + 2 * (numPointsForInsideEdge - 1); - IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge - 1; - IndexPatchContext.insidePointIndexReplacementValue = processedTessFactors.insideEdgePointBaseOffset; - IndexPatchContext.outsidePointIndexPatchBase = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range - IndexPatchContext.outsidePointIndexDeltaToRealValue = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y - 2 - - IndexPatchContext.outsidePointIndexPatchBase; - IndexPatchContext.outsidePointIndexBadValue = IndexPatchContext.outsidePointIndexPatchBase - + processedTessFactors.numPointsForOutsideInside.z - 1; - IndexPatchContext.outsidePointIndexReplacementValue = 0; - - pt = AStitchTransition(int2(numPointsForInsideEdge, 0), - outsideInsideHalfTessFactor.zw, - processedTessFactors.outsideInsideTessFactorParity.zw, - index_id - num.y); - if (pt != -1) - { - pt = TransformIndex2(pt, base_vertex, IndexPatchContext); - } - } - else - { - if ((processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) && (index_id >= num_index - 4)) - { - int outsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset - + ((processedTessFactors.numPointsForOutsideInside.w + 1) - (numRings + startRing)) * (numRings - startRing - 1) * 3; - - if (index_id - (num_index - 4) != 3) - { - pt = TransformIndex1(outsideEdgePointBaseOffset + index_id - (num_index - 4), base_vertex); - } - else - { - pt = -1; - } - } - else - { - int ring = GetRingFromIndexStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, index_id - num.z); - - int tn = TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, ring - 1) * 3; - int n = NumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w - 2 * ring); - - int edge = (index_id - num.z - tn) / n; - int index = (index_id - num.z - tn) - edge * n; - - int2 outsideInsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset - + int2(0, 3 * (processedTessFactors.numPointsForOutsideInside.w - 3)) - + ((processedTessFactors.numPointsForOutsideInside.w - (ring + startRing)) + int2(1, -1)) * (ring - startRing - 1) * 3; - - int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * ring; - int numLastPointsForInsideEdge = numPointsForInsideEdge + 2; - - if (edge < 2) - { - pt = AStitchRegular(true, DIAGONALS_MIRRORED, - numPointsForInsideEdge, - outsideInsideEdgePointBaseOffset + (int2(numLastPointsForInsideEdge, numPointsForInsideEdge) - 1) * edge, - index); - if (pt != -1) - { - pt = TransformIndex1(pt, base_vertex); - } - } - else - { - INDEX_PATCH_CONTEXT IndexPatchContext; - IndexPatchContext.insidePointIndexDeltaToRealValue = outsideInsideEdgePointBaseOffset.y + (numPointsForInsideEdge - 1) * 2; - IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge - 1; - IndexPatchContext.insidePointIndexReplacementValue = outsideInsideEdgePointBaseOffset.y; - IndexPatchContext.outsidePointIndexPatchBase = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range - IndexPatchContext.outsidePointIndexDeltaToRealValue = outsideInsideEdgePointBaseOffset.x + (numLastPointsForInsideEdge - 1) * 2 - - IndexPatchContext.outsidePointIndexPatchBase; - IndexPatchContext.outsidePointIndexBadValue = IndexPatchContext.outsidePointIndexPatchBase - + numLastPointsForInsideEdge - 1; - IndexPatchContext.outsidePointIndexReplacementValue = outsideInsideEdgePointBaseOffset.x; - - pt = AStitchRegular(true, DIAGONALS_MIRRORED, - numPointsForInsideEdge, - int2(numPointsForInsideEdge, 0), - index); - if (pt != -1) - { - pt = TransformIndex2(pt, base_vertex, IndexPatchContext); - } - } - } - } - - tessed_indices = pt; - } - - TessedIndicesOut.Store(id*4, tessed_indices); - } -} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl deleted file mode 100644 index e1f6b9ec3..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl +++ /dev/null @@ -1,206 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSTessellationVertices -//-------------------------------------------------------------------------------------- -// File: TessellatorCS40_TessellateVerticesCS.hlsl -// -// The CS to tessellate vertices -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#include "TessellatorCS40_common.hlsl" - -StructuredBuffer<uint2> InputTriIDIndexID : register(t0); -StructuredBuffer<float4> InputEdgeFactor : register(t1); - -struct TessedVertex -{ - uint BaseTriID; - float2 bc; -}; -RWStructuredBuffer<TessedVertex> TessedVerticesOut : register(u0); - -cbuffer cbCS : register(b1) -{ - uint4 g_param; -} - -void PlacePointIn1D(PROCESSED_TESS_FACTORS_TRI processedTessFactors, int ctx_index, int pt, out float location, int parity) -{ - int numHalfTessFactorPoints = int(ceil(processedTessFactors.outsideInsideHalfTessFactor[ctx_index])); - - bool bFlip; - if( pt >= numHalfTessFactorPoints ) - { - pt = (numHalfTessFactorPoints << 1) - pt; - if( TESSELLATOR_PARITY_ODD == parity ) - { - pt -= 1; - } - bFlip = true; - } - else - { - bFlip = false; - } - - if( pt == numHalfTessFactorPoints ) - { - location = 0.5f; - } - else - { - unsigned int indexOnCeilHalfTessFactor = pt; - unsigned int indexOnFloorHalfTessFactor = indexOnCeilHalfTessFactor; - if( pt > processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor[ctx_index] ) - { - indexOnFloorHalfTessFactor -= 1; - } - float locationOnFloorHalfTessFactor = indexOnFloorHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor[ctx_index]; - float locationOnCeilHalfTessFactor = indexOnCeilHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor[ctx_index]; - - location = lerp(locationOnFloorHalfTessFactor, locationOnCeilHalfTessFactor, frac(processedTessFactors.outsideInsideHalfTessFactor[ctx_index])); - - if( bFlip ) - { - location = 1.0f - location; - } - } -} - -[numthreads(128, 1, 1)] -void CSTessellationVertices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) -{ - uint id = DTid.x; - //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers - - if ( id < g_param.x ) - { - uint tri_id = InputTriIDIndexID[id].x; - uint vert_id = InputTriIDIndexID[id].y; - - float4 outside_inside_factor = InputEdgeFactor[tri_id]; - - PROCESSED_TESS_FACTORS_TRI processedTessFactors; - int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning); - - float2 uv; - if (3 == num_points) - { - if (0 == vert_id) - { - uv = float2(0, 1); - } - else if (1 == vert_id) - { - uv = float2(0, 0); - } - else - { - uv = float2(1, 0); - } - } - else - { - if (vert_id < processedTessFactors.insideEdgePointBaseOffset) - { - // Generate exterior ring edge points, clockwise starting from point V (VW, the U==0 edge) - - int edge; - if (vert_id < processedTessFactors.numPointsForOutsideInside.x - 1) - { - edge = 0; - } - else - { - vert_id -= processedTessFactors.numPointsForOutsideInside.x - 1; - if (vert_id < processedTessFactors.numPointsForOutsideInside.y - 1) - { - edge = 1; - } - else - { - vert_id -= processedTessFactors.numPointsForOutsideInside.y - 1; - edge = 2; - } - } - - int p = vert_id; - int endPoint = processedTessFactors.numPointsForOutsideInside[edge] - 1; - float param; - int q = (edge & 0x1) ? p : endPoint - p; // whether to reverse point order given we are defining V or U (W implicit): - // edge0, VW, has V decreasing, so reverse 1D points below - // edge1, WU, has U increasing, so don't reverse 1D points below - // edge2, UV, has U decreasing, so reverse 1D points below - PlacePointIn1D(processedTessFactors, edge,q,param, processedTessFactors.outsideInsideTessFactorParity[edge]); - if (0 == edge) - { - uv = float2(0, param); - } - else if (1 == edge) - { - uv = float2(param, 0); - } - else - { - uv = float2(param, 1 - param); - } - } - else - { - // Generate interior ring points, clockwise spiralling in - - uint index = vert_id - processedTessFactors.insideEdgePointBaseOffset; - uint ring = 1 + (((3 * processedTessFactors.numPointsForOutsideInside.w - 6) - sqrt(sqr(3 * processedTessFactors.numPointsForOutsideInside.w - 6) - 4 * 3 * index)) + 0.001f) / 6; - index -= 3 * (processedTessFactors.numPointsForOutsideInside.w - ring - 1) * (ring - 1); - - uint startPoint = ring; - uint endPoint = processedTessFactors.numPointsForOutsideInside.w - 1 - startPoint; - if (index < 3 * (endPoint - startPoint)) - { - uint edge = index / (endPoint - startPoint); - uint p = index - edge * (endPoint - startPoint) + startPoint; - - int perpendicularAxisPoint = startPoint; - float perpParam; - PlacePointIn1D(processedTessFactors, 3, perpendicularAxisPoint, perpParam, processedTessFactors.outsideInsideTessFactorParity.w); - perpParam = perpParam * 2 / 3; - - float param; - int q = (edge & 0x1) ? p : endPoint - (p - startPoint); // whether to reverse point given we are defining V or U (W implicit): - // edge0, VW, has V decreasing, so reverse 1D points below - // edge1, WU, has U increasing, so don't reverse 1D points below - // edge2, UV, has U decreasing, so reverse 1D points below - PlacePointIn1D(processedTessFactors, 3, q,param, processedTessFactors.outsideInsideTessFactorParity.w); - // edge0 VW, has perpendicular parameter U constant - // edge1 WU, has perpendicular parameter V constant - // edge2 UV, has perpendicular parameter W constant - const unsigned int deriv = 2; // reciprocal is the rate of change of edge-parallel parameters as they are pushed into the triangle - if (0 == edge) - { - uv = float2(perpParam, param - perpParam / deriv); - } - else if (1 == edge) - { - uv = float2(param - perpParam / deriv, perpParam); - } - else - { - uv = float2(param - perpParam / deriv, 1 - (param - perpParam / deriv + perpParam)); - } - } - else - { - if( processedTessFactors.outsideInsideTessFactorParity.w != TESSELLATOR_PARITY_ODD ) - { - // Last point is the point at the center. - uv = 1 / 3.0f; - } - } - } - } - - TessedVerticesOut[id].BaseTriID = tri_id; - TessedVerticesOut[id].bc = uv; - } -} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl deleted file mode 100644 index 309044cdb..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl +++ /dev/null @@ -1,411 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: TessellatorCS40_common.hlsl -// -// The common utils included by other shaders in the sample -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#include "TessellatorCS40_defines.h" - -cbuffer cbNeverChanges : register(b0) -{ - uint4 insidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2]; - uint4 outsidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2]; -} - -#define D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR ( 64 ) -#define D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR ( 63 ) -#define D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR ( 2 ) -#define D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR ( 1 ) - -#define D3D11_TESSELLATOR_PARTITIONING_INTEGER ( 0 ) -#define D3D11_TESSELLATOR_PARTITIONING_POW2 ( 1 ) -#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD ( 2 ) -#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN ( 3 ) - -#define TESSELLATOR_PARITY_EVEN ( 0 ) -#define TESSELLATOR_PARITY_ODD ( 1 ) - -#define EPSILON 1e-6f -#define MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON (D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON/2) - -#define DIAGONALS_INSIDE_TO_OUTSIDE ( 0 ) -#define DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE ( 1 ) -#define DIAGONALS_MIRRORED ( 2 ) - - -// This is moved to macro defines at shader compile time, so that the partitioning mode can be changed during runtime -//#define g_partitioning (D3D11_TESSELLATOR_PARTITIONING_POW2) - - -struct PROCESSED_TESS_FACTORS_TRI -{ - float4 outsideInsideTessFactor; - int4 outsideInsideTessFactorParity; - - float4 outsideInsideInvNumSegmentsOnFloorTessFactor; - float4 outsideInsideInvNumSegmentsOnCeilTessFactor; - float4 outsideInsideHalfTessFactor; - int4 outsideInsideSplitPointOnFloorHalfTessFactor; - - // Stuff below is specific to the traversal order - uint4 numPointsForOutsideInside; - uint insideEdgePointBaseOffset; -}; - -struct INDEX_PATCH_CONTEXT -{ - int insidePointIndexDeltaToRealValue; - int insidePointIndexBadValue; - int insidePointIndexReplacementValue; - int outsidePointIndexPatchBase; - int outsidePointIndexDeltaToRealValue; - int outsidePointIndexBadValue; - int outsidePointIndexReplacementValue; -}; - -bool4 isEven(float4 input) -{ - return (((uint4)input) & 1) ? false : true; -} - -uint RemoveMSB(uint val) -{ - int check; - if( val <= 0x0000ffff ) - { - check = ( val <= 0x000000ff ) ? 0x00000080 : 0x00008000; - } - else - { - check = ( val <= 0x00ffffff ) ? 0x00800000 : 0x80000000; - } - for (int i = 0; i < 8; i++, check >>= 1) - { - if( val & check ) - { - return (val & ~check); - } - } - return 0; -} - -uint4 NumPointsForTessFactor(float4 tessFactor, int4 parity) -{ - return TESSELLATOR_PARITY_ODD == parity ? uint4(ceil(0.5f + tessFactor / 2)) * 2 : uint4(ceil(tessFactor / 2)) * 2 + 1; -} - -void ComputeTessFactorContext(float4 tessFactor, int4 parity, - out float4 invNumSegmentsOnFloorTessFactor, - out float4 invNumSegmentsOnCeilTessFactor, - out float4 halfTessFactor, - out int4 splitPointOnFloorHalfTessFactor) -{ - halfTessFactor = tessFactor / 2; - - halfTessFactor += 0.5 * ((TESSELLATOR_PARITY_ODD == parity) | (0.5f == halfTessFactor)); - - float4 floorHalfTessFactor = floor(halfTessFactor); - float4 ceilHalfTessFactor = ceil(halfTessFactor); - int4 numHalfTessFactorPoints = int4(ceilHalfTessFactor); - - for (int index = 0; index < 4; ++ index) - { - if( ceilHalfTessFactor[index] == floorHalfTessFactor[index] ) - { - splitPointOnFloorHalfTessFactor[index] = /*pick value to cause this to be ignored*/ numHalfTessFactorPoints[index]+1; - } - else if( TESSELLATOR_PARITY_ODD == parity[index] ) - { - if( floorHalfTessFactor[index] == 1 ) - { - splitPointOnFloorHalfTessFactor[index] = 0; - } - else - { - splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index]) - 1) << 1) + 1; - } - } - else - { - splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index])) << 1) + 1; - } - } - - int4 numFloorSegments = int4(floorHalfTessFactor * 2); - int4 numCeilSegments = int4(ceilHalfTessFactor * 2); - int4 s = (TESSELLATOR_PARITY_ODD == parity); - numFloorSegments -= s; - numCeilSegments -= s; - invNumSegmentsOnFloorTessFactor = 1.0f / numFloorSegments; - invNumSegmentsOnCeilTessFactor = 1.0f / numCeilSegments; -} - -int TriProcessTessFactors( inout float4 tessFactor, - out PROCESSED_TESS_FACTORS_TRI processedTessFactors, - int partitioning ) -{ - processedTessFactors = (PROCESSED_TESS_FACTORS_TRI)0; - - int parity = TESSELLATOR_PARITY_EVEN; - switch( partitioning ) - { - case D3D11_TESSELLATOR_PARTITIONING_INTEGER: - default: - break; - case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: - parity = TESSELLATOR_PARITY_ODD; - break; - case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: - parity = TESSELLATOR_PARITY_EVEN; - break; - } - - // Is the patch culled? - if( !(tessFactor.x > 0) || // NaN will pass - !(tessFactor.y > 0) || - !(tessFactor.z > 0) ) - { - return 0; - } - - // Clamp edge TessFactors - float lowerBound, upperBound; - switch(partitioning) - { - case D3D11_TESSELLATOR_PARTITIONING_INTEGER: - case D3D11_TESSELLATOR_PARTITIONING_POW2: // don't care about pow2 distinction for validation, just treat as integer - default: - lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; - upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; - break; - - case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: - lowerBound = D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR; - upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; - break; - - case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: - lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; - upperBound = D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR; - break; - } - - tessFactor.xyz = min( upperBound, max( lowerBound, tessFactor.xyz ) ); - - // Clamp inside TessFactors - if(D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD == partitioning) - { - if( (tessFactor.x > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) || - (tessFactor.y > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) || - (tessFactor.z > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON)) - // Don't need the same check for insideTessFactor for tri patches, - // since there is only one insideTessFactor, as opposed to quad - // patches which have 2 insideTessFactors. - { - // Force picture frame - lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON; - } - } - - tessFactor.w = min( upperBound, max( lowerBound, tessFactor.w ) ); - // Note the above clamps map NaN to lowerBound - - if (partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER) - { - tessFactor = ceil(tessFactor); - } - else if (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2) - { - static const int exponentMask = 0x7f800000; - static const int mantissaMask = 0x007fffff; - static const int exponentLSB = 0x00800000; - - int4 bits = asint(tessFactor); - tessFactor = bits & mantissaMask ? asfloat((bits & exponentMask) + exponentLSB) : tessFactor; - } - - // Process tessFactors - if ((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)) - { - bool4 e = isEven(tessFactor); - processedTessFactors.outsideInsideTessFactorParity.xyz = e.xyz ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD; - processedTessFactors.outsideInsideTessFactorParity.w = (e.w || (1 == tessFactor.w)) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD; - } - else - { - processedTessFactors.outsideInsideTessFactorParity = parity; - } - - processedTessFactors.outsideInsideTessFactor = tessFactor; - - if (((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)) || (parity == TESSELLATOR_PARITY_ODD)) - { - // Special case if all TessFactors are 1 - if( (1 == processedTessFactors.outsideInsideTessFactor.x) && - (1 == processedTessFactors.outsideInsideTessFactor.y) && - (1 == processedTessFactors.outsideInsideTessFactor.z) && - (1 == processedTessFactors.outsideInsideTessFactor.w) ) - { - return 3; - } - } - - // Compute per-TessFactor metadata - ComputeTessFactorContext(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity, - processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor, - processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor, - processedTessFactors.outsideInsideHalfTessFactor, - processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor); - - // Compute some initial data. - - // outside edge offsets and storage - processedTessFactors.numPointsForOutsideInside = NumPointsForTessFactor(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity); - int NumPoints = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y + processedTessFactors.numPointsForOutsideInside.z - 3; - - // inside edge offsets - { - uint pointCountMin = (processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) ? 4 : 3; - // max() allows degenerate transition regions when inside TessFactor == 1 - processedTessFactors.numPointsForOutsideInside.w = max(pointCountMin, processedTessFactors.numPointsForOutsideInside.w); - } - - processedTessFactors.insideEdgePointBaseOffset = NumPoints; - - // inside storage, including interior edges above - { - int numInteriorRings = (processedTessFactors.numPointsForOutsideInside.w >> 1) - 1; - int numInteriorPoints; - if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD ) - { - numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1) - numInteriorRings); - } - else - { - numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1)) + 1; - } - NumPoints += numInteriorPoints; - } - - return NumPoints; -} - -int NumStitchRegular(bool bTrapezoid, int diagonals, int numInsideEdgePoints) -{ - int num_index = 0; - - if( bTrapezoid ) - { - num_index += 8; - } - switch( diagonals ) - { - case DIAGONALS_INSIDE_TO_OUTSIDE: - // Diagonals pointing from inside edge forward towards outside edge - num_index += 5 * numInsideEdgePoints - 5; - break; - - case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation - // Diagonals pointing from outside edge forward towards inside edge - num_index += 5 * numInsideEdgePoints - 2; - break; - - case DIAGONALS_MIRRORED: - num_index += 2 * numInsideEdgePoints + 5; - break; - } - - return num_index; -} - -uint TotalNumStitchRegular(bool bTrapezoid, int diagonals, - int numPointsForInsideTessFactor, int ring) -{ - uint num_index = 0; - - if( bTrapezoid ) - { - num_index += 8 * (ring - 1); - } - switch( diagonals ) - { - case DIAGONALS_INSIDE_TO_OUTSIDE: - // Diagonals pointing from inside edge forward towards outside edge - num_index += (5 * numPointsForInsideTessFactor - 35 - 5 * ring) * (ring - 1); - break; - - case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation - // Diagonals pointing from outside edge forward towards inside edge - num_index += (5 * numPointsForInsideTessFactor - 12 - 5 * ring) * (ring - 1); - break; - - case DIAGONALS_MIRRORED: - num_index += (2 * numPointsForInsideTessFactor + 1 - 2 * ring) * (ring - 1); - break; - } - - return num_index; -} - -int sqr(int x) -{ - return x * x; -} - -int GetRingFromIndexStitchRegular(bool bTrapezoid, int diagonals, int numPointsForInsideTessFactor, int index) -{ - int t = 0; - if (bTrapezoid) - { - t = 8; - } - - switch( diagonals ) - { - case DIAGONALS_INSIDE_TO_OUTSIDE: - t = (5 * numPointsForInsideTessFactor - (35 - t)) * 3; - return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30; - - case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: - t = (5 * numPointsForInsideTessFactor - (12 - t)) * 3; - return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30; - - case DIAGONALS_MIRRORED: - t = ((t + 1) + 2 * numPointsForInsideTessFactor) * 3; - return 1 + uint((t + 6) - sqrt(sqr(t + 6) - 4 * 6 * (t + index)) + 0.001f) / 12; - - default: - return -1; - } -} - -uint3 NumStitchTransition(int4 outsideInsideNumHalfTessFactorPoints, - int4 outsideInsideEdgeTessFactorParity) -{ - outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity); - - uint3 num_index = insidePointIndex[outsideInsideNumHalfTessFactorPoints.w][MAX_FACTOR / 2 + 1].y * 8; - - [unroll] - for (int edge = 0; edge < 3; ++ edge) - { - num_index[edge] += outsidePointIndex[outsideInsideNumHalfTessFactorPoints[edge]][MAX_FACTOR / 2 + 1].y * 8; - - if( (outsideInsideEdgeTessFactorParity.w != outsideInsideEdgeTessFactorParity[edge]) || (outsideInsideEdgeTessFactorParity.w == TESSELLATOR_PARITY_ODD)) - { - if( outsideInsideEdgeTessFactorParity.w == outsideInsideEdgeTessFactorParity[edge] ) - { - num_index[edge] += 5; - } - else - { - num_index[edge] += 4; - } - } - } - - return num_index; -} diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h deleted file mode 100644 index 6b4382393..000000000 --- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h +++ /dev/null @@ -1,9 +0,0 @@ -//-------------------------------------------------------------------------------------- -// File: TessellatorCS40_defines.h -// -// This file defines common constants which are included by both CPU code and shader code -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#define MAX_FACTOR 16 diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl deleted file mode 100644 index 1e40c80ef..000000000 --- a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl +++ /dev/null @@ -1,2567 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: BC6HEncode.hlsl -// -// The Compute Shader for BC6H Encoder -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//#define REF_DEVICE - -#define UINTLENGTH 32 -#define NCHANNELS 3 -#define SIGNED_F16 96 -#define UNSIGNED_F16 95 -#define MAX_FLOAT asfloat(0x7F7FFFFF) -#define MIN_FLOAT asfloat(0xFF7FFFFF) -#define MAX_INT asint(0x7FFFFFFF) -#define MIN_INT asint(0x80000000) - -cbuffer cbCS : register( b0 ) -{ - uint g_tex_width; - uint g_num_block_x; - uint g_format; //either SIGNED_F16 for DXGI_FORMAT_BC6H_SF16 or UNSIGNED_F16 for DXGI_FORMAT_BC6H_UF16 - uint g_mode_id; - uint g_start_block_id; - uint g_num_total_blocks; -}; - -static const uint candidateModeMemory[14] = { 0x00, 0x01, 0x02, 0x06, 0x0A, 0x0E, 0x12, 0x16, 0x1A, 0x1E, 0x03, 0x07, 0x0B, 0x0F }; -static const uint candidateModeFlag[14] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; -static const bool candidateModeTransformed[14] = { true, true, true, true, true, true, true, true, true, false, false, true, true, true }; -static const uint4 candidateModePrec[14] = { uint4(10,5,5,5), uint4(7,6,6,6), - uint4(11,5,4,4), uint4(11,4,5,4), uint4(11,4,4,5), uint4(9,5,5,5), - uint4(8,6,5,5), uint4(8,5,6,5), uint4(8,5,5,6), uint4(6,6,6,6), - uint4(10,10,10,10), uint4(11,9,9,9), uint4(12,8,8,8), uint4(16,4,4,4) }; - -/*static const uint4x4 candidateSection[32] = -{ - {0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1}, {0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1}, {0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1}, {0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1}, - {0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1}, - {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1}, - {0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1}, - {0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1}, {0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0}, {0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0}, - {0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1}, - {0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0}, {0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0}, - {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0}, {0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0}, {0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0} -};*/ - -static const uint candidateSectionBit[32] = -{ - 0xCCCC, 0x8888, 0xEEEE, 0xECC8, - 0xC880, 0xFEEC, 0xFEC8, 0xEC80, - 0xC800, 0xFFEC, 0xFE80, 0xE800, - 0xFFE8, 0xFF00, 0xFFF0, 0xF000, - 0xF710, 0x008E, 0x7100, 0x08CE, - 0x008C, 0x7310, 0x3100, 0x8CCE, - 0x088C, 0x3110, 0x6666, 0x366C, - 0x17E8, 0x0FF0, 0x718E, 0x399C -}; - -static const uint candidateFixUpIndex1D[32] = -{ - 15,15,15,15, - 15,15,15,15, - 15,15,15,15, - 15,15,15,15, - 15, 2, 8, 2, - 2, 8, 8,15, - 2, 8, 2, 2, - 8, 8, 2, 2 -}; - -//0, 9, 18, 27, 37, 46, 55, 64 -static const uint aStep1[64] = {0,0,0,0,0,1,1,1, - 1,1,1,1,1,1,2,2, - 2,2,2,2,2,2,2,3, - 3,3,3,3,3,3,3,3, - 3,4,4,4,4,4,4,4, - 4,4,5,5,5,5,5,5, - 5,5,5,6,6,6,6,6, - 6,6,6,6,7,7,7,7}; - -//0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 -static const uint aStep2[64] = { 0, 0, 0, 1, 1, 1, 1, 2, - 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 7, 7, 7, - 7, 8, 8, 8, 8, 9, 9, 9, - 9,10,10,10,10,10,11,11, - 11,11,12,12,12,12,13,13, - 13,13,14,14,14,14,15,15}; - -static const float3 RGB2LUM = float3(0.2126f, 0.7152f, 0.0722f); - -#define THREAD_GROUP_SIZE 64 -#define BLOCK_SIZE_Y 4 -#define BLOCK_SIZE_X 4 -#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X) - - -//Forward declaration -uint3 float2half( float3 pixel_f ); -int3 start_quantize( uint3 pixel_h ); -void quantize( inout int2x3 endPoint, uint prec ); -void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); -void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); -void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ); - -void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed ); -void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed ); -void unquantize( inout int2x3 color, uint prec ); -uint3 finish_unquantize( int3 color ); -void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i ); -void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i ); -float3 half2float(uint3 color_h ); - -void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ); -void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ); - -void swap(inout int3 lhs, inout int3 rhs) -{ - int3 tmp = lhs; - lhs = rhs; - rhs = tmp; -} - -Texture2D<float4> g_Input : register( t0 ); -StructuredBuffer<uint4> g_InBuff : register( t1 ); - -RWStructuredBuffer<uint4> g_OutBuff : register( u0 ); - -struct SharedData -{ - float3 pixel; - int3 pixel_ph; - float3 pixel_hr; - float pixel_lum; - float error; - uint best_mode; - uint best_partition; - int3 endPoint_low; - int3 endPoint_high; - float endPoint_lum_low; - float endPoint_lum_high; -}; - -groupshared SharedData shared_temp[THREAD_GROUP_SIZE]; - -[numthreads( THREAD_GROUP_SIZE, 1, 1 )] -void TryModeG10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) -{ - const uint MAX_USED_THREAD = 16; - uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; - uint blockInGroup = GI / MAX_USED_THREAD; - uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; - uint threadBase = blockInGroup * MAX_USED_THREAD; - uint threadInBlock = GI - threadBase; - -#ifndef REF_DEVICE - if (blockID >= g_num_total_blocks) - { - return; - } -#endif - - uint block_y = blockID / g_num_block_x; - uint block_x = blockID - block_y * g_num_block_x; - uint base_x = block_x * BLOCK_SIZE_X; - uint base_y = block_y * BLOCK_SIZE_Y; - - if (threadInBlock < 16) - { - shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; - uint3 pixel_h = float2half( shared_temp[GI].pixel ); - shared_temp[GI].pixel_hr = half2float(pixel_h); - shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM); - shared_temp[GI].pixel_ph = start_quantize( pixel_h ); - - shared_temp[GI].endPoint_low = shared_temp[GI].pixel_ph; - shared_temp[GI].endPoint_high = shared_temp[GI].pixel_ph; - shared_temp[GI].endPoint_lum_low = shared_temp[GI].pixel_lum; - shared_temp[GI].endPoint_lum_high = shared_temp[GI].pixel_lum; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 8) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - //ergod mode_type 11:14 - if ( threadInBlock == 0 ) - { - int2x3 endPoint; - // find_axis - endPoint[0] = shared_temp[threadBase + 0].endPoint_low; - endPoint[1] = shared_temp[threadBase + 0].endPoint_high; - - //compute_index - float3 span = endPoint[1] - endPoint[0];// fixed a bug in v0.2 - float span_norm_sqr = dot( span, span );// fixed a bug in v0.2 - float dotProduct = dot( span, shared_temp[threadBase + 0].pixel_ph - endPoint[0] );// fixed a bug in v0.2 - if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 ) - { - swap(endPoint[0], endPoint[1]); - - shared_temp[GI].endPoint_low = endPoint[0]; - shared_temp[GI].endPoint_high = endPoint[1]; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 4) - { - int2x3 endPoint; - endPoint[0] = shared_temp[threadBase + 0].endPoint_low; - endPoint[1] = shared_temp[threadBase + 0].endPoint_high; - - float3 span = endPoint[1] - endPoint[0]; - float span_norm_sqr = dot( span, span ); - - uint4 prec = candidateModePrec[threadInBlock + 10]; - int2x3 endPoint_q = endPoint; - quantize( endPoint_q, prec.x ); - - bool transformed = candidateModeTransformed[threadInBlock + 10]; - if (transformed) - { - endPoint_q[1] -= endPoint_q[0]; - } - - bool bBadQuantize; - finish_quantize( bBadQuantize, endPoint_q, prec, transformed ); - - start_unquantize( endPoint_q, prec, transformed ); - - unquantize( endPoint_q, prec.x ); - - float error = 0; - [loop]for ( uint j = 0; j < 16; j ++ ) - { - float dotProduct = dot( span, shared_temp[threadBase + j].pixel_ph - endPoint[0] );// fixed a bug in v0.2 - uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] ); - - uint3 pixel_rh; - generate_palette_unquantized16( pixel_rh, endPoint_q[0], endPoint_q[1], index ); - float3 pixel_r = half2float( pixel_rh ); - pixel_r -= shared_temp[threadBase + j].pixel_hr; - error += dot(pixel_r, pixel_r); - } - if ( bBadQuantize ) - error = 1e20f; - - shared_temp[GI].error = error; - shared_temp[GI].best_mode = candidateModeFlag[threadInBlock + 10]; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 2) - { - if ( shared_temp[GI].error > shared_temp[GI + 2].error ) - { - shared_temp[GI].error = shared_temp[GI + 2].error; - shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - if ( shared_temp[GI].error > shared_temp[GI + 1].error ) - { - shared_temp[GI].error = shared_temp[GI + 1].error; - shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode; - } - - g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, 0, 0); - } -} - -[numthreads( THREAD_GROUP_SIZE, 1, 1 )] -void TryModeLE10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) -{ - const uint MAX_USED_THREAD = 32; - uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; - uint blockInGroup = GI / MAX_USED_THREAD; - uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; - uint threadBase = blockInGroup * MAX_USED_THREAD; - uint threadInBlock = GI - threadBase; - -#ifndef REF_DEVICE - if (blockID >= g_num_total_blocks) - { - return; - } - - if (asfloat(g_InBuff[blockID].x) < 1e-6f) - { - g_OutBuff[blockID] = g_InBuff[blockID]; - return; - } -#endif - - uint block_y = blockID / g_num_block_x; - uint block_x = blockID - block_y * g_num_block_x; - uint base_x = block_x * BLOCK_SIZE_X; - uint base_y = block_y * BLOCK_SIZE_Y; - - if (threadInBlock < 16) - { - shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; - uint3 pixel_h = float2half( shared_temp[GI].pixel ); - shared_temp[GI].pixel_hr = half2float(pixel_h); - shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM); - shared_temp[GI].pixel_ph = start_quantize( pixel_h ); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - //ergod mode_type 1:10 - if (threadInBlock < 32) - { - // find_axis - int2x3 endPoint[2]; - endPoint[0][0] = MAX_INT; - endPoint[0][1] = MIN_INT; - endPoint[1][0] = MAX_INT; - endPoint[1][1] = MIN_INT; - - float2 endPoint_lum[2]; - endPoint_lum[0][0] = MAX_FLOAT; - endPoint_lum[0][1] = MIN_FLOAT; - endPoint_lum[1][0] = MAX_FLOAT; - endPoint_lum[1][1] = MIN_FLOAT; - - uint bit = candidateSectionBit[threadInBlock]; - for ( uint i = 0; i < 16; i ++ ) - { - int3 pixel_ph = shared_temp[threadBase + i].pixel_ph; - float pixel_lum = shared_temp[threadBase + i].pixel_lum; - if ( (bit >> i) & 1 ) //It gets error when using "candidateSection" as "endPoint_ph" index - { - if (endPoint_lum[1][0] > pixel_lum) - { - endPoint[1][0] = pixel_ph; - endPoint_lum[1][0] = pixel_lum; - } - if (endPoint_lum[1][1] < pixel_lum) - { - endPoint[1][1] = pixel_ph; - endPoint_lum[1][1] = pixel_lum; - } - } - else - { - if (endPoint_lum[0][0] > pixel_lum) - { - endPoint[0][0] = pixel_ph; - endPoint_lum[0][0] = pixel_lum; - } - if (endPoint_lum[0][1] < pixel_lum) - { - endPoint[0][1] = pixel_ph; - endPoint_lum[0][1] = pixel_lum; - } - } - } - - //compute_index - float3 span[2];// fixed a bug in v0.2 - float span_norm_sqr[2];// fixed a bug in v0.2 - [unroll] - for (uint p = 0; p < 2; ++ p) - { - span[p] = endPoint[p][1] - endPoint[p][0]; - span_norm_sqr[p] = dot( span[p], span[p] ); - - float dotProduct = dot( span[p], shared_temp[threadBase + (0 == p ? 0 : candidateFixUpIndex1D[threadInBlock])].pixel_ph - endPoint[p][0] );// fixed a bug in v0.2 - if ( span_norm_sqr[p] > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr[p] ) > 32 ) - { - span[p] = -span[p]; - swap(endPoint[p][0], endPoint[p][1]); - } - } - - uint4 prec = candidateModePrec[g_mode_id]; - int2x3 endPoint_q[2] = endPoint; - quantize( endPoint_q[0], prec.x ); - quantize( endPoint_q[1], prec.x ); - - bool transformed = candidateModeTransformed[g_mode_id]; - if (transformed) - { - endPoint_q[0][1] -= endPoint_q[0][0]; - endPoint_q[1][0] -= endPoint_q[0][0]; - endPoint_q[1][1] -= endPoint_q[0][0]; - } - - int bBadQuantize = 0; - finish_quantize_0( bBadQuantize, endPoint_q[0], prec, transformed ); - finish_quantize_1( bBadQuantize, endPoint_q[1], prec, transformed ); - - start_unquantize( endPoint_q, prec, transformed ); - - unquantize( endPoint_q[0], prec.x ); - unquantize( endPoint_q[1], prec.x ); - - float error = 0; - for ( uint j = 0; j < 16; j ++ ) - { - uint3 pixel_rh; - if ((bit >> j) & 1) - { - float dotProduct = dot( span[1], shared_temp[threadBase + j].pixel_ph - endPoint[1][0] );// fixed a bug in v0.2 - uint index = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr[1] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep1[63] ); - generate_palette_unquantized8( pixel_rh, endPoint_q[1][0], endPoint_q[1][1], index ); - } - else - { - float dotProduct = dot( span[0], shared_temp[threadBase + j].pixel_ph - endPoint[0][0] );// fixed a bug in v0.2 - uint index = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr[0] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep1[63] ); - generate_palette_unquantized8( pixel_rh, endPoint_q[0][0], endPoint_q[0][1], index ); - } - - float3 pixel_r = half2float( pixel_rh ); - pixel_r -= shared_temp[threadBase + j].pixel_hr; - error += dot(pixel_r, pixel_r); - } - if ( bBadQuantize ) - error = 1e20f; - - shared_temp[GI].error = error; - shared_temp[GI].best_mode = candidateModeFlag[g_mode_id]; - shared_temp[GI].best_partition = threadInBlock; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 16) - { - if ( shared_temp[GI].error > shared_temp[GI + 16].error ) - { - shared_temp[GI].error = shared_temp[GI + 16].error; - shared_temp[GI].best_mode = shared_temp[GI + 16].best_mode; - shared_temp[GI].best_partition = shared_temp[GI + 16].best_partition; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 8) - { - if ( shared_temp[GI].error > shared_temp[GI + 8].error ) - { - shared_temp[GI].error = shared_temp[GI + 8].error; - shared_temp[GI].best_mode = shared_temp[GI + 8].best_mode; - shared_temp[GI].best_partition = shared_temp[GI + 8].best_partition; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - if ( shared_temp[GI].error > shared_temp[GI + 4].error ) - { - shared_temp[GI].error = shared_temp[GI + 4].error; - shared_temp[GI].best_mode = shared_temp[GI + 4].best_mode; - shared_temp[GI].best_partition = shared_temp[GI + 4].best_partition; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - if ( shared_temp[GI].error > shared_temp[GI + 2].error ) - { - shared_temp[GI].error = shared_temp[GI + 2].error; - shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode; - shared_temp[GI].best_partition = shared_temp[GI + 2].best_partition; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - if ( shared_temp[GI].error > shared_temp[GI + 1].error ) - { - shared_temp[GI].error = shared_temp[GI + 1].error; - shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode; - shared_temp[GI].best_partition = shared_temp[GI + 1].best_partition; - } - - if (asfloat(g_InBuff[blockID].x) > shared_temp[GI].error) - { - g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, shared_temp[GI].best_partition, 0); - } - else - { - g_OutBuff[blockID] = g_InBuff[blockID]; - } - } -} - -[numthreads( THREAD_GROUP_SIZE, 1, 1 )] -void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) -{ - const uint MAX_USED_THREAD = 32; - uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; - uint blockInGroup = GI / MAX_USED_THREAD; - uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; - uint threadBase = blockInGroup * MAX_USED_THREAD; - uint threadInBlock = GI - threadBase; - -#ifndef REF_DEVICE - if (blockID >= g_num_total_blocks) - { - return; - } -#endif - - uint block_y = blockID / g_num_block_x; - uint block_x = blockID - block_y * g_num_block_x; - uint base_x = block_x * BLOCK_SIZE_X; - uint base_y = block_y * BLOCK_SIZE_Y; - - if (threadInBlock < 16) - { - shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb; - shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel, RGB2LUM); - uint3 pixel_h = float2half( shared_temp[GI].pixel ); - shared_temp[GI].pixel_ph = start_quantize( pixel_h ); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - uint best_mode = g_InBuff[blockID].y; - uint best_partition = g_InBuff[blockID].z; - - uint4 block = 0; - - if (threadInBlock < 32) - { - int2x3 endPoint; - endPoint[0] = MAX_INT; - endPoint[1] = MIN_INT; - - float2 endPoint_lum; - endPoint_lum[0] = MAX_FLOAT; - endPoint_lum[1] = MIN_FLOAT; - - int2 endPoint_lum_index; - endPoint_lum_index[0] = -1; - endPoint_lum_index[1] = -1; - - int3 pixel_ph = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_ph; - float pixel_lum = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_lum; - if (threadInBlock < 16) - { - if (best_mode > 10) - { - endPoint[0] = endPoint[1] = pixel_ph; - endPoint_lum[0] = endPoint_lum[1] = pixel_lum; - } - else - { - uint bits = candidateSectionBit[best_partition]; - if (0 == ((bits >> threadInBlock) & 1)) - { - endPoint[0] = endPoint[1] = pixel_ph; - endPoint_lum[0] = endPoint_lum[1] = pixel_lum; - } - } - } - else - { - if (best_mode <= 10) - { - uint bits = candidateSectionBit[best_partition]; - if (1 == ((bits >> (threadInBlock & 0xF)) & 1)) - { - endPoint[0] = endPoint[1] = pixel_ph; - endPoint_lum[0] = endPoint_lum[1] = pixel_lum; - } - } - } - - shared_temp[GI].endPoint_low = endPoint[0]; - shared_temp[GI].endPoint_high = endPoint[1]; - - shared_temp[GI].endPoint_lum_low = endPoint_lum[0]; - shared_temp[GI].endPoint_lum_high = endPoint_lum[1]; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if ((threadInBlock & 0xF) < 8) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if ((threadInBlock & 0xF) < 4) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if ((threadInBlock & 0xF) < 2) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if ((threadInBlock & 0xF) < 1) - { - if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low) - { - shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low; - shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low; - } - if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high) - { - shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high; - shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 2) - { - // find_axis - int2x3 endPoint; - endPoint[0] = shared_temp[threadBase + threadInBlock * 16].endPoint_low; - endPoint[1] = shared_temp[threadBase + threadInBlock * 16].endPoint_high; - - uint fixup = 0; - if ((1 == threadInBlock) && (best_mode <= 10)) - { - fixup = candidateFixUpIndex1D[best_partition]; - } - - float3 span = endPoint[1] - endPoint[0]; - float span_norm_sqr = dot( span, span ); - float dotProduct = dot( span, shared_temp[threadBase + fixup].pixel_ph - endPoint[0] ); - if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 ) - { - swap(endPoint[0], endPoint[1]); - } - - shared_temp[GI].endPoint_low = endPoint[0]; - shared_temp[GI].endPoint_high = endPoint[1]; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 16) - { - uint bits; - if (best_mode > 10) - { - bits = 0; - } - else - { - bits = candidateSectionBit[best_partition]; - } - - float3 span; - float dotProduct; - if ((bits >> threadInBlock) & 1) - { - span = shared_temp[threadBase + 1].endPoint_high - shared_temp[threadBase + 1].endPoint_low; - dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 1].endPoint_low ); - } - else - { - span = shared_temp[threadBase + 0].endPoint_high - shared_temp[threadBase + 0].endPoint_low; - dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 0].endPoint_low ); - } - float span_norm_sqr = dot( span, span ); - - if (best_mode > 10) - { - uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] ); - if (threadInBlock == 0) - { - block.z |= index << 1; - } - else if (threadInBlock < 8) - { - block.z |= index << (threadInBlock * 4); - } - else - { - block.w |= index << ((threadInBlock - 8) * 4); - } - } - else - { - uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep1[63] ); - - uint fixup = candidateFixUpIndex1D[best_partition]; - int2 offset = int2((fixup != 2), (fixup == 15)); - - if (threadInBlock == 0) - { - block.z |= index << 18; - } - else if (threadInBlock < 3) - { - block.z |= index << (20 + (threadInBlock - 1) * 3); - } - else if (threadInBlock < 5) - { - block.z |= index << (25 + (threadInBlock - 3) * 3 + offset.x); - } - else if (threadInBlock == 5) - { - block.w |= index >> !offset.x; - if (!offset.x) - { - block.z |= index << 31; - } - } - else if (threadInBlock < 9) - { - block.w |= index << (2 + (threadInBlock - 6) * 3 + offset.x); - } - else - { - block.w |= index << (11 + (threadInBlock - 9) * 3 + offset.y); - } - } - - shared_temp[GI].pixel_hr.xy = asfloat(block.zw); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 8) - { - shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 8].pixel_hr.xy)); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 4].pixel_hr.xy)); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 2].pixel_hr.xy)); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 1].pixel_hr.xy)); - - block.zw = asuint(shared_temp[GI].pixel_hr.xy); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - bool transformed = candidateModeTransformed[best_mode - 1]; - uint4 prec = candidateModePrec[best_mode - 1]; - if (threadInBlock == 2) - { - int2x3 endPoint_q; - endPoint_q[0] = shared_temp[threadBase + 0].endPoint_low; - endPoint_q[1] = shared_temp[threadBase + 0].endPoint_high; - - quantize( endPoint_q, prec.x ); - if (transformed) - { - endPoint_q[1] -= endPoint_q[0]; - } - - shared_temp[GI].endPoint_low = endPoint_q[0]; - shared_temp[GI].endPoint_high = endPoint_q[1]; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock == 3) - { - int3 ep0 = shared_temp[threadBase + 2].endPoint_low; - int2x3 endPoint_q; - endPoint_q[0] = shared_temp[threadBase + 1].endPoint_low; - endPoint_q[1] = shared_temp[threadBase + 1].endPoint_high; - - if (best_mode <= 10) - { - quantize( endPoint_q, prec.x ); - if (transformed) - { - endPoint_q[0] -= ep0; - endPoint_q[1] -= ep0; - } - - shared_temp[GI].endPoint_low = endPoint_q[0]; - shared_temp[GI].endPoint_high = endPoint_q[1]; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 2) - { - int2x3 endPoint_q; - endPoint_q[0] = shared_temp[threadBase + threadInBlock + 2].endPoint_low; - endPoint_q[1] = shared_temp[threadBase + threadInBlock + 2].endPoint_high; - - int bBadQuantize = 0; - if (threadInBlock == 0) - { - if (best_mode > 10) - { - finish_quantize( bBadQuantize, endPoint_q, prec, transformed ); - } - else - { - finish_quantize_0( bBadQuantize, endPoint_q, prec, transformed ); - } - } - else // if (threadInBlock == 1) - { - if (best_mode <= 10) - { - finish_quantize_1( bBadQuantize, endPoint_q, prec, transformed ); - } - } - - shared_temp[GI].endPoint_low = endPoint_q[0]; - shared_temp[GI].endPoint_high = endPoint_q[1]; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if ( threadInBlock == 0 ) - { - int2x3 endPoint_q[2]; - endPoint_q[0][0] = shared_temp[threadBase + 0].endPoint_low; - endPoint_q[0][1] = shared_temp[threadBase + 0].endPoint_high; - endPoint_q[1][0] = shared_temp[threadBase + 1].endPoint_low; - endPoint_q[1][1] = shared_temp[threadBase + 1].endPoint_high; - - if ( best_mode > 10 ) - { - block_package( block, endPoint_q[0], best_mode ); - } - else - { - block_package( block, endPoint_q, best_mode, best_partition ); - } - - g_OutBuff[blockID] = block; - } -} - -uint float2half1( float f ) -{ - uint Result; - - uint IValue = asuint(f); - uint Sign = (IValue & 0x80000000U) >> 16U; - IValue = IValue & 0x7FFFFFFFU; - - if (IValue > 0x47FFEFFFU) - { - // The number is too large to be represented as a half. Saturate to infinity. - Result = 0x7FFFU; - } - else - { - if (IValue < 0x38800000U) - { - // The number is too small to be represented as a normalized half. - // Convert it to a denormalized value. - uint Shift = 113U - (IValue >> 23U); - IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; - } - else - { - // Rebias the exponent to represent the value as a normalized half. - IValue += 0xC8000000U; - } - - Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU; - } - return (Result|Sign); -} - -uint3 float2half( float3 endPoint_f ) -{ - //uint3 sign = asuint(endPoint_f) & 0x80000000; - //uint3 expo = asuint(endPoint_f) & 0x7F800000; - //uint3 base = asuint(endPoint_f) & 0x007FFFFF; - //return ( expo < 0x33800000 ) ? 0 - // //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present - // : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2 - // //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation - // : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff ) - // // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present - // // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number - // : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) ); - - - return uint3( float2half1( endPoint_f.x ), float2half1( endPoint_f.y ), float2half1( endPoint_f.z ) ); -} -int3 start_quantize( uint3 pixel_h ) -{ - if ( g_format == UNSIGNED_F16 ) - { - return asint( ( pixel_h << 6 ) / 31 ); - } - else - { - return ( pixel_h < 0x8000 ) ? ( ( pixel_h == 0x7bff ) ? 0x7fff : asint( ( pixel_h << 5 ) / 31 ) )// fixed a bug in v0.2 - : ( ( pixel_h == 0x7bff ) ? 0xffff8001 : -asint( ( ( 0x00007fff & pixel_h ) << 5 ) / 31 ) );// fixed a bug in v0.2 - } -} -void quantize( inout int2x3 endPoint, uint prec ) -{ - int iprec = asint( prec ); - if ( g_format == UNSIGNED_F16 ) - { - endPoint = ( ( iprec >= 15 ) | ( endPoint == 0 ) ) ? endPoint - : ( ( endPoint == asint(0xFFFF) ) ? ( ( 1 << iprec ) - 1 ) - : ( ( ( endPoint << iprec ) + asint(0x0000) ) >> 16 ) ); - } - else - { - endPoint = ( ( iprec >= 16 ) | ( endPoint == 0 ) ) ? endPoint - : ( ( endPoint >= 0 ) ? ( ( endPoint == asint(0x7FFF) ) ? ( ( 1 << ( iprec - 1 ) ) - 1 ) : ( ( ( endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) - : ( ( -endPoint == asint(0x7FFF) ) ? -( ( 1 << ( iprec - 1 ) ) - 1 ) : -( ( ( -endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) ); - } -} -void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) -{ - if ( transformed ) - { - bool3 bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) - : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); - bBadQuantize |= any(bBadComponent); - - endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 ); - endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) - : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); - } - else - { - endPoint &= ( ( 1 << prec.x ) - 1 ); - } -} -void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) -{ - if ( transformed ) - { - bool2x3 bBadComponent; - bBadComponent[0] = ( endPoint[0] >= 0 ) ? ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) - : ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ); - bBadComponent[1] = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) - : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); - bBadQuantize |= any(bBadComponent); - - endPoint[0] = ( endPoint[0] >= 0 ) ? ( ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[0] ) - : ( ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[0] & ( ( 1 << prec.yzw ) - 1 ) ) ); - endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) - : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); - } - else - { - endPoint &= ( ( 1 << prec.x ) - 1 ); - } -} -void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed ) -{ - if ( transformed ) - { - bool3 bBadComponent; - bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) - : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ); - bBadQuantize = any( bBadComponent ); - - endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 ); - endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] ) - : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) ); - } - else - { - endPoint &= ( ( 1 << prec.x ) - 1 ); - - bBadQuantize = 0; - } -} - -void SIGN_EXTEND( uint3 prec, inout int3 color ) -{ - uint3 p = 1 << (prec - 1); - color = (color & p) ? (color & (p - 1)) - p : color; -} - -void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint ) -{ - if ( g_format == SIGNED_F16 ) - SIGN_EXTEND( prec.x, endPoint[0] ); - if ( g_format == SIGNED_F16 || transformed ) - SIGN_EXTEND( prec.yzw, endPoint[1] ); -} - -void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint[2] ) -{ - if ( g_format == SIGNED_F16 ) - SIGN_EXTEND( prec.x, endPoint[0][0] ); - if ( g_format == SIGNED_F16 || transformed ) - { - SIGN_EXTEND( prec.yzw, endPoint[0][1] ); - SIGN_EXTEND( prec.yzw, endPoint[1][0] ); - SIGN_EXTEND( prec.yzw, endPoint[1][1] ); - } -} -void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed ) -{ - sign_extend( transformed, prec, endPoint ); - if ( transformed ) - { - endPoint[0][1] += endPoint[0][0]; - endPoint[1][0] += endPoint[0][0]; - endPoint[1][1] += endPoint[0][0]; - } -} -void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed ) -{ - sign_extend( transformed, prec, endPoint ); - if ( transformed ) - endPoint[1] += endPoint[0]; -} -void unquantize( inout int2x3 color, uint prec ) -{ - int iprec = asint( prec ); - if (g_format == UNSIGNED_F16 ) - { - if (prec < 15) - { - color = (color != 0) ? (color == ((1 << iprec) - 1) ? 0xFFFF : (((color << 16) + 0x8000) >> iprec)) : color; - } - } - else - { - if (prec < 16) - { - uint2x3 s = color >= 0 ? 0 : 1; - color = abs(color); - color = (color != 0) ? (color >= ((1 << (iprec - 1)) - 1) ? 0x7FFF : (((color << 15) + 0x4000) >> (iprec - 1))) : color; - color = s > 0 ? -color : color; - } - } -} -uint3 finish_unquantize( int3 color ) -{ - if ( g_format == UNSIGNED_F16 ) - color = ( color * 31 ) >> 6; - else - { - color = ( color < 0 ) ? -( ( -color * 31 ) >> 5 ) : ( color * 31 ) >> 5; - color = ( color < 0 ) ? ( ( -color ) | 0x8000 ) : color; - } - return asuint(color); -} -void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i ) -{ - static const int aWeight3[] = {0, 9, 18, 27, 37, 46, 55, 64}; - - int3 tmp = ( low * ( 64 - aWeight3[i] ) + high * aWeight3[i] + 32 ) >> 6; - palette = finish_unquantize( tmp ); -} -void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i ) -{ - static const int aWeight4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; - - int3 tmp = ( low * ( 64 - aWeight4[i] ) + high * aWeight4[i] + 32 ) >> 6; - palette = finish_unquantize( tmp ); -} - -float half2float1( uint Value ) -{ - uint Mantissa = (uint)(Value & 0x03FF); - - uint Exponent; - if ((Value & 0x7C00) != 0) // The value is normalized - { - Exponent = (uint)((Value >> 10) & 0x1F); - } - else if (Mantissa != 0) // The value is denormalized - { - // Normalize the value in the resulting float - Exponent = 1; - - do - { - Exponent--; - Mantissa <<= 1; - } while ((Mantissa & 0x0400) == 0); - - Mantissa &= 0x03FF; - } - else // The value is zero - { - Exponent = (uint)(-112); - } - - uint Result = ((Value & 0x8000) << 16) | // Sign - ((Exponent + 112) << 23) | // Exponent - (Mantissa << 13); // Mantissa - - return asfloat(Result); -} - -float3 half2float(uint3 color_h ) -{ - //uint3 sign = color_h & 0x8000; - //uint3 expo = color_h & 0x7C00; - //uint3 base = color_h & 0x03FF; - //return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24 - // : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00 - - return float3( half2float1( color_h.x ), half2float1( color_h.y ), half2float1( color_h.z ) ); -} - -void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ) // for mode 1 - 10 -{ - block.xy = 0; - block.z &= 0xFFFC0000; - - //block.z |= (partition_index & 0x1f) << 13; - - if ( mode_type == candidateModeFlag[0]) - { - /*block.x = candidateModeMemory[0]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.x |= ( endPoint[1][0].g >> 2 ) & 0x00000004; - block.x |= ( endPoint[1][0].b >> 1 ) & 0x00000008; - block.x |= endPoint[1][1].b & 0x00000010; - block.y |= ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ); - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); - block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) ); - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[0] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[0] >> 1) & 1) << 1; - block.x |= ((endPoint[1][0].g >> 4) & 1) << 2; - block.x |= ((endPoint[1][0].b >> 4) & 1) << 3; - block.x |= ((endPoint[1][1].b >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[1]) - { - /*block.x = candidateModeMemory[1]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00000FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x003F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); - block.x |= ( ( endPoint[1][0].g >> 3 ) & 0x00000004 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 ); - block.x |= ( endPoint[1][1].g >> 1 ) & 0x00000018; - block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 ); - block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); - block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 ); - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[1] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[1] >> 1) & 1) << 1; - block.x |= ((endPoint[1][0].g >> 5) & 1) << 2; - block.x |= ((endPoint[1][1].g >> 4) & 1) << 3; - block.x |= ((endPoint[1][1].g >> 5) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[1][1].b >> 0) & 1) << 12; - block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; - block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[1][0].b >> 5) & 1) << 22; - block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; - block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[1][1].b >> 3) & 1) << 0; - block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; - block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[2]) - { - /*block.x = candidateModeMemory[2]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.y |= ( endPoint[0][0].r >> 2 ) & 0x00000100; - block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000; - block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); - block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) ); - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[2] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[2] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[2] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[2] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[2] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[0][0].r >> 10) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][0].g >> 10) & 1) << 17; - block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][0].b >> 10) & 1) << 27; - block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[3]) - { - /*block.x = candidateModeMemory[3]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080; - block.y |= ( endPoint[0][0].g << 8 ) & 0x00040000; - block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000001E); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ); - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780); - block.yz |= ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000); - block.z |= ( ( endPoint[1][0].g << 7 ) & 0x00000800 ); - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; - block.z |= ( endPoint[1][1].b << 4 ) & 0x00000040; - block.z |= ( endPoint[1][1].b << 5 ) & 0x00000020;*/ - - block.x |= ((candidateModeMemory[3] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[3] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[3] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[3] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[3] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][0].r >> 10) & 1) << 7; - block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[0][0].g >> 10) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][0].b >> 10) & 1) << 27; - block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][1].b >> 0) & 1) << 5; - block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][0].g >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[4]) - { - /*block.x = candidateModeMemory[4]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080; - block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000; - block.y |= ( ( endPoint[0][0].b << 18 ) & 0x10000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 ); - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); - block.y |= ( ( endPoint[1][0].g << 9 ) & 0x00001E00 ) | ( ( endPoint[1][0].b << 4 ) & 0x00000100 ); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780); - block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000060); - block.z |= ( endPoint[1][0].r << 1 ) & 0x0000001E; - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; - block.z |= ( ( endPoint[1][1].b << 7 ) & 0x00000800 ) | ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/ - - block.x |= ((candidateModeMemory[4] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[4] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[4] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[4] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[4] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0][0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0][0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0][0].b >> 9) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][0].r >> 10) & 1) << 7; - block.y |= ((endPoint[1][0].b >> 4) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][0].g >> 10) & 1) << 17; - block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[0][0].b >> 10) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][1].b >> 1) & 1) << 5; - block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].b >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[5]) - { - /*block.x = candidateModeMemory[5]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00003FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x00FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000); - block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000003; - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); - block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); - block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ); - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); - block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040); - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; - block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/ - - block.x |= ((candidateModeMemory[5] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[5] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[5] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[5] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[5] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0][0].r >> 8) & 1) << 13; - block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0][0].g >> 8) & 1) << 23; - block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0][0].b >> 8) & 1) << 1; - block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[6]) - { - /*block.x = candidateModeMemory[6]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); - block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000); - block.x |= ( ( endPoint[1][1].g << 9 ) & 0x00002000 ) | ( ( endPoint[1][1].b << 21 ) & 0x00800000); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); - block.y |= ( ( endPoint[1][1].b >> 2 ) & 0x00000006 ); - block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ) | ( ( endPoint[1][1].b << 18 ) & 0x00040000 ); - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[6] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[6] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[6] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[6] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[6] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[1][1].g >> 4) & 1) << 13; - block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; - block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[1][1].b >> 3) & 1) << 1; - block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[7]) - { - /*block.x = candidateModeMemory[7]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 ); - block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); - block.x |= ( ( endPoint[1][0].g << 18 ) & 0x00800000 ); - block.x |= ( ( endPoint[1][1].b << 13 ) & 0x00002000 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.y |= ( ( endPoint[1][1].g >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); - block.y |= ( endPoint[1][1].b << 27 ) & 0x10000000; - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; - block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/ - - block.x |= ((candidateModeMemory[7] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[7] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[7] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[7] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[7] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[1][1].b >> 0) & 1) << 13; - block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[1][0].g >> 5) & 1) << 23; - block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[1][1].g >> 5) & 1) << 1; - block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[1][1].b >> 1) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[8]) - { - /*block.x = candidateModeMemory[8]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 ); - block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001; - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); - block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); - block.x |= ( ( endPoint[1][0].b << 18 ) & 0x00800000 ); - block.x |= ( endPoint[1][1].b << 12 ) & 0x00002000; - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E); - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80); - block.y |= ( endPoint[1][1].b << 18 ) & 0x00040000; - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001; - block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/ - - block.x |= ((candidateModeMemory[8] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[8] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[8] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[8] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[8] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0][0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0][0].r >> 7) & 1) << 12; - block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; - block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0][0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0][0].g >> 7) & 1) << 22; - block.x |= ((endPoint[1][0].b >> 5) & 1) << 23; - block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0][0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0][0].b >> 7) & 1) << 0; - block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; - block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[1][1].g >> 4) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[1][1].b >> 0) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][1].b >> 2) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].b >> 3) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } - else if ( mode_type == candidateModeFlag[9]) - { - /*block.x = candidateModeMemory[9]; - block.x |= ( ( endPoint[0][0].r << 5 ) & 0x000007E0 ) | ( ( endPoint[0][0].g << 15 ) & 0x001F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0x7E000000 ); - block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 ); - block.x |= ( ( endPoint[1][0].g << 16 ) & 0x00200000 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 ); - block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 ); - block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 ); - block.x |= ( ( endPoint[1][1].g << 26 ) & 0x80000000 ) | ( ( endPoint[1][1].g << 7 ) & 0x00000800 ); - block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E); - block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80); - block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000; - block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 ); - block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[9] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[9] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[9] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[9] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[9] >> 4) & 1) << 4; - block.x |= ((endPoint[0][0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0][0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0][0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0][0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0][0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0][0].r >> 5) & 1) << 10; - block.x |= ((endPoint[1][1].g >> 4) & 1) << 11; - block.x |= ((endPoint[1][1].b >> 0) & 1) << 12; - block.x |= ((endPoint[1][1].b >> 1) & 1) << 13; - block.x |= ((endPoint[1][0].b >> 4) & 1) << 14; - block.x |= ((endPoint[0][0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0][0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0][0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0][0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0][0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0][0].g >> 5) & 1) << 20; - block.x |= ((endPoint[1][0].g >> 5) & 1) << 21; - block.x |= ((endPoint[1][0].b >> 5) & 1) << 22; - block.x |= ((endPoint[1][1].b >> 2) & 1) << 23; - block.x |= ((endPoint[1][0].g >> 4) & 1) << 24; - block.x |= ((endPoint[0][0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0][0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0][0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0][0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0][0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0][0].b >> 5) & 1) << 30; - block.x |= ((endPoint[1][1].g >> 5) & 1) << 31; - block.y |= ((endPoint[1][1].b >> 3) & 1) << 0; - block.y |= ((endPoint[1][1].b >> 5) & 1) << 1; - block.y |= ((endPoint[1][1].b >> 4) & 1) << 2; - block.y |= ((endPoint[0][1].r >> 0) & 1) << 3; - block.y |= ((endPoint[0][1].r >> 1) & 1) << 4; - block.y |= ((endPoint[0][1].r >> 2) & 1) << 5; - block.y |= ((endPoint[0][1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0][1].r >> 4) & 1) << 7; - block.y |= ((endPoint[0][1].r >> 5) & 1) << 8; - block.y |= ((endPoint[1][0].g >> 0) & 1) << 9; - block.y |= ((endPoint[1][0].g >> 1) & 1) << 10; - block.y |= ((endPoint[1][0].g >> 2) & 1) << 11; - block.y |= ((endPoint[1][0].g >> 3) & 1) << 12; - block.y |= ((endPoint[0][1].g >> 0) & 1) << 13; - block.y |= ((endPoint[0][1].g >> 1) & 1) << 14; - block.y |= ((endPoint[0][1].g >> 2) & 1) << 15; - block.y |= ((endPoint[0][1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0][1].g >> 4) & 1) << 17; - block.y |= ((endPoint[0][1].g >> 5) & 1) << 18; - block.y |= ((endPoint[1][1].g >> 0) & 1) << 19; - block.y |= ((endPoint[1][1].g >> 1) & 1) << 20; - block.y |= ((endPoint[1][1].g >> 2) & 1) << 21; - block.y |= ((endPoint[1][1].g >> 3) & 1) << 22; - block.y |= ((endPoint[0][1].b >> 0) & 1) << 23; - block.y |= ((endPoint[0][1].b >> 1) & 1) << 24; - block.y |= ((endPoint[0][1].b >> 2) & 1) << 25; - block.y |= ((endPoint[0][1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0][1].b >> 4) & 1) << 27; - block.y |= ((endPoint[0][1].b >> 5) & 1) << 28; - block.y |= ((endPoint[1][0].b >> 0) & 1) << 29; - block.y |= ((endPoint[1][0].b >> 1) & 1) << 30; - block.y |= ((endPoint[1][0].b >> 2) & 1) << 31; - block.z |= ((endPoint[1][0].b >> 3) & 1) << 0; - block.z |= ((endPoint[1][0].r >> 0) & 1) << 1; - block.z |= ((endPoint[1][0].r >> 1) & 1) << 2; - block.z |= ((endPoint[1][0].r >> 2) & 1) << 3; - block.z |= ((endPoint[1][0].r >> 3) & 1) << 4; - block.z |= ((endPoint[1][0].r >> 4) & 1) << 5; - block.z |= ((endPoint[1][0].r >> 5) & 1) << 6; - block.z |= ((endPoint[1][1].r >> 0) & 1) << 7; - block.z |= ((endPoint[1][1].r >> 1) & 1) << 8; - block.z |= ((endPoint[1][1].r >> 2) & 1) << 9; - block.z |= ((endPoint[1][1].r >> 3) & 1) << 10; - block.z |= ((endPoint[1][1].r >> 4) & 1) << 11; - block.z |= ((endPoint[1][1].r >> 5) & 1) << 12; - block.z |= ((partition_index >> 0) & 1) << 13; - block.z |= ((partition_index >> 1) & 1) << 14; - block.z |= ((partition_index >> 2) & 1) << 15; - block.z |= ((partition_index >> 3) & 1) << 16; - block.z |= ((partition_index >> 4) & 1) << 17; - } -} -void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ) // for mode 11 - 14 -{ - /*block.x = ( ( endPoint[0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0].b << 25 ) & 0xFE000000 ); - block.y |= ( endPoint[0].b >> 7 ) & 0x00000007;*/ - - block.xy = 0; - block.z &= 0xFFFFFFFE; - - - if ( mode_type == candidateModeFlag[10]) - { - /* block.x |= candidateModeMemory[10]; - block.y |= ( ( endPoint[1].r << 3 ) & 0x00001FF8 ) | ( ( endPoint[1].g << 13 ) & 0x007FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 ); - block.z |= ( endPoint[1].b >> 9 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[10] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[10] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[10] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[10] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[10] >> 4) & 1) << 4; - block.x |= ((endPoint[0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0].b >> 9) & 1) << 2; - block.y |= ((endPoint[1].r >> 0) & 1) << 3; - block.y |= ((endPoint[1].r >> 1) & 1) << 4; - block.y |= ((endPoint[1].r >> 2) & 1) << 5; - block.y |= ((endPoint[1].r >> 3) & 1) << 6; - block.y |= ((endPoint[1].r >> 4) & 1) << 7; - block.y |= ((endPoint[1].r >> 5) & 1) << 8; - block.y |= ((endPoint[1].r >> 6) & 1) << 9; - block.y |= ((endPoint[1].r >> 7) & 1) << 10; - block.y |= ((endPoint[1].r >> 8) & 1) << 11; - block.y |= ((endPoint[1].r >> 9) & 1) << 12; - block.y |= ((endPoint[1].g >> 0) & 1) << 13; - block.y |= ((endPoint[1].g >> 1) & 1) << 14; - block.y |= ((endPoint[1].g >> 2) & 1) << 15; - block.y |= ((endPoint[1].g >> 3) & 1) << 16; - block.y |= ((endPoint[1].g >> 4) & 1) << 17; - block.y |= ((endPoint[1].g >> 5) & 1) << 18; - block.y |= ((endPoint[1].g >> 6) & 1) << 19; - block.y |= ((endPoint[1].g >> 7) & 1) << 20; - block.y |= ((endPoint[1].g >> 8) & 1) << 21; - block.y |= ((endPoint[1].g >> 9) & 1) << 22; - block.y |= ((endPoint[1].b >> 0) & 1) << 23; - block.y |= ((endPoint[1].b >> 1) & 1) << 24; - block.y |= ((endPoint[1].b >> 2) & 1) << 25; - block.y |= ((endPoint[1].b >> 3) & 1) << 26; - block.y |= ((endPoint[1].b >> 4) & 1) << 27; - block.y |= ((endPoint[1].b >> 5) & 1) << 28; - block.y |= ((endPoint[1].b >> 6) & 1) << 29; - block.y |= ((endPoint[1].b >> 7) & 1) << 30; - block.y |= ((endPoint[1].b >> 8) & 1) << 31; - block.z |= ((endPoint[1].b >> 9) & 1) << 0; - } - else if (mode_type == candidateModeFlag[11]) - { - /*block.x |= candidateModeMemory[11]; - block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 ); - block.y |= ( ( endPoint[1].r << 3 ) & 0x00000FF8 ) | ( ( endPoint[1].g << 13 ) & 0x003FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 ); - block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[11] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[11] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[11] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[11] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[11] >> 4) & 1) << 4; - block.x |= ((endPoint[0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0].b >> 9) & 1) << 2; - block.y |= ((endPoint[1].r >> 0) & 1) << 3; - block.y |= ((endPoint[1].r >> 1) & 1) << 4; - block.y |= ((endPoint[1].r >> 2) & 1) << 5; - block.y |= ((endPoint[1].r >> 3) & 1) << 6; - block.y |= ((endPoint[1].r >> 4) & 1) << 7; - block.y |= ((endPoint[1].r >> 5) & 1) << 8; - block.y |= ((endPoint[1].r >> 6) & 1) << 9; - block.y |= ((endPoint[1].r >> 7) & 1) << 10; - block.y |= ((endPoint[1].r >> 8) & 1) << 11; - block.y |= ((endPoint[0].r >> 10) & 1) << 12; - block.y |= ((endPoint[1].g >> 0) & 1) << 13; - block.y |= ((endPoint[1].g >> 1) & 1) << 14; - block.y |= ((endPoint[1].g >> 2) & 1) << 15; - block.y |= ((endPoint[1].g >> 3) & 1) << 16; - block.y |= ((endPoint[1].g >> 4) & 1) << 17; - block.y |= ((endPoint[1].g >> 5) & 1) << 18; - block.y |= ((endPoint[1].g >> 6) & 1) << 19; - block.y |= ((endPoint[1].g >> 7) & 1) << 20; - block.y |= ((endPoint[1].g >> 8) & 1) << 21; - block.y |= ((endPoint[0].g >> 10) & 1) << 22; - block.y |= ((endPoint[1].b >> 0) & 1) << 23; - block.y |= ((endPoint[1].b >> 1) & 1) << 24; - block.y |= ((endPoint[1].b >> 2) & 1) << 25; - block.y |= ((endPoint[1].b >> 3) & 1) << 26; - block.y |= ((endPoint[1].b >> 4) & 1) << 27; - block.y |= ((endPoint[1].b >> 5) & 1) << 28; - block.y |= ((endPoint[1].b >> 6) & 1) << 29; - block.y |= ((endPoint[1].b >> 7) & 1) << 30; - block.y |= ((endPoint[1].b >> 8) & 1) << 31; - block.z |= ((endPoint[0].b >> 10) & 1) << 0; - } - else if (mode_type == candidateModeFlag[12])// violate the spec in [0].low - { - /*block.x |= candidateModeMemory[12]; - block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 ); - block.y |= ( ( endPoint[0].r << 0 ) & 0x00000800 ) | ( ( endPoint[0].g << 10 ) & 0x00200000 ); - block.y |= ( endPoint[0].b << 20 ) & 0x80000000; - block.y |= ( ( endPoint[1].r << 3 ) & 0x000007F8 ) | ( ( endPoint[1].g << 13 ) & 0x001FE000 ) | ( ( endPoint[1].b << 23 ) & 0x7F800000 ); - block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[12] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[12] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[12] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[12] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[12] >> 4) & 1) << 4; - block.x |= ((endPoint[0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0].b >> 9) & 1) << 2; - block.y |= ((endPoint[1].r >> 0) & 1) << 3; - block.y |= ((endPoint[1].r >> 1) & 1) << 4; - block.y |= ((endPoint[1].r >> 2) & 1) << 5; - block.y |= ((endPoint[1].r >> 3) & 1) << 6; - block.y |= ((endPoint[1].r >> 4) & 1) << 7; - block.y |= ((endPoint[1].r >> 5) & 1) << 8; - block.y |= ((endPoint[1].r >> 6) & 1) << 9; - block.y |= ((endPoint[1].r >> 7) & 1) << 10; - block.y |= ((endPoint[0].r >> 11) & 1) << 11; - block.y |= ((endPoint[0].r >> 10) & 1) << 12; - block.y |= ((endPoint[1].g >> 0) & 1) << 13; - block.y |= ((endPoint[1].g >> 1) & 1) << 14; - block.y |= ((endPoint[1].g >> 2) & 1) << 15; - block.y |= ((endPoint[1].g >> 3) & 1) << 16; - block.y |= ((endPoint[1].g >> 4) & 1) << 17; - block.y |= ((endPoint[1].g >> 5) & 1) << 18; - block.y |= ((endPoint[1].g >> 6) & 1) << 19; - block.y |= ((endPoint[1].g >> 7) & 1) << 20; - block.y |= ((endPoint[0].g >> 11) & 1) << 21; - block.y |= ((endPoint[0].g >> 10) & 1) << 22; - block.y |= ((endPoint[1].b >> 0) & 1) << 23; - block.y |= ((endPoint[1].b >> 1) & 1) << 24; - block.y |= ((endPoint[1].b >> 2) & 1) << 25; - block.y |= ((endPoint[1].b >> 3) & 1) << 26; - block.y |= ((endPoint[1].b >> 4) & 1) << 27; - block.y |= ((endPoint[1].b >> 5) & 1) << 28; - block.y |= ((endPoint[1].b >> 6) & 1) << 29; - block.y |= ((endPoint[1].b >> 7) & 1) << 30; - block.y |= ((endPoint[0].b >> 11) & 1) << 31; - block.z |= ((endPoint[0].b >> 10) & 1) << 0; - } - else if (mode_type == candidateModeFlag[13]) - { - /*block.x |= candidateModeMemory[13]; - block.y |= ( ( endPoint[0].r >> 8 ) & 0x00000080 ); - block.y |= ( ( endPoint[0].r >> 6 ) & 0x00000100 ); - block.y |= ( ( endPoint[0].r >> 4 ) & 0x00000200 ); - block.y |= ( ( endPoint[0].r >> 2 ) & 0x00000400 ); - block.y |= ( ( endPoint[0].r >> 0 ) & 0x00000800 ); - block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ); - block.y |= ( ( endPoint[0].g << 2 ) & 0x00020000 ); - block.y |= ( ( endPoint[0].g << 4 ) & 0x00040000 ); - block.y |= ( ( endPoint[0].g << 6 ) & 0x00080000 ); - block.y |= ( ( endPoint[0].g << 8 ) & 0x00100000 ); - block.y |= ( ( endPoint[0].g << 10 ) & 0x00200000 ); - block.y |= ( ( endPoint[0].g << 12 ) & 0x00400000 ); - block.y |= ( ( endPoint[0].b << 12 ) & 0x08000000 ); - block.y |= ( ( endPoint[0].b << 14 ) & 0x10000000 ); - block.y |= ( ( endPoint[0].b << 16 ) & 0x20000000 ); - block.y |= ( ( endPoint[0].b << 18 ) & 0x40000000 ); - block.y |= ( ( endPoint[0].b << 20 ) & 0x80000000 ); - block.y |= ( ( endPoint[1].r << 3 ) & 0x00000078 ) | ( ( endPoint[1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[1].b << 23 ) & 0x07800000 ); - block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/ - - block.x |= ((candidateModeMemory[13] >> 0) & 1) << 0; - block.x |= ((candidateModeMemory[13] >> 1) & 1) << 1; - block.x |= ((candidateModeMemory[13] >> 2) & 1) << 2; - block.x |= ((candidateModeMemory[13] >> 3) & 1) << 3; - block.x |= ((candidateModeMemory[13] >> 4) & 1) << 4; - block.x |= ((endPoint[0].r >> 0) & 1) << 5; - block.x |= ((endPoint[0].r >> 1) & 1) << 6; - block.x |= ((endPoint[0].r >> 2) & 1) << 7; - block.x |= ((endPoint[0].r >> 3) & 1) << 8; - block.x |= ((endPoint[0].r >> 4) & 1) << 9; - block.x |= ((endPoint[0].r >> 5) & 1) << 10; - block.x |= ((endPoint[0].r >> 6) & 1) << 11; - block.x |= ((endPoint[0].r >> 7) & 1) << 12; - block.x |= ((endPoint[0].r >> 8) & 1) << 13; - block.x |= ((endPoint[0].r >> 9) & 1) << 14; - block.x |= ((endPoint[0].g >> 0) & 1) << 15; - block.x |= ((endPoint[0].g >> 1) & 1) << 16; - block.x |= ((endPoint[0].g >> 2) & 1) << 17; - block.x |= ((endPoint[0].g >> 3) & 1) << 18; - block.x |= ((endPoint[0].g >> 4) & 1) << 19; - block.x |= ((endPoint[0].g >> 5) & 1) << 20; - block.x |= ((endPoint[0].g >> 6) & 1) << 21; - block.x |= ((endPoint[0].g >> 7) & 1) << 22; - block.x |= ((endPoint[0].g >> 8) & 1) << 23; - block.x |= ((endPoint[0].g >> 9) & 1) << 24; - block.x |= ((endPoint[0].b >> 0) & 1) << 25; - block.x |= ((endPoint[0].b >> 1) & 1) << 26; - block.x |= ((endPoint[0].b >> 2) & 1) << 27; - block.x |= ((endPoint[0].b >> 3) & 1) << 28; - block.x |= ((endPoint[0].b >> 4) & 1) << 29; - block.x |= ((endPoint[0].b >> 5) & 1) << 30; - block.x |= ((endPoint[0].b >> 6) & 1) << 31; - block.y |= ((endPoint[0].b >> 7) & 1) << 0; - block.y |= ((endPoint[0].b >> 8) & 1) << 1; - block.y |= ((endPoint[0].b >> 9) & 1) << 2; - block.y |= ((endPoint[1].r >> 0) & 1) << 3; - block.y |= ((endPoint[1].r >> 1) & 1) << 4; - block.y |= ((endPoint[1].r >> 2) & 1) << 5; - block.y |= ((endPoint[1].r >> 3) & 1) << 6; - block.y |= ((endPoint[0].r >> 15) & 1) << 7; - block.y |= ((endPoint[0].r >> 14) & 1) << 8; - block.y |= ((endPoint[0].r >> 13) & 1) << 9; - block.y |= ((endPoint[0].r >> 12) & 1) << 10; - block.y |= ((endPoint[0].r >> 11) & 1) << 11; - block.y |= ((endPoint[0].r >> 10) & 1) << 12; - block.y |= ((endPoint[1].g >> 0) & 1) << 13; - block.y |= ((endPoint[1].g >> 1) & 1) << 14; - block.y |= ((endPoint[1].g >> 2) & 1) << 15; - block.y |= ((endPoint[1].g >> 3) & 1) << 16; - block.y |= ((endPoint[0].g >> 15) & 1) << 17; - block.y |= ((endPoint[0].g >> 14) & 1) << 18; - block.y |= ((endPoint[0].g >> 13) & 1) << 19; - block.y |= ((endPoint[0].g >> 12) & 1) << 20; - block.y |= ((endPoint[0].g >> 11) & 1) << 21; - block.y |= ((endPoint[0].g >> 10) & 1) << 22; - block.y |= ((endPoint[1].b >> 0) & 1) << 23; - block.y |= ((endPoint[1].b >> 1) & 1) << 24; - block.y |= ((endPoint[1].b >> 2) & 1) << 25; - block.y |= ((endPoint[1].b >> 3) & 1) << 26; - block.y |= ((endPoint[0].b >> 15) & 1) << 27; - block.y |= ((endPoint[0].b >> 14) & 1) << 28; - block.y |= ((endPoint[0].b >> 13) & 1) << 29; - block.y |= ((endPoint[0].b >> 12) & 1) << 30; - block.y |= ((endPoint[0].b >> 11) & 1) << 31; - block.z |= ((endPoint[0].b >> 10) & 1) << 0; - } -} diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl deleted file mode 100644 index 6a57c3862..000000000 --- a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl +++ /dev/null @@ -1,1908 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: BC7Encode.hlsl -// -// The Compute Shader for BC7 Encoder -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//#define REF_DEVICE - -#define CHAR_LENGTH 8 -#define NCHANNELS 4 -#define BC7_UNORM 98 -#define MAX_UINT 0xFFFFFFFF -#define MIN_UINT 0 - -static const uint candidateSectionBit[64] = //Associated to partition 0-63 -{ - 0xCCCC, 0x8888, 0xEEEE, 0xECC8, - 0xC880, 0xFEEC, 0xFEC8, 0xEC80, - 0xC800, 0xFFEC, 0xFE80, 0xE800, - 0xFFE8, 0xFF00, 0xFFF0, 0xF000, - 0xF710, 0x008E, 0x7100, 0x08CE, - 0x008C, 0x7310, 0x3100, 0x8CCE, - 0x088C, 0x3110, 0x6666, 0x366C, - 0x17E8, 0x0FF0, 0x718E, 0x399C, - 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, - 0x3c3c, 0x55aa, 0x9696, 0xa55a, - 0x73ce, 0x13c8, 0x324c, 0x3bdc, - 0x6996, 0xc33c, 0x9966, 0x660, - 0x272, 0x4e4, 0x4e40, 0x2720, - 0xc936, 0x936c, 0x39c6, 0x639c, - 0x9336, 0x9cc6, 0x817e, 0xe718, - 0xccf0, 0xfcc, 0x7744, 0xee22, -}; -static const uint candidateSectionBit2[64] = //Associated to partition 64-127 -{ - 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, - 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, - 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, - 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, - 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, - 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500, - 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, - 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, - 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424, - 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, - 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0, - 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, - 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, - 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000, - 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, - 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254, -}; -static const uint2 candidateFixUpIndex1D[128] = -{ - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{ 2, 0},{ 8, 0},{ 2, 0}, - { 2, 0},{ 8, 0},{ 8, 0},{15, 0}, - { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, - { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0}, - - {15, 0},{15, 0},{ 6, 0},{ 8, 0}, - { 2, 0},{ 8, 0},{15, 0},{15, 0}, - { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, - { 2, 0},{15, 0},{15, 0},{ 6, 0}, - { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0}, - {15, 0},{15, 0},{ 2, 0},{ 2, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{ 2, 0},{ 2, 0},{15, 0}, - //candidateFixUpIndex1D[i][1], i < 64 should not be used - - { 3,15},{ 3, 8},{15, 8},{15, 3}, - { 8,15},{ 3,15},{15, 3},{15, 8}, - { 8,15},{ 8,15},{ 6,15},{ 6,15}, - { 6,15},{ 5,15},{ 3,15},{ 3, 8}, - { 3,15},{ 3, 8},{ 8,15},{15, 3}, - { 3,15},{ 3, 8},{ 6,15},{10, 8}, - { 5, 3},{ 8,15},{ 8, 6},{ 6,10}, - { 8,15},{ 5,15},{15,10},{15, 8}, - - { 8,15},{15, 3},{ 3,15},{ 5,10}, - { 6,10},{10, 8},{ 8, 9},{15,10}, - {15, 6},{ 3,15},{15, 8},{ 5,15}, - {15, 3},{15, 6},{15, 6},{15, 8}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct - { 3,15},{15, 3},{ 5,15},{ 5,15}, - { 5,15},{ 8,15},{ 5,15},{10,15}, - { 5,15},{10,15},{ 8,15},{13,15}, - {15, 3},{12,15},{ 3,15},{ 3, 8}, -}; -static const uint2 candidateFixUpIndex1DOrdered[128] = //Same with candidateFixUpIndex1D but order the result when i >= 64 -{ - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{ 2, 0},{ 8, 0},{ 2, 0}, - { 2, 0},{ 8, 0},{ 8, 0},{15, 0}, - { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, - { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0}, - - {15, 0},{15, 0},{ 6, 0},{ 8, 0}, - { 2, 0},{ 8, 0},{15, 0},{15, 0}, - { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0}, - { 2, 0},{15, 0},{15, 0},{ 6, 0}, - { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0}, - {15, 0},{15, 0},{ 2, 0},{ 2, 0}, - {15, 0},{15, 0},{15, 0},{15, 0}, - {15, 0},{ 2, 0},{ 2, 0},{15, 0}, - //candidateFixUpIndex1DOrdered[i][1], i < 64 should not be used - - { 3,15},{ 3, 8},{ 8,15},{ 3,15}, - { 8,15},{ 3,15},{ 3,15},{ 8,15}, - { 8,15},{ 8,15},{ 6,15},{ 6,15}, - { 6,15},{ 5,15},{ 3,15},{ 3, 8}, - { 3,15},{ 3, 8},{ 8,15},{ 3,15}, - { 3,15},{ 3, 8},{ 6,15},{ 8,10}, - { 3, 5},{ 8,15},{ 6, 8},{ 6,10}, - { 8,15},{ 5,15},{10,15},{ 8,15}, - - { 8,15},{ 3,15},{ 3,15},{ 5,10}, - { 6,10},{ 8,10},{ 8, 9},{10,15}, - { 6,15},{ 3,15},{ 8,15},{ 5,15}, - { 3,15},{ 6,15},{ 6,15},{ 8,15}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct - { 3,15},{ 3,15},{ 5,15},{ 5,15}, - { 5,15},{ 8,15},{ 5,15},{10,15}, - { 5,15},{10,15},{ 8,15},{13,15}, - { 3,15},{12,15},{ 3,15},{ 3, 8}, -}; -//static const uint4x4 candidateRotation[4] = -//{ -// {1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}, -// {0,0,0,1},{0,1,0,0},{0,0,1,0},{1,0,0,0}, -// {1,0,0,0},{0,0,0,1},{0,0,1,0},{0,1,0,0}, -// {1,0,0,0},{0,1,0,0},{0,0,0,1},{0,0,1,0} -//}; -//static const uint2 candidateIndexPrec[8] = {{3,0},{3,0},{2,0},{2,0}, -// {2,3}, //color index and alpha index can exchange -// {2,2},{4,4},{2,2}}; - -static const uint aWeight[3][16] = { {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}, - {0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; - - //4 bit index: 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 -static const uint aStep[3][64] = { { 0, 0, 0, 1, 1, 1, 1, 2, - 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 7, 7, 7, - 7, 8, 8, 8, 8, 9, 9, 9, - 9,10,10,10,10,10,11,11, - 11,11,12,12,12,12,13,13, - 13,13,14,14,14,14,15,15 }, - //3 bit index: 0, 9, 18, 27, 37, 46, 55, 64 - { 0,0,0,0,0,1,1,1, - 1,1,1,1,1,1,2,2, - 2,2,2,2,2,2,2,3, - 3,3,3,3,3,3,3,3, - 3,4,4,4,4,4,4,4, - 4,4,5,5,5,5,5,5, - 5,5,5,6,6,6,6,6, - 6,6,6,6,7,7,7,7 }, - //2 bit index: 0, 21, 43, 64 - { 0,0,0,0,0,0,0,0, - 0,0,0,1,1,1,1,1, - 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, - 1,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,3,3, - 3,3,3,3,3,3,3,3 } }; - -cbuffer cbCS : register( b0 ) -{ - uint g_tex_width; - uint g_num_block_x; - uint g_format; - uint g_mode_id; - uint g_start_block_id; - uint g_num_total_blocks; - float g_alpha_weight; -}; - -//Forward declaration -uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ); //Mode = 0 -uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ); //Mode = 1 -uint2x4 compress_endpoints2( inout uint2x4 endPoint ); //Mode = 2 -uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ); //Mode = 3 -uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ); //Mode = 7 -uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ); //Mode = 6 -uint2x4 compress_endpoints4( inout uint2x4 endPoint ); //Mode = 4 -uint2x4 compress_endpoints5( inout uint2x4 endPoint ); //Mode = 5 - -void block_package0( out uint4 block, uint partition, uint threadBase ); //Mode0 -void block_package1( out uint4 block, uint partition, uint threadBase ); //Mode1 -void block_package2( out uint4 block, uint partition, uint threadBase ); //Mode2 -void block_package3( out uint4 block, uint partition, uint threadBase ); //Mode3 -void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ); //Mode4 -void block_package5( out uint4 block, uint rotation, uint threadBase ); //Mode5 -void block_package6( out uint4 block, uint threadBase ); //Mode6 -void block_package7( out uint4 block, uint partition, uint threadBase ); //Mode7 - - -void swap(inout uint4 lhs, inout uint4 rhs) -{ - uint4 tmp = lhs; - lhs = rhs; - rhs = tmp; -} -void swap(inout uint3 lhs, inout uint3 rhs) -{ - uint3 tmp = lhs; - lhs = rhs; - rhs = tmp; -} -void swap(inout uint lhs, inout uint rhs) -{ - uint tmp = lhs; - lhs = rhs; - rhs = tmp; -} - -uint ComputeError(in uint4 a, in uint4 b) -{ - return dot(a.rgb, b.rgb) + g_alpha_weight * a.a*b.a; -} - -void Ensure_A_Is_Larger( inout uint4 a, inout uint4 b ) -{ - if ( a.x < b.x ) - swap( a.x, b.x ); - if ( a.y < b.y ) - swap( a.y, b.y ); - if ( a.z < b.z ) - swap( a.z, b.z ); - if ( a.w < b.w ) - swap( a.w, b.w ); -} - - -Texture2D g_Input : register( t0 ); -StructuredBuffer<uint4> g_InBuff : register( t1 ); - -RWStructuredBuffer<uint4> g_OutBuff : register( u0 ); - -#define THREAD_GROUP_SIZE 64 -#define BLOCK_SIZE_Y 4 -#define BLOCK_SIZE_X 4 -#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X) - -struct BufferShared -{ - uint4 pixel; - uint error; - uint mode; - uint partition; - uint index_selector; - uint rotation; - uint4 endPoint_low; - uint4 endPoint_high; - uint4 endPoint_low_quantized; - uint4 endPoint_high_quantized; -}; -groupshared BufferShared shared_temp[THREAD_GROUP_SIZE]; - -[numthreads( THREAD_GROUP_SIZE, 1, 1 )] -void TryMode456CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 4 5 6 all have 1 subset per block, and fix-up index is always index 0 -{ - // we process 4 BC blocks per thread group - const uint MAX_USED_THREAD = 16; // pixels in a BC (block compressed) block - uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; // the number of BC blocks a thread group processes = 64 / 16 = 4 - uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group - uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on - uint threadBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group - uint threadInBlock = GI - threadBase; // id of the pixel in this BC block - -#ifndef REF_DEVICE - if (blockID >= g_num_total_blocks) - { - return; - } -#endif - - uint block_y = blockID / g_num_block_x; - uint block_x = blockID - block_y * g_num_block_x; - uint base_x = block_x * BLOCK_SIZE_X; - uint base_y = block_y * BLOCK_SIZE_Y; - - if (threadInBlock < 16) - { - shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); - - shared_temp[GI].endPoint_low = shared_temp[GI].pixel; - shared_temp[GI].endPoint_high = shared_temp[GI].pixel; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 8) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - uint2x4 endPoint; - endPoint[0] = shared_temp[threadBase].endPoint_low; - endPoint[1] = shared_temp[threadBase].endPoint_high; - - uint error = 0xFFFFFFFF; - uint mode = 0; - uint index_selector = 0; - uint rotation = 0; - - uint2 indexPrec; - if (threadInBlock < 8) // all threads of threadInBlock < 8 will be working on trying out mode 4, since only mode 4 has index selector bit - { - if (0 == (threadInBlock & 1)) // thread 0, 2, 4, 6 - { - //2 represents 2bit index precision; 1 represents 3bit index precision - index_selector = 0; - indexPrec = uint2( 2, 1 ); - } - else // thread 1, 3, 5, 7 - { - //2 represents 2bit index precision; 1 represents 3bit index precision - index_selector = 1; - indexPrec = uint2( 1, 2 ); - } - } - else - { - //2 represents 2bit index precision - indexPrec = uint2( 2, 2 ); - } - - uint4 pixel_r; - uint color_index; - uint alpha_index; - int4 span; - int2 span_norm_sqr; - int2 dotProduct; - if (threadInBlock < 12) // Try mode 4 5 in threads 0..11 - { - // mode 4 5 have component rotation - if ((threadInBlock < 2) || (8 == threadInBlock)) // rotation = 0 in thread 0, 1 - { - rotation = 0; - } - else if ((threadInBlock < 4) || (9 == threadInBlock)) // rotation = 1 in thread 2, 3 - { - endPoint[0].ra = endPoint[0].ar; - endPoint[1].ra = endPoint[1].ar; - - rotation = 1; - } - else if ((threadInBlock < 6) || (10 == threadInBlock)) // rotation = 2 in thread 4, 5 - { - endPoint[0].ga = endPoint[0].ag; - endPoint[1].ga = endPoint[1].ag; - - rotation = 2; - } - else if ((threadInBlock < 8) || (11 == threadInBlock)) // rotation = 3 in thread 6, 7 - { - endPoint[0].ba = endPoint[0].ab; - endPoint[1].ba = endPoint[1].ab; - - rotation = 3; - } - - if (threadInBlock < 8) // try mode 4 in threads 0..7 - { - // mode 4 thread distribution - // Thread 0 1 2 3 4 5 6 7 - // Rotation 0 0 1 1 2 2 3 3 - // Index selector 0 1 0 1 0 1 0 1 - - mode = 4; - compress_endpoints4( endPoint ); - } - else // try mode 5 in threads 8..11 - { - // mode 5 thread distribution - // Thread 8 9 10 11 - // Rotation 0 1 2 3 - - mode = 5; - compress_endpoints5( endPoint ); - } - - uint4 pixel = shared_temp[threadBase + 0].pixel; - if (1 == rotation) - { - pixel.ra = pixel.ar; - } - else if (2 == rotation) - { - pixel.ga = pixel.ag; - } - else if (3 == rotation) - { - pixel.ba = pixel.ab; - } - - span = endPoint[1] - endPoint[0]; - span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a ); - - // in mode 4 5 6, end point 0 must be closer to pixel 0 than end point 1, because of the fix-up index is always index 0 - // TODO: this shouldn't be necessary here in error calculation - /* - dotProduct = int2( dot( span.rgb, pixel.rgb - endPoint[0].rgb ), span.a * ( pixel.a - endPoint[0].a ) ); - if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) - { - span.rgb = -span.rgb; - swap(endPoint[0].rgb, endPoint[1].rgb); - } - if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) ) - { - span.a = -span.a; - swap(endPoint[0].a, endPoint[1].a); - } - */ - - // should be the same as above - dotProduct = int2( dot( pixel.rgb - endPoint[0].rgb, pixel.rgb - endPoint[0].rgb ), dot( pixel.rgb - endPoint[1].rgb, pixel.rgb - endPoint[1].rgb ) ); - if ( dotProduct.x > dotProduct.y ) - { - span.rgb = -span.rgb; - swap(endPoint[0].rgb, endPoint[1].rgb); - } - dotProduct = int2( dot( pixel.a - endPoint[0].a, pixel.a - endPoint[0].a ), dot( pixel.a - endPoint[1].a, pixel.a - endPoint[1].a ) ); - if ( dotProduct.x > dotProduct.y ) - { - span.a = -span.a; - swap(endPoint[0].a, endPoint[1].a); - } - - error = 0; - for ( uint i = 0; i < 16; i ++ ) - { - pixel = shared_temp[threadBase + i].pixel; - if (1 == rotation) - { - pixel.ra = pixel.ar; - } - else if (2 == rotation) - { - pixel.ga = pixel.ag; - } - else if (3 == rotation) - { - pixel.ba = pixel.ab; - } - - dotProduct.x = dot( span.rgb, pixel.rgb - endPoint[0].rgb ); - color_index = ( span_norm_sqr.x <= 0 /*endPoint[0] == endPoint[1]*/ || dotProduct.x <= 0 /*pixel == endPoint[0]*/ ) ? 0 - : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] ); - dotProduct.y = dot( span.a, pixel.a - endPoint[0].a ); - alpha_index = ( span_norm_sqr.y <= 0 || dotProduct.y <= 0 ) ? 0 - : ( ( dotProduct.y < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct.y * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] ); - - // the same color_index and alpha_index should be used for reconstruction, so this should be left commented out - /*if (index_selector) - { - swap(color_index, alpha_index); - }*/ - - pixel_r.rgb = ( ( 64 - aWeight[indexPrec.x][color_index] ) * endPoint[0].rgb + - aWeight[indexPrec.x][color_index] * endPoint[1].rgb + - 32 ) >> 6; - pixel_r.a = ( ( 64 - aWeight[indexPrec.y][alpha_index] ) * endPoint[0].a + - aWeight[indexPrec.y][alpha_index] * endPoint[1].a + - 32 ) >> 6; - - Ensure_A_Is_Larger( pixel_r, pixel ); - pixel_r -= pixel; - if (1 == rotation) - { - pixel_r.ra = pixel_r.ar; - } - else if (2 == rotation) - { - pixel_r.ga = pixel_r.ag; - } - else if (3 == rotation) - { - pixel_r.ba = pixel_r.ab; - } - error += ComputeError(pixel_r, pixel_r); - } - } - else if (threadInBlock < 16) // Try mode 6 in threads 12..15, since in mode 4 5 6, only mode 6 has p bit - { - uint p = threadInBlock - 12; - - compress_endpoints6( endPoint, uint2(p >> 0, p >> 1) & 1 ); - - uint4 pixel = shared_temp[threadBase + 0].pixel; - - span = endPoint[1] - endPoint[0]; - span_norm_sqr = dot( span, span ); - dotProduct = dot( span, pixel - endPoint[0] ); - if ( span_norm_sqr.x > 0 && dotProduct.x >= 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) - { - span = -span; - swap(endPoint[0], endPoint[1]); - } - - error = 0; - for ( uint i = 0; i < 16; i ++ ) - { - pixel = shared_temp[threadBase + i].pixel; - - dotProduct.x = dot( span, pixel - endPoint[0] ); - color_index = ( span_norm_sqr.x <= 0 || dotProduct.x <= 0 ) ? 0 - : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[0][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[0][63] ); - - pixel_r = ( ( 64 - aWeight[0][color_index] ) * endPoint[0] - + aWeight[0][color_index] * endPoint[1] + 32 ) >> 6; - - Ensure_A_Is_Larger( pixel_r, pixel ); - pixel_r -= pixel; - error += ComputeError(pixel_r, pixel_r); - } - - mode = 6; - rotation = p; // Borrow rotation for p - } - - shared_temp[GI].error = error; - shared_temp[GI].mode = mode; - shared_temp[GI].index_selector = index_selector; - shared_temp[GI].rotation = rotation; - -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 8) - { - if ( shared_temp[GI].error > shared_temp[GI + 8].error ) - { - shared_temp[GI].error = shared_temp[GI + 8].error; - shared_temp[GI].mode = shared_temp[GI + 8].mode; - shared_temp[GI].index_selector = shared_temp[GI + 8].index_selector; - shared_temp[GI].rotation = shared_temp[GI + 8].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - if ( shared_temp[GI].error > shared_temp[GI + 4].error ) - { - shared_temp[GI].error = shared_temp[GI + 4].error; - shared_temp[GI].mode = shared_temp[GI + 4].mode; - shared_temp[GI].index_selector = shared_temp[GI + 4].index_selector; - shared_temp[GI].rotation = shared_temp[GI + 4].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - if ( shared_temp[GI].error > shared_temp[GI + 2].error ) - { - shared_temp[GI].error = shared_temp[GI + 2].error; - shared_temp[GI].mode = shared_temp[GI + 2].mode; - shared_temp[GI].index_selector = shared_temp[GI + 2].index_selector; - shared_temp[GI].rotation = shared_temp[GI + 2].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - if ( shared_temp[GI].error > shared_temp[GI + 1].error ) - { - shared_temp[GI].error = shared_temp[GI + 1].error; - shared_temp[GI].mode = shared_temp[GI + 1].mode; - shared_temp[GI].index_selector = shared_temp[GI + 1].index_selector; - shared_temp[GI].rotation = shared_temp[GI + 1].rotation; - } - - g_OutBuff[blockID] = uint4(shared_temp[GI].error, (shared_temp[GI].index_selector << 31) | shared_temp[GI].mode, - 0, shared_temp[GI].rotation); // rotation is indeed rotation for mode 4 5. for mode 6, rotation is p bit - } -} - -[numthreads( THREAD_GROUP_SIZE, 1, 1 )] -void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 1 3 7 all have 2 subsets per block -{ - const uint MAX_USED_THREAD = 64; - uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; - uint blockInGroup = GI / MAX_USED_THREAD; - uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; - uint threadBase = blockInGroup * MAX_USED_THREAD; - uint threadInBlock = GI - threadBase; - - uint block_y = blockID / g_num_block_x; - uint block_x = blockID - block_y * g_num_block_x; - uint base_x = block_x * BLOCK_SIZE_X; - uint base_y = block_y * BLOCK_SIZE_Y; - - if (threadInBlock < 16) - { - shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); - } - GroupMemoryBarrierWithGroupSync(); - - shared_temp[GI].error = 0xFFFFFFFF; - - uint4 pixel_r; - uint2x4 endPoint[2]; // endPoint[0..1 for subset id][0..1 for low and high in the subset] - uint2x4 endPointBackup[2]; - uint color_index; - if (threadInBlock < 64) - { - uint partition = threadInBlock; - - endPoint[0][0] = MAX_UINT; - endPoint[0][1] = MIN_UINT; - endPoint[1][0] = MAX_UINT; - endPoint[1][1] = MIN_UINT; - uint bits = candidateSectionBit[partition]; - for ( uint i = 0; i < 16; i ++ ) - { - uint4 pixel = shared_temp[threadBase + i].pixel; - if ( (( bits >> i ) & 0x01) == 1 ) - { - endPoint[1][0] = min( endPoint[1][0], pixel ); - endPoint[1][1] = max( endPoint[1][1], pixel ); - } - else - { - endPoint[0][0] = min( endPoint[0][0], pixel ); - endPoint[0][1] = max( endPoint[0][1], pixel ); - } - } - - endPointBackup[0] = endPoint[0]; - endPointBackup[1] = endPoint[1]; - - uint max_p; - if (1 == g_mode_id) - { - // in mode 1, there is only one p bit per subset - max_p = 4; - } - else - { - // in mode 3 7, there are two p bits per subset, one for each end point - max_p = 16; - } - - uint rotation = 0; - uint error = MAX_UINT; - for ( uint p = 0; p < max_p; p ++ ) - { - endPoint[0] = endPointBackup[0]; - endPoint[1] = endPointBackup[1]; - - for ( i = 0; i < 2; i ++ ) // loop through 2 subsets - { - if (g_mode_id == 1) - { - compress_endpoints1( endPoint[i], (p >> i) & 1 ); - } - else if (g_mode_id == 3) - { - compress_endpoints3( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 ); - } - else if (g_mode_id == 7) - { - compress_endpoints7( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 ); - } - } - - int4 span[2]; - span[0] = endPoint[0][1] - endPoint[0][0]; - span[1] = endPoint[1][1] - endPoint[1][0]; - - if (g_mode_id != 7) - { - span[0].w = span[1].w = 0; - } - - int span_norm_sqr[2]; - span_norm_sqr[0] = dot( span[0], span[0] ); - span_norm_sqr[1] = dot( span[1], span[1] ); - - // TODO: again, this shouldn't be necessary here in error calculation - int dotProduct = dot( span[0], shared_temp[threadBase + 0].pixel - endPoint[0][0] ); - if ( span_norm_sqr[0] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[0] ) ) - { - span[0] = -span[0]; - swap(endPoint[0][0], endPoint[0][1]); - } - dotProduct = dot( span[1], shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel - endPoint[1][0] ); - if ( span_norm_sqr[1] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[1] ) ) - { - span[1] = -span[1]; - swap(endPoint[1][0], endPoint[1][1]); - } - - uint step_selector; - if (g_mode_id != 1) - { - step_selector = 2; // mode 3 7 have 2 bit index - } - else - { - step_selector = 1; // mode 1 has 3 bit index - } - - uint p_error = 0; - for ( i = 0; i < 16; i ++ ) - { - if (((bits >> i) & 0x01) == 1) - { - dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] ); - color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0) ? 0 - : ((dotProduct < span_norm_sqr[1]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[1])] : aStep[step_selector][63]); - } - else - { - dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] ); - color_index = (span_norm_sqr[0] <= 0 || dotProduct <= 0) ? 0 - : ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[0])] : aStep[step_selector][63]); - } - - uint subset_index = (bits >> i) & 0x01; - - pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0] - + aWeight[step_selector][color_index] * endPoint[subset_index][1] + 32) >> 6; - if (g_mode_id != 7) - { - pixel_r.a = 255; - } - - uint4 pixel = shared_temp[threadBase + i].pixel; - Ensure_A_Is_Larger( pixel_r, pixel ); - pixel_r -= pixel; - p_error += ComputeError(pixel_r, pixel_r); - } - - if (p_error < error) - { - error = p_error; - rotation = p; - } - } - - shared_temp[GI].error = error; - shared_temp[GI].mode = g_mode_id; - shared_temp[GI].partition = partition; - shared_temp[GI].rotation = rotation; // mode 1 3 7 don't have rotation, we use rotation for p bits - } - GroupMemoryBarrierWithGroupSync(); - - if (threadInBlock < 32) - { - if ( shared_temp[GI].error > shared_temp[GI + 32].error ) - { - shared_temp[GI].error = shared_temp[GI + 32].error; - shared_temp[GI].mode = shared_temp[GI + 32].mode; - shared_temp[GI].partition = shared_temp[GI + 32].partition; - shared_temp[GI].rotation = shared_temp[GI + 32].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif -if (threadInBlock < 16) - { - if ( shared_temp[GI].error > shared_temp[GI + 16].error ) - { - shared_temp[GI].error = shared_temp[GI + 16].error; - shared_temp[GI].mode = shared_temp[GI + 16].mode; - shared_temp[GI].partition = shared_temp[GI + 16].partition; - shared_temp[GI].rotation = shared_temp[GI + 16].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 8) - { - if ( shared_temp[GI].error > shared_temp[GI + 8].error ) - { - shared_temp[GI].error = shared_temp[GI + 8].error; - shared_temp[GI].mode = shared_temp[GI + 8].mode; - shared_temp[GI].partition = shared_temp[GI + 8].partition; - shared_temp[GI].rotation = shared_temp[GI + 8].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - if ( shared_temp[GI].error > shared_temp[GI + 4].error ) - { - shared_temp[GI].error = shared_temp[GI + 4].error; - shared_temp[GI].mode = shared_temp[GI + 4].mode; - shared_temp[GI].partition = shared_temp[GI + 4].partition; - shared_temp[GI].rotation = shared_temp[GI + 4].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - if ( shared_temp[GI].error > shared_temp[GI + 2].error ) - { - shared_temp[GI].error = shared_temp[GI + 2].error; - shared_temp[GI].mode = shared_temp[GI + 2].mode; - shared_temp[GI].partition = shared_temp[GI + 2].partition; - shared_temp[GI].rotation = shared_temp[GI + 2].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - if ( shared_temp[GI].error > shared_temp[GI + 1].error ) - { - shared_temp[GI].error = shared_temp[GI + 1].error; - shared_temp[GI].mode = shared_temp[GI + 1].mode; - shared_temp[GI].partition = shared_temp[GI + 1].partition; - shared_temp[GI].rotation = shared_temp[GI + 1].rotation; - } - - if (g_InBuff[blockID].x > shared_temp[GI].error) - { - g_OutBuff[blockID] = uint4(shared_temp[GI].error, shared_temp[GI].mode, shared_temp[GI].partition, shared_temp[GI].rotation); // mode 1 3 7 don't have rotation, we use rotation for p bits - } - else - { - g_OutBuff[blockID] = g_InBuff[blockID]; - } - } -} - -[numthreads( THREAD_GROUP_SIZE, 1, 1 )] -void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 0 2 have 3 subsets per block -{ - const uint MAX_USED_THREAD = 64; - uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; - uint blockInGroup = GI / MAX_USED_THREAD; - uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; - uint threadBase = blockInGroup * MAX_USED_THREAD; - uint threadInBlock = GI - threadBase; - - uint block_y = blockID / g_num_block_x; - uint block_x = blockID - block_y * g_num_block_x; - uint base_x = block_x * BLOCK_SIZE_X; - uint base_y = block_y * BLOCK_SIZE_Y; - - if (threadInBlock < 16) - { - shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); - } - GroupMemoryBarrierWithGroupSync(); - - shared_temp[GI].error = 0xFFFFFFFF; - - uint num_partitions; - if (0 == g_mode_id) - { - num_partitions = 16; - } - else - { - num_partitions = 64; - } - - uint4 pixel_r; - uint2x4 endPoint[3]; // endPoint[0..1 for subset id][0..1 for low and high in the subset] - uint2x4 endPointBackup[3]; - uint color_index[16]; - if (threadInBlock < num_partitions) - { - uint partition = threadInBlock + 64; - - endPoint[0][0] = MAX_UINT; - endPoint[0][1] = MIN_UINT; - endPoint[1][0] = MAX_UINT; - endPoint[1][1] = MIN_UINT; - endPoint[2][0] = MAX_UINT; - endPoint[2][1] = MIN_UINT; - uint bits2 = candidateSectionBit2[partition - 64]; - for ( uint i = 0; i < 16; i ++ ) - { - uint4 pixel = shared_temp[threadBase + i].pixel; - uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03; - if ( subset_index == 2 ) - { - endPoint[2][0] = min( endPoint[2][0], pixel ); - endPoint[2][1] = max( endPoint[2][1], pixel ); - } - else if ( subset_index == 1 ) - { - endPoint[1][0] = min( endPoint[1][0], pixel ); - endPoint[1][1] = max( endPoint[1][1], pixel ); - } - else - { - endPoint[0][0] = min( endPoint[0][0], pixel ); - endPoint[0][1] = max( endPoint[0][1], pixel ); - } - } - - endPointBackup[0] = endPoint[0]; - endPointBackup[1] = endPoint[1]; - endPointBackup[2] = endPoint[2]; - - uint max_p; - if (0 == g_mode_id) - { - max_p = 64; // changed from 32 to 64 - } - else - { - max_p = 1; - } - - uint rotation = 0; - uint error = MAX_UINT; - for ( uint p = 0; p < max_p; p ++ ) - { - endPoint[0] = endPointBackup[0]; - endPoint[1] = endPointBackup[1]; - endPoint[2] = endPointBackup[2]; - - for ( i = 0; i < 3; i ++ ) - { - if (0 == g_mode_id) - { - compress_endpoints0( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 ); - } - else - { - compress_endpoints2( endPoint[i] ); - } - } - - uint step_selector = 1 + (2 == g_mode_id); - - int4 span[3]; - span[0] = endPoint[0][1] - endPoint[0][0]; - span[1] = endPoint[1][1] - endPoint[1][0]; - span[2] = endPoint[2][1] - endPoint[2][0]; - span[0].w = span[1].w = span[2].w = 0; - int span_norm_sqr[3]; - span_norm_sqr[0] = dot( span[0], span[0] ); - span_norm_sqr[1] = dot( span[1], span[1] ); - span_norm_sqr[2] = dot( span[2], span[2] ); - - // TODO: again, this shouldn't be necessary here in error calculation - uint ci[3] = { 0, candidateFixUpIndex1D[partition].x, candidateFixUpIndex1D[partition].y }; - for (i = 0; i < 3; i ++) - { - int dotProduct = dot( span[i], shared_temp[threadBase + ci[i]].pixel - endPoint[i][0] ); - if ( span_norm_sqr[i] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[i] ) ) - { - span[i] = -span[i]; - swap(endPoint[i][0], endPoint[i][1]); - } - } - - uint p_error = 0; - for ( i = 0; i < 16; i ++ ) - { - uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03; - if ( subset_index == 2 ) - { - int dotProduct = dot( span[2], shared_temp[threadBase + i].pixel - endPoint[2][0] ); - color_index[i] = ( span_norm_sqr[2] <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr[2] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[2] ) ] : aStep[step_selector][63] ); - } - else if ( subset_index == 1 ) - { - int dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] ); - color_index[i] = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr[1] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep[step_selector][63] ); - } - else - { - int dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] ); - color_index[i] = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr[0] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep[step_selector][63] ); - } - - pixel_r = ( ( 64 - aWeight[step_selector][color_index[i]] ) * endPoint[subset_index][0] - + aWeight[step_selector][color_index[i]] * endPoint[subset_index][1] + 32 ) >> 6; - pixel_r.a = 255; - - uint4 pixel = shared_temp[threadBase + i].pixel; - Ensure_A_Is_Larger( pixel_r, pixel ); - pixel_r -= pixel; - p_error += ComputeError(pixel_r, pixel_r); - } - - if (p_error < error) - { - error = p_error; - rotation = p; // Borrow rotation for p - } - } - - shared_temp[GI].error = error; - shared_temp[GI].partition = partition; - shared_temp[GI].rotation = rotation; - } - GroupMemoryBarrierWithGroupSync(); - - if (threadInBlock < 32) - { - if ( shared_temp[GI].error > shared_temp[GI + 32].error ) - { - shared_temp[GI].error = shared_temp[GI + 32].error; - shared_temp[GI].partition = shared_temp[GI + 32].partition; - shared_temp[GI].rotation = shared_temp[GI + 32].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 16) - { - if ( shared_temp[GI].error > shared_temp[GI + 16].error ) - { - shared_temp[GI].error = shared_temp[GI + 16].error; - shared_temp[GI].partition = shared_temp[GI + 16].partition; - shared_temp[GI].rotation = shared_temp[GI + 16].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 8) - { - if ( shared_temp[GI].error > shared_temp[GI + 8].error ) - { - shared_temp[GI].error = shared_temp[GI + 8].error; - shared_temp[GI].partition = shared_temp[GI + 8].partition; - shared_temp[GI].rotation = shared_temp[GI + 8].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - if ( shared_temp[GI].error > shared_temp[GI + 4].error ) - { - shared_temp[GI].error = shared_temp[GI + 4].error; - shared_temp[GI].partition = shared_temp[GI + 4].partition; - shared_temp[GI].rotation = shared_temp[GI + 4].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - if ( shared_temp[GI].error > shared_temp[GI + 2].error ) - { - shared_temp[GI].error = shared_temp[GI + 2].error; - shared_temp[GI].partition = shared_temp[GI + 2].partition; - shared_temp[GI].rotation = shared_temp[GI + 2].rotation; - } - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - if ( shared_temp[GI].error > shared_temp[GI + 1].error ) - { - shared_temp[GI].error = shared_temp[GI + 1].error; - shared_temp[GI].partition = shared_temp[GI + 1].partition; - shared_temp[GI].rotation = shared_temp[GI + 1].rotation; - } - - if (g_InBuff[blockID].x > shared_temp[GI].error) - { - g_OutBuff[blockID] = uint4(shared_temp[GI].error, g_mode_id, shared_temp[GI].partition, shared_temp[GI].rotation); // rotation is actually p bit for mode 0. for mode 2, rotation is always 0 - } - else - { - g_OutBuff[blockID] = g_InBuff[blockID]; - } - } -} - -[numthreads( THREAD_GROUP_SIZE, 1, 1 )] -void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) -{ - const uint MAX_USED_THREAD = 16; - uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; - uint blockInGroup = GI / MAX_USED_THREAD; - uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; - uint threadBase = blockInGroup * MAX_USED_THREAD; - uint threadInBlock = GI - threadBase; - -#ifndef REF_DEVICE - if (blockID >= g_num_total_blocks) - { - return; - } -#endif - - uint block_y = blockID / g_num_block_x; - uint block_x = blockID - block_y * g_num_block_x; - uint base_x = block_x * BLOCK_SIZE_X; - uint base_y = block_y * BLOCK_SIZE_Y; - - uint mode = g_InBuff[blockID].y & 0x7FFFFFFF; - uint partition = g_InBuff[blockID].z; - uint index_selector = (g_InBuff[blockID].y >> 31) & 1; - uint rotation = g_InBuff[blockID].w; - - if (threadInBlock < 16) - { - uint4 pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255); - - if ((4 == mode) || (5 == mode)) - { - if (1 == rotation) - { - pixel.ra = pixel.ar; - } - else if (2 == rotation) - { - pixel.ga = pixel.ag; - } - else if (3 == rotation) - { - pixel.ba = pixel.ab; - } - } - - shared_temp[GI].pixel = pixel; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - uint bits = candidateSectionBit[partition]; - uint bits2 = candidateSectionBit2[partition - 64]; - - uint2x4 ep; - uint2x4 ep_quantized; - [unroll] - for (int ii = 2; ii >= 0; -- ii) - { - if (threadInBlock < 16) - { - uint2x4 ep; - ep[0] = MAX_UINT; - ep[1] = MIN_UINT; - - uint4 pixel = shared_temp[GI].pixel; - - uint subset_index = ( bits >> threadInBlock ) & 0x01; - uint subset_index2 = ( bits2 >> ( threadInBlock * 2 ) ) & 0x03; - if (0 == ii) - { - if ((0 == mode) || (2 == mode)) - { - if (0 == subset_index2) - { - ep[0] = ep[1] = pixel; - } - } - else if ((1 == mode) || (3 == mode) || (7 == mode)) - { - if (0 == subset_index) - { - ep[0] = ep[1] = pixel; - } - } - else if ((4 == mode) || (5 == mode) || (6 == mode)) - { - ep[0] = ep[1] = pixel; - } - } - else if (1 == ii) - { - if ((0 == mode) || (2 == mode)) - { - if (1 == subset_index2) - { - ep[0] = ep[1] = pixel; - } - } - else if ((1 == mode) || (3 == mode) || (7 == mode)) - { - if (1 == subset_index) - { - ep[0] = ep[1] = pixel; - } - } - } - else - { - if ((0 == mode) || (2 == mode)) - { - if (2 == subset_index2) - { - ep[0] = ep[1] = pixel; - } - } - } - - shared_temp[GI].endPoint_low = ep[0]; - shared_temp[GI].endPoint_high = ep[1]; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 8) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 4) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 2) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - if (threadInBlock < 1) - { - shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low); - shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high); - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (ii == (int)threadInBlock) - { - ep[0] = shared_temp[threadBase].endPoint_low; - ep[1] = shared_temp[threadBase].endPoint_high; - } - } - - if (threadInBlock < 3) - { - uint2 P; - if (1 == mode) - { - P = (rotation >> threadInBlock) & 1; - } - else - { - P = uint2(rotation >> (threadInBlock * 2 + 0), rotation >> (threadInBlock * 2 + 1)) & 1; - } - - if (0 == mode) - { - ep_quantized = compress_endpoints0( ep, P ); - } - else if (1 == mode) - { - ep_quantized = compress_endpoints1( ep, P ); - } - else if (2 == mode) - { - ep_quantized = compress_endpoints2( ep ); - } - else if (3 == mode) - { - ep_quantized = compress_endpoints3( ep, P ); - } - else if (4 == mode) - { - ep_quantized = compress_endpoints4( ep ); - } - else if (5 == mode) - { - ep_quantized = compress_endpoints5( ep ); - } - else if (6 == mode) - { - ep_quantized = compress_endpoints6( ep, P ); - } - else //if (7 == mode) - { - ep_quantized = compress_endpoints7( ep, P ); - } - - int4 span = ep[1] - ep[0]; - if (mode < 4) - { - span.w = 0; - } - - if ((4 == mode) || (5 == mode)) - { - if (0 == threadInBlock) - { - int2 span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a ); - int2 dotProduct = int2( dot( span.rgb, shared_temp[threadBase + 0].pixel.rgb - ep[0].rgb ), span.a * ( shared_temp[threadBase + 0].pixel.a - ep[0].a ) ); - if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) ) - { - swap(ep[0].rgb, ep[1].rgb); - swap(ep_quantized[0].rgb, ep_quantized[1].rgb); - } - if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) ) - { - swap(ep[0].a, ep[1].a); - swap(ep_quantized[0].a, ep_quantized[1].a); - } - } - } - else //if ((0 == mode) || (2 == mode) || (1 == mode) || (3 == mode) || (7 == mode) || (6 == mode)) - { - int p; - if (0 == threadInBlock) - { - p = 0; - } - else if (1 == threadInBlock) - { - p = candidateFixUpIndex1D[partition].x; - } - else //if (2 == threadInBlock) - { - p = candidateFixUpIndex1D[partition].y; - } - - int span_norm_sqr = dot( span, span ); - int dotProduct = dot( span, shared_temp[threadBase + p].pixel - ep[0] ); - if ( span_norm_sqr > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr ) ) - { - swap(ep[0], ep[1]); - swap(ep_quantized[0], ep_quantized[1]); - } - } - - shared_temp[GI].endPoint_low = ep[0]; - shared_temp[GI].endPoint_high = ep[1]; - shared_temp[GI].endPoint_low_quantized = ep_quantized[0]; - shared_temp[GI].endPoint_high_quantized = ep_quantized[1]; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (threadInBlock < 16) - { - uint color_index = 0; - uint alpha_index = 0; - - uint2x4 ep; - - uint2 indexPrec; - if ((0 == mode) || (1 == mode)) - { - indexPrec = 1; - } - else if (6 == mode) - { - indexPrec = 0; - } - else if (4 == mode) - { - if (0 == index_selector) - { - indexPrec = uint2(2, 1); - } - else - { - indexPrec = uint2(1, 2); - } - } - else - { - indexPrec = 2; - } - - int subset_index; - if ((0 == mode) || (2 == mode)) - { - subset_index = (bits2 >> (threadInBlock * 2)) & 0x03; - } - else if ((1 == mode) || (3 == mode) || (7 == mode)) - { - subset_index = (bits >> threadInBlock) & 0x01; - } - else - { - subset_index = 0; - } - - ep[0] = shared_temp[threadBase + subset_index].endPoint_low; - ep[1] = shared_temp[threadBase + subset_index].endPoint_high; - - int4 span = ep[1] - ep[0]; - if (mode < 4) - { - span.w = 0; - } - - if ((4 == mode) || (5 == mode)) - { - int2 span_norm_sqr; - span_norm_sqr.x = dot( span.rgb, span.rgb ); - span_norm_sqr.y = span.a * span.a; - - int dotProduct = dot( span.rgb, shared_temp[threadBase + threadInBlock].pixel.rgb - ep[0].rgb ); - color_index = ( span_norm_sqr.x <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] ); - dotProduct = dot( span.a, shared_temp[threadBase + threadInBlock].pixel.a - ep[0].a ); - alpha_index = ( span_norm_sqr.y <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] ); - - if (index_selector) - { - swap(color_index, alpha_index); - } - } - else - { - int span_norm_sqr = dot( span, span ); - - int dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel - ep[0] ); - color_index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0 - : ( ( dotProduct < span_norm_sqr ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep[indexPrec.x][63] ); - } - - shared_temp[GI].error = color_index; - shared_temp[GI].mode = alpha_index; - } -#ifdef REF_DEVICE - GroupMemoryBarrierWithGroupSync(); -#endif - - if (0 == threadInBlock) - { - uint4 block; - if (0 == mode) - { - block_package0( block, partition, threadBase ); - } - else if (1 == mode) - { - block_package1( block, partition, threadBase ); - } - else if (2 == mode) - { - block_package2( block, partition, threadBase ); - } - else if (3 == mode) - { - block_package3( block, partition, threadBase ); - } - else if (4 == mode) - { - block_package4( block, rotation, index_selector, threadBase ); - } - else if (5 == mode) - { - block_package5( block, rotation, threadBase ); - } - else if (6 == mode) - { - block_package6( block, threadBase ); - } - else //if (7 == mode) - { - block_package7( block, partition, threadBase ); - } - - g_OutBuff[blockID] = block; - } -} - -//uint4 truncate_and_round( uint4 color, uint bits) -//{ -// uint precisionMask = ((1 << bits) - 1) << (8 - bits); -// uint precisionHalf = (1 << (7-bits)); -// -// uint4 truncated = color & precisionMask; -// uint4 rounded = min(255, color + precisionHalf) & precisionMask; -// -// uint4 truncated_bak = truncated = truncated | (truncated >> bits); -// uint4 rounded_bak = rounded = rounded | (rounded >> bits); -// -// uint4 color_bak = color; -// -// Ensure_A_Is_Larger( rounded, color ); -// Ensure_A_Is_Larger( truncated, color_bak ); -// -// if (dot(rounded - color, rounded - color) < -// dot(truncated - color_bak, truncated - color_bak)) -// { -// return rounded_bak; -// } -// else -// { -// return truncated_bak; -// } -//} - -uint4 quantize( uint4 color, uint uPrec ) -{ - uint4 rnd = min(255, color + (1 << (7 - uPrec))); - return rnd >> (8 - uPrec); -} - -uint4 unquantize( uint4 color, uint uPrec ) -{ - color = color << (8 - uPrec); - return color | (color >> uPrec); -} - -uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb & 0xFFFFFFFE; - quantized[j].rgb |= P[j]; - quantized[j].a = 0xFF; - - endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; - endPoint[j].a = 0xFF; - - quantized[j] <<= 3; - } - return quantized; -} -uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb & 0xFFFFFFFE; - quantized[j].rgb |= P[j]; - quantized[j].a = 0xFF; - - endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb; - endPoint[j].a = 0xFF; - - quantized[j] <<= 1; - } - return quantized; -} -uint2x4 compress_endpoints2( inout uint2x4 endPoint ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb; - quantized[j].a = 0xFF; - - endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; - endPoint[j].a = 0xFF; - - quantized[j] <<= 3; - } - return quantized; -} -uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j].rgb = endPoint[j].rgb & 0xFFFFFFFE; - quantized[j].rgb |= P[j]; - quantized[j].a = 0xFF; - - endPoint[j].rgb = quantized[j].rgb; - endPoint[j].a = 0xFF; - } - return quantized; -} -uint2x4 compress_endpoints4( inout uint2x4 endPoint ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb; - quantized[j].a = quantize(endPoint[j].a, 6).r; - - endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb; - endPoint[j].a = unquantize(quantized[j].a, 6).r; - - quantized[j].rgb <<= 3; - quantized[j].a <<= 2; - } - return quantized; -} -uint2x4 compress_endpoints5( inout uint2x4 endPoint ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb; - quantized[j].a = endPoint[j].a; - - endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb; - // endPoint[j].a Alpha is full precision - - quantized[j].rgb <<= 1; - } - return quantized; -} -uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j] = endPoint[j] & 0xFFFFFFFE; - quantized[j] |= P[j]; - - endPoint[j] = quantized[j]; - } - return quantized; -} -uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ) -{ - uint2x4 quantized; - for ( uint j = 0; j < 2; j ++ ) - { - quantized[j] = quantize(endPoint[j], 6) & 0xFFFFFFFE; - quantized[j] |= P[j]; - - endPoint[j] = unquantize(quantized[j], 6); - } - return quantized << 2; -} - -#define get_end_point_l(subset) shared_temp[threadBase + subset].endPoint_low_quantized -#define get_end_point_h(subset) shared_temp[threadBase + subset].endPoint_high_quantized -#define get_color_index(index) shared_temp[threadBase + index].error -#define get_alpha_index(index) shared_temp[threadBase + index].mode - -void block_package0( out uint4 block, uint partition, uint threadBase ) -{ - block.x = 0x01 | ( (partition - 64) << 1 ) - | ( ( get_end_point_l(0).r & 0xF0 ) << 1 ) | ( ( get_end_point_h(0).r & 0xF0 ) << 5 ) - | ( ( get_end_point_l(1).r & 0xF0 ) << 9 ) | ( ( get_end_point_h(1).r & 0xF0 ) << 13 ) - | ( ( get_end_point_l(2).r & 0xF0 ) << 17 ) | ( ( get_end_point_h(2).r & 0xF0 ) << 21 ) - | ( ( get_end_point_l(0).g & 0xF0 ) << 25 ); - block.y = ( ( get_end_point_l(0).g & 0xF0 ) >> 7 ) | ( ( get_end_point_h(0).g & 0xF0 ) >> 3 ) - | ( ( get_end_point_l(1).g & 0xF0 ) << 1 ) | ( ( get_end_point_h(1).g & 0xF0 ) << 5 ) - | ( ( get_end_point_l(2).g & 0xF0 ) << 9 ) | ( ( get_end_point_h(2).g & 0xF0 ) << 13 ) - | ( ( get_end_point_l(0).b & 0xF0 ) << 17 ) | ( ( get_end_point_h(0).b & 0xF0 ) << 21 ) - | ( ( get_end_point_l(1).b & 0xF0 ) << 25 ); - block.z = ( ( get_end_point_l(1).b & 0xF0 ) >> 7 ) | ( ( get_end_point_h(1).b & 0xF0 ) >> 3 ) - | ( ( get_end_point_l(2).b & 0xF0 ) << 1 ) | ( ( get_end_point_h(2).b & 0xF0 ) << 5 ) - | ( ( get_end_point_l(0).r & 0x08 ) << 10 ) | ( ( get_end_point_h(0).r & 0x08 ) << 11 ) - | ( ( get_end_point_l(1).r & 0x08 ) << 12 ) | ( ( get_end_point_h(1).r & 0x08 ) << 13 ) - | ( ( get_end_point_l(2).r & 0x08 ) << 14 ) | ( ( get_end_point_h(2).r & 0x08 ) << 15 ) - | ( get_color_index(0) << 19 ); - block.w = 0; - uint i = 1; - for ( ; i <= min( candidateFixUpIndex1DOrdered[partition][0], 4 ); i ++ ) - { - block.z |= get_color_index(i) << ( i * 3 + 18 ); - } - if ( candidateFixUpIndex1DOrdered[partition][0] < 4 ) //i = 4 - { - block.z |= get_color_index(4) << 29; - i += 1; - } - else //i = 5 - { - block.w |= ( get_color_index(4) & 0x04 ) >> 2; - for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) - block.w |= get_color_index(i) << ( i * 3 - 14 ); - } - for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ ) - { - block.w |= get_color_index(i) << ( i * 3 - 15 ); - } - for ( ; i < 16; i ++ ) - { - block.w |= get_color_index(i) << ( i * 3 - 16 ); - } -} -void block_package1( out uint4 block, uint partition, uint threadBase ) -{ - block.x = 0x02 | ( partition << 2 ) - | ( ( get_end_point_l(0).r & 0xFC ) << 6 ) | ( ( get_end_point_h(0).r & 0xFC ) << 12 ) - | ( ( get_end_point_l(1).r & 0xFC ) << 18 ) | ( ( get_end_point_h(1).r & 0xFC ) << 24 ); - block.y = ( ( get_end_point_l(0).g & 0xFC ) >> 2 ) | ( ( get_end_point_h(0).g & 0xFC ) << 4 ) - | ( ( get_end_point_l(1).g & 0xFC ) << 10 ) | ( ( get_end_point_h(1).g & 0xFC ) << 16 ) - | ( ( get_end_point_l(0).b & 0xFC ) << 22 ) | ( ( get_end_point_h(0).b & 0xFC ) << 28 ); - block.z = ( ( get_end_point_h(0).b & 0xFC ) >> 4 ) | ( ( get_end_point_l(1).b & 0xFC ) << 2 ) - | ( ( get_end_point_h(1).b & 0xFC ) << 8 ) - | ( ( get_end_point_l(0).r & 0x02 ) << 15 ) | ( ( get_end_point_l(1).r & 0x02 ) << 16 ) - | ( get_color_index(0) << 18 ); - if ( candidateFixUpIndex1DOrdered[partition][0] == 15 ) - { - block.w = (get_color_index(15) << 30) | (get_color_index(14) << 27) | (get_color_index(13) << 24) | (get_color_index(12) << 21) | (get_color_index(11) << 18) | (get_color_index(10) << 15) - | (get_color_index(9) << 12) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5); - block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); - } - else if ( candidateFixUpIndex1DOrdered[partition][0] == 2 ) - { - block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) - | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 5) | (get_color_index(6) << 2) | (get_color_index(5) >> 1); - block.z |= (get_color_index(5) << 31) | (get_color_index(4) << 28) | (get_color_index(3) << 25) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); - } - else if ( candidateFixUpIndex1DOrdered[partition][0] == 8 ) - { - block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) - | (get_color_index(9) << 11) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5); - block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); - } - else //candidateFixUpIndex1DOrdered[partition] == 6 - { - block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14) - | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 6) | (get_color_index(6) << 4) | get_color_index(5); - block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18); - } -} -void block_package2( out uint4 block, uint partition, uint threadBase ) -{ - block.x = 0x04 | ( (partition - 64) << 3 ) - | ( ( get_end_point_l(0).r & 0xF8 ) << 6 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 11 ) - | ( ( get_end_point_l(1).r & 0xF8 ) << 16 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 21 ) - | ( ( get_end_point_l(2).r & 0xF8 ) << 26 ); - block.y = ( ( get_end_point_l(2).r & 0xF8 ) >> 6 ) | ( ( get_end_point_h(2).r & 0xF8 ) >> 1 ) - | ( ( get_end_point_l(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 9 ) - | ( ( get_end_point_l(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_h(1).g & 0xF8 ) << 19 ) - | ( ( get_end_point_l(2).g & 0xF8 ) << 24 ); - block.z = ( ( get_end_point_h(2).g & 0xF8 ) >> 3 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 2 ) - | ( ( get_end_point_h(0).b & 0xF8 ) << 7 ) | ( ( get_end_point_l(1).b & 0xF8 ) << 12 ) - | ( ( get_end_point_h(1).b & 0xF8 ) << 17 ) | ( ( get_end_point_l(2).b & 0xF8 ) << 22 ) - | ( ( get_end_point_h(2).b & 0xF8 ) << 27 ); - block.w = ( ( get_end_point_h(2).b & 0xF8 ) >> 5 ) - | ( get_color_index(0) << 3 ); - uint i = 1; - for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) - { - block.w |= get_color_index(i) << ( i * 2 + 2 ); - } - for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ ) - { - block.w |= get_color_index(i) << ( i * 2 + 1 ); - } - for ( ; i < 16; i ++ ) - { - block.w |= get_color_index(i) << ( i * 2 ); - } -} -void block_package3( out uint4 block, uint partition, uint threadBase ) -{ - block.x = 0x08 | ( partition << 4 ) - | ( ( get_end_point_l(0).r & 0xFE ) << 9 ) | ( ( get_end_point_h(0).r & 0xFE ) << 16 ) - | ( ( get_end_point_l(1).r & 0xFE ) << 23 ) | ( ( get_end_point_h(1).r & 0xFE ) << 30 ); - block.y = ( ( get_end_point_h(1).r & 0xFE ) >> 2 ) | ( ( get_end_point_l(0).g & 0xFE ) << 5 ) - | ( ( get_end_point_h(0).g & 0xFE ) << 12 ) | ( ( get_end_point_l(1).g & 0xFE ) << 19 ) - | ( ( get_end_point_h(1).g & 0xFE ) << 26 ); - block.z = ( ( get_end_point_h(1).g & 0xFE ) >> 6 ) | ( ( get_end_point_l(0).b & 0xFE ) << 1 ) - | ( ( get_end_point_h(0).b & 0xFE ) << 8 ) | ( ( get_end_point_l(1).b & 0xFE ) << 15 ) - | ( ( get_end_point_h(1).b & 0xFE ) << 22 ) - | ( ( get_end_point_l(0).r & 0x01 ) << 30 ) | ( ( get_end_point_h(0).r & 0x01 ) << 31 ); - block.w = ( ( get_end_point_l(1).r & 0x01 ) << 0 ) | ( ( get_end_point_h(1).r & 0x01 ) << 1 ) - | ( get_color_index(0) << 2 ); - uint i = 1; - for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) - { - block.w |= get_color_index(i) << ( i * 2 + 1 ); - } - for ( ; i < 16; i ++ ) - { - block.w |= get_color_index(i) << ( i * 2 ); - } -} -void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ) -{ - block.x = 0x10 | ( (rotation & 3) << 5 ) | ( (index_selector & 1) << 7 ) - | ( ( get_end_point_l(0).r & 0xF8 ) << 5 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 10 ) - | ( ( get_end_point_l(0).g & 0xF8 ) << 15 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 20 ) - | ( ( get_end_point_l(0).b & 0xF8 ) << 25 ); - - block.y = ( ( get_end_point_l(0).b & 0xF8 ) >> 7 ) | ( ( get_end_point_h(0).b & 0xF8 ) >> 2 ) - | ( ( get_end_point_l(0).a & 0xFC ) << 4 ) | ( ( get_end_point_h(0).a & 0xFC ) << 10 ) - | ( (get_color_index(0) & 1) << 18 ) | ( get_color_index(1) << 19 ) | ( get_color_index(2) << 21 ) | ( get_color_index(3) << 23 ) - | ( get_color_index(4) << 25 ) | ( get_color_index(5) << 27 ) | ( get_color_index(6) << 29 ) | ( get_color_index(7) << 31 ); - - block.z = ( get_color_index(7) >> 1 ) | ( get_color_index(8) << 1 ) | ( get_color_index(9) << 3 ) | ( get_color_index(10)<< 5 ) - | ( get_color_index(11)<< 7 ) | ( get_color_index(12)<< 9 ) | ( get_color_index(13)<< 11 ) | ( get_color_index(14)<< 13 ) - | ( get_color_index(15)<< 15 ) | ( (get_alpha_index(0) & 3) << 17 ) | ( get_alpha_index(1) << 19 ) | ( get_alpha_index(2) << 22 ) - | ( get_alpha_index(3) << 25 ) | ( get_alpha_index(4) << 28 ) | ( get_alpha_index(5) << 31 ); - - block.w = ( get_alpha_index(5) >> 1 ) | ( get_alpha_index(6) << 2 ) | ( get_alpha_index(7) << 5 ) | ( get_alpha_index(8) << 8 ) - | ( get_alpha_index(9) << 11 ) | ( get_alpha_index(10)<< 14 ) | ( get_alpha_index(11)<< 17 ) | ( get_alpha_index(12)<< 20 ) - | ( get_alpha_index(13)<< 23 ) | ( get_alpha_index(14)<< 26 ) | ( get_alpha_index(15)<< 29 ); -} -void block_package5( out uint4 block, uint rotation, uint threadBase ) -{ - block.x = 0x20 | ( rotation << 6 ) - | ( ( get_end_point_l(0).r & 0xFE ) << 7 ) | ( ( get_end_point_h(0).r & 0xFE ) << 14 ) - | ( ( get_end_point_l(0).g & 0xFE ) << 21 ) | ( ( get_end_point_h(0).g & 0xFE ) << 28 ); - block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 4 ) | ( ( get_end_point_l(0).b & 0xFE ) << 3 ) - | ( ( get_end_point_h(0).b & 0xFE ) << 10 ) | ( get_end_point_l(0).a << 18 ) | ( get_end_point_h(0).a << 26 ); - block.z = ( get_end_point_h(0).a >> 6 ) - | ( get_color_index(0) << 2 ) | ( get_color_index(1) << 3 ) | ( get_color_index(2) << 5 ) | ( get_color_index(3) << 7 ) - | ( get_color_index(4) << 9 ) | ( get_color_index(5) << 11 ) | ( get_color_index(6) << 13 ) | ( get_color_index(7) << 15 ) - | ( get_color_index(8) << 17 ) | ( get_color_index(9) << 19 ) | ( get_color_index(10)<< 21 ) | ( get_color_index(11)<< 23 ) - | ( get_color_index(12)<< 25 ) | ( get_color_index(13)<< 27 ) | ( get_color_index(14)<< 29 ) | ( get_color_index(15)<< 31 ); - block.w = ( get_color_index(15)>> 1 ) | ( get_alpha_index(0) << 1 ) | ( get_alpha_index(1) << 2 ) | ( get_alpha_index(2) << 4 ) - | ( get_alpha_index(3) << 6 ) | ( get_alpha_index(4) << 8 ) | ( get_alpha_index(5) << 10 ) | ( get_alpha_index(6) << 12 ) - | ( get_alpha_index(7) << 14 ) | ( get_alpha_index(8) << 16 ) | ( get_alpha_index(9) << 18 ) | ( get_alpha_index(10)<< 20 ) - | ( get_alpha_index(11)<< 22 ) | ( get_alpha_index(12)<< 24 ) | ( get_alpha_index(13)<< 26 ) | ( get_alpha_index(14)<< 28 ) - | ( get_alpha_index(15)<< 30 ); -} -void block_package6( out uint4 block, uint threadBase ) -{ - block.x = 0x40 - | ( ( get_end_point_l(0).r & 0xFE ) << 6 ) | ( ( get_end_point_h(0).r & 0xFE ) << 13 ) - | ( ( get_end_point_l(0).g & 0xFE ) << 20 ) | ( ( get_end_point_h(0).g & 0xFE ) << 27 ); - block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 5 ) | ( ( get_end_point_l(0).b & 0xFE ) << 2 ) - | ( ( get_end_point_h(0).b & 0xFE ) << 9 ) | ( ( get_end_point_l(0).a & 0xFE ) << 16 ) - | ( ( get_end_point_h(0).a & 0xFE ) << 23 ) - | ( get_end_point_l(0).r & 0x01 ) << 31; - block.z = ( get_end_point_h(0).r & 0x01 ) - | ( get_color_index(0) << 1 ) | ( get_color_index(1) << 4 ) | ( get_color_index(2) << 8 ) | ( get_color_index(3) << 12 ) - | ( get_color_index(4) << 16 ) | ( get_color_index(5) << 20 ) | ( get_color_index(6) << 24 ) | ( get_color_index(7) << 28 ); - block.w = ( get_color_index(8) << 0 ) | ( get_color_index(9) << 4 ) | ( get_color_index(10)<< 8 ) | ( get_color_index(11)<< 12 ) - | ( get_color_index(12)<< 16 ) | ( get_color_index(13)<< 20 ) | ( get_color_index(14)<< 24 ) | ( get_color_index(15)<< 28 ); -} -void block_package7( out uint4 block, uint partition, uint threadBase ) -{ - block.x = 0x80 | ( partition << 8 ) - | ( ( get_end_point_l(0).r & 0xF8 ) << 11 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 16 ) - | ( ( get_end_point_l(1).r & 0xF8 ) << 21 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 26 ); - block.y = ( ( get_end_point_h(1).r & 0xF8 ) >> 6 ) | ( ( get_end_point_l(0).g & 0xF8 ) >> 1 ) - | ( ( get_end_point_h(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_l(1).g & 0xF8 ) << 9 ) - | ( ( get_end_point_h(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 19 ) - | ( ( get_end_point_h(0).b & 0xF8 ) << 24 ); - block.z = ( ( get_end_point_l(1).b & 0xF8 ) >> 3 ) | ( ( get_end_point_h(1).b & 0xF8 ) << 2 ) - | ( ( get_end_point_l(0).a & 0xF8 ) << 7 ) | ( ( get_end_point_h(0).a & 0xF8 ) << 12 ) - | ( ( get_end_point_l(1).a & 0xF8 ) << 17 ) | ( ( get_end_point_h(1).a & 0xF8 ) << 22 ) - | ( ( get_end_point_l(0).r & 0x04 ) << 28 ) | ( ( get_end_point_h(0).r & 0x04 ) << 29 ); - block.w = ( ( get_end_point_l(1).r & 0x04 ) >> 2 ) | ( ( get_end_point_h(1).r & 0x04 ) >> 1 ) - | ( get_color_index(0) << 2 ); - uint i = 1; - for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ ) - { - block.w |= get_color_index(i) << ( i * 2 + 1 ); - } - for ( ; i < 16; i ++ ) - { - block.w |= get_color_index(i) << ( i * 2 ); - } -}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl b/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl deleted file mode 100644 index 664e92e5d..000000000 --- a/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl +++ /dev/null @@ -1,72 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain -//-------------------------------------------------------------------------------------- -// File: BasicCompute11.hlsl -// -// This file contains the Compute Shader to perform array A + array B -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#ifdef USE_STRUCTURED_BUFFERS - -struct BufType -{ - int i; - float f; -#ifdef TEST_DOUBLE - double d; -#endif -}; - -StructuredBuffer<BufType> Buffer0 : register(t0); -StructuredBuffer<BufType> Buffer1 : register(t1); -RWStructuredBuffer<BufType> BufferOut : register(u0); - -[numthreads(1, 1, 1)] -void CSMain( uint3 DTid : SV_DispatchThreadID ) -{ - BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i; - BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f; -#ifdef TEST_DOUBLE - BufferOut[DTid.x].d = Buffer0[DTid.x].d + Buffer1[DTid.x].d; -#endif -} - -#else // The following code is for raw buffers - -ByteAddressBuffer Buffer0 : register(t0); -ByteAddressBuffer Buffer1 : register(t1); -RWByteAddressBuffer BufferOut : register(u0); - -[numthreads(1, 1, 1)] -void CSMain( uint3 DTid : SV_DispatchThreadID ) -{ -#ifdef TEST_DOUBLE - int i0 = asint( Buffer0.Load( DTid.x*16 ) ); - float f0 = asfloat( Buffer0.Load( DTid.x*16+4 ) ); - double d0 = asdouble( Buffer0.Load( DTid.x*16+8 ), Buffer0.Load( DTid.x*16+12 ) ); - int i1 = asint( Buffer1.Load( DTid.x*16 ) ); - float f1 = asfloat( Buffer1.Load( DTid.x*16+4 ) ); - double d1 = asdouble( Buffer1.Load( DTid.x*16+8 ), Buffer1.Load( DTid.x*16+12 ) ); - - BufferOut.Store( DTid.x*16, asuint(i0 + i1) ); - BufferOut.Store( DTid.x*16+4, asuint(f0 + f1) ); - - uint dl, dh; - asuint( d0 + d1, dl, dh ); - - BufferOut.Store( DTid.x*16+8, dl ); - BufferOut.Store( DTid.x*16+12, dh ); -#else - int i0 = asint( Buffer0.Load( DTid.x*8 ) ); - float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) ); - int i1 = asint( Buffer1.Load( DTid.x*8 ) ); - float f1 = asfloat( Buffer1.Load( DTid.x*8+4 ) ); - - BufferOut.Store( DTid.x*8, asuint(i0 + i1) ); - BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) ); -#endif // TEST_DOUBLE -} - -#endif // USE_STRUCTURED_BUFFERS diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx deleted file mode 100644 index bd28f862b..000000000 --- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx +++ /dev/null @@ -1,158 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: BasicHLSL.fx -// -// The effect file for the BasicHLSL sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Global variables -//-------------------------------------------------------------------------------------- -float4 g_MaterialAmbientColor; // Material's ambient color -float4 g_MaterialDiffuseColor; // Material's diffuse color -int g_nNumLights; - -float3 g_LightDir; // Light's direction in world space -float4 g_LightDiffuse; // Light's diffuse color -float4 g_LightAmbient; // Light's ambient color - -texture g_MeshTexture; // Color texture for mesh - -float g_fTime; // App's time in seconds -float4x4 g_mWorld; // World matrix for object -float4x4 g_mWorldViewProjection; // World * View * Projection matrix - - - -//-------------------------------------------------------------------------------------- -// Texture samplers -//-------------------------------------------------------------------------------------- -sampler MeshTextureSampler = -sampler_state -{ - Texture = <g_MeshTexture>; - MipFilter = LINEAR; - MinFilter = LINEAR; - MagFilter = LINEAR; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex shader output structure -//-------------------------------------------------------------------------------------- -struct VS_OUTPUT -{ - float4 Position : POSITION; // vertex position - float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) - float2 TextureUV : TEXCOORD0; // vertex texture coords -}; - - -//-------------------------------------------------------------------------------------- -// This shader computes standard transform and lighting -//-------------------------------------------------------------------------------------- -VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, - float3 vNormal : NORMAL, - float2 vTexCoord0 : TEXCOORD0, - uniform int nNumLights, - uniform bool bTexture, - uniform bool bAnimate ) -{ - - VS_OUTPUT Output; - float3 vNormalWorldSpace; - - // Transform the position from object space to homogeneous projection space - Output.Position = mul(vPos, g_mWorldViewProjection); - - // Transform the normal from object space to world space - vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space) - - // Compute simple directional lighting equation - float3 vTotalLightDiffuse = float3(0,0,0); - for(int i=0; i<nNumLights; i++ ) - vTotalLightDiffuse += g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)); - - Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse + - g_MaterialAmbientColor * g_LightAmbient; - Output.Diffuse.a = 1.0f; - - // Just copy the texture coordinate through - if( bTexture ) - Output.TextureUV = vTexCoord0; - else - Output.TextureUV = 0; - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel shader output structure -//-------------------------------------------------------------------------------------- -struct PS_OUTPUT -{ - float4 RGBColor : COLOR0; // Pixel color -}; - - -//-------------------------------------------------------------------------------------- -// This shader outputs the pixel's color by modulating the texture's -// color with diffuse material color -//-------------------------------------------------------------------------------------- -PS_OUTPUT RenderScenePS( VS_OUTPUT In, - uniform bool bTexture ) -{ - PS_OUTPUT Output; - - // Lookup mesh texture and modulate it with diffuse - if( bTexture ) - Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse; - else - Output.RGBColor = In.Diffuse; - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// Renders scene to render target -//-------------------------------------------------------------------------------------- -technique RenderSceneWithTexture1Light -{ - pass P0 - { - VertexShader = compile vs_2_0 RenderSceneVS( 1, true, true ); - PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired) - } -} - -technique RenderSceneWithTexture2Light -{ - pass P0 - { - VertexShader = compile vs_2_0 RenderSceneVS( 2, true, true ); - PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired) - } -} - -technique RenderSceneWithTexture3Light -{ - pass P0 - { - VertexShader = compile vs_2_0 RenderSceneVS( 3, true, true ); - PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired) - } -} - -technique RenderSceneNoTexture -{ - pass P0 - { - VertexShader = compile vs_2_0 RenderSceneVS( 1, false, false ); - PixelShader = compile ps_2_0 RenderScenePS( false ); // trivial pixel shader (could use FF instead if desired) - } -} diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl deleted file mode 100644 index 33ea61b07..000000000 --- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl +++ /dev/null @@ -1,60 +0,0 @@ -//TEST:COMPARE_HLSL:-no-mangle -profile ps_4_0 -entry PSMain - -#ifndef __SLANG__ -#define cbPerFrame cbPerFrame_0 -#define g_vLightDir g_vLightDir_0 -#define g_fAmbient g_fAmbient_0 -#define g_samLinear g_samLinear_0 -#define g_txDiffuse g_txDiffuse_0 -#endif - -//-------------------------------------------------------------------------------------- -// File: BasicHLSL11_PS.hlsl -// -// The pixel shader file for the BasicHLSL11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - float4 g_vObjectColor ;//SLANG: : packoffset( c0 ); -}; - -cbuffer cbPerFrame : register( b1 ) -{ - float3 g_vLightDir ;//SLANG: : packoffset( c0 ); - float g_fAmbient ;//SLANG: : packoffset( c0.w ); -}; - -//-------------------------------------------------------------------------------------- -// Textures and Samplers -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse : register( t0 ); -SamplerState g_samLinear : register( s0 ); - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct PS_INPUT -{ - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; -}; - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PSMain( PS_INPUT Input ) : SV_TARGET -{ - float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord ); - - float fLighting = saturate( dot( g_vLightDir, Input.vNormal ) ); - fLighting = max( fLighting, g_fAmbient ); - - return vDiffuse * fLighting; -} - diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl deleted file mode 100644 index a0fb3c9ce..000000000 --- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl +++ /dev/null @@ -1,56 +0,0 @@ -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain - -#ifndef __SLANG__ -#define cbPerObject cbPerObject_0 -#define g_mWorldViewProjection g_mWorldViewProjection_0 -#define g_mWorld g_mWorld_0 -#endif - -//-------------------------------------------------------------------------------------- -// File: BasicHLSL11_VS.hlsl -// -// The vertex shader file for the BasicHLSL11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 ); - matrix g_mWorld ;//SLANG: : packoffset( c4 ); -}; - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; - float4 vPosition : SV_POSITION; -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - VS_OUTPUT Output; - - Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); - Output.vNormal = mul( Input.vNormal, (float3x3)g_mWorld ); - Output.vTexcoord = Input.vTexcoord; - - return Output; -} - diff --git a/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx b/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx deleted file mode 100644 index 1ecc1930a..000000000 --- a/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx +++ /dev/null @@ -1,181 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: BasicHLSL11.fx -// -// The effect file for the BasicHLSL sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Global variables -//-------------------------------------------------------------------------------------- -float4 g_MaterialAmbientColor; // Material's ambient color -float4 g_MaterialDiffuseColor; // Material's diffuse color -int g_nNumLights; - -float3 g_LightDir[3]; // Light's direction in world space -float4 g_LightDiffuse[3]; // Light's diffuse color -float4 g_LightAmbient; // Light's ambient color - -Texture2D g_MeshTexture; // Color texture for mesh - -float g_fTime; // App's time in seconds -float4x4 g_mWorld; // World matrix for object -float4x4 g_mWorldViewProjection; // World * View * Projection matrix - -//-------------------------------------------------------------------------------------- -// DepthStates -//-------------------------------------------------------------------------------------- -DepthStencilState EnableDepth -{ - DepthEnable = TRUE; - DepthWriteMask = ALL; - DepthFunc = LESS_EQUAL; -}; - -//-------------------------------------------------------------------------------------- -// Texture samplers -//-------------------------------------------------------------------------------------- -SamplerState MeshTextureSampler -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Wrap; - AddressV = Wrap; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex shader output structure -//-------------------------------------------------------------------------------------- -struct VS_OUTPUT -{ - float4 Position : SV_POSITION; // vertex position - float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) - float2 TextureUV : TEXCOORD0; // vertex texture coords -}; - - -//-------------------------------------------------------------------------------------- -// This shader computes standard transform and lighting -//-------------------------------------------------------------------------------------- -VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, - float3 vNormal : NORMAL, - float2 vTexCoord0 : TEXCOORD, - uniform int nNumLights, - uniform bool bTexture, - uniform bool bAnimate ) -{ - VS_OUTPUT Output; - float3 vNormalWorldSpace; - - float4 vAnimatedPos = vPos; - - // Animation the vertex based on time and the vertex's object space position - if( bAnimate ) - vAnimatedPos += float4(vNormal, 0) * (sin(g_fTime+5.5)+0.5)*5; - - // Transform the position from object space to homogeneous projection space - Output.Position = mul(vAnimatedPos, g_mWorldViewProjection); - - // Transform the normal from object space to world space - vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space) - - // Compute simple directional lighting equation - float3 vTotalLightDiffuse = float3(0,0,0); - for(int i=0; i<nNumLights; i++ ) - vTotalLightDiffuse += g_LightDiffuse[i] * max(0,dot(vNormalWorldSpace, g_LightDir[i])); - - Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse + - g_MaterialAmbientColor * g_LightAmbient; - Output.Diffuse.a = 1.0f; - - // Just copy the texture coordinate through - if( bTexture ) - Output.TextureUV = vTexCoord0; - else - Output.TextureUV = 0; - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel shader output structure -//-------------------------------------------------------------------------------------- -struct PS_OUTPUT -{ - float4 RGBColor : SV_Target; // Pixel color -}; - - -//-------------------------------------------------------------------------------------- -// This shader outputs the pixel's color by modulating the texture's -// color with diffuse material color -//-------------------------------------------------------------------------------------- -PS_OUTPUT RenderScenePS( VS_OUTPUT In, - uniform bool bTexture ) -{ - PS_OUTPUT Output; - - // Lookup mesh texture and modulate it with diffuse - if( bTexture ) - Output.RGBColor = g_MeshTexture.Sample(MeshTextureSampler, In.TextureUV) * In.Diffuse; - else - Output.RGBColor = In.Diffuse; - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// Renders scene to render target using D3D11 Techniques -//-------------------------------------------------------------------------------------- -technique11 RenderSceneWithTexture1Light -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -} - -technique11 RenderSceneWithTexture2Light -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 2, true, true ) ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -} - -technique11 RenderSceneWithTexture3Light -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 3, true, true ) ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -} - -technique11 RenderSceneNoTexture -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( false ) ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl deleted file mode 100644 index 3224d783c..000000000 --- a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl +++ /dev/null @@ -1,506 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSMain -//-------------------------------------------------------------------------------------- -// File: RenderCascadeScene.hlsl -// -// This is the main shader file. This shader is compiled with several different flags -// to provide different customizations based on user controls. -// -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- - -// This flag uses the derivative information to map the texels in a shadow map to the -// view space plane of the primitive being rendred. This depth is then used as the -// comparison depth and reduces self shadowing aliases. This technique is expensive -// and is only valid when objects are planer ( such as a ground plane ). -#ifndef USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG -#define USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG 0 -#endif - -// This flag enables the shadow to blend between cascades. This is most useful when the -// the shadow maps are small and artifact can be seen between the various cascade layers. -#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG -#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0 -#endif - -// There are two methods for selecting the proper cascade a fragment lies in. Interval selection -// compares the depth of the fragment against the frustum's depth partition. -// Map based selection compares the texture coordinates against the acutal cascade maps. -// Map based selection gives better coverage. -// Interval based selection is easier to extend and understand. -#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG -#define SELECT_CASCADE_BY_INTERVAL_FLAG 0 -#endif - -// The number of cascades -#ifndef CASCADE_COUNT_FLAG -#define CASCADE_COUNT_FLAG 3 -#endif - - -// Most titles will find that 3-4 cascades with -// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs. -// High end PCs will be able to handle more cascades, and larger blur bands. -// In some cases such as when large PCF kernels are used, derivative based depth offsets could be used -// with larger PCF blur kernels on high end PCs for the ground plane. - -cbuffer cbAllShadowData : register( b0 ) -{ - matrix m_mWorldViewProjection; - matrix m_mWorld; - matrix m_mWorldView; - matrix m_mShadow; - float4 m_vCascadeOffset[8]; - float4 m_vCascadeScale[8]; - int m_nCascadeLevels; // Number of Cascades - int m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene - int m_iPCFBlurForLoopStart; // For loop begin value. For a 5x5 Kernal this would be -2. - int m_iPCFBlurForLoopEnd; // For loop end value. For a 5x5 kernel this would be 3. - - // For Map based selection scheme, this keeps the pixels inside of the the valid range. - // When there is no boarder, these values are 0 and 1 respectivley. - float m_fMinBorderPadding; - float m_fMaxBorderPadding; - float m_fShadowBiasFromGUI; // A shadow map offset to deal with self shadow artifacts. - //These artifacts are aggravated by PCF. - float m_fShadowPartitionSize; - float m_fCascadeBlendArea; // Amount to overlap when blending between cascades. - float m_fTexelSize; - float m_fNativeTexelSizeInX; - float m_fPaddingForCB3; // Padding variables exist because CBs must be a multiple of 16 bytes. - float4 m_fCascadeFrustumsEyeSpaceDepthsFloat[2]; // The values along Z that seperate the cascades. - float4 m_fCascadeFrustumsEyeSpaceDepthsFloat4[8]; // the values along Z that separte the cascades. - // Wastefully stored in float4 so they are array indexable. - float3 m_vLightDir; - float m_fPaddingCB4; - -}; - - - -//-------------------------------------------------------------------------------------- -// Textures and Samplers -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse : register( t0 ); -Texture2D g_txShadow : register( t5 ); - - -SamplerState g_samLinear : register( s0 ); -SamplerComparisonState g_samShadow : register( s5 ); - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; - float4 vTexShadow : TEXCOORD1; - float4 vPosition : SV_POSITION; - float4 vInterpPos : TEXCOORD2; - float vDepth : TEXCOORD3; -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - VS_OUTPUT Output; - - Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection ); - Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld ); - Output.vTexcoord = Input.vTexcoord; - Output.vInterpPos = Input.vPosition; - Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ; - - // Transform the shadow texture coordinates for all the cascades. - Output.vTexShadow = mul( Input.vPosition, m_mShadow ); - return Output; - -} - - - -static const float4 vCascadeColorsMultiplier[8] = -{ - float4 ( 1.5f, 0.0f, 0.0f, 1.0f ), - float4 ( 0.0f, 1.5f, 0.0f, 1.0f ), - float4 ( 0.0f, 0.0f, 5.5f, 1.0f ), - float4 ( 1.5f, 0.0f, 5.5f, 1.0f ), - float4 ( 1.5f, 1.5f, 0.0f, 1.0f ), - float4 ( 1.0f, 1.0f, 1.0f, 1.0f ), - float4 ( 0.0f, 1.0f, 5.5f, 1.0f ), - float4 ( 0.5f, 3.5f, 0.75f, 1.0f ) -}; - - -void ComputeCoordinatesTransform( in int iCascadeIndex, - in float4 InterpolatedPosition , - in out float4 vShadowTexCoord , - in out float4 vShadowTexCoordViewSpace ) -{ - // Now that we know the correct map, we can transform the world space position of the current fragment - if( SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex]; - vShadowTexCoord += m_vCascadeOffset[iCascadeIndex]; - } - - vShadowTexCoord.x *= m_fShadowPartitionSize; // precomputed (float)iCascadeIndex / (float)CASCADE_CNT - vShadowTexCoord.x += (m_fShadowPartitionSize * (float)iCascadeIndex ); - - -} - - -//-------------------------------------------------------------------------------------- -// This function calculates the screen space depth for shadow space texels -//-------------------------------------------------------------------------------------- -void CalculateRightAndUpTexelDepthDeltas ( in float3 vShadowTexDDX, - in float3 vShadowTexDDY, - out float fUpTextDepthWeight, - out float fRightTextDepthWeight - ) { - - // We use the derivatives in X and Y to create a transformation matrix. Because these derivives give us the - // transformation from screen space to shadow space, we need the inverse matrix to take us from shadow space - // to screen space. This new matrix will allow us to map shadow map texels to screen space. This will allow - // us to find the screen space depth of a corresponding depth pixel. - // This is not a perfect solution as it assumes the underlying geometry of the scene is a plane. A more - // accureate way of finding the actual depth would be to do a deferred rendering approach and actually - //sample the depth. - - // Using an offset, or using variance shadow maps is a better approach to reducing these artifacts in most cases. - - float2x2 matScreentoShadow = float2x2( vShadowTexDDX.xy, vShadowTexDDY.xy ); - float fDeterminant = determinant ( matScreentoShadow ); - - float fInvDeterminant = 1.0f / fDeterminant; - - float2x2 matShadowToScreen = float2x2 ( - matScreentoShadow._22 * fInvDeterminant, matScreentoShadow._12 * -fInvDeterminant, - matScreentoShadow._21 * -fInvDeterminant, matScreentoShadow._11 * fInvDeterminant ); - - float2 vRightShadowTexelLocation = float2( m_fTexelSize, 0.0f ); - float2 vUpShadowTexelLocation = float2( 0.0f, m_fTexelSize ); - - // Transform the right pixel by the shadow space to screen space matrix. - float2 vRightTexelDepthRatio = mul( vRightShadowTexelLocation, matShadowToScreen ); - float2 vUpTexelDepthRatio = mul( vUpShadowTexelLocation, matShadowToScreen ); - - // We can now caculate how much depth changes when you move up or right in the shadow map. - // We use the ratio of change in x and y times the dervivite in X and Y of the screen space - // depth to calculate this change. - fUpTextDepthWeight = - vUpTexelDepthRatio.x * vShadowTexDDX.z - + vUpTexelDepthRatio.y * vShadowTexDDY.z; - fRightTextDepthWeight = - vRightTexelDepthRatio.x * vShadowTexDDX.z - + vRightTexelDepthRatio.y * vShadowTexDDY.z; - -} - - -//-------------------------------------------------------------------------------------- -// Use PCF to sample the depth map and return a percent lit value. -//-------------------------------------------------------------------------------------- -void CalculatePCFPercentLit ( in float4 vShadowTexCoord, - in float fRightTexelDepthDelta, - in float fUpTexelDepthDelta, - in float fBlurRowSize, - out float fPercentLit - ) -{ - fPercentLit = 0.0f; - // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed. - // This would be performance improvment. - for( int x = m_iPCFBlurForLoopStart; x < m_iPCFBlurForLoopEnd; ++x ) - { - for( int y = m_iPCFBlurForLoopStart; y < m_iPCFBlurForLoopEnd; ++y ) - { - float depthcompare = vShadowTexCoord.z; - // A very simple solution to the depth bias problems of PCF is to use an offset. - // Unfortunately, too much offset can lead to Peter-panning (shadows near the base of object disappear ) - // Too little offset can lead to shadow acne ( objects that should not be in shadow are partially self shadowed ). - depthcompare -= m_fShadowBiasFromGUI; - if ( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) - { - // Add in derivative computed depth scale based on the x and y pixel. - depthcompare += fRightTexelDepthDelta * ( (float) x ) + fUpTexelDepthDelta * ( (float) y ); - } - // Compare the transformed pixel depth to the depth read from the map. - fPercentLit += g_txShadow.SampleCmpLevelZero( g_samShadow, - float2( - vShadowTexCoord.x + ( ( (float) x ) * m_fNativeTexelSizeInX ) , - vShadowTexCoord.y + ( ( (float) y ) * m_fTexelSize ) - ), - depthcompare ); - } - } - fPercentLit /= (float)fBlurRowSize; -} - -//-------------------------------------------------------------------------------------- -// Calculate amount to blend between two cascades and the band where blending will occure. -//-------------------------------------------------------------------------------------- -void CalculateBlendAmountForInterval ( in int iCurrentCascadeIndex, - in out float fPixelDepth, - in out float fCurrentPixelsBlendBandLocation, - out float fBlendBetweenCascadesAmount - ) -{ - - // We need to calculate the band of the current shadow map where it will fade into the next cascade. - // We can then early out of the expensive PCF for loop. - // - float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepthsFloat4[ iCurrentCascadeIndex ].x; - //if( iNextCascadeIndex > 1 ) - int fBlendIntervalbelowIndex = min(0, iCurrentCascadeIndex-1); - fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x; - fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x; - - // The current pixel's blend band location will be used to determine when we need to blend and by how much. - fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval; - fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation; - // The fBlendBetweenCascadesAmount is our location in the blend band. - fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; -} - - - -//-------------------------------------------------------------------------------------- -// Calculate amount to blend between two cascades and the band where blending will occure. -//-------------------------------------------------------------------------------------- -void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord, - in out float fCurrentPixelsBlendBandLocation, - out float fBlendBetweenCascadesAmount ) -{ - // Calcaulte the blend band for the map based selection. - float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y ); - fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ); - float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y ); - fCurrentPixelsBlendBandLocation = - min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 ); - fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; -} - -//-------------------------------------------------------------------------------------- -// Calculate the shadow based on several options and rende the scene. -//-------------------------------------------------------------------------------------- -float4 PSMain( VS_OUTPUT Input ) : SV_TARGET -{ - float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord ); - - float4 vShadowMapTextureCoord = 0.0f; - float4 vShadowMapTextureCoord_blend = 0.0f; - - float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f); - - float fPercentLit = 0.0f; - float fPercentLit_blend = 0.0f; - - - float fUpTextDepthWeight=0; - float fRightTextDepthWeight=0; - float fUpTextDepthWeight_blend=0; - float fRightTextDepthWeight_blend=0; - - int iBlurRowSize = m_iPCFBlurForLoopEnd - m_iPCFBlurForLoopStart; - iBlurRowSize *= iBlurRowSize; - float fBlurRowSize = (float)iBlurRowSize; - - int iCascadeFound = 0; - int iNextCascadeIndex = 1; - - float fCurrentPixelDepth; - - // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions. - fCurrentPixelDepth = Input.vDepth; - - // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used. - // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum. - int iCurrentCascadeIndex; - - float4 vShadowMapTextureCoordViewSpace = Input.vTexShadow; - if( SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - iCurrentCascadeIndex = 0; - if ( CASCADE_COUNT_FLAG > 1 ) - { - float4 vCurrentPixelDepth = Input.vDepth; - float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[0]); - float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[1]); - float fIndex = dot( - float4( CASCADE_COUNT_FLAG > 0, - CASCADE_COUNT_FLAG > 1, - CASCADE_COUNT_FLAG > 2, - CASCADE_COUNT_FLAG > 3) - , fComparison ) - + dot( - float4( - CASCADE_COUNT_FLAG > 4, - CASCADE_COUNT_FLAG > 5, - CASCADE_COUNT_FLAG > 6, - CASCADE_COUNT_FLAG > 7) - , fComparison2 ) ; - - fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 ); - iCurrentCascadeIndex = (int)fIndex; - } - } - - if ( !SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - iCurrentCascadeIndex = 0; - if ( CASCADE_COUNT_FLAG == 1 ) - { - vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0]; - vShadowMapTextureCoord += m_vCascadeOffset[0]; - } - if ( CASCADE_COUNT_FLAG > 1 ) { - for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex ) - { - vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex]; - vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex]; - - if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding - && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding ) - { - iCurrentCascadeIndex = iCascadeIndex; - iCascadeFound = 1; - } - } - } - } - - float4 color = 0; - - if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) - { - // Repeat text coord calculations for the next cascade. - // The next cascade index is used for blurring between maps. - iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 ); - } - - float fBlendBetweenCascadesAmount = 1.0f; - float fCurrentPixelsBlendBandLocation = 1.0f; - - if( SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) - { - CalculateBlendAmountForInterval ( iCurrentCascadeIndex, fCurrentPixelDepth, - fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); - } - } - else - { - - if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) - { - CalculateBlendAmountForMap ( vShadowMapTextureCoord, - fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); - } - } - - float3 vShadowMapTextureCoordDDX; - float3 vShadowMapTextureCoordDDY; - // The derivatives are used to find the slope of the current plane. - // The derivative calculation has to be inside of the loop in order to prevent divergent flow control artifacts. - if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) - { - vShadowMapTextureCoordDDX = ddx( vShadowMapTextureCoordViewSpace ); - vShadowMapTextureCoordDDY = ddy( vShadowMapTextureCoordViewSpace ); - - vShadowMapTextureCoordDDX *= m_vCascadeScale[iCurrentCascadeIndex]; - vShadowMapTextureCoordDDY *= m_vCascadeScale[iCurrentCascadeIndex]; - } - - ComputeCoordinatesTransform( iCurrentCascadeIndex, - Input.vInterpPos, - vShadowMapTextureCoord, - vShadowMapTextureCoordViewSpace ); - - - vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex]; - - if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) - { - CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX, vShadowMapTextureCoordDDY, - fUpTextDepthWeight, fRightTextDepthWeight ); - } - - CalculatePCFPercentLit ( vShadowMapTextureCoord, fRightTextDepthWeight, - fUpTextDepthWeight, fBlurRowSize, fPercentLit ); - - if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) - { - if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea) - { // the current pixel is within the blend band. - - // Repeat text coord calculations for the next cascade. - // The next cascade index is used for blurring between maps. - if( !SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex]; - vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex]; - } - - ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos, - vShadowMapTextureCoord_blend, - vShadowMapTextureCoordViewSpace ); - - // We repeat the calcuation for the next cascade layer, when blending between maps. - if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea) - { // the current pixel is within the blend band. - if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) - { - - CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX, - vShadowMapTextureCoordDDY, - fUpTextDepthWeight_blend, - fRightTextDepthWeight_blend ); - } - CalculatePCFPercentLit ( vShadowMapTextureCoord_blend, fRightTextDepthWeight_blend, - fUpTextDepthWeight_blend, fBlurRowSize, fPercentLit_blend ); - fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount ); - // Blend the two calculated shadows by the blend amount. - } - } - } - - - if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4(1.0f,1.0f,1.0f,1.0f); - - float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f ); - float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f ); - float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f ); - float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f ); - // Some ambient-like lighting. - float fLighting = - saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f + - saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f + - saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f + - saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ; - - float4 vShadowLighting = fLighting * 0.5f; - fLighting += saturate( dot( m_vLightDir , Input.vNormal ) ); - fLighting = lerp( vShadowLighting, fLighting, fPercentLit ); - - return fLighting * vVisualizeCascadeColor * vDiffuse; - -} - diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl deleted file mode 100644 index af9679ada..000000000 --- a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl +++ /dev/null @@ -1,59 +0,0 @@ -//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VSMain -stage vertex -entry VSMainPancake -stage vertex - -#ifndef __SLANG__ -#define cbPerObject cbPerObject_0 -#define g_mWorldViewProjection g_mWorldViewProjection_0 -#endif - -//-------------------------------------------------------------------------------------- -// File: RenderCascadeShadow.hlsl -// -// The shader file for the RenderCascadeScene sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 ); -}; - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; -}; - -struct VS_OUTPUT -{ - float4 vPosition : SV_POSITION; -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - VS_OUTPUT Output; - - // There is nothing special here, just transform and write out the depth. - Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); - - return Output; -} - - -VS_OUTPUT VSMainPancake( VS_INPUT Input ) -{ - VS_OUTPUT Output; - // after transform move clipped geometry to near plane - Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); - //Output.vPosition.z = max( Output.vPosition.z, 0.0f ); - return Output; -}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl deleted file mode 100644 index 6e14bc10e..000000000 --- a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl +++ /dev/null @@ -1,75 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose -//-------------------------------------------------------------------------------------- -// File: ComputeShaderSort11.hlsl -// -// This file contains the compute shaders to perform GPU sorting using DirectX 11. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#define BITONIC_BLOCK_SIZE 512 - -#define TRANSPOSE_BLOCK_SIZE 16 - -//-------------------------------------------------------------------------------------- -// Constant Buffers -//-------------------------------------------------------------------------------------- -cbuffer CB : register( b0 ) -{ - unsigned int g_iLevel; - unsigned int g_iLevelMask; - unsigned int g_iWidth; - unsigned int g_iHeight; -}; - -//-------------------------------------------------------------------------------------- -// Structured Buffers -//-------------------------------------------------------------------------------------- -StructuredBuffer<unsigned int> Input : register( t0 ); -RWStructuredBuffer<unsigned int> Data : register( u0 ); - -//-------------------------------------------------------------------------------------- -// Bitonic Sort Compute Shader -//-------------------------------------------------------------------------------------- -groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE]; - -[numthreads(BITONIC_BLOCK_SIZE, 1, 1)] -void BitonicSort( uint3 Gid : SV_GroupID, - uint3 DTid : SV_DispatchThreadID, - uint3 GTid : SV_GroupThreadID, - uint GI : SV_GroupIndex ) -{ - // Load shared data - shared_data[GI] = Data[DTid.x]; - GroupMemoryBarrierWithGroupSync(); - - // Sort the shared data - for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1) - { - unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI]; - GroupMemoryBarrierWithGroupSync(); - shared_data[GI] = result; - GroupMemoryBarrierWithGroupSync(); - } - - // Store shared data - Data[DTid.x] = shared_data[GI]; -} - -//-------------------------------------------------------------------------------------- -// Matrix Transpose Compute Shader -//-------------------------------------------------------------------------------------- -groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE]; - -[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)] -void MatrixTranspose( uint3 Gid : SV_GroupID, - uint3 DTid : SV_DispatchThreadID, - uint3 GTid : SV_GroupThreadID, - uint GI : SV_GroupIndex ) -{ - transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x]; - GroupMemoryBarrierWithGroupSync(); - uint2 XY = DTid.yx - GTid.yx + GTid.xy; - Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y]; -} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx deleted file mode 100644 index e1fead571..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx +++ /dev/null @@ -1,28 +0,0 @@ -//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VS -stage vertex -entry PS -stage fragment - -#ifndef __SLANG__ -#define SV_Target SV_TARGET -#endif - -//-------------------------------------------------------------------------------------- -// File: Tutorial02.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -float4 VS( float4 Pos : POSITION ) : SV_POSITION -{ - return Pos; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( float4 Pos : SV_POSITION ) : SV_Target -{ - return float4( 1.0f, 1.0f, 0.0f, 1.0f ); // Yellow, with Alpha = 1 -} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl deleted file mode 100644 index 82300c10c..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS -#include "Tutorial02.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl deleted file mode 100644 index cdf4f9649..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -#include "Tutorial02.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx deleted file mode 100644 index e1fead571..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx +++ /dev/null @@ -1,28 +0,0 @@ -//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VS -stage vertex -entry PS -stage fragment - -#ifndef __SLANG__ -#define SV_Target SV_TARGET -#endif - -//-------------------------------------------------------------------------------------- -// File: Tutorial02.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -float4 VS( float4 Pos : POSITION ) : SV_POSITION -{ - return Pos; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( float4 Pos : SV_POSITION ) : SV_Target -{ - return float4( 1.0f, 1.0f, 0.0f, 1.0f ); // Yellow, with Alpha = 1 -} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl deleted file mode 100644 index 684788198..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS -#include "Tutorial03.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl deleted file mode 100644 index 40d9770fc..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -#include "Tutorial03.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx deleted file mode 100644 index d311edc5a..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx +++ /dev/null @@ -1,46 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -//-------------------------------------------------------------------------------------- -// File: Tutorial04.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -cbuffer ConstantBuffer : register( b0 ) -{ - matrix World; - matrix View; - matrix Projection; -} - -//-------------------------------------------------------------------------------------- -struct VS_OUTPUT -{ - float4 Pos : SV_POSITION; - float4 Color : COLOR0; -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -VS_OUTPUT VS( float4 Pos : POSITION, float4 Color : COLOR ) -{ - VS_OUTPUT output = (VS_OUTPUT)0; - output.Pos = mul( Pos, World ); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Color = Color; - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( VS_OUTPUT input ) : SV_Target -{ - return input.Color; -} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl deleted file mode 100644 index 65c36988f..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS -#include "Tutorial04.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl deleted file mode 100644 index 4505c1a98..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -#include "Tutorial04.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx deleted file mode 100644 index 5ef5487da..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx +++ /dev/null @@ -1,54 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -//-------------------------------------------------------------------------------------- -// File: Tutorial05.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -cbuffer ConstantBuffer : register( b0 ) -{ - matrix World; - matrix View; - matrix Projection; -} - -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 Pos : POSITION; - float4 Color : COLOR; -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float4 Color : COLOR; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - output.Pos = mul( input.Pos, World ); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Color = input.Color; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - return input.Color; -} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl deleted file mode 100644 index 4226d4b47..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS -#include "Tutorial05.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl deleted file mode 100644 index 1c2f5519f..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -#include "Tutorial05.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx deleted file mode 100644 index 219e96b9f..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx +++ /dev/null @@ -1,76 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -entry PSSolid -//-------------------------------------------------------------------------------------- -// File: Tutorial06.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -cbuffer ConstantBuffer : register( b0 ) -{ - matrix World; - matrix View; - matrix Projection; - float4 vLightDir[2]; - float4 vLightColor[2]; - float4 vOutputColor; -} - - -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 Pos : POSITION; - float3 Norm : NORMAL; -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float3 Norm : TEXCOORD0; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - output.Pos = mul( input.Pos, World ); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = mul( float4( input.Norm, 1 ), World ).xyz; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - float4 finalColor = 0; - - //do NdotL lighting for 2 lights - for(int i=0; i<2; i++) - { - finalColor += saturate( dot( (float3)vLightDir[i],input.Norm) * vLightColor[i] ); - } - finalColor.a = 1; - return finalColor; -} - - -//-------------------------------------------------------------------------------------- -// PSSolid - render a solid color -//-------------------------------------------------------------------------------------- -float4 PSSolid( PS_INPUT input) : SV_Target -{ - return vOutputColor; -} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl deleted file mode 100644 index 7bd5ece78..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS -#include "Tutorial06.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl deleted file mode 100644 index 50fcdbf56..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -#include "Tutorial06.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx deleted file mode 100644 index f99aeba1b..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx +++ /dev/null @@ -1,67 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -//-------------------------------------------------------------------------------------- -// File: Tutorial07.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -Texture2D txDiffuse : register( t0 ); -SamplerState samLinear : register( s0 ); - -cbuffer cbNeverChanges : register( b0 ) -{ - matrix View; -}; - -cbuffer cbChangeOnResize : register( b1 ) -{ - matrix Projection; -}; - -cbuffer cbChangesEveryFrame : register( b2 ) -{ - matrix World; - float4 vMeshColor; -}; - - -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 Pos : POSITION; - float2 Tex : TEXCOORD0; -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD0; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - output.Pos = mul( input.Pos, World ); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Tex = input.Tex; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor; -} diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl deleted file mode 100644 index f81862efd..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS -#include "Tutorial07.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl deleted file mode 100644 index 3ce6baf34..000000000 --- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl +++ /dev/null @@ -1,3 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -#include "Tutorial07.fx" diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx deleted file mode 100644 index f3c6a5774..000000000 --- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx +++ /dev/null @@ -1,56 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -//-------------------------------------------------------------------------------------- -// File: Tutorial08.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -Texture2D txDiffuse : register( t0 ); -SamplerState samLinear : register( s0 ); - -cbuffer cbChangesEveryFrame : register( b0 ) -{ - matrix WorldViewProj; - matrix World; - float4 vMeshColor; -}; - - -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 Pos : POSITION; - float2 Tex : TEXCOORD; -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD0; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - output.Pos = mul( input.Pos, WorldViewProj ); - output.Tex = input.Tex; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor; -} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx deleted file mode 100644 index 2be29fb40..000000000 --- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx +++ /dev/null @@ -1,69 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -//-------------------------------------------------------------------------------------- -// File: Tutorial09.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -Texture2D txDiffuse : register( t0 ); -SamplerState samLinear : register( s0 ); - -cbuffer cbNeverChanges : register( b0 ) -{ - float3 vLightDir; -}; - -cbuffer cbChangesEveryFrame : register( b1 ) -{ - matrix WorldViewProj; - matrix World; -}; - -struct VS_INPUT -{ - float3 Pos : POSITION; //position - float3 Norm : NORMAL; //normal - float2 Tex : TEXCOORD0; //texture coordinate -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float4 Diffuse : COLOR0; - float2 Tex : TEXCOORD1; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - output.Pos = mul( float4(input.Pos,1), WorldViewProj ); - float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) ); - - float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) ); - output.Diffuse.rgb = fLighting; - output.Diffuse.a = 1.0f; - - output.Tex = input.Tex; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - //calculate lighting assuming light color is <1,1,1,1> - float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse; - outputColor.a = 1; - return outputColor; -} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx deleted file mode 100644 index 68f53c0b6..000000000 --- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx +++ /dev/null @@ -1,73 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -//-------------------------------------------------------------------------------------- -// File: Tutorial10.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -Texture2D txDiffuse : register( t0 ); -SamplerState samLinear : register( s0 ); - -cbuffer cbNeverChanges : register( b0 ) -{ - float3 vLightDir; -}; - -cbuffer cbChangesEveryFrame : register( b1 ) -{ - matrix WorldViewProj; - matrix World; - float Puffiness; -}; - -struct VS_INPUT -{ - float3 Pos : POSITION; //position - float3 Norm : NORMAL; //normal - float2 Tex : TEXCOORD0; //texture coordinate -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float4 Diffuse : COLOR0; - float2 Tex : TEXCOORD1; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - - input.Pos += input.Norm * Puffiness; - - output.Pos = mul( float4(input.Pos,1), WorldViewProj ); - float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) ); - - float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) ); - output.Diffuse.rgb = fLighting; - output.Diffuse.a = 1.0f; - - output.Tex = input.Tex; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - //calculate lighting assuming light color is <1,1,1,1> - float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse; - outputColor.a = 1; - return outputColor; -} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx deleted file mode 100644 index a647a9079..000000000 --- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx +++ /dev/null @@ -1,117 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: Tutorial11.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse; -SamplerState samLinear -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Wrap; - AddressV = Wrap; -}; - -cbuffer cbConstant -{ - float3 vLightDir = float3(-0.577,0.577,-0.577); -}; - -cbuffer cbChangesEveryFrame -{ - matrix World; - matrix View; - matrix Projection; - float Time; -}; - -cbuffer cbUserChanges -{ - float Waviness; -}; - -struct VS_INPUT -{ - float3 Pos : POSITION; - float3 Norm : NORMAL; - float2 Tex : TEXCOORD0; -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float3 Norm : TEXCOORD0; - float2 Tex : TEXCOORD1; -}; - -//-------------------------------------------------------------------------------------- -// DepthStates -//-------------------------------------------------------------------------------------- -DepthStencilState EnableDepth -{ - DepthEnable = TRUE; - DepthWriteMask = ALL; - DepthFunc = LESS_EQUAL; -}; - -BlendState NoBlending -{ - AlphaToCoverageEnable = FALSE; - BlendEnable[0] = FALSE; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - - output.Pos = mul( float4(input.Pos,1), World ); - - output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness; - - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = mul( input.Norm, World ); - output.Tex = input.Tex; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - // Calculate lighting assuming light color is <1,1,1,1> - float fLighting = saturate( dot( input.Norm, vLightDir ) ); - float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; - outputColor.a = 1; - return outputColor; -} - - -//-------------------------------------------------------------------------------------- -// Technique -//-------------------------------------------------------------------------------------- -technique11 Render -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, VS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PS() ) ); - - SetDepthStencilState( EnableDepth, 0 ); - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} - diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx deleted file mode 100644 index aae7f9a87..000000000 --- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx +++ /dev/null @@ -1,129 +0,0 @@ -//TEST_IGNORE_FILE: -// -// Constant Buffer Variables -// - -Texture2D g_txDiffuse; -SamplerState samLinear -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Wrap; - AddressV = Wrap; -}; - -TextureCube g_txEnvMap; -SamplerState samLinearClamp -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Clamp; - AddressV = Clamp; -}; - -cbuffer cbConstant -{ - float3 vLightDir = float3(-0.577,0.577,-0.577); -}; - -cbuffer cbChangesEveryFrame -{ - matrix World; - matrix View; - matrix Projection; - float Time; -}; - -cbuffer cbUserChanges -{ - float Waviness; -}; - -struct VS_INPUT -{ - float3 Pos : POSITION; //position - float3 Norm : NORMAL; //normal - float2 Tex : TEXCOORD0; //texture coordinate -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float3 Norm : TEXCOORD0; - float2 Tex : TEXCOORD1; - float3 ViewR : TEXCOORD2; -}; - -//-------------------------------------------------------------------------------------- -// DepthStates -//-------------------------------------------------------------------------------------- -DepthStencilState EnableDepth -{ - DepthEnable = TRUE; - DepthWriteMask = ALL; - DepthFunc = LESS_EQUAL; -}; - -BlendState NoBlending -{ - AlphaToCoverageEnable = FALSE; - BlendEnable[0] = FALSE; -}; - -// -// Vertex Shader -// -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - - output.Pos = mul( float4(input.Pos,1), World ); - - output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness; - - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = mul( input.Norm, (float3x3)World ); - output.Tex = input.Tex; - - // Calculate the reflection vector - float3 viewNorm = mul( output.Norm, (float3x3)View ); - output.ViewR = reflect( viewNorm, float3(0,0,-1.0) ); - - return output; -} - - -// -// Pixel Shader -// -float4 PS( PS_INPUT input) : SV_Target -{ - // Calculate lighting assuming light color is <1,1,1,1> - float fLighting = saturate( dot( input.Norm, vLightDir ) ); - - // Load the environment map texture - float4 cReflect = g_txEnvMap.Sample( samLinearClamp, input.ViewR ); - - // Load the diffuse texture and multiply by the lighting amount - float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; - - // Add diffuse to reflection and go - float4 cTotal = cDiffuse + cReflect; - cTotal.a = 1; - return cTotal; -} - -// -// Technique -// -technique11 Render -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, VS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PS() ) ); - - SetDepthStencilState( EnableDepth, 0 ); - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx deleted file mode 100644 index a6f09ecc7..000000000 --- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx +++ /dev/null @@ -1,191 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: Tutorial13.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse; -SamplerState samLinear -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Wrap; - AddressV = Wrap; -}; - -TextureCube g_txEnvMap; -SamplerState samLinearClamp -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Clamp; - AddressV = Clamp; -}; - -cbuffer cbConstant -{ - float3 vLightDir = float3(-0.577,0.577,-0.577); -}; - -cbuffer cbChangesEveryFrame -{ - matrix World; - matrix View; - matrix Projection; - float Time; -}; - -cbuffer cbUserChanges -{ - float Explode; -}; - -struct VS_INPUT -{ - float3 Pos : POSITION; - float3 Norm : NORMAL; - float2 Tex : TEXCOORD0; -}; - -struct GSPS_INPUT -{ - float4 Pos : SV_POSITION; - float3 Norm : TEXCOORD0; - float2 Tex : TEXCOORD1; -}; - -//-------------------------------------------------------------------------------------- -// DepthStates -//-------------------------------------------------------------------------------------- -DepthStencilState EnableDepth -{ - DepthEnable = TRUE; - DepthWriteMask = ALL; - DepthFunc = LESS_EQUAL; -}; - -BlendState NoBlending -{ - AlphaToCoverageEnable = FALSE; - BlendEnable[0] = FALSE; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -GSPS_INPUT VS( VS_INPUT input ) -{ - GSPS_INPUT output = (GSPS_INPUT)0; - - output.Pos = mul( float4(input.Pos,1), World ); - output.Norm = mul( input.Norm, (float3x3)World ); - output.Tex = input.Tex; - - return output; -} - - -//-------------------------------------------------------------------------------------- -// Geometry Shader -//-------------------------------------------------------------------------------------- -[maxvertexcount(12)] -void GS( triangle GSPS_INPUT input[3], inout TriangleStream<GSPS_INPUT> TriStream ) -{ - GSPS_INPUT output; - - // - // Calculate the face normal - // - float3 faceEdgeA = input[1].Pos - input[0].Pos; - float3 faceEdgeB = input[2].Pos - input[0].Pos; - float3 faceNormal = normalize( cross(faceEdgeA, faceEdgeB) ); - float3 ExplodeAmt = faceNormal*Explode; - - // - // Calculate the face center - // - float3 centerPos = (input[0].Pos.xyz + input[1].Pos.xyz + input[2].Pos.xyz)/3.0; - float2 centerTex = (input[0].Tex + input[1].Tex + input[2].Tex)/3.0; - centerPos += faceNormal*Explode; - - // - // Output the pyramid - // - for( int i=0; i<3; i++ ) - { - output.Pos = input[i].Pos + float4(ExplodeAmt,0); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = input[i].Norm; - output.Tex = input[i].Tex; - TriStream.Append( output ); - - int iNext = (i+1)%3; - output.Pos = input[iNext].Pos + float4(ExplodeAmt,0); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = input[iNext].Norm; - output.Tex = input[iNext].Tex; - TriStream.Append( output ); - - output.Pos = float4(centerPos,1) + float4(ExplodeAmt,0); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = faceNormal; - output.Tex = centerTex; - TriStream.Append( output ); - - TriStream.RestartStrip(); - } - - for( int i=2; i>=0; i-- ) - { - output.Pos = input[i].Pos + float4(ExplodeAmt,0); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = -input[i].Norm; - output.Tex = input[i].Tex; - TriStream.Append( output ); - } - TriStream.RestartStrip(); -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( GSPS_INPUT input) : SV_Target -{ - // Calculate lighting assuming light color is <1,1,1,1> - float fLighting = saturate( dot( input.Norm, vLightDir ) ); - - // Load the diffuse texture and multiply by the lighting amount - float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; - cDiffuse.a = 1; - - // return diffuse - return cDiffuse; -} - - -//-------------------------------------------------------------------------------------- -// Technique -//-------------------------------------------------------------------------------------- -technique11 Render -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, VS() ) ); - SetGeometryShader( CompileShader( gs_4_0, GS() ) ); - SetPixelShader( CompileShader( ps_4_0, PS() ) ); - - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - SetDepthStencilState( EnableDepth, 0 ); - } -} - - diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx deleted file mode 100644 index b1e45b842..000000000 --- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx +++ /dev/null @@ -1,294 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: Tutorial14.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Constant Buffer Variables -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse; -SamplerState samLinear -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Wrap; - AddressV = Wrap; -}; - -cbuffer cbConstant -{ - float3 vLightDir = float3(-0.577,0.577,-0.577); -}; - -cbuffer cbChangesEveryFrame -{ - matrix World; - matrix View; - matrix Projection; -}; - -struct VS_INPUT -{ - float3 Pos : POSITION; //position - float3 Norm : NORMAL; //normal - float2 Tex : TEXCOORD0; //texture coordinate -}; - -struct PS_INPUT -{ - float4 Pos : SV_POSITION; - float3 Norm : TEXCOORD0; - float2 Tex : TEXCOORD1; -}; - -struct QUADVS_INPUT -{ - float4 Pos : POSITION; - float2 Tex : TEXCOORD0; -}; - -struct QUADVS_OUTPUT -{ - float4 Pos : SV_POSITION; // Transformed position - float2 Tex : TEXCOORD0; -}; - -//-------------------------------------------------------------------------------------- -// Blending States -//-------------------------------------------------------------------------------------- -BlendState NoBlending -{ - BlendEnable[0] = FALSE; -}; - -BlendState SrcAlphaBlendingAdd -{ - BlendEnable[0] = TRUE; - SrcBlend = SRC_ALPHA; - DestBlend = ONE; - BlendOp = ADD; - SrcBlendAlpha = ZERO; - DestBlendAlpha = ZERO; - BlendOpAlpha = ADD; - RenderTargetWriteMask[0] = 0x0F; -}; - -BlendState SrcAlphaBlendingSub -{ - BlendEnable[0] = TRUE; - SrcBlend = SRC_ALPHA; - DestBlend = ONE; - BlendOp = SUBTRACT; - SrcBlendAlpha = ZERO; - DestBlendAlpha = ZERO; - BlendOpAlpha = ADD; - RenderTargetWriteMask[0] = 0x0F; -}; - -BlendState SrcColorBlendingAdd -{ - BlendEnable[0] = TRUE; - SrcBlend = SRC_COLOR; - DestBlend = ONE; - BlendOp = ADD; - SrcBlendAlpha = ZERO; - DestBlendAlpha = ZERO; - BlendOpAlpha = ADD; - RenderTargetWriteMask[0] = 0x0F; -}; - -BlendState SrcColorBlendingSub -{ - BlendEnable[0] = TRUE; - SrcBlend = SRC_COLOR; - DestBlend = ONE; - BlendOp = SUBTRACT; - SrcBlendAlpha = ZERO; - DestBlendAlpha = ZERO; - BlendOpAlpha = ADD; - RenderTargetWriteMask[0] = 0x0F; -}; - -//-------------------------------------------------------------------------------------- -// Depth/Stencil States -//-------------------------------------------------------------------------------------- -DepthStencilState RenderWithStencilState -{ - DepthEnable = false; - DepthWriteMask = ZERO; - DepthFunc = Less; - - // Setup stencil states - StencilEnable = true; - StencilReadMask = 0xFF; - StencilWriteMask = 0x00; - - FrontFaceStencilFunc = Not_Equal; - FrontFaceStencilPass = Keep; - FrontFaceStencilFail = Zero; - - BackFaceStencilFunc = Not_Equal; - BackFaceStencilPass = Keep; - BackFaceStencilFail = Zero; -}; - - - -//-------------------------------------------------------------------------------------- -// Scene Vertex Shader -//-------------------------------------------------------------------------------------- -PS_INPUT VS( VS_INPUT input ) -{ - PS_INPUT output = (PS_INPUT)0; - - output.Pos = mul( float4(input.Pos,1), World ); - output.Pos = mul( output.Pos, View ); - output.Pos = mul( output.Pos, Projection ); - output.Norm = mul( input.Norm, World ); - output.Tex = input.Tex; - - return output; -} - -//----------------------------------------------------------------------------- -// Quad Vertex Shaders -//----------------------------------------------------------------------------- -QUADVS_OUTPUT QuadVS( QUADVS_INPUT Input ) -{ - QUADVS_OUTPUT Output; - Output.Pos = mul( Input.Pos, World ); - Output.Pos = mul( Output.Pos, View ); - Output.Pos = mul( Output.Pos, Projection ); - Output.Tex = Input.Tex; - return Output; -} - -QUADVS_OUTPUT ScreenQuadVS( QUADVS_INPUT Input ) -{ - QUADVS_OUTPUT Output; - Output.Pos = Input.Pos; - Output.Tex = Input.Tex; - return Output; -} - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PS( PS_INPUT input) : SV_Target -{ - // Calculate lighting assuming light color is <1,1,1,1> - float fLighting = saturate( dot( input.Norm, vLightDir ) ); - float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting; - outputColor.a = 1; - return outputColor; -} - -//-------------------------------------------------------------------------------------- -// Quad Pixel Shader -//-------------------------------------------------------------------------------------- -float4 QuadPS( QUADVS_OUTPUT input) : SV_Target -{ - return g_txDiffuse.Sample( samLinear, input.Tex ); -} - - -//-------------------------------------------------------------------------------------- -// Scene Techniques -//-------------------------------------------------------------------------------------- -technique11 RenderScene -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, VS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PS() ) ); - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} - -//-------------------------------------------------------------------------------------- -// RenderWithStencil - set the depth stencil state inside of the technique -//-------------------------------------------------------------------------------------- -technique11 RenderWithStencil -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, ScreenQuadVS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); - - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - SetDepthStencilState( RenderWithStencilState, 0 ); - } -} - -//-------------------------------------------------------------------------------------- -// Quad Techniques: Alpha blending state is set inside the technique -//-------------------------------------------------------------------------------------- -technique11 RenderQuadSolid -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); - - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} - -//-------------------------------------------------------------------------------------- -technique11 RenderQuadSrcAlphaAdd -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); - - SetBlendState( SrcAlphaBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} - -//-------------------------------------------------------------------------------------- -technique11 RenderQuadSrcAlphaSub -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); - - SetBlendState( SrcAlphaBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} - -//-------------------------------------------------------------------------------------- -technique11 RenderQuadSrcColorAdd -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); - - SetBlendState( SrcColorBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} - -//-------------------------------------------------------------------------------------- -technique11 RenderQuadSrcColorSub -{ - pass P0 - { - SetVertexShader( CompileShader( vs_4_0, QuadVS() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, QuadPS() ) ); - - SetBlendState( SrcColorBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - } -} - - diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h deleted file mode 100644 index b44251829..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h +++ /dev/null @@ -1,84 +0,0 @@ -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkage11_LightPSH.h -// -// The pixel shader light header file for the DynamicShaderLinkage11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Interfaces -//-------------------------------------------------------------------------------------- -interface iBaseLight -{ - float3 IlluminateAmbient(float3 vNormal); - - float3 IlluminateDiffuse(float3 vNormal); - - float3 IlluminateSpecular(float3 vNormal, int specularPower ); - -}; - -//-------------------------------------------------------------------------------------- -// Classes -//-------------------------------------------------------------------------------------- -class cAmbientLight : iBaseLight -{ - float3 m_vLightColor; - bool m_bEnable; - - float3 IlluminateAmbient(float3 vNormal); - - float3 IlluminateDiffuse(float3 vNormal) - { - return (float3)0; - } - - float3 IlluminateSpecular(float3 vNormal, int specularPower ) - { - return (float3)0; - } -}; - -class cHemiAmbientLight : cAmbientLight -{ - // inherited float4 m_vLightColor is the SkyColor - float4 m_vGroundColor; - float4 m_vDirUp; - - float3 IlluminateAmbient(float3 vNormal); - -}; - -class cDirectionalLight : cAmbientLight -{ - // inherited float4 m_vLightColor is the LightColor - float4 m_vLightDir; - - float3 IlluminateDiffuse( float3 vNormal ); - - float3 IlluminateSpecular( float3 vNormal, int specularPower ); - -}; - -class cOmniLight : cAmbientLight -{ - float3 m_vLightPosition; - float radius; - - float3 IlluminateDiffuse( float3 vNormal ); - -}; - -class cSpotLight : cAmbientLight -{ - float3 m_vLightPosition; - float3 m_vLightDir; -}; - -class cEnvironmentLight : cAmbientLight -{ - float3 IlluminateSpecular( float3 vNormal, int specularPower ); -}; - - diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h deleted file mode 100644 index 7f6bc3d22..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h +++ /dev/null @@ -1,103 +0,0 @@ -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkage11_MATERIALPSH.h -// -// The pixel shader material header file for the DynamicShaderLinkage11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Interfaces -//-------------------------------------------------------------------------------------- -interface iBaseMaterial -{ - float3 GetAmbientColor(float2 vTexcoord); - - float3 GetDiffuseColor(float2 vTexcoord); - - int GetSpecularPower(); - -}; - -//-------------------------------------------------------------------------------------- -// Classes -//-------------------------------------------------------------------------------------- -class cBaseMaterial : iBaseMaterial -{ - float3 m_vColor; - int m_iSpecPower; - - float3 GetAmbientColor(float2 vTexcoord) - { - return m_vColor; - } - - float3 GetDiffuseColor(float2 vTexcoord) - { - return (float3)m_vColor; - } - - int GetSpecularPower() - { - return m_iSpecPower; - } - -}; - -class cPlasticMaterial : cBaseMaterial -{ - -}; - -class cPlasticTexturedMaterial : cPlasticMaterial -{ - float3 GetAmbientColor(float2 vTexcoord); - - float3 GetDiffuseColor(float2 vTexcoord); - -}; - -class cPlasticLightingOnlyMaterial : cBaseMaterial -{ - float3 GetAmbientColor(float2 vTexcoord) - { - return (float3)1.0f; - } - - float3 GetDiffuseColor(float2 vTexcoord) - { - return (float3)1.0f; - } - -}; - -class cRoughMaterial : cBaseMaterial -{ - int GetSpecularPower() - { - return m_iSpecPower; - } -}; - -class cRoughTexturedMaterial : cRoughMaterial -{ - float3 GetAmbientColor(float2 vTexcoord); - - float3 GetDiffuseColor(float2 vTexcoord); - -}; - - -class cRoughLightingOnlyMaterial : cRoughMaterial -{ - float3 GetAmbientColor(float2 vTexcoord) - { - return (float3)1.0f; - } - - float3 GetDiffuseColor(float2 vTexcoord) - { - return (float3)1.0f; - } - -}; diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl deleted file mode 100644 index 6850ad9cb..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl +++ /dev/null @@ -1,84 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSMain -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkage11.psh -// -// The pixel shader header file for the DynamicShaderLinkage11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Header Includes -//-------------------------------------------------------------------------------------- -#include "DynamicShaderLinkage11_PSBuffers.h" - -// Defines for default static permutated setting -#if defined( STATIC_PERMUTE ) - #define HEMI_AMBIENT //CONST_AMBIENT //HEMI_AMBIENT - #define TEXTURE_ENABLE - #define SPECULAR_ENABLE -#endif - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct PS_INPUT -{ - float4 vPosition : SV_POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; - float4 vMatrix : TEXCOORD1; -}; - -//-------------------------------------------------------------------------------------- -// Abstract Interface Instances for dyamic linkage / permutation -//-------------------------------------------------------------------------------------- -#if !defined( STATIC_PERMUTE ) - iBaseLight g_abstractAmbientLighting; - iBaseLight g_abstractDirectLighting; - iBaseLight g_abstractEnvironmentLighting; - iBaseMaterial g_abstractMaterial; -#else -//-------------------------------------------------------------------------------------- -// Concrete Instances for STATIC_PERMUTE - static permutation -//-------------------------------------------------------------------------------------- - #if defined( HEMI_AMBIENT ) - #define g_abstractAmbientLighting g_hemiAmbientLight - #else - // CONST_AMBIENT - #define g_abstractAmbientLighting g_ambientLight - #endif - #define g_abstractDirectLighting g_directionalLight - #define g_abstractEnvironmentLighting g_environmentLight - #if defined( TEXTURE_ENABLE ) - #define g_abstractMaterial g_plasticTexturedMaterial - #else - #define g_abstractMaterial g_plasticMaterial - #endif -#endif - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PSMain( PS_INPUT Input ) : SV_TARGET -{ - // Compute the Ambient term - float3 Ambient = (float3)0.0f; - Ambient = g_abstractMaterial.GetAmbientColor( Input.vTexcoord ) * g_abstractAmbientLighting.IlluminateAmbient( Input.vNormal ); - - // Accumulate the Diffuse contribution - float3 Diffuse = (float3)0.0f; - - Diffuse += g_abstractMaterial.GetDiffuseColor( Input.vTexcoord ) * g_abstractDirectLighting.IlluminateDiffuse( Input.vNormal ); - - // Compute the Specular contribution - float3 Specular = (float3)0.0f; - Specular += g_abstractDirectLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() ); - Specular += g_abstractEnvironmentLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() ); - - // Accumulate the lighting with saturation - float3 Lighting = saturate( Ambient + Diffuse + Specular ); - - return float4(Lighting,1.0f); -} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h deleted file mode 100644 index e2263b832..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h +++ /dev/null @@ -1,129 +0,0 @@ -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkage11_LightPSH.hlsl -// -// The pixel shader light source module file for the DynamicShaderLinkage11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#include "DynamicShaderLinkage11_LightPSH.h" -#include "DynamicShaderLinkage11_MaterialPSH.h" - -//-------------------------------------------------------------------------------------- -// Constant Buffers -//-------------------------------------------------------------------------------------- -cbuffer cbPerFrame : register( b0 ) -{ - cAmbientLight g_ambientLight; - cHemiAmbientLight g_hemiAmbientLight; - cDirectionalLight g_directionalLight; - cEnvironmentLight g_environmentLight; - float4 g_vEyeDir; -}; - -cbuffer cbPerPrimitive : register( b1 ) -{ - cPlasticMaterial g_plasticMaterial; - cPlasticTexturedMaterial g_plasticTexturedMaterial; - cPlasticLightingOnlyMaterial g_plasticLightingOnlyMaterial; - cRoughMaterial g_roughMaterial; - cRoughTexturedMaterial g_roughTexturedMaterial; - cRoughLightingOnlyMaterial g_roughLightingOnlyMaterial; -}; - -//-------------------------------------------------------------------------------------- -// Textures and Samplers -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse : register( t0 ); -Texture2D g_txNormalMap : register( t1 ); -TextureCube g_txEnvironmentMap : register( t2 ); - -SamplerState g_samLinear : register( s0 ); - -//-------------------------------------------------------------------------------------- -// Lighting Class Methods -//-------------------------------------------------------------------------------------- -// Ambient Lighting Class Methods -float3 cAmbientLight::IlluminateAmbient(float3 vNormal) -{ - return float4( m_vLightColor * m_bEnable, 1.0f); -} - -float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal) -{ - float thetha = (dot( vNormal, m_vDirUp ) + 1.0f) / 2.0f; - - return lerp( m_vGroundColor, m_vLightColor, thetha) * m_bEnable; -} - -// Directional Light class -float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal ) -{ - float lambert = saturate(dot( vNormal, m_vLightDir )); - return ((float3)lambert * m_vLightColor * m_bEnable); -} - -float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower ) -{ - float3 H = -normalize(g_vEyeDir) + m_vLightDir; - float3 halfAngle = normalize( H ); - float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower ); - - return ((float3)specular * m_vLightColor * m_bEnable); -} - -// Omni Light Class -float3 cOmniLight::IlluminateDiffuse( float3 vNormal ) -{ - return (float3)0.0f; // TO DO! -} - -// Environment Lighting -float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower ) -{ - // compute reflection vector taking into account a cheap fresnel falloff; - float3 N = normalize(vNormal); - float3 E = normalize(g_vEyeDir); - float3 R = reflect( E, N ); - float fresnel = 1 - dot( -E, N ); - fresnel = (fresnel * fresnel * fresnel ); - - float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ) * fresnel; - - return (specular * (float3)m_bEnable); -// return ((float3)fresnel); - -} - -//-------------------------------------------------------------------------------------- -// Material Class Methods -//-------------------------------------------------------------------------------------- -// Plastic Material Methods -float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse; -} - -float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse; -} - -// Rough Material Methods -float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse; -} - -float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse; -} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl deleted file mode 100644 index d47f20c23..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl +++ /dev/null @@ -1,73 +0,0 @@ -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain - -#ifndef __SLANG__ -#define cbPerObject cbPerObject_0 -#define g_mWorldViewProjection g_mWorldViewProjection_0 -#define g_mWorld g_mWorld_0 -#endif - -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkage11_VS.hlsl -// -// The vertex shader file for the DynamicShaderLinkage11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - float4x4 g_mWorldViewProjection ;//SLANG: : packoffset( c0 ); - float4x4 g_mWorld ;//SLANG: : packoffset( c4 ); -}; - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - float4 vPosition : SV_POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord0 : TEXCOORD0; - float4 vMatrix : TEXCOORD1; // DEBUG -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -// We aliased signed vectors as a unsigned format. -// Need to recover signed values. The values 1.0 and 2.0 -// are slightly inaccurate here. -float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec ) -{ - vVec *= 2.0f; - return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec; -} - -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - - VS_OUTPUT Output; - float3 tmpNormal; - - Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); - - // Expand compressed vectors - tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal ); - Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld ); - - Output.vTexcoord0 = Input.vTexcoord; - - Output.vMatrix = (float4)g_mWorld[0]; // DEBUG - return Output; -} - diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx deleted file mode 100644 index c72b98843..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx +++ /dev/null @@ -1,192 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkageFX11.fx -// -// The effect file for the DynamicShaderLinkageFX11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#include "DynamicShaderLinkageFX11_VS.hlsl" -#include "DynamicShaderLinkageFX11_PS.hlsl" - -// -// Settings for static permutations. -// All of the pre-5.0 targets need static specialization -// since they don't support late binding. The below -// just selects a single specialization but you could -// create any number of them, each one representing -// a new shader with the interfaces compiled out -// due to the compile-time class references. -// - -#define StaticMaterial g_plasticTexturedMaterial -#define StaticAmbientLight g_ambientLight -#define StaticDirectLight g_directionalLight -#define StaticEnvironmentLight g_environmentLight - -technique11 FeatureLevel10 -{ - pass - { - SetRasterizerState(g_rasterizerState[g_fillMode]); - SetVertexShader(CompileShader(vs_4_0, - VSMain())); - SetPixelShader(CompileShader(ps_4_0, - PSMainUniform(StaticAmbientLight, - StaticDirectLight, - StaticEnvironmentLight, - StaticMaterial))); - } -} - -technique11 FeatureLevel10_1 -{ - pass - { - SetRasterizerState(g_rasterizerState[g_fillMode]); - SetVertexShader(CompileShader(vs_4_1, - VSMain())); - SetPixelShader(CompileShader(ps_4_1, - PSMainUniform(StaticAmbientLight, - StaticDirectLight, - StaticEnvironmentLight, - StaticMaterial))); - } -} - -// -// Variables for dynamic shader linkage. -// There are two variations here for dynamic usage. -// In the first we use the uniform entry point -// and pass in global interface variables. This -// creates a shader which refers to the global -// interface variables when running and we can bind -// concrete instances in our C++ code by using -// ID3DX11EffectInterfaceVariable::SetClassInstance. -// This approach works well when you have several -// independent variations and want to bind them -// individually in your C++ code, such as the -// different lighting and material parameters in -// this sample. -// - -iBaseLight g_abstractAmbientLighting; -iBaseLight g_abstractDirectLighting; -iBaseLight g_abstractEnvironmentLighting; -iBaseMaterial g_abstractMaterial; - -technique11 FeatureLevel11 -{ - pass - { - SetRasterizerState(g_rasterizerState[g_fillMode]); - SetVertexShader(CompileShader(vs_5_0, - VSMain())); - SetPixelShader(CompileShader(ps_5_0, - PSMainUniform(g_abstractAmbientLighting, - g_abstractDirectLighting, - g_abstractEnvironmentLighting, - g_abstractMaterial))); - } -} - -// -// In this second variation we use the non-uniform -// entry point so that we don't have to specify -// any interfaces when compiling the shader. We -// then reuse the compiled shader with different -// BindInterfaces calls so that all bindings are -// handled automatically by the effect runtime. -// Below we have multiple techniques where -// we've given a concrete binding for the material. -// Lighting parameters are left as interfaces for -// binding via effect variables, but could also -// be specified concretely if the number of variations -// is manageable. -// This approach works well for a small number of variations -// that are known in advance, as you can just list them -// in your effect and you don't need to do the -// binding work explicitly in your C++ code. -// - -VertexShader g_NonUniVS = CompileShader(vs_5_0, VSMain()); -PixelShader g_NonUniPS = CompileShader(ps_5_0, PSMainNonUniform()); - -technique11 FeatureLevel11_g_plasticMaterial -{ - pass - { - SetVertexShader(g_NonUniVS); - SetPixelShader(BindInterfaces(g_NonUniPS, - g_abstractAmbientLighting, - g_abstractDirectLighting, - g_abstractEnvironmentLighting, - g_plasticMaterial)); - } -} - -technique11 FeatureLevel11_g_plasticTexturedMaterial -{ - pass - { - SetVertexShader(g_NonUniVS); - SetPixelShader(BindInterfaces(g_NonUniPS, - g_abstractAmbientLighting, - g_abstractDirectLighting, - g_abstractEnvironmentLighting, - g_plasticTexturedMaterial)); - } -} - -technique11 FeatureLevel11_g_plasticLightingOnlyMaterial -{ - pass - { - SetVertexShader(g_NonUniVS); - SetPixelShader(BindInterfaces(g_NonUniPS, - g_abstractAmbientLighting, - g_abstractDirectLighting, - g_abstractEnvironmentLighting, - g_plasticLightingOnlyMaterial)); - } -} - -technique11 FeatureLevel11_g_roughMaterial -{ - pass - { - SetVertexShader(g_NonUniVS); - SetPixelShader(BindInterfaces(g_NonUniPS, - g_abstractAmbientLighting, - g_abstractDirectLighting, - g_abstractEnvironmentLighting, - g_roughMaterial)); - } -} - -technique11 FeatureLevel11_g_roughTexturedMaterial -{ - pass - { - SetVertexShader(g_NonUniVS); - SetPixelShader(BindInterfaces(g_NonUniPS, - g_abstractAmbientLighting, - g_abstractDirectLighting, - g_abstractEnvironmentLighting, - g_roughTexturedMaterial)); - } -} - -technique11 FeatureLevel11_g_roughLightingOnlyMaterial -{ - pass - { - SetVertexShader(g_NonUniVS); - SetPixelShader(BindInterfaces(g_NonUniPS, - g_abstractAmbientLighting, - g_abstractDirectLighting, - g_abstractEnvironmentLighting, - g_roughLightingOnlyMaterial)); - } -} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h deleted file mode 100644 index 6f9a0f4d8..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h +++ /dev/null @@ -1,82 +0,0 @@ -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkageFX11_LightPSH.h -// -// The pixel shader light header file for the DynamicShaderLinkageFX11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Interfaces -//-------------------------------------------------------------------------------------- -interface iBaseLight -{ - float3 IlluminateAmbient(float3 vNormal); - - float3 IlluminateDiffuse(float3 vNormal); - - float3 IlluminateSpecular(float3 vNormal, int specularPower ); - -}; - -//-------------------------------------------------------------------------------------- -// Classes -//-------------------------------------------------------------------------------------- -class cAmbientLight : iBaseLight -{ - float3 m_vLightColor; - bool m_bEnable; - - float3 IlluminateAmbient(float3 vNormal); - - float3 IlluminateDiffuse(float3 vNormal) - { - return (float3)0; - } - - float3 IlluminateSpecular(float3 vNormal, int specularPower ) - { - return (float3)0; - } -}; - -class cHemiAmbientLight : cAmbientLight -{ - // inherited float4 m_vLightColor is the SkyColor - float4 m_vGroundColor; - float4 m_vDirUp; - - float3 IlluminateAmbient(float3 vNormal); - -}; - -class cDirectionalLight : cAmbientLight -{ - // inherited float4 m_vLightColor is the LightColor - float4 m_vLightDir; - - float3 IlluminateDiffuse( float3 vNormal ); - - float3 IlluminateSpecular( float3 vNormal, int specularPower ); - -}; - -class cOmniLight : cAmbientLight -{ - float3 m_vLightPosition; - float radius; - - float3 IlluminateDiffuse( float3 vNormal ); - -}; - -class cSpotLight : cAmbientLight -{ - float3 m_vLightPosition; - float3 m_vLightDir; -}; - -class cEnvironmentLight : cAmbientLight -{ - float3 IlluminateSpecular( float3 vNormal, int specularPower ); -}; diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h deleted file mode 100644 index cd54a283d..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h +++ /dev/null @@ -1,103 +0,0 @@ -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkageFX11_MaterialPSH.h -// -// The pixel shader material header file for the DynamicShaderLinkageFX11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Interfaces -//-------------------------------------------------------------------------------------- -interface iBaseMaterial -{ - float3 GetAmbientColor(float2 vTexcoord); - - float3 GetDiffuseColor(float2 vTexcoord); - - int GetSpecularPower(); - -}; - -//-------------------------------------------------------------------------------------- -// Classes -//-------------------------------------------------------------------------------------- -class cBaseMaterial : iBaseMaterial -{ - float3 m_vColor; - int m_iSpecPower; - - float3 GetAmbientColor(float2 vTexcoord) - { - return m_vColor; - } - - float3 GetDiffuseColor(float2 vTexcoord) - { - return (float3)m_vColor; - } - - int GetSpecularPower() - { - return m_iSpecPower; - } - -}; - -class cPlasticMaterial : cBaseMaterial -{ - -}; - -class cPlasticTexturedMaterial : cPlasticMaterial -{ - float3 GetAmbientColor(float2 vTexcoord); - - float3 GetDiffuseColor(float2 vTexcoord); - -}; - -class cPlasticLightingOnlyMaterial : cBaseMaterial -{ - float3 GetAmbientColor(float2 vTexcoord) - { - return (float3)1.0f; - } - - float3 GetDiffuseColor(float2 vTexcoord) - { - return (float3)1.0f; - } - -}; - -class cRoughMaterial : cBaseMaterial -{ - int GetSpecularPower() - { - return m_iSpecPower; - } -}; - -class cRoughTexturedMaterial : cRoughMaterial -{ - float3 GetAmbientColor(float2 vTexcoord); - - float3 GetDiffuseColor(float2 vTexcoord); - -}; - - -class cRoughLightingOnlyMaterial : cRoughMaterial -{ - float3 GetAmbientColor(float2 vTexcoord) - { - return (float3)1.0f; - } - - float3 GetDiffuseColor(float2 vTexcoord) - { - return (float3)1.0f; - } - -}; diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h deleted file mode 100644 index 3b4c528be..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h +++ /dev/null @@ -1,152 +0,0 @@ -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkageFX11_LightPSH.hlsl -// -// The pixel shader light source module file for the DynamicShaderLinkageFX11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#include "DynamicShaderLinkageFX11_LightPSH.h" -#include "DynamicShaderLinkageFX11_MaterialPSH.h" - -//-------------------------------------------------------------------------------------- -// Constant Buffers -//-------------------------------------------------------------------------------------- -cbuffer cbPerFrame : register( b0 ) -{ - cAmbientLight g_ambientLight; - cHemiAmbientLight g_hemiAmbientLight; - cDirectionalLight g_directionalLight; - cEnvironmentLight g_environmentLight; - float4 g_vEyeDir; -}; - -cbuffer cbPerPrimitive : register( b1 ) -{ - cPlasticMaterial g_plasticMaterial; - cPlasticTexturedMaterial g_plasticTexturedMaterial; - cPlasticLightingOnlyMaterial g_plasticLightingOnlyMaterial; - cRoughMaterial g_roughMaterial; - cRoughTexturedMaterial g_roughTexturedMaterial; - cRoughLightingOnlyMaterial g_roughLightingOnlyMaterial; -}; - -//-------------------------------------------------------------------------------------- -// Textures and Samplers -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse : register( t0 ); -Texture2D g_txNormalMap : register( t1 ); -TextureCube g_txEnvironmentMap : register( t2 ); - -SamplerState g_samLinear : register( s0 ) -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = WRAP; - AddressV = WRAP; - AddressW = WRAP; -}; - -//-------------------------------------------------------------------------------------- -// Rasterization State -//-------------------------------------------------------------------------------------- -uint g_fillMode = 0; - -RasterizerState g_rasterizerState[2] -{ -{ - FillMode = SOLID; - MultisampleEnable = true; -}, -{ - FillMode = WIREFRAME; - MultisampleEnable = true; -} -}; - -//-------------------------------------------------------------------------------------- -// Lighting Class Methods -//-------------------------------------------------------------------------------------- -// Ambient Lighting Class Methods -float3 cAmbientLight::IlluminateAmbient(float3 vNormal) -{ - return m_vLightColor * m_bEnable; -} - -float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal) -{ - float thetha = (dot( vNormal, m_vDirUp.xyz ) + 1.0f) / 2.0f; - - return lerp( m_vGroundColor.xyz, m_vLightColor, thetha) * m_bEnable; -} - -// Directional Light class -float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal ) -{ - float lambert = saturate(dot( vNormal, m_vLightDir.xyz )); - return ((float3)lambert * m_vLightColor * m_bEnable); -} - -float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower ) -{ - float3 H = -normalize(g_vEyeDir.xyz) + m_vLightDir.xyz; - float3 halfAngle = normalize( H ); - float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower ); - - return ((float3)specular * m_vLightColor * m_bEnable); -} - -// Omni Light Class -float3 cOmniLight::IlluminateDiffuse( float3 vNormal ) -{ - return (float3)0.0f; // TO DO! -} - -// Environment Lighting -float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower ) -{ - // compute reflection vector taking into account a cheap fresnel falloff; - float3 N = normalize(vNormal); - float3 E = normalize(g_vEyeDir.xyz); - float3 R = reflect( E, N ); - float fresnel = 1 - dot( -E, N ); - fresnel = (fresnel * fresnel * fresnel ); - - float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ).xyz * fresnel; - - return (specular * (float3)m_bEnable); -// return ((float3)fresnel); - -} - -//-------------------------------------------------------------------------------------- -// Material Class Methods -//-------------------------------------------------------------------------------------- -// Plastic Material Methods -float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse.xyz; -} - -float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse.xyz; -} - -// Rough Material Methods -float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse.xyz; -} - -float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord) -{ - float4 vDiffuse = (float4)1.0f; - vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord ); - return m_vColor * vDiffuse.xyz; -} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl deleted file mode 100644 index 55d206259..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl +++ /dev/null @@ -1,113 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkageFX11.psh -// -// The pixel shader header file for the DynamicShaderLinkageFX11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Header Includes -//-------------------------------------------------------------------------------------- -#include "DynamicShaderLinkageFX11_PSBuffers.h" - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct PS_INPUT -{ - float4 vPosition : SV_POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; - float4 vMatrix : TEXCOORD1; -}; - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- - -// This pixel shader uses several interfaces during its -// work. We show three different ways of providing interface -// bindings for the PS and those have two different -// entry points so we've separated the base PS code -// into a worker routine that's called by the entry -// points. Normally only one technique would be used -// and this layering of entry point and worker would -// not be necessary. -float4 PSMainWorker( iBaseLight ambientLighting, - iBaseLight directLighting, - iBaseLight environmentLighting, - iBaseMaterial material, - PS_INPUT Input ) -{ - // Compute the Ambient term - float3 Ambient = (float3)0.0f; - Ambient = material.GetAmbientColor( Input.vTexcoord ) * ambientLighting.IlluminateAmbient( Input.vNormal ); - - // Accumulate the Diffuse contribution - float3 Diffuse = (float3)0.0f; - - Diffuse += material.GetDiffuseColor( Input.vTexcoord ) * directLighting.IlluminateDiffuse( Input.vNormal ); - - // Compute the Specular contribution - float3 Specular = (float3)0.0f; - Specular += directLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() ); - Specular += environmentLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() ); - - // Accumulate the lighting with saturation - float3 Lighting = saturate( Ambient + Diffuse + Specular); - - return float4(Lighting,1.0f); -} - -// One way to provide bindings for shaders in Effects 11 is -// to use uniform interface parameters. As with non-interface -// uniform parameters you must specify a value for these -// parameters in your CompileShader invocations in the effect. -// You can provide concrete class instances if you want -// to statically specialize your shaders, such as for targets -// that don't support abstract interfaces; or you can provide -// other interfaces that you bind using effect variables. -// Both are shown in this sample's technique passes. -float4 PSMainUniform( uniform iBaseLight ambientLighting, - uniform iBaseLight directLighting, - uniform iBaseLight environmentLighting, - uniform iBaseMaterial material, - PS_INPUT Input ) : SV_Target -{ - return PSMainWorker(ambientLighting, - directLighting, - environmentLighting, - material, - Input); -} - -// Another way to use Effects 11 with interfaces is -// to have non-uniform parameters, which then are -// bound with a BindInterfaces in a technique pass. -// BindInterfaces gives concrete instances to use -// with a shader but does not do static specialization, -// it just saves information for the effect runtime -// to use when setting up the shader to run. -// This lets you share a single shader, compiled with -// interface usage, while still getting the convenience -// of declaring concrete bindings in the effect and -// not needed explicit binding in code via effect -// variable updates. If you have many different -// variations it may be simpler to use bindings -// through effect variables, as then you don't -// need to list every possible binding set in your -// techniques. -float4 PSMainNonUniform( iBaseLight ambientLighting, - iBaseLight directLighting, - iBaseLight environmentLighting, - iBaseMaterial material, - PS_INPUT Input ) : SV_Target -{ - return PSMainWorker(ambientLighting, - directLighting, - environmentLighting, - material, - Input); -} diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl deleted file mode 100644 index 4791e5786..000000000 --- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl +++ /dev/null @@ -1,65 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: DynamicShaderLinkageFX11_VS.hlsl -// -// The vertex shader file for the DynamicShaderLinkageFX11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - float4x4 g_mWorldViewProjection : packoffset( c0 ); - float4x4 g_mWorld : packoffset( c4 ); -}; - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - float4 vPosition : SV_POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord0 : TEXCOORD0; - float4 vMatrix : TEXCOORD1; // DEBUG -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -// We aliased signed vectors as a unsigned format. -// Need to recover signed values. The values 1.0 and 2.0 -// are slightly inaccurate here. -float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec ) -{ - vVec *= 2.0f; - return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec; -} - -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - - VS_OUTPUT Output; - float3 tmpNormal; - - Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); - - // Expand compressed vectors - tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal ); - Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld ); - - Output.vTexcoord0 = Input.vTexcoord; - - Output.vMatrix = (float4)g_mWorld[0]; // DEBUG - return Output; -} diff --git a/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx b/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx deleted file mode 100644 index 699df8655..000000000 --- a/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx +++ /dev/null @@ -1,468 +0,0 @@ -//TEST_IGNORE_FILE: -// FixedFuncEMU.fx -// Copyright (c) 2005 Microsoft Corporation. All rights reserved. -// - -struct VSSceneIn -{ - float3 pos : POSITION; //position of the particle - float3 norm : NORMAL; //velocity of the particle - float2 tex : TEXTURE0; //tex coords -}; - -struct VSSceneOut -{ - float4 pos : SV_Position; //position - float2 tex : TEXTURE0; //texture coordinate - float3 wPos : TEXTURE1; //world space pos - float3 wNorm : TEXTURE2; //world space normal - float4 colorD : COLOR0; //color for gouraud and flat shading - float4 colorS : COLOR1; //color for specular - float fogDist : FOGDISTANCE; //distance used for fog calculations - float3 planeDist : SV_ClipDistance0; //clip distance for 3 planes -}; - -struct PSSceneIn -{ - float4 pos : SV_Position; //position - float2 tex : TEXTURE0; //texture coordinate - float3 wPos : TEXTURE1; //world space pos - float3 wNorm : TEXTURE2; //world space normal - float4 colorD : COLOR0; //color for gouraud and flat shading - float4 colorS : COLOR1; //color for specular - float fogDist : FOGDISTANCE; //distance used for fog calculations -}; - -struct Light -{ - float4 Position; - float4 Diffuse; - float4 Specular; - float4 Ambient; - float4 Atten; -}; - -#define FOGMODE_NONE 0 -#define FOGMODE_LINEAR 1 -#define FOGMODE_EXP 2 -#define FOGMODE_EXP2 3 -#define E 2.71828 - -cbuffer cbLights -{ - float4 g_clipplanes[3]; - Light g_lights[8]; -}; - -cbuffer cbPerFrame -{ - float4x4 g_mWorld; - float4x4 g_mView; - float4x4 g_mProj; - float4x4 g_mInvProj; - float4x4 g_mLightViewProj; -}; - -cbuffer cbPerTechnique -{ - bool g_bEnableLighting = true; - bool g_bEnableClipping = true; - bool g_bPointScaleEnable = false; - float g_pointScaleA; - float g_pointScaleB; - float g_pointScaleC; - float g_pointSize; - - //fog params - int g_fogMode = FOGMODE_NONE; - float g_fogStart; - float g_fogEnd; - float g_fogDensity; - float4 g_fogColor; -}; - -cbuffer cbPerViewChange -{ - //viewport params - float g_viewportHeight; - float g_viewportWidth; - float g_nearPlane; -}; - -cbuffer cbImmutable -{ - float3 g_positions[4] = - { - float3( -0.5, 0.5, 0 ), - float3( 0.5, 0.5, 0 ), - float3( -0.5, -0.5, 0 ), - float3( 0.5, -0.5, 0 ), - }; -}; - -Texture2D g_txDiffuse; -Texture2D g_txProjected; -SamplerState g_samLinear -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Clamp; - AddressV = Clamp; -}; - -DepthStencilState DisableDepth -{ - DepthEnable = FALSE; - DepthWriteMask = ZERO; -}; - -DepthStencilState EnableDepth -{ - DepthEnable = TRUE; - DepthWriteMask = ALL; -}; - -struct ColorsOutput -{ - float4 Diffuse; - float4 Specular; -}; - -ColorsOutput CalcLighting( float3 worldNormal, float3 worldPos, float3 cameraPos ) -{ - ColorsOutput output = (ColorsOutput)0.0; - - for(int i=0; i<8; i++) - { - float3 toLight = g_lights[i].Position.xyz - worldPos; - float lightDist = length( toLight ); - float fAtten = 1.0/dot( g_lights[i].Atten, float4(1,lightDist,lightDist*lightDist,0) ); - float3 lightDir = normalize( toLight ); - float3 halfAngle = normalize( normalize(-cameraPos) + lightDir ); - - output.Diffuse += max(0,dot( lightDir, worldNormal ) * g_lights[i].Diffuse * fAtten) + g_lights[i].Ambient; - output.Specular += max(0,pow( dot( halfAngle, worldNormal ), 64 ) * g_lights[i].Specular * fAtten ); - } - - return output; -} - -// -// VS for emulating fixed function pipeline -// -VSSceneOut VSScenemain(VSSceneIn input) -{ - VSSceneOut output = (VSSceneOut)0.0; - - //output our final position in clipspace - float4 worldPos = mul( float4( input.pos, 1 ), g_mWorld ); - float4 cameraPos = mul( worldPos, g_mView ); //Save cameraPos for fog calculations - output.pos = mul( cameraPos, g_mProj ); - - //save world pos for later - output.wPos = worldPos; - - //save the fog distance for later - output.fogDist = cameraPos.z; - - //find our clipping planes (fixed function clipping is done in world space) - if( g_bEnableClipping ) - { - worldPos.w = 1; - - //calc the distance from the 3 clipping planes - output.planeDist.x = dot( worldPos, g_clipplanes[0] ); - output.planeDist.y = dot( worldPos, g_clipplanes[1] ); - output.planeDist.z = dot( worldPos, g_clipplanes[2] ); - } - else - { - output.planeDist.x = 1; - output.planeDist.y = 1; - output.planeDist.z = 1; - } - - //do gouraud lighting - if( g_bEnableLighting ) - { - float3 worldNormal = normalize( mul( input.norm, (float3x3)g_mWorld ) ); - output.wNorm = worldNormal; - ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos ); - output.colorD = cOut.Diffuse; - output.colorS = cOut.Specular; - } - else - { - output.colorD = float4(1,1,1,1); - } - - //propogate texture coordinate - output.tex = input.tex; - - return output; -} - -// -// VS for rendering in screen space -// -PSSceneIn VSScreenSpacemain(VSSceneIn input) -{ - PSSceneIn output = (PSSceneIn)0.0; - - //output our final position - output.pos.x = (input.pos.x / (g_viewportWidth/2.0)) -1; - output.pos.y = -(input.pos.y / (g_viewportHeight/2.0)) +1; - output.pos.z = input.pos.z; - output.pos.w = 1; - - //propogate texture coordinate - output.tex = input.tex; - output.colorD = float4(1,1,1,1); - - return output; -} - -// -// GS for flat shaded rendering -// - -[maxvertexcount(3)] -void GSFlatmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> FlatTriStream ) -{ - VSSceneOut output; - - // - // Calculate the face normal - // - float3 faceEdgeA = input[1].wPos - input[0].wPos; - float3 faceEdgeB = input[2].wPos - input[0].wPos; - - // - // Cross product - // - float3 faceNormal = cross(faceEdgeA, faceEdgeB); - - // - //calculate the face center - // - float3 faceCenter = (input[0].wPos + input[1].wPos + input[2].wPos)/3.0; - - //find world pos and camera pos - float4 worldPos = float4( faceCenter, 1 ); - float4 cameraPos = mul( worldPos, g_mView ); - - //do shading - float3 worldNormal = normalize( faceNormal ); - ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos ); - - for(int i=0; i<3; i++) - { - output = input[i]; - output.colorD = cOut.Diffuse; - output.colorS = cOut.Specular; - - FlatTriStream.Append( output ); - } - FlatTriStream.RestartStrip(); -} - -// -// GS for point rendering -// -[maxvertexcount(12)] -void GSPointmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> PointTriStream ) -{ - VSSceneOut output; - - // - // Calculate the point size - // - //float fSizeX = (g_pointSize/g_viewportWidth)/4.0; - float fSizeY = (g_pointSize/g_viewportHeight)/4.0; - float fSizeX = fSizeY; - - for(int i=0; i<3; i++) - { - output = input[i]; - - //find world pos and camera pos - float4 worldPos = float4(input[i].wPos,1); - float4 cameraPos = mul( worldPos, g_mView ); - - //find our size - if( g_bPointScaleEnable ) - { - float dEye = length( cameraPos.xyz ); - fSizeX = fSizeY = g_viewportHeight * g_pointSize * - sqrt( 1.0f/( g_pointScaleA + g_pointScaleB*dEye + g_pointScaleC*(dEye*dEye) ) ); - } - - //do shading - if(g_bEnableLighting) - { - float3 worldNormal = input[i].wNorm; - ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos ); - - output.colorD = cOut.Diffuse; - output.colorS = cOut.Specular; - } - else - { - output.colorD = float4(1,1,1,1); - } - - output.tex = input[i].tex; - - // - // Emit two new triangles - // - for(int i=0; i<4; i++) - { - float4 outPos = mul( worldPos, g_mView ); - output.pos = mul( outPos, g_mProj ); - float zoverNear = (outPos.z)/g_nearPlane; - float4 posSize = float4( g_positions[i].x*fSizeX*zoverNear, - g_positions[i].y*fSizeY*zoverNear, - 0, - 0 ); - output.pos += posSize; - - PointTriStream.Append(output); - } - PointTriStream.RestartStrip(); - } -} - -// -// Calculates fog factor based upon distance -// -float CalcFogFactor( float d ) -{ - float fogCoeff = 1.0; - - if( FOGMODE_LINEAR == g_fogMode ) - { - fogCoeff = (g_fogEnd - d)/(g_fogEnd - g_fogStart); - } - else if( FOGMODE_EXP == g_fogMode ) - { - fogCoeff = 1.0 / pow( E, d*g_fogDensity ); - } - else if( FOGMODE_EXP2 == g_fogMode ) - { - fogCoeff = 1.0 / pow( E, d*d*g_fogDensity*g_fogDensity ); - } - - return clamp( fogCoeff, 0, 1 ); -} - -// -// PS for rendering with clip planes -// -float4 PSScenemain(PSSceneIn input) : SV_Target -{ - //calculate the fog factor - float fog = CalcFogFactor( input.fogDist ); - - //calculate the color based off of the normal, textures, etc - float4 normalColor = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD + input.colorS; - - //calculate the color from the projected texture - float4 cookieCoord = mul( float4(input.wPos,1), g_mLightViewProj ); - //since we don't have texldp, we must perform the w divide ourselves befor the texture lookup - cookieCoord.xy = 0.5 * cookieCoord.xy / cookieCoord.w + float2( 0.5, 0.5 ); - float4 cookieColor = float4(0,0,0,0); - if( cookieCoord.z > 0 ) - cookieColor = g_txProjected.Sample( g_samLinear, cookieCoord.xy ); - - //for standard light-modulating effects just multiply normalcolor and coookiecolor - normalColor += cookieColor; - - return fog * normalColor + (1.0 - fog)*g_fogColor; -} - -// -// PS for rendering with alpha test -// -float4 PSAlphaTestmain(PSSceneIn input) : SV_Target -{ - float4 color = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD; - if( color.a < 0.5 ) - discard; - return color; -} - -// -// RenderSceneGouraud - renders gouraud-shaded primitives -// -technique10 RenderSceneGouraud -{ - pass p0 - { - SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -} - -// -// RenderSceneFlat - renders flat-shaded primitives -// -technique10 RenderSceneFlat -{ - pass p0 - { - SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); - SetGeometryShader( CompileShader( gs_4_0, GSFlatmain() ) ); - SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -} - -// -// RenderScenePoint - replaces d3dfill_point -// -technique10 RenderScenePoint -{ - pass p0 - { - SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); - SetGeometryShader( CompileShader( gs_4_0, GSPointmain() ) ); - SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -} - -// -// RenderScreneSpace - shows how to render something in screenspace -// -technique10 RenderScreenSpaceAlphaTest -{ - pass p0 - { - SetVertexShader( CompileShader( vs_4_0, VSScreenSpacemain() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PSAlphaTestmain() ) ); - - SetDepthStencilState( DisableDepth, 0 ); - } -} - -// -// RenderScreneSpace - shows how to render something in screenspace -// -technique10 RenderTextureOnly -{ - pass p0 - { - SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); - - SetDepthStencilState( EnableDepth, 0 ); - } -} - diff --git a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl deleted file mode 100644 index 6e14bc10e..000000000 --- a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl +++ /dev/null @@ -1,75 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose -//-------------------------------------------------------------------------------------- -// File: ComputeShaderSort11.hlsl -// -// This file contains the compute shaders to perform GPU sorting using DirectX 11. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#define BITONIC_BLOCK_SIZE 512 - -#define TRANSPOSE_BLOCK_SIZE 16 - -//-------------------------------------------------------------------------------------- -// Constant Buffers -//-------------------------------------------------------------------------------------- -cbuffer CB : register( b0 ) -{ - unsigned int g_iLevel; - unsigned int g_iLevelMask; - unsigned int g_iWidth; - unsigned int g_iHeight; -}; - -//-------------------------------------------------------------------------------------- -// Structured Buffers -//-------------------------------------------------------------------------------------- -StructuredBuffer<unsigned int> Input : register( t0 ); -RWStructuredBuffer<unsigned int> Data : register( u0 ); - -//-------------------------------------------------------------------------------------- -// Bitonic Sort Compute Shader -//-------------------------------------------------------------------------------------- -groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE]; - -[numthreads(BITONIC_BLOCK_SIZE, 1, 1)] -void BitonicSort( uint3 Gid : SV_GroupID, - uint3 DTid : SV_DispatchThreadID, - uint3 GTid : SV_GroupThreadID, - uint GI : SV_GroupIndex ) -{ - // Load shared data - shared_data[GI] = Data[DTid.x]; - GroupMemoryBarrierWithGroupSync(); - - // Sort the shared data - for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1) - { - unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI]; - GroupMemoryBarrierWithGroupSync(); - shared_data[GI] = result; - GroupMemoryBarrierWithGroupSync(); - } - - // Store shared data - Data[DTid.x] = shared_data[GI]; -} - -//-------------------------------------------------------------------------------------- -// Matrix Transpose Compute Shader -//-------------------------------------------------------------------------------------- -groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE]; - -[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)] -void MatrixTranspose( uint3 Gid : SV_GroupID, - uint3 DTid : SV_DispatchThreadID, - uint3 GTid : SV_GroupThreadID, - uint GI : SV_GroupIndex ) -{ - transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x]; - GroupMemoryBarrierWithGroupSync(); - uint2 XY = DTid.yx - GTid.yx + GTid.xy; - Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y]; -} diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl deleted file mode 100644 index 8966ea3c1..000000000 --- a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl +++ /dev/null @@ -1,529 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BuildGridCS -entry ClearGridIndicesCS -entry BuildGridIndicesCS -entry RearrangeParticlesCS -entry DensityCS_Simple -entry DensityCS_Shared -entry DensityCS_Grid -entry ForceCS_Simple -entry ForceCS_Shared -entry ForceCS_Grid -entry IntegrateCS -//-------------------------------------------------------------------------------------- -// File: FluidCS11.hlsl -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Smoothed Particle Hydrodynamics Algorithm Based Upon: -// Particle-Based Fluid Simulation for Interactive Applications -// Matthias Müller -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Optimized Grid Algorithm Based Upon: -// Broad-Phase Collision Detection with CUDA -// Scott Le Grand -//-------------------------------------------------------------------------------------- - -struct Particle -{ - float2 position; - float2 velocity; -}; - -struct ParticleForces -{ - float2 acceleration; -}; - -struct ParticleDensity -{ - float density; -}; - -cbuffer cbSimulationConstants : register( b0 ) -{ - uint g_iNumParticles; - float g_fTimeStep; - float g_fSmoothlen; - float g_fPressureStiffness; - float g_fRestDensity; - float g_fDensityCoef; - float g_fGradPressureCoef; - float g_fLapViscosityCoef; - float g_fWallStiffness; - - float4 g_vGravity; - float4 g_vGridDim; - float3 g_vPlanes[4]; -}; - -//-------------------------------------------------------------------------------------- -// Fluid Simulation -//-------------------------------------------------------------------------------------- - -#define SIMULATION_BLOCK_SIZE 256 - -//-------------------------------------------------------------------------------------- -// Structured Buffers -//-------------------------------------------------------------------------------------- -RWStructuredBuffer<Particle> ParticlesRW : register( u0 ); -StructuredBuffer<Particle> ParticlesRO : register( t0 ); - -RWStructuredBuffer<ParticleDensity> ParticlesDensityRW : register( u0 ); -StructuredBuffer<ParticleDensity> ParticlesDensityRO : register( t1 ); - -RWStructuredBuffer<ParticleForces> ParticlesForcesRW : register( u0 ); -StructuredBuffer<ParticleForces> ParticlesForcesRO : register( t2 ); - -RWStructuredBuffer<unsigned int> GridRW : register( u0 ); -StructuredBuffer<unsigned int> GridRO : register( t3 ); - -RWStructuredBuffer<uint2> GridIndicesRW : register( u0 ); -StructuredBuffer<uint2> GridIndicesRO : register( t4 ); - - -//-------------------------------------------------------------------------------------- -// Grid Construction -//-------------------------------------------------------------------------------------- - -// For simplicity, this sample uses a 16-bit hash based on the grid cell and -// a 16-bit particle ID to keep track of the particles while sorting -// This imposes a limitation of 64K particles and 256x256 grid work -// You could extended the implementation to support large scenarios by using a uint2 - -float2 GridCalculateCell(float2 position) -{ - return clamp(position * g_vGridDim.xy + g_vGridDim.zw, float2(0, 0), float2(255, 255)); -} - -unsigned int GridConstuctKey(uint2 xy) -{ - // Bit pack [-----UNUSED-----][----Y---][----X---] - // 16-bit 8-bit 8-bit - return dot(xy.yx, uint2(256, 1)); -} - -unsigned int GridConstuctKeyValuePair(uint2 xy, uint value) -{ - // Bit pack [----Y---][----X---][-----VALUE------] - // 8-bit 8-bit 16-bit - return dot(uint3(xy.yx, value), uint3(256*256*256, 256*256, 1)); -} - -unsigned int GridGetKey(unsigned int keyvaluepair) -{ - return (keyvaluepair >> 16); -} - -unsigned int GridGetValue(unsigned int keyvaluepair) -{ - return (keyvaluepair & 0xFFFF); -} - - -//-------------------------------------------------------------------------------------- -// Build Grid -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void BuildGridCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; // Particle ID to operate on - - float2 position = ParticlesRO[P_ID].position; - float2 grid_xy = GridCalculateCell( position ); - - GridRW[P_ID] = GridConstuctKeyValuePair((uint2)grid_xy, P_ID); -} - - -//-------------------------------------------------------------------------------------- -// Build Grid Indices -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void ClearGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - GridIndicesRW[DTid.x] = uint2(0, 0); -} - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void BuildGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int G_ID = DTid.x; // Grid ID to operate on - unsigned int G_ID_PREV = (G_ID == 0)? g_iNumParticles : G_ID; G_ID_PREV--; - unsigned int G_ID_NEXT = G_ID + 1; if (G_ID_NEXT == g_iNumParticles) { G_ID_NEXT = 0; } - - unsigned int cell = GridGetKey( GridRO[G_ID] ); - unsigned int cell_prev = GridGetKey( GridRO[G_ID_PREV] ); - unsigned int cell_next = GridGetKey( GridRO[G_ID_NEXT] ); - if (cell != cell_prev) - { - // I'm the start of a cell - GridIndicesRW[cell].x = G_ID; - } - if (cell != cell_next) - { - // I'm the end of a cell - GridIndicesRW[cell].y = G_ID + 1; - } -} - - -//-------------------------------------------------------------------------------------- -// Rearrange Particles -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void RearrangeParticlesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int ID = DTid.x; // Particle ID to operate on - const unsigned int G_ID = GridGetValue( GridRO[ ID ] ); - ParticlesRW[ID] = ParticlesRO[ G_ID ]; -} - - -//-------------------------------------------------------------------------------------- -// Density Calculation -//-------------------------------------------------------------------------------------- - -float CalculateDensity(float r_sq) -{ - const float h_sq = g_fSmoothlen * g_fSmoothlen; - // Implements this equation: - // W_poly6(r, h) = 315 / (64 * pi * h^9) * (h^2 - r^2)^3 - // g_fDensityCoef = fParticleMass * 315.0f / (64.0f * PI * fSmoothlen^9) - return g_fDensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq); -} - - -//-------------------------------------------------------------------------------------- -// Simple N^2 Algorithm -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void DensityCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; - const float h_sq = g_fSmoothlen * g_fSmoothlen; - float2 P_position = ParticlesRO[P_ID].position; - - float density = 0; - - // Calculate the density based on all neighbors - for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++) - { - float2 N_position = ParticlesRO[N_ID].position; - - float2 diff = N_position - P_position; - float r_sq = dot(diff, diff); - if (r_sq < h_sq) - { - density += CalculateDensity(r_sq); - } - } - - ParticlesDensityRW[P_ID].density = density; -} - - -//-------------------------------------------------------------------------------------- -// Shared Memory Optimized N^2 Algorithm -//-------------------------------------------------------------------------------------- - -groupshared float2 density_shared_pos[SIMULATION_BLOCK_SIZE]; - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void DensityCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; - const float h_sq = g_fSmoothlen * g_fSmoothlen; - float2 P_position = ParticlesRO[P_ID].position; - - float density = 0; - - // Calculate the density based on all neighbors - [loop] - for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE) - { - // Cache a tile of particles unto shared memory to increase IO efficiency - density_shared_pos[GI] = ParticlesRO[N_block_ID + GI].position; - - GroupMemoryBarrierWithGroupSync(); - - for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++) - { - float2 N_position = density_shared_pos[N_tile_ID]; - - float2 diff = N_position - P_position; - float r_sq = dot(diff, diff); - if (r_sq < h_sq) - { - density += CalculateDensity(r_sq); - } - } - - GroupMemoryBarrierWithGroupSync(); - } - - ParticlesDensityRW[P_ID].density = density; -} - - -//-------------------------------------------------------------------------------------- -// Optimized Grid + Sort Algorithm -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void DensityCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; - const float h_sq = g_fSmoothlen * g_fSmoothlen; - float2 P_position = ParticlesRO[P_ID].position; - - float density = 0; - - // Calculate the density based on neighbors from the 8 adjacent cells + current cell - int2 G_XY = (int2)GridCalculateCell( P_position ); - for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++) - { - for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++) - { - unsigned int G_CELL = GridConstuctKey(uint2(X, Y)); - uint2 G_START_END = GridIndicesRO[G_CELL]; - for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++) - { - float2 N_position = ParticlesRO[N_ID].position; - - float2 diff = N_position - P_position; - float r_sq = dot(diff, diff); - if (r_sq < h_sq) - { - density += CalculateDensity(r_sq); - } - } - } - } - - ParticlesDensityRW[P_ID].density = density; -} - - -//-------------------------------------------------------------------------------------- -// Force Calculation -//-------------------------------------------------------------------------------------- - -float CalculatePressure(float density) -{ - // Implements this equation: - // Pressure = B * ((rho / rho_0)^y - 1) - return g_fPressureStiffness * max(pow(density / g_fRestDensity, 3) - 1, 0); -} - -float2 CalculateGradPressure(float r, float P_pressure, float N_pressure, float N_density, float2 diff) -{ - const float h = g_fSmoothlen; - float avg_pressure = 0.5f * (N_pressure + P_pressure); - // Implements this equation: - // W_spkiey(r, h) = 15 / (pi * h^6) * (h - r)^3 - // GRAD( W_spikey(r, h) ) = -45 / (pi * h^6) * (h - r)^2 - // g_fGradPressureCoef = fParticleMass * -45.0f / (PI * fSmoothlen^6) - return g_fGradPressureCoef * avg_pressure / N_density * (h - r) * (h - r) / r * (diff); -} - -float2 CalculateLapVelocity(float r, float2 P_velocity, float2 N_velocity, float N_density) -{ - const float h = g_fSmoothlen; - float2 vel_diff = (N_velocity - P_velocity); - // Implements this equation: - // W_viscosity(r, h) = 15 / (2 * pi * h^3) * (-r^3 / (2 * h^3) + r^2 / h^2 + h / (2 * r) - 1) - // LAPLACIAN( W_viscosity(r, h) ) = 45 / (pi * h^6) * (h - r) - // g_fLapViscosityCoef = fParticleMass * fViscosity * 45.0f / (PI * fSmoothlen^6) - return g_fLapViscosityCoef / N_density * (h - r) * vel_diff; -} - - -//-------------------------------------------------------------------------------------- -// Simple N^2 Algorithm -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void ForceCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; // Particle ID to operate on - - float2 P_position = ParticlesRO[P_ID].position; - float2 P_velocity = ParticlesRO[P_ID].velocity; - float P_density = ParticlesDensityRO[P_ID].density; - float P_pressure = CalculatePressure(P_density); - - const float h_sq = g_fSmoothlen * g_fSmoothlen; - - float2 acceleration = float2(0, 0); - - // Calculate the acceleration based on all neighbors - for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++) - { - float2 N_position = ParticlesRO[N_ID].position; - - float2 diff = N_position - P_position; - float r_sq = dot(diff, diff); - if (r_sq < h_sq && P_ID != N_ID) - { - float2 N_velocity = ParticlesRO[N_ID].velocity; - float N_density = ParticlesDensityRO[N_ID].density; - float N_pressure = CalculatePressure(N_density); - float r = sqrt(r_sq); - - // Pressure Term - acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff); - - // Viscosity Term - acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density); - } - } - - ParticlesForcesRW[P_ID].acceleration = acceleration / P_density; -} - - -//-------------------------------------------------------------------------------------- -// Shared Memory Optimized N^2 Algorithm -//-------------------------------------------------------------------------------------- - -groupshared struct { float2 position; float2 velocity; float density; } force_shared_pos[SIMULATION_BLOCK_SIZE]; - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void ForceCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; // Particle ID to operate on - - float2 P_position = ParticlesRO[P_ID].position; - float2 P_velocity = ParticlesRO[P_ID].velocity; - float P_density = ParticlesDensityRO[P_ID].density; - float P_pressure = CalculatePressure(P_density); - - const float h_sq = g_fSmoothlen * g_fSmoothlen; - - float2 acceleration = float2(0, 0); - - // Calculate the acceleration based on all neighbors - [loop] - for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE) - { - // Cache a tile of particles unto shared memory to increase IO efficiency - force_shared_pos[GI].position = ParticlesRO[N_block_ID + GI].position; - force_shared_pos[GI].velocity = ParticlesRO[N_block_ID + GI].velocity; - force_shared_pos[GI].density = ParticlesDensityRO[N_block_ID + GI].density; - - GroupMemoryBarrierWithGroupSync(); - - [loop] - for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++ ) - { - uint N_ID = N_block_ID + N_tile_ID; - float2 N_position = force_shared_pos[N_tile_ID].position; - - float2 diff = N_position - P_position; - float r_sq = dot(diff, diff); - if (r_sq < h_sq && P_ID != N_ID) - { - float2 N_velocity = force_shared_pos[N_tile_ID].velocity; - float N_density = force_shared_pos[N_tile_ID].density; - float N_pressure = CalculatePressure(N_density); - float r = sqrt(r_sq); - - // Pressure Term - acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff); - - // Viscosity Term - acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density); - } - } - - GroupMemoryBarrierWithGroupSync(); - } - - ParticlesForcesRW[P_ID].acceleration = acceleration / P_density; -} - - -//-------------------------------------------------------------------------------------- -// Optimized Grid + Sort Algorithm -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void ForceCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; // Particle ID to operate on - - float2 P_position = ParticlesRO[P_ID].position; - float2 P_velocity = ParticlesRO[P_ID].velocity; - float P_density = ParticlesDensityRO[P_ID].density; - float P_pressure = CalculatePressure(P_density); - - const float h_sq = g_fSmoothlen * g_fSmoothlen; - - float2 acceleration = float2(0, 0); - - // Calculate the acceleration based on neighbors from the 8 adjacent cells + current cell - int2 G_XY = (int2)GridCalculateCell( P_position ); - for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++) - { - for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++) - { - unsigned int G_CELL = GridConstuctKey(uint2(X, Y)); - uint2 G_START_END = GridIndicesRO[G_CELL]; - for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++) - { - float2 N_position = ParticlesRO[N_ID].position; - - float2 diff = N_position - P_position; - float r_sq = dot(diff, diff); - if (r_sq < h_sq && P_ID != N_ID) - { - float2 N_velocity = ParticlesRO[N_ID].velocity; - float N_density = ParticlesDensityRO[N_ID].density; - float N_pressure = CalculatePressure(N_density); - float r = sqrt(r_sq); - - // Pressure Term - acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff); - - // Viscosity Term - acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density); - } - } - } - } - - ParticlesForcesRW[P_ID].acceleration = acceleration / P_density; -} - - -//-------------------------------------------------------------------------------------- -// Integration -//-------------------------------------------------------------------------------------- - -[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)] -void IntegrateCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - const unsigned int P_ID = DTid.x; // Particle ID to operate on - - float2 position = ParticlesRO[P_ID].position; - float2 velocity = ParticlesRO[P_ID].velocity; - float2 acceleration = ParticlesForcesRO[P_ID].acceleration; - - // Apply the forces from the map walls - [unroll] - for (unsigned int i = 0 ; i < 4 ; i++) - { - float dist = dot(float3(position, 1), g_vPlanes[i]); - acceleration += min(dist, 0) * -g_fWallStiffness * g_vPlanes[i].xy; - } - - // Apply gravity - acceleration += g_vGravity.xy; - - // Integrate - velocity += g_fTimeStep * acceleration; - position += g_fTimeStep * velocity; - - // Update - ParticlesRW[P_ID].position = position; - ParticlesRW[P_ID].velocity = velocity; -} diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl deleted file mode 100644 index cfd14c2b2..000000000 --- a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl +++ /dev/null @@ -1,124 +0,0 @@ -//TEST:COMPARE_HLSL:-no-mangle -profile sm_4_0 -entry ParticleVS -stage vertex -entry ParticleGS -stage geometry -entry ParticlePS -stage pixel - -#ifndef __SLANG__ -#define ParticlesRO ParticlesRO_0 -#define ParticleDensityRO ParticleDensityRO_0 -#define cbRenderConstants cbRenderConstants_0 -#define g_mViewProjection g_mViewProjection_0 -#define g_fParticleSize g_fParticleSize_0 -#define density density_0 -#define position position_0 -#define velocity velocity_0 - -#endif - -//-------------------------------------------------------------------------------------- -// File: FluidRender.hlsl -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Particle Rendering -//-------------------------------------------------------------------------------------- - -struct Particle { - float2 position; - float2 velocity; -}; - -struct ParticleDensity { - float density; -}; - -StructuredBuffer<Particle> ParticlesRO : register( t0 ); -StructuredBuffer<ParticleDensity> ParticleDensityRO : register( t1 ); - -cbuffer cbRenderConstants : register( b0 ) -{ - matrix g_mViewProjection; - float g_fParticleSize; -}; - -struct VSParticleOut -{ - float2 position : POSITION; - float4 color : COLOR; -}; - -struct GSParticleOut -{ - float4 position : SV_Position; - float4 color : COLOR; - float2 texcoord : TEXCOORD; -}; - - -//-------------------------------------------------------------------------------------- -// Visualization Helper -//-------------------------------------------------------------------------------------- - -static const float4 Rainbow[5] = { - float4(1, 0, 0, 1), // red - float4(1, 1, 0, 1), // orange - float4(0, 1, 0, 1), // green - float4(0, 1, 1, 1), // teal - float4(0, 0, 1, 1), // blue -}; - -float4 VisualizeNumber(float n) -{ - return lerp( Rainbow[ int(floor(n * 4.0f)) ], Rainbow[ int(ceil(n * 4.0f)) ], frac(n * 4.0f) ); -} - -float4 VisualizeNumber(float n, float lower, float upper) -{ - return VisualizeNumber( saturate( (n - lower) / (upper - lower) ) ); -} - - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- - -VSParticleOut ParticleVS(uint ID : SV_VERTEXID) -{ - VSParticleOut Out; // = { { 0, 0 } , { 0, 0, 0, 0 } }; // (VSParticleOut)0; - Out.position = ParticlesRO[ID].position; - Out.color = VisualizeNumber(ParticleDensityRO[ID].density, 1000.0f, 2000.0f); - return Out; -} - - -//-------------------------------------------------------------------------------------- -// Particle Geometry Shader -//-------------------------------------------------------------------------------------- - -static const float2 g_positions[4] = { float2(-1, 1), float2(1, 1), float2(-1, -1), float2(1, -1) }; -static const float2 g_texcoords[4] = { float2(0, 1), float2(1, 1), float2(0, 0), float2(1, 0) }; - -[maxvertexcount(4)] -void ParticleGS(point VSParticleOut In[1], inout TriangleStream<GSParticleOut> SpriteStream) -{ - [unroll] - for (int i = 0; i < 4; i++) - { - GSParticleOut Out; // = (GSParticleOut)0; - float4 position = float4(In[0].position, 0, 1) + g_fParticleSize * float4(g_positions[i], 0, 0); - Out.position = mul(position, g_mViewProjection); - Out.color = In[0].color; - Out.texcoord = g_texcoords[i]; - SpriteStream.Append(Out); - } - SpriteStream.RestartStrip(); -} - - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- - -float4 ParticlePS(GSParticleOut In) : SV_TARGET -{ - return In.color; -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl deleted file mode 100644 index 3addeca08..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl +++ /dev/null @@ -1,64 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain -//-------------------------------------------------------------------------------------- -// File: BrightPassAndHorizFilterCS.hlsl -// -// The CS for bright pass and horizontal blur, used in CS path of -// HDRToneMappingCS11 sample -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -static const float MIDDLE_GRAY = 0.72f; -static const float LUM_WHITE = 1.5f; -static const float BRIGHT_THRESHOLD = 0.5f; - -Texture2D Input : register( t0 ); -StructuredBuffer<float> lum : register( t1 ); -RWStructuredBuffer<float4> Result : register( u0 ); - -cbuffer cb0 -{ - float4 g_avSampleWeights[15]; - uint g_outputwidth; - float g_inverse; - int2 g_inputsize; -} - -#define kernelhalf 7 -#define groupthreads 128 -groupshared float4 temp[groupthreads]; - -[numthreads( groupthreads, 1, 1 )] -void CSMain( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) -{ - int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y ); - coord = coord.xy * 8 + int2(4, 3); - coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) ); - float4 vColor = Input.Load( int3(coord, 0) ); - - float fLum = lum[0]*g_inverse; - - // Bright pass and tone mapping - vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD ); - vColor *= MIDDLE_GRAY / (fLum + 0.001f); - vColor *= (1.0f + vColor/LUM_WHITE); - vColor /= (1.0f + vColor); - - temp[GI] = vColor; - - GroupMemoryBarrierWithGroupSync(); - - // Horizontal blur - if ( GI >= kernelhalf && - GI < (groupthreads - kernelhalf) && - ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputwidth) ) - { - float4 vOut = 0; - - [unroll] - for ( int i = -kernelhalf; i <= kernelhalf; ++i ) - vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf]; - - Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputwidth] = float4(vOut.rgb, 1.0f); - } -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl deleted file mode 100644 index f2d119eb5..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl +++ /dev/null @@ -1,29 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSDump -//-------------------------------------------------------------------------------------- -// File: DumpToTexture.hlsl -// -// The PS for converting CS output buffer to a texture, used in CS path of -// HDRToneMappingCS11 sample -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -StructuredBuffer<float4> buffer : register( t0 ); - -struct QuadVS_Output -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD0; -}; - -cbuffer cbPS : register( b0 ) -{ - uint4 g_param; -}; - -float4 PSDump( QuadVS_Output Input ) : SV_TARGET -{ - // To calculate the buffer offset, it is natural to use the screen space coordinates, - // Input.Pos is the screen space coordinates of the pixel being written - return buffer[ (Input.Pos.x - 0.5) + (Input.Pos.y - 0.5) * g_param.x ]; -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl deleted file mode 100644 index e21b97e30..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl +++ /dev/null @@ -1,73 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSVerticalFilter -entry CSHorizFilter -//-------------------------------------------------------------------------------------- -// File: FilterCS.hlsl -// -// The CSs for doing vertical and horizontal blur, used in CS path of -// HDRToneMappingCS11 sample -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -StructuredBuffer<float4> InputBuf : register( t0 ); -Texture2D InputTex : register( t1 ); -RWStructuredBuffer<float4> Result : register( u0 ); - -cbuffer cb0 -{ - float4 g_avSampleWeights[15]; - int2 g_outputsize; - int2 g_inputsize; -} - -#define kernelhalf 7 -#define groupthreads 128 -groupshared float4 temp[groupthreads]; - -[numthreads( groupthreads, 1, 1 )] -void CSVerticalFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) -{ - int offsety = GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y; - offsety = clamp( offsety, 0, g_inputsize.y-1 ); - int offset = Gid.x + offsety * g_inputsize.x; - temp[GI] = InputBuf[offset]; - - GroupMemoryBarrierWithGroupSync(); - - // Vertical blur - if ( GI >= kernelhalf && - GI < (groupthreads - kernelhalf) && - ( (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) < g_outputsize.y) ) - { - float4 vOut = 0; - - [unroll] - for ( int i = -kernelhalf; i <= kernelhalf; ++i ) - vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf]; - - Result[Gid.x + (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) * g_outputsize.x] = float4(vOut.rgb, 1.0f); - } -} - -[numthreads( groupthreads, 1, 1 )] -void CSHorizFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex ) -{ - int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y ); - coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) ); - temp[GI] = InputTex.Load( int3(coord, 0) ); - - GroupMemoryBarrierWithGroupSync(); - - // Horizontal blur - if ( GI >= kernelhalf && - GI < (groupthreads - kernelhalf) && - ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputsize.x) ) - { - float4 vOut = 0; - - [unroll] - for ( int i = -kernelhalf; i <= kernelhalf; ++i ) - vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf]; - - Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputsize.x] = float4(vOut.rgb, 1.0f); - } -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl deleted file mode 100644 index f5a49d2eb..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl +++ /dev/null @@ -1,79 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry QuadVS -profile ps_4_0 -entry PSFinalPass -entry PSFinalPassForCPUReduction -//-------------------------------------------------------------------------------------- -// File: FinalPass.hlsl -// -// The PSs for doing tone-mapping based on the input luminance, used in CS path of -// HDRToneMappingCS11 sample -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -struct QuadVS_Input -{ - float4 Pos : POSITION; - float2 Tex : TEXCOORD0; -}; - -struct QuadVS_Output -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD0; -}; - -QuadVS_Output QuadVS( QuadVS_Input Input ) -{ - QuadVS_Output Output; - Output.Pos = Input.Pos; - Output.Tex = Input.Tex; - return Output; -} - -Texture2D<float4> tex : register( t0 ); -StructuredBuffer<float> lum : register( t1 ); -Texture2D<float4> bloom : register( t2 ); - -SamplerState PointSampler : register (s0); -SamplerState LinearSampler : register (s1); - - -static const float MIDDLE_GRAY = 0.72f; -static const float LUM_WHITE = 1.5f; - -cbuffer cbPS : register( b0 ) -{ - float4 g_param; -}; - -float4 PSFinalPass( QuadVS_Output Input ) : SV_TARGET -{ - float4 vColor = tex.Sample( PointSampler, Input.Tex ); - float fLum = lum[0]*g_param.x; - float3 vBloom = bloom.Sample( LinearSampler, Input.Tex ); - - // Tone mapping - vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f); - vColor.rgb *= (1.0f + vColor/LUM_WHITE); - vColor.rgb /= (1.0f + vColor); - - vColor.rgb += 0.6f * vBloom; - vColor.a = 1.0f; - - return vColor; -} - -float4 PSFinalPassForCPUReduction( QuadVS_Output Input ) : SV_TARGET -{ - float4 vColor = tex.Sample( PointSampler, Input.Tex ); - float fLum = g_param.x; - float3 vBloom = bloom.Sample( LinearSampler, Input.Tex ); - - // Tone mapping - vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f); - vColor.rgb *= (1.0f + vColor/LUM_WHITE); - vColor.rgb /= (1.0f + vColor); - - vColor.rgb += 0.6f * vBloom; - vColor.a = 1.0f; - - return vColor; -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl deleted file mode 100644 index 3f16b2449..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl +++ /dev/null @@ -1,129 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry DownScale2x2_Lum -entry DownScale3x3 -entry FinalPass -entry DownScale3x3_BrightPass -entry Bloom -//-------------------------------------------------------------------------------------- -// File: PSApproach.hlsl -// -// The PSs for doing post-processing, used in PS path of -// HDRToneMappingCS11 sample -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- -static const float4 LUM_VECTOR = float4(.299, .587, .114, 0); -static const float MIDDLE_GRAY = 0.72f; -static const float LUM_WHITE = 1.5f; -static const float BRIGHT_THRESHOLD = 0.5f; - -SamplerState PointSampler : register (s0); -SamplerState LinearSampler : register (s1); - -struct QuadVS_Output -{ - float4 Pos : SV_POSITION; - float2 Tex : TEXCOORD0; -}; - -Texture2D s0 : register(t0); -Texture2D s1 : register(t1); -Texture2D s2 : register(t2); - -float4 DownScale2x2_Lum ( QuadVS_Output Input ) : SV_TARGET -{ - float4 vColor = 0.0f; - float fAvg = 0.0f; - - for( int y = -1; y < 1; y++ ) - { - for( int x = -1; x < 1; x++ ) - { - // Compute the sum of color values - vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) ); - - fAvg += dot( vColor, LUM_VECTOR ); - } - } - - fAvg /= 4; - - return float4(fAvg, fAvg, fAvg, 1.0f); -} - -float4 DownScale3x3( QuadVS_Output Input ) : SV_TARGET -{ - float fAvg = 0.0f; - float4 vColor; - - for( int y = -1; y <= 1; y++ ) - { - for( int x = -1; x <= 1; x++ ) - { - // Compute the sum of color values - vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) ); - - fAvg += vColor.r; - } - } - - // Divide the sum to complete the average - fAvg /= 9; - - return float4(fAvg, fAvg, fAvg, 1.0f); -} - -float4 FinalPass( QuadVS_Output Input ) : SV_TARGET -{ - //float4 vColor = 0; - float4 vColor = s0.Sample( PointSampler, Input.Tex ); - float4 vLum = s1.Sample( PointSampler, float2(0,0) ); - float3 vBloom = s2.Sample( LinearSampler, Input.Tex ); - - // Tone mapping - vColor.rgb *= MIDDLE_GRAY / (vLum.r + 0.001f); - vColor.rgb *= (1.0f + vColor/LUM_WHITE); - vColor.rgb /= (1.0f + vColor); - - vColor.rgb += 0.6f * vBloom; - vColor.a = 1.0f; - - return vColor; -} - -float4 DownScale3x3_BrightPass( QuadVS_Output Input ) : SV_TARGET -{ - float3 vColor = 0.0f; - float4 vLum = s1.Sample( PointSampler, float2(0, 0) ); - float fLum = vLum.r; - - vColor = s0.Sample( PointSampler, Input.Tex ).rgb; - - // Bright pass and tone mapping - vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD ); - vColor *= MIDDLE_GRAY / (fLum + 0.001f); - vColor *= (1.0f + vColor/LUM_WHITE); - vColor /= (1.0f + vColor); - - return float4(vColor, 1.0f); -} - -cbuffer cb0 -{ - float2 g_avSampleOffsets[15]; - float4 g_avSampleWeights[15]; -} - -float4 Bloom( QuadVS_Output Input ) : SV_TARGET -{ - float4 vSample = 0.0f; - float4 vColor = 0.0f; - float2 vSamplePosition; - - for( int iSample = 0; iSample < 15; iSample++ ) - { - // Sample from adjacent points - vSamplePosition = Input.Tex + g_avSampleOffsets[iSample]; - vColor = s0.Sample( PointSampler, vSamplePosition); - - vSample += g_avSampleWeights[iSample]*vColor; - } - - return vSample; -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl deleted file mode 100644 index 1316250d5..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl +++ /dev/null @@ -1,72 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain -//----------------------------------------------------------------------------- -// File: ReduceTo1DCS.hlsl -// -// Desc: Reduce an input Texture2D to a buffer -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//----------------------------------------------------------------------------- -Texture2D Input : register( t0 ); -RWStructuredBuffer<float> Result : register( u0 ); - -cbuffer cbCS : register( b0 ) -{ - uint4 g_param; // (g_param.x, g_param.y) is the x and y dimensions of the Dispatch call - // (g_param.z, g_param.w) is the size of the above Input Texture2D -}; - -//#define CS_FULL_PIXEL_REDUCITON // Defining this or not must be the same as in HDRToneMappingCS11.cpp - -#define blocksize 8 -#define blocksizeY 8 -#define groupthreads (blocksize*blocksizeY) -groupshared float accum[groupthreads]; - -static const float4 LUM_VECTOR = float4(.299, .587, .114, 0); - -[numthreads(blocksize,blocksizeY,1)] -void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - float4 s = -#ifdef CS_FULL_PIXEL_REDUCITON - Input.Load( uint3(DTid.xy , 0) )+ - Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x, 0), 0) ) + - Input.Load( uint3(DTid.xy + uint2(0, blocksizeY*g_param.y), 0) ) + - Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x, blocksizeY*g_param.y), 0) ); -#else - Input.Load( uint3((float)DTid.x/81.0f*g_param.z, (float)DTid.y/81.0f*g_param.w, 0) ); -#endif - - accum[GI] = dot( s, LUM_VECTOR ); - - // Parallel reduction algorithm follows - GroupMemoryBarrierWithGroupSync(); - if ( GI < 32 ) - accum[GI] += accum[32+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 16 ) - accum[GI] += accum[16+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 8 ) - accum[GI] += accum[8+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 4 ) - accum[GI] += accum[4+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 2 ) - accum[GI] += accum[2+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 1 ) - accum[GI] += accum[1+GI]; - - if ( GI == 0 ) - { - Result[Gid.y*g_param.x+Gid.x] = accum[0]; - } -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl deleted file mode 100644 index 73857a6bb..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl +++ /dev/null @@ -1,63 +0,0 @@ -//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSMain -//----------------------------------------------------------------------------- -// File: ReduceToSingleCS.hlsl -// -// Desc: Reduce an input buffer by a factor of groupthreads -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//----------------------------------------------------------------------------- - -StructuredBuffer<float> Input : register( t0 ); -RWStructuredBuffer<float> Result : register( u0 ); - -cbuffer cbCS : register( b0 ) -{ - uint4 g_param; // g_param.x is the actual elements contained in Input - // g_param.y is the x dimension of the Dispatch call -}; - -#define groupthreads 128 -groupshared float accum[groupthreads]; - -[numthreads(groupthreads,1,1)] -void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - if ( DTid.x < g_param.x ) - accum[GI] = Input[DTid.x]; - else - accum[GI] = 0; - - // Parallel reduction algorithm follows - GroupMemoryBarrierWithGroupSync(); - if ( GI < 64 ) - accum[GI] += accum[64+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 32 ) - accum[GI] += accum[32+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 16 ) - accum[GI] += accum[16+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 8 ) - accum[GI] += accum[8+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 4 ) - accum[GI] += accum[4+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 2 ) - accum[GI] += accum[2+GI]; - - GroupMemoryBarrierWithGroupSync(); - if ( GI < 1 ) - accum[GI] += accum[1+GI]; - - if ( GI == 0 ) - { - Result[Gid.x] = accum[0]; - } -} diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl deleted file mode 100644 index a0e44ba95..000000000 --- a/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl +++ /dev/null @@ -1,44 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry SkyboxVS -profile ps_4_0 -entry SkyboxPS -//----------------------------------------------------------------------------- -// File: SkyBox11.hlsl -// -// Desc: -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//----------------------------------------------------------------------------- - -cbuffer cbPerObject : register( b0 ) -{ - row_major matrix g_mWorldViewProjection : packoffset( c0 ); -} - -TextureCube g_EnvironmentTexture : register( t0 ); -SamplerState g_sam : register( s0 ); - -struct SkyboxVS_Input -{ - float4 Pos : POSITION; -}; - -struct SkyboxVS_Output -{ - float4 Pos : SV_POSITION; - float3 Tex : TEXCOORD0; -}; - -SkyboxVS_Output SkyboxVS( SkyboxVS_Input Input ) -{ - SkyboxVS_Output Output; - - Output.Pos = Input.Pos; - Output.Tex = normalize( mul(Input.Pos, g_mWorldViewProjection) ); - - return Output; -} - -float4 SkyboxPS( SkyboxVS_Output Input ) : SV_TARGET -{ - float4 color = g_EnvironmentTexture.Sample( g_sam, Input.Tex ); - return color; -} diff --git a/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx b/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx deleted file mode 100644 index 3c8d45078..000000000 --- a/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx +++ /dev/null @@ -1,591 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: Instancing.fx -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Input and output structures -//-------------------------------------------------------------------------------------- -struct VSInstIn -{ - float3 pos : POSITION; - float3 norm : NORMAL; - float2 tex : TEXTURE0; - row_major float4x4 mTransform : mTransform; -}; - -struct VSSceneIn -{ - float3 pos : POSITION; - float3 norm : NORMAL; - float2 tex : TEXTURE0; -}; - -struct VSGrassIn -{ - float3 pos : POSITION; - float3 norm : NORMAL; - float2 tex : TEXTURE0; - row_major float4x4 mTransform : mTransform; - uint VertexID : SV_VertexID; -}; - -struct VSGrassOut -{ - float3 pos : POSITION; - float3 norm : NORMAL; - float2 tex : TEXTURE0; - uint VertexID : VERTID; -}; - -struct VSQuadIn -{ - float3 pos : POSITION; - float2 tex : TEXTURE0; - row_major float4x4 mTransform : mTransform; - float fOcc : fOcc; - uint InstanceId : SV_InstanceID; -}; - -struct PSSceneIn -{ - float4 pos : SV_Position; - float2 tex : TEXTURE0; - float4 color : COLOR0; -}; - -struct PSQuadIn -{ - float4 pos : SV_Position; - float3 tex : TEXTURE0; - float4 color : COLOR0; -}; - -//-------------------------------------------------------------------------------------- -// Constant buffers -//-------------------------------------------------------------------------------------- -cbuffer crarely -{ - float4x4 g_mTreeMatrices[50]; - uint g_iNumTrees; -}; - -cbuffer ceveryframe -{ - float4x4 g_mWorldViewProj; - float4x4 g_mWorldView; -}; - -cbuffer cmultipleperframe -{ - float g_GrassWidth; - float g_GrassHeight; - uint g_iGrassCoverage; -}; - -cbuffer cusercontrolled -{ - float g_GrassMessiness; -}; - -struct light_struct -{ - float4 direction; - float4 color; -}; - -cbuffer cimmutable -{ - light_struct g_lights[4] = { - { float4(0.620275, 0.683659, 0.384537, 1), float4(0.75, 0.599, 0.405, 1) }, //sun - { float4(0.063288, -0.987444, 0.144735, 1), float4(0.192, 0.273, 0.275, 1) }, //bottom - { float4(0.23007, 0.785579, -0.574422, 1), float4(0.300, 0.292, 0.223, 1) }, //highlight - { float4(-0.620275, -0.683659, -0.384537, 1), float4(0.0, 0.0, 0.1, 1) } //blue rim-light - }; - - float4 g_ambient = float4(0.4945,0.465,0.5,1); - - float g_occDimHeight = 2400.0; //scalar that tells us how much to darken the tree near the top -}; - -cbuffer cgrassblade -{ - float3 g_positions[6] = - { - float3( -1, 0, 0 ), - float3( -1, 2, 0 ), - float3( 1, 0, 0 ), - float3( 1, 2, 0 ), - - float3( -1, 0, 0 ), - float3( -1, 2, 0 ), - }; - float2 g_texcoords[6] = - { - float2(0,1), - float2(0,0), - float2(1,1), - float2(1,0), - - float2(0,1), - float2(0,0), - }; -}; - -//-------------------------------------------------------------------------------------- -// Textures and Samplers -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse; -Texture2DArray g_tx2dArray; -SamplerState g_samLinear -{ - Filter = ANISOTROPIC; - AddressU = Wrap; - AddressV = Wrap; -}; - -Texture1D g_txRandom; -SamplerState g_samPoint -{ - Filter = MIN_MAG_MIP_POINT; - AddressU = Wrap; - AddressV = Wrap; -}; - -//-------------------------------------------------------------------------------------- -// State structures -//-------------------------------------------------------------------------------------- -BlendState QuadAlphaBlendState -{ - AlphaToCoverageEnable = TRUE; - RenderTargetWriteMask[0] = 0x0F; -}; - -RasterizerState EnableMSAA -{ - CullMode = BACK; - MultisampleEnable = TRUE; -}; - -DepthStencilState DisableDepthTestWrite -{ - DepthEnable = FALSE; - DepthWriteMask = ZERO; -}; - -DepthStencilState EnableDepthTestWrite -{ - DepthEnable = TRUE; - DepthWriteMask = ALL; -}; - -BlendState NoBlending -{ - AlphaToCoverageEnable = FALSE; - BlendEnable[0] = FALSE; -}; - -//-------------------------------------------------------------------------------------- -// Sky vertex shader -//-------------------------------------------------------------------------------------- -PSSceneIn VSSkymain(VSSceneIn input) -{ - PSSceneIn output; - - // - // Transform the vert to view-space - // - float4 v4Position = mul(float4(input.pos, 1), g_mWorldViewProj); - output.pos = v4Position; - - // - // Transfer the rest - // - output.tex = input.tex; - - output.color = float4(1,1,1,1); - - return output; -} - -//-------------------------------------------------------------------------------------- -// CalcLighting helper function. Calculates lighting from 4 light sources, adds ambient -// and attenuates for depth. Used by all techniques for lighting. -//-------------------------------------------------------------------------------------- -float4 CalcLighting( float3 norm, float depth ) -{ - float4 color = float4(0,0,0,0); - - // add the contributions of 4 directional lights - [unroll] for( int i=0; i<4; i++ ) - { - color += saturate( dot(g_lights[i].direction,norm) )*g_lights[i].color; - } - - // give some attenuation due to depth - float attenuate = depth / 10000.0; - float4 attenColor = float4(0.15, 0.2, 0.3, 0); - - // add it all up plus ambient - return (1-attenuate*0.23)*(color + g_ambient) + attenColor*attenuate; -} - -//-------------------------------------------------------------------------------------- -// Instancing vertex shader. Positions the vertices based upon the matrix stored -// in the second vertex stream. -//-------------------------------------------------------------------------------------- -PSSceneIn VSInstmain(VSInstIn input) -{ - PSSceneIn output; - - // - // Transform by our Sceneance matrix - // - float4 InstancePosition = mul(float4(input.pos, 1), input.mTransform); - float4 ViewPos = mul(InstancePosition, g_mWorldView ); - - // - // Transform the vert to view-space - // - float4 v4Position = mul(InstancePosition, g_mWorldViewProj); - output.pos = v4Position; - - // - // Transfer the rest - // - output.tex = input.tex; - - // - // dot the norm with the light dir - // - float3 norm = mul(input.norm,(float3x3)input.mTransform); - output.color = CalcLighting( norm, ViewPos.z ); - - // - // Dim the color by how far up the tree we are. - // This is a nice way to fake occlusion of the branches by the leaves. - // - output.color *= 1.0f - saturate(input.pos.y/g_occDimHeight); - - - return output; -} - -//-------------------------------------------------------------------------------------- -// Quad (leaf) vertex shader. Instances the quad over multiple leaf positions and -// multiple trees. This demonstrates how to do double instancing. -//-------------------------------------------------------------------------------------- -PSQuadIn VSQuadmain(VSQuadIn input) -{ - PSQuadIn output; - - // base our leaf texture upon which instance id we are - uint iLeaf = input.InstanceId/g_iNumTrees; - uint iLeafTex = iLeaf%3; - output.tex = float3(input.tex, float(iLeafTex) ); - - // - // Transform the position by the Instance matrix - // - int iTree = input.InstanceId - (input.InstanceId/g_iNumTrees)*g_iNumTrees; - float4 vInstancePos = mul( float4(input.pos, 1), input.mTransform ); - float4 InstancePosition = mul(vInstancePos, g_mTreeMatrices[iTree] ); - float4 ViewPos = mul(InstancePosition, g_mWorldView ); - - // - // Transform the Instance position to view-space - // - output.pos = mul(InstancePosition, g_mWorldViewProj); - - // pack distance from the eye into the color alpha channel - output.color = float4(input.fOcc,input.fOcc,input.fOcc,ViewPos.z); - - return output; -} - -//-------------------------------------------------------------------------------------- -// Grass vertex shader. Basically a passthrough except for instancing the island base -// mesh. -//-------------------------------------------------------------------------------------- -VSGrassOut VSGrassmain(VSGrassIn input) -{ - // simple transform into the instance space - VSGrassOut output; - output.pos = mul(float4(input.pos, 1), input.mTransform); - output.norm = mul(input.norm, (float3x3)input.mTransform); - output.tex = input.tex; - output.VertexID = input.VertexID; - - return output; -} - -//-------------------------------------------------------------------------------------- -// Quad (leaf) GS. Calculates the normal and lighting for the leaf. -//-------------------------------------------------------------------------------------- -[maxvertexcount(3)] -void GSQuadmain(triangle PSQuadIn input[3], inout TriangleStream<PSQuadIn> QuadStream) -{ - PSQuadIn output; - - // - // Calculate the face normal - // - float4 faceNormalA = input[1].pos.xyzw - input[0].pos.xyzw; - float4 faceNormalB = input[2].pos.xyzw - input[0].pos.xyzw; - - // - // Cross product - // - float3 faceNormal = cross(faceNormalA, faceNormalB); - - // - // Normalize face normal - // - faceNormal = normalize(faceNormal); - - // - // Dot face normal with some arbitrary light vectors - // - float4 color1 = CalcLighting( faceNormal, input[0].color.a ); - color1 *= input[0].color; - - // - // Make sure we always have an alpha of 1 - // - color1.a = 1.0; - - // - // Emit out the new tri - // - for(int i=0; i<3; i++) - { - output.pos = input[i].pos; - output.color = color1; - output.tex = input[i].tex; - QuadStream.Append(output); - } - QuadStream.RestartStrip(); -} - -//-------------------------------------------------------------------------------------- -// RandomDir helper. Samples a random dir out of our 1d random texture. In this case -// we use a texture because the offset could be anywhere. If we were sampling linearly -// then we would probably just use a buffer and load from that. -//-------------------------------------------------------------------------------------- -float3 RandomDir(float fOffset) -{ - float tCoord = (fOffset) / 300.0; - return g_txRandom.SampleLevel( g_samPoint, tCoord, 0 ); -} - -//-------------------------------------------------------------------------------------- -// Helper to determing if a point is within a triangle -//-------------------------------------------------------------------------------------- -bool IsInTriangle( float3 P, float3 A, float3 B, float3 C ) -{ - float3 crossA = cross( B-A, P-A ); - float3 crossB = cross( C-B, P-B ); - float3 crossC = cross( A-C, P-C ); - - if( dot( crossA, crossB ) > 0 && - dot( crossB, crossC ) > 0 ) - { - return true; - } - else - { - return false; - } -} - -//-------------------------------------------------------------------------------------- -// Gets a random orientation matrix based upon the RandomDir funciton -//-------------------------------------------------------------------------------------- -float4x4 GetRandomOrientation( float3 Pos, float3 Norm, float fRandOffset ) -{ - float3 Tangent = RandomDir(fRandOffset); - - float3 Bitangent = normalize( cross( Tangent, Norm ) ); - Tangent = normalize( cross( Bitangent, Norm ) ); - - float4x4 matWorld = { float4( Tangent, 0 ), - float4( Norm, 0 ), - float4( Bitangent, 0 ), - float4( Pos, 1 ) }; - return matWorld; -} - -//-------------------------------------------------------------------------------------- -// Generates an actual grass blade -//-------------------------------------------------------------------------------------- -void OutputGrassBlade( VSGrassOut midPoint, inout TriangleStream<PSQuadIn> GrassStream, int iGrassTex ) -{ - PSQuadIn output; - - float4x4 mWorld = GetRandomOrientation( midPoint.pos, midPoint.norm, (float)midPoint.VertexID ); - float4 ViewPos = mul( midPoint.pos, g_mWorldView ); - - float3 grassNorm = midPoint.norm; - float4 color1 = CalcLighting( grassNorm, ViewPos.z ); - - for(int v=0; v<6; v++) - { - float3 pos = g_positions[v]; - pos.x *= g_GrassWidth; - pos.y *= g_GrassHeight; - - output.pos = mul( float4(pos,1), mWorld ); - output.pos = mul( output.pos, g_mWorldViewProj ); - output.tex = float3( g_texcoords[v], iGrassTex ); - output.color = color1; - - GrassStream.Append( output ); - } - - GrassStream.RestartStrip(); -} - -//-------------------------------------------------------------------------------------- -// Midpoint of the three vertices A,B,C -//-------------------------------------------------------------------------------------- -VSGrassOut CalcMidPoint( VSGrassOut A, VSGrassOut B, VSGrassOut C ) -{ - VSGrassOut MidPoint; - - MidPoint.pos = (A.pos + B.pos + C.pos)/3.0f; - MidPoint.norm = (A.norm + B.norm + C.norm)/3.0f; - MidPoint.tex = (A.tex + B.tex + C.tex)/3.0f; - MidPoint.VertexID = A.VertexID + B.VertexID + C.VertexID; - - return MidPoint; -} - -//-------------------------------------------------------------------------------------- -// The actual grass geometry shader. This generates grass blades based upon an input -// mesh (the tops of the islands) and a coverage texture. Each of the textures channels -// determines how much of each of the 4 types of grass to place at a particular spot. -//-------------------------------------------------------------------------------------- -[maxvertexcount(90)] -void GSGrassmain(triangle VSGrassOut input[3], inout TriangleStream<PSQuadIn> GrassStream ) -{ - VSGrassOut MidPoint = CalcMidPoint( input[0], input[1], input[2] ); - - float4 CoverageMask = g_tx2dArray.SampleLevel( g_samPoint, float3(MidPoint.tex,4), 0 ); - float cm[4]; - cm[0] = CoverageMask.r; - cm[1] = CoverageMask.g; - cm[2] = CoverageMask.b; - cm[3] = CoverageMask.a; - - for(int g=0; g<4; g++) - { - float MaxBlades = float(g_iGrassCoverage)*cm[g]; - for(float i=0; i<MaxBlades; i++) - { - float randOffset = g*5 + (i+1); - float3 Tan = RandomDir( MidPoint.pos.x + randOffset ); - float3 Len = normalize( RandomDir( MidPoint.pos.z + randOffset ) ); - float3 Shift = Len.x*g_GrassMessiness*normalize( cross( Tan, MidPoint.norm ) ); - VSGrassOut grassPoint = MidPoint; - grassPoint.VertexID += randOffset; - grassPoint.pos += Shift; - - //uncomment this to make the grass strictly conform to the mesh - //if( IsInTriangle( grassPoint.pos, input[0].pos, input[1].pos, input[2].pos ) ) - { - OutputGrassBlade( grassPoint, GrassStream, g ); - } - } - } -} - -//-------------------------------------------------------------------------------------- -// PS for non-leaf or grass items. -//-------------------------------------------------------------------------------------- -float4 PSScenemain(PSSceneIn input) : SV_Target -{ - float4 color = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color; - return color; -} - -//-------------------------------------------------------------------------------------- -// PS for leaves and grass -//-------------------------------------------------------------------------------------- -float4 PSQuadmain(PSQuadIn input) : SV_Target -{ - float4 color = g_tx2dArray.Sample( g_samLinear, input.tex ); - color.xyz *= input.color.xyz; - return color; -} - -//-------------------------------------------------------------------------------------- -// Render instanced meshes with vertex lighting -//-------------------------------------------------------------------------------------- -technique10 RenderInstancedVertLighting -{ - pass p0 - { - SetVertexShader( CompileShader( vs_4_0, VSInstmain() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); - - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - SetDepthStencilState( EnableDepthTestWrite, 0 ); - SetRasterizerState( EnableMSAA ); - } -} - -//-------------------------------------------------------------------------------------- -// Skybox -//-------------------------------------------------------------------------------------- -technique10 RenderSkybox -{ - pass p0 - { - SetVertexShader( CompileShader( vs_4_0, VSSkymain() ) ); - SetGeometryShader( NULL ); - SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) ); - - SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - SetDepthStencilState( DisableDepthTestWrite, 0 ); - SetRasterizerState( EnableMSAA ); - } -} - -//-------------------------------------------------------------------------------------- -// Render leaves -//-------------------------------------------------------------------------------------- -technique10 RenderQuad -{ - pass p0 - { - - SetVertexShader( CompileShader( vs_4_0, VSQuadmain() ) ); - SetGeometryShader( CompileShader( gs_4_0, GSQuadmain() ) ); - SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) ); - - SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - SetDepthStencilState( EnableDepthTestWrite, 0 ); - SetRasterizerState( EnableMSAA ); - } -} - -//-------------------------------------------------------------------------------------- -// Render grass -//-------------------------------------------------------------------------------------- -technique10 RenderGrass -{ - pass p0 - { - - SetVertexShader( CompileShader( vs_4_0, VSGrassmain() ) ); - SetGeometryShader( CompileShader( gs_4_0, GSGrassmain() ) ); - SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) ); - - SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF ); - SetDepthStencilState( EnableDepthTestWrite, 0 ); - SetRasterizerState( EnableMSAA ); - } -} diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl deleted file mode 100644 index 5463f5b92..000000000 --- a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl +++ /dev/null @@ -1,202 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSMain -//-------------------------------------------------------------------------------------- -// File: MultithreadedRendering11_PS.hlsl -// -// The pixel shader file for the MultithreadedRendering11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -// Various debug options -//#define NO_DIFFUSE_MAP -//#define NO_NORMAL_MAP -//#define NO_AMBIENT -//#define NO_DYNAMIC_LIGHTING -//#define NO_SHADOW_MAP - -#define SHADOW_DEPTH_BIAS 0.0005f - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -static const int g_iNumLights = 4; -static const int g_iNumShadows = 1; // by convention, the first n lights cast shadows - -cbuffer cbPerObject : register( b0 ) -{ - float4 g_vObjectColor : packoffset( c0 ); -}; - -cbuffer cbPerLight : register( b1 ) -{ - struct LightDataStruct - { - matrix m_mLightViewProj; - float4 m_vLightPos; - float4 m_vLightDir; - float4 m_vLightColor; - float4 m_vFalloffs; // x = dist end, y = dist range, z = cos angle end, w = cos range - } g_LightData[g_iNumLights] : packoffset( c0 ); -}; - -cbuffer cbPerScene : register( b2 ) -{ - float4 g_vMirrorPlane : packoffset( c0 ); - float4 g_vAmbientColor : packoffset( c1 ); - float4 g_vTintColor : packoffset( c2 ); -}; - -//-------------------------------------------------------------------------------------- -// Textures and Samplers -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse : register( t0 ); -Texture2D g_txNormal : register( t1 ); -Texture2D g_txShadow[g_iNumShadows] : register( t2 ); - -SamplerState g_samPointClamp : register( s0 ); -SamplerState g_samLinearWrap : register( s1 ); - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct PS_INPUT -{ - float3 vNormal : NORMAL; - float3 vTangent : TANGENT; - float2 vTexcoord : TEXCOORD0; - float4 vPosWorld : TEXCOORD1; -}; - -//-------------------------------------------------------------------------------------- -// Sample normal map, convert to signed, apply tangent-to-world space transform -//-------------------------------------------------------------------------------------- -float3 CalcPerPixelNormal( float2 vTexcoord, float3 vVertNormal, float3 vVertTangent ) -{ - // Compute tangent frame - vVertNormal = normalize( vVertNormal ); - vVertTangent = normalize( vVertTangent ); - float3 vVertBinormal = normalize( cross( vVertTangent, vVertNormal ) ); - float3x3 mTangentSpaceToWorldSpace = float3x3( vVertTangent, vVertBinormal, vVertNormal ); - - // Compute per-pixel normal - float3 vBumpNormal = g_txNormal.Sample( g_samLinearWrap, vTexcoord ); - vBumpNormal = 2.0f * vBumpNormal - 1.0f; - - return mul( vBumpNormal, mTangentSpaceToWorldSpace ); -} - -//-------------------------------------------------------------------------------------- -// Test how much pixel is in shadow, using 2x2 percentage-closer filtering -//-------------------------------------------------------------------------------------- -float4 CalcUnshadowedAmountPCF2x2( int iShadow, float4 vPosWorld ) -{ - matrix mLightViewProj = g_LightData[iShadow].m_mLightViewProj; - Texture2D txShadow = g_txShadow[iShadow]; - - // Compute pixel position in light space - float4 vLightSpacePos = mul( vPosWorld, mLightViewProj ); - vLightSpacePos.xyz /= vLightSpacePos.w; - - // Translate from surface coords to texture coords - // Could fold these into the matrix - float2 vShadowTexCoord = 0.5f * vLightSpacePos + 0.5f; - vShadowTexCoord.y = 1.0f - vShadowTexCoord.y; - - // Depth bias to avoid pixel self-shadowing - float vLightSpaceDepth = vLightSpacePos.z - SHADOW_DEPTH_BIAS; - - // Find sub-pixel weights - float2 vShadowMapDims = float2( 2048.0f, 2048.0f ); // need to keep in sync with .cpp file - float4 vSubPixelCoords; - vSubPixelCoords.xy = frac( vShadowMapDims * vShadowTexCoord ); - vSubPixelCoords.zw = 1.0f - vSubPixelCoords; - float4 vBilinearWeights = vSubPixelCoords.zxzx * vSubPixelCoords.wwyy; - - // 2x2 percentage closer filtering - float2 vTexelUnits = 1.0f / vShadowMapDims; - float4 vShadowDepths; - vShadowDepths.x = txShadow.Sample( g_samPointClamp, vShadowTexCoord ); - vShadowDepths.y = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( vTexelUnits.x, 0.0f ) ); - vShadowDepths.z = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( 0.0f, vTexelUnits.y ) ); - vShadowDepths.w = txShadow.Sample( g_samPointClamp, vShadowTexCoord + vTexelUnits ); - - // What weighted fraction of the 4 samples are nearer to the light than this pixel? - float4 vShadowTests = ( vShadowDepths >= vLightSpaceDepth ) ? 1.0f : 0.0f; - return dot( vBilinearWeights, vShadowTests ); -} - -//-------------------------------------------------------------------------------------- -// Diffuse lighting calculation, with angle and distance falloff -//-------------------------------------------------------------------------------------- -float4 CalcLightingColor( int iLight, float3 vPosWorld, float3 vPerPixelNormal ) -{ - float3 vLightPos = g_LightData[iLight].m_vLightPos.xyz; - float3 vLightDir = g_LightData[iLight].m_vLightDir.xyz; - float4 vLightColor = g_LightData[iLight].m_vLightColor; - float4 vFalloffs = g_LightData[iLight].m_vFalloffs; - - float3 vLightToPixelUnNormalized = vPosWorld - vLightPos; - - // Dist falloff = 0 at vFalloffs.x, 1 at vFalloffs.x - vFalloffs.y - float fDist = length( vLightToPixelUnNormalized ); - float fDistFalloff = saturate( ( vFalloffs.x - fDist ) / vFalloffs.y ); - - // Normalize from here on - float3 vLightToPixelNormalized = vLightToPixelUnNormalized / fDist; - - // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w - float fCosAngle = dot( vLightToPixelNormalized, vLightDir ); - float fAngleFalloff = saturate( ( fCosAngle - vFalloffs.z ) / vFalloffs.w ); - - // Diffuse contribution - float fNDotL = saturate( -dot( vLightToPixelNormalized, vPerPixelNormal ) ); - - return vLightColor * fNDotL * fDistFalloff * fAngleFalloff; -} - -//-------------------------------------------------------------------------------------- -// Pixel Shader -//-------------------------------------------------------------------------------------- -float4 PSMain( PS_INPUT Input ) : SV_TARGET -{ - // Manual clip test, so that objects which are behind the mirror - // don't show up in the mirror. - clip( dot( g_vMirrorPlane.xyz, Input.vPosWorld.xyz ) + g_vMirrorPlane.w ); - -#ifdef NO_DIFFUSE_MAP - float4 vDiffuse = 0.5f; -#else // #ifdef NO_DIFFUSE_MAP - float4 vDiffuse = g_txDiffuse.Sample( g_samLinearWrap, Input.vTexcoord ); -#endif // #ifdef NO_DIFFUSE_MAP #else - - // Compute per-pixel normal -#ifdef NO_NORMAL_MAP - float3 vPerPixelNormal = Input.vNormal; -#else // #ifdef NO_NORMAL_MAP - float3 vPerPixelNormal = CalcPerPixelNormal( Input.vTexcoord, Input.vNormal, Input.vTangent ); -#endif // #ifdef NO_NORMAL_MAP #else - - // Compute lighting contribution -#ifdef NO_AMBIENT - float4 vTotalLightingColor = 0.0f; -#else // #ifdef NO_AMBIENT - float4 vTotalLightingColor = g_vAmbientColor; -#endif // #ifdef NO_AMBIENT #else - -#ifndef NO_DYNAMIC_LIGHTING - for ( int iLight = 0; iLight < g_iNumLights; ++iLight ) - { - float4 vLightingColor = CalcLightingColor( iLight, Input.vPosWorld, vPerPixelNormal ); -#ifndef NO_SHADOW_MAP - if ( iLight < g_iNumShadows && any( vLightingColor.xyz ) > 0.0f ) // Don't bother checking shadow map if the pixel is unlit - { - vLightingColor *= CalcUnshadowedAmountPCF2x2( iLight, Input.vPosWorld ); - } -#endif // #ifndef NO_SHADOW_MAP - vTotalLightingColor += vLightingColor; - } -#endif // #ifndef NO_DYNAMIC_LIGHTING - - return vDiffuse * g_vTintColor * g_vObjectColor * vTotalLightingColor; -} diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl deleted file mode 100644 index 12fe14ae9..000000000 --- a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl +++ /dev/null @@ -1,83 +0,0 @@ -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain - -#ifndef __SLANG__ -#define cbPerObject cbPerObject_0 -#define g_mWorld g_mWorld_0 -#define cbPerScene cbPerScene_0 -#define g_mViewProj g_mViewProj_0 -#endif - -//-------------------------------------------------------------------------------------- -// File: MultithreadedRendering11_VS.hlsl -// -// The vertex shader file for the MultithreadedRendering11 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -// Various debug options -//#define UNCOMPRESSED_VERTEX_DATA // The sdkmesh file contained uncompressed vertex data - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - matrix g_mWorld ;//SLANG: : packoffset( c0 ); -}; -cbuffer cbPerScene : register( b1 ) -{ - matrix g_mViewProj ;//SLANG: : packoffset( c0 ); -}; - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; - float3 vTangent : TANGENT; -}; - -struct VS_OUTPUT -{ - float3 vNormal : NORMAL; - float3 vTangent : TANGENT; - float2 vTexcoord : TEXCOORD0; - float4 vPosWorld : TEXCOORD1; - float4 vPosition : SV_POSITION; -}; - -// We aliased signed vectors as a unsigned format. -// Need to recover signed values. The values 1.0 and 2.0 -// are slightly inaccurate here. -float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec ) -{ - vVec *= 2.0f; - return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec; -} - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - VS_OUTPUT Output; - -#ifndef UNCOMPRESSED_VERTEX_DATA - // Expand compressed vectors - Input.vNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal ); - Input.vTangent = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vTangent ); -#endif // #ifndef UNCOMPRESSED_VERTEX_DATA - - Output.vPosWorld = mul( Input.vPosition, g_mWorld ); - Output.vPosition = mul( Output.vPosWorld, g_mViewProj ); - Output.vNormal = mul( Input.vNormal, (float3x3)g_mWorld ); - Output.vTangent = mul( Input.vTangent, (float3x3)g_mWorld ); - Output.vTexcoord = Input.vTexcoord; - - return Output; -} - diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl deleted file mode 100644 index bac2839db..000000000 --- a/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl +++ /dev/null @@ -1,103 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain -//-------------------------------------------------------------------------------------- -// File: NBodyGravityCS11.hlsl -// -// Demonstrates how to use Compute Shader to do n-body gravity computation -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -static float softeningSquared = 0.0012500000*0.0012500000; -static float g_fG = 6.67300e-11f * 10000.0f; -static float g_fParticleMass = g_fG*10000.0f * 10000.0f; - -#define blocksize 128 -groupshared float4 sharedPos[blocksize]; - -// Body to body interaction, acceleration of the particle at position bi is updated -void bodyBodyInteraction(inout float3 ai, float4 bj, float4 bi, float mass, int particles ) -{ - float3 r = bj.xyz - bi.xyz; - - float distSqr = dot(r, r); - distSqr += softeningSquared; - - float invDist = 1.0f / sqrt(distSqr); - float invDistCube = invDist * invDist * invDist; - - float s = mass * invDistCube * particles; - - ai += r * s; -} - -cbuffer cbCS : register( b0 ) -{ - uint4 g_param; // pcbCS->param[0] = MAX_PARTICLES; - // pcbCS->param[1] = dimx; - float4 g_paramf; // pcbCS->paramf[0] = 0.1f; - // pcbCS->paramf[1] = 1; -}; - -struct PosVelo -{ - float4 pos; - float4 velo; -}; - -StructuredBuffer<PosVelo> oldPosVelo; -RWStructuredBuffer<PosVelo> newPosVelo; - -[numthreads(blocksize, 1, 1)] -void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - // Each thread of the CS updates one of the particles - - float4 pos = oldPosVelo[DTid.x].pos; - float4 vel = oldPosVelo[DTid.x].velo; - float3 accel = 0; - float mass = g_fParticleMass; - - // Update current particle using all other particles - [loop] - for (uint tile = 0; tile < g_param.y; tile++) - { - // Cache a tile of particles unto shared memory to increase IO efficiency - sharedPos[GI] = oldPosVelo[tile * blocksize + GI].pos; - - GroupMemoryBarrierWithGroupSync(); - - [unroll] - for (uint counter = 0; counter < blocksize; counter+=8 ) - { - bodyBodyInteraction(accel, sharedPos[counter], pos, mass, 1); - bodyBodyInteraction(accel, sharedPos[counter+1], pos, mass, 1); - bodyBodyInteraction(accel, sharedPos[counter+2], pos, mass, 1); - bodyBodyInteraction(accel, sharedPos[counter+3], pos, mass, 1); - bodyBodyInteraction(accel, sharedPos[counter+4], pos, mass, 1); - bodyBodyInteraction(accel, sharedPos[counter+5], pos, mass, 1); - bodyBodyInteraction(accel, sharedPos[counter+6], pos, mass, 1); - bodyBodyInteraction(accel, sharedPos[counter+7], pos, mass, 1); - } - - GroupMemoryBarrierWithGroupSync(); - } - - // g_param.x is the number of our particles, however this number might not be an exact multiple of the tile size. - // In such cases, out of bound reads occur in the process above, which means there will be - // tooManyParticles "phantom" particles generating false gravity at position (0, 0, 0), so we have to substract them here. - // NOTE, out of bound reads always return 0 in CS - const uint tooManyParticles = g_param.y * blocksize - g_param.x; - bodyBodyInteraction(accel, float4(0, 0, 0, 0), pos, mass, -tooManyParticles); - - // Update the velocity and position of current particle using the acceleration computed above - vel.xyz += accel.xyz * g_paramf.x; //deltaTime; - vel.xyz *= g_paramf.y; //damping; - pos.xyz += vel.xyz * g_paramf.x; //deltaTime; - - if ( DTid.x < g_param.x ) - { - newPosVelo[DTid.x].pos = pos; - newPosVelo[DTid.x].velo = float4(vel.xyz, length(accel)); - } -} diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl deleted file mode 100644 index 7f6292662..000000000 --- a/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl +++ /dev/null @@ -1,128 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw -//-------------------------------------------------------------------------------------- -// File: ParticleDraw.hlsl -// -// Shaders for rendering the particle as point sprite -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -struct VSParticleIn -{ - float4 color : COLOR; - uint id : SV_VERTEXID; -}; - -struct VSParticleDrawOut -{ - float3 pos : POSITION; - float4 color : COLOR; -}; - -struct GSParticleDrawOut -{ - float2 tex : TEXCOORD0; - float4 color : COLOR; - float4 pos : SV_POSITION; -}; - -struct PSParticleDrawIn -{ - float2 tex : TEXCOORD0; - float4 color : COLOR; -}; - -struct PosVelo -{ - float4 pos; - float4 velo; -}; - -Texture2D g_txDiffuse; -StructuredBuffer<PosVelo> g_bufPosVelo; - - -SamplerState g_samLinear -{ - Filter = MIN_MAG_MIP_LINEAR; - AddressU = Clamp; - AddressV = Clamp; -}; - -cbuffer cb0 -{ - row_major float4x4 g_mWorldViewProj; - row_major float4x4 g_mInvView; -}; - -cbuffer cb1 -{ - static float g_fParticleRad = 10.0f; -}; - -cbuffer cbImmutable -{ - static float3 g_positions[4] = - { - float3( -1, 1, 0 ), - float3( 1, 1, 0 ), - float3( -1, -1, 0 ), - float3( 1, -1, 0 ), - }; - - static float2 g_texcoords[4] = - { - float2(0,0), - float2(1,0), - float2(0,1), - float2(1,1), - }; -}; - -// -// Vertex shader for drawing the point-sprite particles -// -VSParticleDrawOut VSParticleDraw(VSParticleIn input) -{ - VSParticleDrawOut output; - - output.pos = g_bufPosVelo[input.id].pos; - - float mag = g_bufPosVelo[input.id].velo.w/9; - output.color = lerp( float4(1,0.1,0.1,1), input.color, mag ); - - return output; -} - -// -// GS for rendering point sprite particles. Takes a point and turns it into 2 tris. -// -[maxvertexcount(4)] -void GSParticleDraw(point VSParticleDrawOut input[1], inout TriangleStream<GSParticleDrawOut> SpriteStream) -{ - GSParticleDrawOut output; - - // - // Emit two new triangles - // - for(int i=0; i<4; i++) - { - float3 position = g_positions[i] * g_fParticleRad; - position = mul( position, (float3x3)g_mInvView ) + input[0].pos; - output.pos = mul( float4(position,1.0), g_mWorldViewProj ); - - output.color = input[0].color; - output.tex = g_texcoords[i]; - SpriteStream.Append(output); - } - SpriteStream.RestartStrip(); -} - -// -// PS for drawing particles -// -float4 PSParticleDraw(PSParticleDrawIn input) : SV_Target -{ - return g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color; -}
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl deleted file mode 100644 index 80a1e165e..000000000 --- a/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl +++ /dev/null @@ -1,277 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile cs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw -//----------------------------------------------------------------------------- -// File: OIT_CS.hlsl -// -// Desc: Compute shaders for used in the Order Independent Transparency sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//----------------------------------------------------------------------------- -// TODO: use structured buffers -RWBuffer<float> deepBufferDepth : register( u0 ); -RWBuffer<uint> deepBufferColorUINT : register( u1 ); -RWTexture2D<float4> frameBuffer : register( u2 ); -RWBuffer<uint> prefixSum : register( u3 ); - -Texture2D<uint> fragmentCount : register ( t0 ); - -cbuffer CB : register( b0 ) -{ - uint g_nFrameWidth : packoffset( c0.x ); - uint g_nFrameHeight : packoffset( c0.y ); - uint g_nPassSize : packoffset( c0.z ); - uint g_nReserved : packoffset( c0.w ); -} - -#define blocksize 1 -#define groupthreads (blocksize*blocksize) -groupshared float accum[groupthreads]; - -// First pass of the prefix sum creation algorithm. Converts a 2D buffer to a 1D buffer, -// and sums every other value with the previous value. -[numthreads(1,1,1)] -void CreatePrefixSum_Pass0_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) -{ - int nThreadNum = nGid.y*g_nFrameWidth + nGid.x; - if( nThreadNum%2 == 0 ) - { - prefixSum[nThreadNum] = fragmentCount[nGid.xy]; - - // Add the Fragment count to the next bin - if( (nThreadNum+1) < g_nFrameWidth * g_nFrameHeight ) - { - int2 nextUV; - nextUV.x = (nThreadNum+1) % g_nFrameWidth; - nextUV.y = (nThreadNum+1) / g_nFrameWidth; - prefixSum[ nThreadNum+1 ] = prefixSum[ nThreadNum ] + fragmentCount[ nextUV ]; - } - } -} - -// Second and following passes. Each pass distributes the sum of the first half of the group -// to the second half of the group. There are n/groupsize groups in each pass. -// Each pass increases the group size until it is the size of the buffer. -// The resulting buffer holds the prefix sum of all preceding values in each -// position -[numthreads(1,1,1)] -void CreatePrefixSum_Pass1_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) -{ - int nThreadNum = nGid.x; - - int nValue = prefixSum[nThreadNum*g_nPassSize + g_nPassSize/2 - 1]; - for(int i = nThreadNum*g_nPassSize + g_nPassSize/2; i < nThreadNum*g_nPassSize + g_nPassSize && i < g_nFrameWidth*g_nFrameHeight; i++) - { - prefixSum[i] = prefixSum[i] + nValue; - } -} - -#if 1 - -// Sort the fragments using a bitonic sort, then accumulate the fragments into the final result. -groupshared int nIndex[32]; -#define NUM_THREADS 8 -[numthreads(1,1,1)] -void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) -{ - uint nThreadNum = nGid.y * g_nFrameWidth + nGid.x; - -// uint r0, r1, r2; -// float rd0, rd1, rd2, rd3, rd4, rd5, rd6, rd7; - - uint N = fragmentCount[nDTid.xy]; - - uint N2 = 1 << (int)(ceil(log2(N))); - - float fDepth[32]; - for(int i = 0; i < N; i++) - { - nIndex[i] = i; - fDepth[i] = deepBufferDepth[ prefixSum[nThreadNum-1] + i ]; - } - for(int i = N; i < N2; i++) - { - nIndex[i] = i; - fDepth[i] = 1.1f; - } - - uint idx = blocksize*nGTid.y + nGTid.x; - - // Bitonic sort - for( int k = 2; k <= N2; k = 2*k ) - { - for( int j = k>>1; j > 0 ; j = j>>1 ) - { - for( int i = 0; i < N2; i++ ) - { -// GroupMemoryBarrierWithGroupSync(); - //i = idx; - - float di = fDepth[ nIndex[ i ] ]; - int ixj = i^j; - if ( ( ixj ) > i ) - { - float dixj = fDepth[ nIndex[ ixj ] ]; - if ( ( i&k ) == 0 && di > dixj ) - { - int temp = nIndex[ i ]; - nIndex[ i ] = nIndex[ ixj ]; - nIndex[ ixj ] = temp; - } - if ( ( i&k ) != 0 && di < dixj ) - { - int temp = nIndex[ i ]; - nIndex[ i ] = nIndex[ ixj ]; - nIndex[ ixj ] = temp; - } - } - } - } - } - - // Output the final result to the frame buffer - if( idx == 0 ) - { - - /* - // Debug - uint color[8]; - for(int i = 0; i < 8; i++) - { - color[i] = deepBufferColorUINT[prefixSum[nThreadNum-1] + i]; - } - - for(int i = 0; i < 8; i++) - { - deepBufferDepth[nThreadNum*8+i] = fDepth[i];//fDepth[nIndex[i]]; - deepBufferColorUINT[nThreadNum*8+i] = color[nIndex[i]]; - } - */ - - // Accumulate fragments into final result - float4 result = 0.0f; - for( int x = N-1; x >= 0; x-- ) - { - uint bufferValue = deepBufferColorUINT[ prefixSum[nThreadNum-1] + nIndex[ x ] ]; - float4 color; - color.r = ( ( bufferValue >> 0 & 0xFF )) / 255.0f; - color.g = ( bufferValue >> 8 & 0xFF ) / 255.0f; - color.b = ( bufferValue >> 16 & 0xFF ) / 255.0f; - color.a = ( bufferValue >> 24 & 0xFF ) / 255.0f; - result = lerp( result, color, color.a ); - } - result.a = 1.0f; - frameBuffer[ nGid.xy ] = result; - } -} - -#else -[numthreads(1,1,1)] -void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID ) -{ - uint nThreadNum = nDTid.y * g_nFrameWidth + nDTid.x; - float d0 = deepBufferDepth[nThreadNum*8]; - float d1 = deepBufferDepth[nThreadNum*8+1]; - float d2 = deepBufferDepth[nThreadNum*8+2]; - - uint s0 = deepBufferColorUINT[nThreadNum*8 + 0]; - uint s1 = deepBufferColorUINT[nThreadNum*8 + 1]; - uint s2 = deepBufferColorUINT[nThreadNum*8 + 2]; - - uint r0, r1, r2; - float rd0, rd1, rd2; - if( d0 < d1 && d0 < d2 ) - { - r0 = s0; - rd0 = d0; - if( d1 < d2 ) - { - r1 = s1; - r2 = s2; - - rd1 = d1; - rd2 = d2; - } - else - { - r1 = s2; - r2 = s1; - - rd1 = d2; - rd2 = d1; - } - } - else if( d1 < d2 ) - { - r0 = s1; - rd0 = d1; - if( d0 < d2 ) - { - r1 = s0; - r2 = s2; - - rd1 = d0; - rd2 = d2; - } - else - { - r1 = s2; - r2 = s0; - - rd1 = d2; - rd2 = d0; - } - } - else - { - r0 = s2; - rd0 = d2; - if( d1 < d0 ) - { - r1 = s1; - r2 = s0; - - rd1 = d1; - rd2 = d0; - } - else - { - r1 = s0; - r2 = s1; - - rd1 = d0; - rd2 = d1; - } - } - - deepBufferDepth[nThreadNum*8] = rd0; - deepBufferDepth[nThreadNum*8+1] = rd1; - deepBufferDepth[nThreadNum*8+2] = rd2; - - deepBufferColorUINT[nThreadNum*8] = r0; - deepBufferColorUINT[nThreadNum*8+1] = r1; - deepBufferColorUINT[nThreadNum*8+2] = r2; - - // convert the color to floats - float4 color[3]; - color[0].r = (r0 >> 0 & 0xFF) / 255.0f; - color[0].g = (r0 >> 8 & 0xFF) / 255.0f; - color[0].b = (r0 >> 16 & 0xFF) / 255.0f; - color[0].a = (r0 >> 24 & 0xFF) / 255.0f; - - color[1].r = (r1 >> 0 & 0xFF) / 255.0f; - color[1].g = (r1 >> 8 & 0xFF) / 255.0f; - color[1].b = (r1 >> 16 & 0xFF) / 255.0f; - color[1].a = (r1 >> 24 & 0xFF) / 255.0f; - - color[2].r = (r2 >> 0 & 0xFF) / 255.0f; - color[2].g = (r2 >> 8 & 0xFF) / 255.0f; - color[2].b = (r2 >> 16 & 0xFF) / 255.0f; - color[2].a = (r2 >> 24 & 0xFF) / 255.0f; - - float4 result = lerp(lerp(lerp(0, color[2], color[2].a), color[1], color[1].a), color[0], color[0].a); - result.a = 1.0f; - - frameBuffer[nDTid.xy] = result; -} - -#endif
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl deleted file mode 100644 index 5fae02d62..000000000 --- a/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl +++ /dev/null @@ -1,56 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile ps_4_0 -entry FragmentCountPS -entry FillDeepBufferPS -//----------------------------------------------------------------------------- -// File: OITPS.hlsl -// -// Desc: Pixel shaders used in the Order Independent Transparency sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//----------------------------------------------------------------------------- -//TODO: Use structured buffers -RWTexture2D<uint> fragmentCount : register( u1 ); -RWBuffer<float> deepBufferDepth : register( u2 ); -RWBuffer<uint4> deepBufferColor : register( u3 ); -RWBuffer<uint> prefixSum : register( u4 ); - -cbuffer CB : register( b0 ) -{ - uint g_nFrameWidth : packoffset( c0.x ); - uint g_nFrameHeight : packoffset( c0.y ); - uint g_nReserved0 : packoffset( c0.z ); - uint g_nReserved1 : packoffset( c0.w ); -} - -struct SceneVS_Output -{ - float4 pos : SV_POSITION; - float4 color : COLOR0; -}; - -void FragmentCountPS( SceneVS_Output input) -{ - // Increments need to be done atomically - InterlockedAdd(fragmentCount[input.pos.xy], 1); -} - -void FillDeepBufferPS( SceneVS_Output input ) -{ - uint x = input.pos.x; - uint y = input.pos.y; - - // Atomically allocate space in the deep buffer - uint fc; - InterlockedAdd(fragmentCount[input.pos.xy], 1, fc); - - uint nPrefixSumPos = y*g_nFrameWidth + x; - uint nDeepBufferPos; - if( nPrefixSumPos == 0 ) - nDeepBufferPos = fc; - else - nDeepBufferPos = prefixSum[nPrefixSumPos-1] + fc; - - // Store fragment data into the allocated space - deepBufferDepth[nDeepBufferPos] = input.pos.z; - deepBufferColor[nDeepBufferPos] = clamp(input.color, 0, 1)*255; -} - diff --git a/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl b/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl deleted file mode 100644 index 2254091f6..000000000 --- a/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl +++ /dev/null @@ -1,42 +0,0 @@ -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry SceneVS - -#ifndef __SLANG__ -#define cbPerObject cbPerObject_0 -#define g_mWorldViewProjection g_mWorldViewProjection_0 -#endif - -//----------------------------------------------------------------------------- -// File: SceneVS.hlsl -// -// Desc: Vertex shader for the scene. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//----------------------------------------------------------------------------- - - -cbuffer cbPerObject : register( b0 ) -{ - row_major matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 ); -} - -struct SceneVS_Input -{ - float4 pos : POSITION; - float4 color : COLOR; -}; - -struct SceneVS_Output -{ - float4 pos : SV_POSITION; - float4 color : COLOR0; -}; - -SceneVS_Output SceneVS( SceneVS_Input input ) -{ - SceneVS_Output output; - - output.color = input.color; - output.pos = mul(input.pos, g_mWorldViewProjection ); - - return output; -} diff --git a/tests/hlsl/dxsdk/README.md b/tests/hlsl/dxsdk/README.md deleted file mode 100644 index dd0c0fb6b..000000000 --- a/tests/hlsl/dxsdk/README.md +++ /dev/null @@ -1,5 +0,0 @@ -DirectX SDK Sample Shaders -========================== - -This directory contains shaders that have shipped as part of the DirectX SDK. -The licsense terms for these shaders are specificed at the top of the source files.
\ No newline at end of file diff --git a/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl b/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl deleted file mode 100644 index d01cd7aa4..000000000 --- a/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl +++ /dev/null @@ -1,233 +0,0 @@ -//TEST_DISABLED:COMPARE_HLSL: -profile vs_4_0 -entry BezierVS -profile hs_5_0 -entry BezierHS -profile ds_5_0 -entry BezierDS -profile ps_4_0 -entry BezierPS -entry SolidColorPS - -// Note(Slang): Disabling this test for now because compiling it via IR ends up creating a local variable of the `OutputPatch<...>` type, which we need to get rid of via SSA optimization. - - -//-------------------------------------------------------------------------------------- -// File: SimpleBezier11.hlsl -// -// This sample shows an simple implementation of the DirectX 11 Hardware Tessellator -// for rendering a Bezier Patch. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -// This allows us to compile the shader with a #define to choose -// the different partition modes for the hull shader. -// See the hull shader: [partitioning(BEZIER_HS_PARTITION)] -// This sample demonstrates "integer", "fractional_even", and "fractional_odd" -#ifndef BEZIER_HS_PARTITION -#define BEZIER_HS_PARTITION "integer" -#endif // BEZIER_HS_PARTITION - -// The input patch size. In this sample, it is 16 control points. -// This value should match the call to IASetPrimitiveTopology() -#define INPUT_PATCH_SIZE 16 - -// The output patch size. In this sample, it is also 16 control points. -#define OUTPUT_PATCH_SIZE 16 - -//-------------------------------------------------------------------------------------- -// Constant Buffers -//-------------------------------------------------------------------------------------- -cbuffer cbPerFrame : register( b0 ) -{ - matrix g_mViewProjection; - float3 g_vCameraPosWorld; - float g_fTessellationFactor; -}; - -//-------------------------------------------------------------------------------------- -// Vertex shader section -//-------------------------------------------------------------------------------------- -struct VS_CONTROL_POINT_INPUT -{ - float3 vPosition : POSITION; -}; - -struct VS_CONTROL_POINT_OUTPUT -{ - float3 vPosition : POSITION; -}; - -// This simple vertex shader passes the control points straight through to the -// hull shader. In a more complex scene, you might transform the control points -// or perform skinning at this step. - -// The input to the vertex shader comes from the vertex buffer. - -// The output from the vertex shader will go into the hull shader. - -VS_CONTROL_POINT_OUTPUT BezierVS( VS_CONTROL_POINT_INPUT Input ) -{ - VS_CONTROL_POINT_OUTPUT Output; - - Output.vPosition = Input.vPosition; - - return Output; -} - -//-------------------------------------------------------------------------------------- -// Constant data function for the BezierHS. This is executed once per patch. -//-------------------------------------------------------------------------------------- -struct HS_CONSTANT_DATA_OUTPUT -{ - float Edges[4] : SV_TessFactor; - float Inside[2] : SV_InsideTessFactor; -}; - -struct HS_OUTPUT -{ - float3 vPosition : BEZIERPOS; -}; - -// This constant hull shader is executed once per patch. For the simple Mobius strip -// model, it will be executed 4 times. In this sample, we set the tessellation factor -// via SV_TessFactor and SV_InsideTessFactor for each patch. In a more complex scene, -// you might calculate a variable tessellation factor based on the camera's distance. - -HS_CONSTANT_DATA_OUTPUT BezierConstantHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> ip, - uint PatchID : SV_PrimitiveID ) -{ - HS_CONSTANT_DATA_OUTPUT Output; - - float TessAmount = g_fTessellationFactor; - - Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount; - Output.Inside[0] = Output.Inside[1] = TessAmount; - - return Output; -} - -// The hull shader is called once per output control point, which is specified with -// outputcontrolpoints. For this sample, we take the control points from the vertex -// shader and pass them directly off to the domain shader. In a more complex scene, -// you might perform a basis conversion from the input control points into a Bezier -// patch, such as the SubD11 Sample. - -// The input to the hull shader comes from the vertex shader - -// The output from the hull shader will go to the domain shader. -// The tessellation factor, topology, and partition mode will go to the fixed function -// tessellator stage to calculate the UVW and domain points. - -[domain("quad")] -[partitioning(BEZIER_HS_PARTITION)] -[outputtopology("triangle_cw")] -[outputcontrolpoints(OUTPUT_PATCH_SIZE)] -[patchconstantfunc("BezierConstantHS")] -HS_OUTPUT BezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> p, - uint i : SV_OutputControlPointID, - uint PatchID : SV_PrimitiveID ) -{ - HS_OUTPUT Output; - Output.vPosition = p[i].vPosition; - return Output; -} - -//-------------------------------------------------------------------------------------- -// Bezier evaluation domain shader section -//-------------------------------------------------------------------------------------- -struct DS_OUTPUT -{ - float4 vPosition : SV_POSITION; - float3 vWorldPos : WORLDPOS; - float3 vNormal : NORMAL; -}; - -//-------------------------------------------------------------------------------------- -float4 BernsteinBasis(float t) -{ - float invT = 1.0f - t; - - return float4( invT * invT * invT, - 3.0f * t * invT * invT, - 3.0f * t * t * invT, - t * t * t ); -} - -//-------------------------------------------------------------------------------------- -float4 dBernsteinBasis(float t) -{ - float invT = 1.0f - t; - - return float4( -3 * invT * invT, - 3 * invT * invT - 6 * t * invT, - 6 * t * invT - 3 * t * t, - 3 * t * t ); -} - -//-------------------------------------------------------------------------------------- -float3 EvaluateBezier( const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch, - float4 BasisU, - float4 BasisV ) -{ - float3 Value = float3(0,0,0); - Value = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w ); - Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w ); - Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w ); - Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w ); - - return Value; -} - -// The domain shader is run once per vertex and calculates the final vertex's position -// and attributes. It receives the UVW from the fixed function tessellator and the -// control point outputs from the hull shader. Since we are using the DirectX 11 -// Tessellation pipeline, it is the domain shader's responsibility to calculate the -// final SV_POSITION for each vertex. In this sample, we evaluate the vertex's -// position using a Bernstein polynomial and the normal is calculated as the cross -// product of the U and V derivatives. - -// The input SV_DomainLocation to the domain shader comes from fixed function -// tessellator. And the OutputPatch comes from the hull shader. From these, you -// must calculate the final vertex position, color, texcoords, and other attributes. - -// The output from the domain shader will be a vertex that will go to the video card's -// rasterization pipeline and get drawn to the screen. - -[domain("quad")] -DS_OUTPUT BezierDS( HS_CONSTANT_DATA_OUTPUT input, - float2 UV : SV_DomainLocation, - const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch ) -{ - float4 BasisU = BernsteinBasis( UV.x ); - float4 BasisV = BernsteinBasis( UV.y ); - float4 dBasisU = dBernsteinBasis( UV.x ); - float4 dBasisV = dBernsteinBasis( UV.y ); - - float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV ); - float3 Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV ); - float3 BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV ); - float3 Norm = normalize( cross( Tangent, BiTangent ) ); - - DS_OUTPUT Output; - Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection ); - Output.vWorldPos = WorldPos; - Output.vNormal = Norm; - - return Output; -} - -//-------------------------------------------------------------------------------------- -// Smooth shading pixel shader section -//-------------------------------------------------------------------------------------- - -// The pixel shader works the same as it would in a normal graphics pipeline. -// In this sample, it performs very simple N dot L lighting. - -float4 BezierPS( DS_OUTPUT Input ) : SV_TARGET -{ - float3 N = normalize(Input.vNormal); - float3 L = normalize(Input.vWorldPos - g_vCameraPosWorld); - return abs(dot(N, L)) * float4(1, 0, 0, 1); -} - -//-------------------------------------------------------------------------------------- -// Solid color shading pixel shader (used for wireframe overlay) -//-------------------------------------------------------------------------------------- -float4 SolidColorPS( DS_OUTPUT Input ) : SV_TARGET -{ - // Return a solid green color - return float4( 0, 1, 0, 1 ); -} diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx deleted file mode 100644 index 00883ce70..000000000 --- a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx +++ /dev/null @@ -1,112 +0,0 @@ -//TEST_IGNORE_FILE: -//-------------------------------------------------------------------------------------- -// File: SimpleSample.fx -// -// The effect file for the SimpleSample sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Global variables -//-------------------------------------------------------------------------------------- -float4 g_MaterialAmbientColor; // Material's ambient color -float4 g_MaterialDiffuseColor; // Material's diffuse color -float3 g_LightDir; // Light's direction in world space -float4 g_LightDiffuse; // Light's diffuse color -texture g_MeshTexture; // Color texture for mesh - -float g_fTime; // App's time in seconds -float4x4 g_mWorld; // World matrix for object -float4x4 g_mWorldViewProjection; // World * View * Projection matrix - - - -//-------------------------------------------------------------------------------------- -// Texture samplers -//-------------------------------------------------------------------------------------- -sampler MeshTextureSampler = -sampler_state -{ - Texture = <g_MeshTexture>; - MipFilter = LINEAR; - MinFilter = LINEAR; - MagFilter = LINEAR; -}; - - -//-------------------------------------------------------------------------------------- -// Vertex shader output structure -//-------------------------------------------------------------------------------------- -struct VS_OUTPUT -{ - float4 Position : POSITION; // vertex position - float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) - float2 TextureUV : TEXCOORD0; // vertex texture coords -}; - - -//-------------------------------------------------------------------------------------- -// This shader computes standard transform and lighting -//-------------------------------------------------------------------------------------- -VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, - float3 vNormal : NORMAL, - float2 vTexCoord0 : TEXCOORD0 ) -{ - VS_OUTPUT Output; - float3 vNormalWorldSpace; - - // Transform the position from object space to homogeneous projection space - Output.Position = mul(vPos, g_mWorldViewProjection); - - // Transform the normal from object space to world space - vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space) - - // Calc diffuse color - Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)) + - g_MaterialAmbientColor; - Output.Diffuse.a = 1.0f; - - // Just copy the texture coordinate through - Output.TextureUV = vTexCoord0; - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// Pixel shader output structure -//-------------------------------------------------------------------------------------- -struct PS_OUTPUT -{ - float4 RGBColor : COLOR0; // Pixel color -}; - - -//-------------------------------------------------------------------------------------- -// This shader outputs the pixel's color by modulating the texture's -// color with diffuse material color -//-------------------------------------------------------------------------------------- -PS_OUTPUT RenderScenePS( VS_OUTPUT In ) -{ - PS_OUTPUT Output; - - // Lookup mesh texture and modulate it with diffuse - Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse; - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// Renders scene -//-------------------------------------------------------------------------------------- -technique RenderScene -{ - pass P0 - { - VertexShader = compile vs_2_0 RenderSceneVS(); - PixelShader = compile ps_2_0 RenderScenePS(); - } -} diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl deleted file mode 100644 index 7c688940b..000000000 --- a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl +++ /dev/null @@ -1,86 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry RenderSceneVS -profile ps_4_0 -entry RenderScenePS -//-------------------------------------------------------------------------------------- -// File: SimpleSample.hlsl -// -// The HLSL file for the SimpleSample sample for the Direct3D 11 device -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - - -//-------------------------------------------------------------------------------------- -// Constant Buffers -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - matrix g_mWorldViewProjection : packoffset( c0 ); - matrix g_mWorld : packoffset( c4 ); - float4 g_MaterialAmbientColor : packoffset( c8 ); - float4 g_MaterialDiffuseColor : packoffset( c9 ); -} - -cbuffer cbPerFrame : register( b1 ) -{ - float3 g_vLightDir : packoffset( c0 ); - float g_fTime : packoffset( c0.w ); - float4 g_LightDiffuse : packoffset( c1 ); -}; - -//----------------------------------------------------------------------------------------- -// Textures and Samplers -//----------------------------------------------------------------------------------------- -Texture2D g_txDiffuse : register( t0 ); -SamplerState g_samLinear : register( s0 ); - -//-------------------------------------------------------------------------------------- -// shader input/output structure -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 Position : POSITION; // vertex position - float3 Normal : NORMAL; // this normal comes in per-vertex - float2 TextureUV : TEXCOORD0;// vertex texture coords -}; - -struct VS_OUTPUT -{ - float4 Position : SV_POSITION; // vertex position - float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1) - float2 TextureUV : TEXCOORD0; // vertex texture coords -}; - -//-------------------------------------------------------------------------------------- -// This shader computes standard transform and lighting -//-------------------------------------------------------------------------------------- -VS_OUTPUT RenderSceneVS( VS_INPUT input ) -{ - VS_OUTPUT Output; - float3 vNormalWorldSpace; - - // Transform the position from object space to homogeneous projection space - Output.Position = mul( input.Position, g_mWorldViewProjection ); - - // Transform the normal from object space to world space - vNormalWorldSpace = normalize(mul(input.Normal, (float3x3)g_mWorld)); // normal (world space) - - // Calc diffuse color - Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_vLightDir)) + - g_MaterialAmbientColor; - Output.Diffuse.a = 1.0f; - - // Just copy the texture coordinate through - Output.TextureUV = input.TextureUV; - - return Output; -} - -//-------------------------------------------------------------------------------------- -// This shader outputs the pixel's color by modulating the texture's -// color with diffuse material color -//-------------------------------------------------------------------------------------- -float4 RenderScenePS( VS_OUTPUT In ) : SV_TARGET -{ - // Lookup mesh texture and modulate it with diffuse - return g_txDiffuse.Sample( g_samLinear, In.TextureUV ) * In.Diffuse; -} diff --git a/tests/hlsl/dxsdk/SubD11/SubD11.hlsl b/tests/hlsl/dxsdk/SubD11/SubD11.hlsl deleted file mode 100644 index 839e004e6..000000000 --- a/tests/hlsl/dxsdk/SubD11/SubD11.hlsl +++ /dev/null @@ -1,1238 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry PatchSkinningVS -entry MeshSkinningVS -profile hs_5_0 -entry SubDToBezierHS -entry SubDToBezierHS4444 -profile ds_5_0 -entry BezierEvalDS -profile ps_4_0 -entry SmoothPS -entry SolidColorPS -//-------------------------------------------------------------------------------------- -// File: SubD11.hlsl -// -// This file contains functions to convert from a Catmull-Clark subdivision -// representation to a bicubic patch representation. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//Work-around for an optimization rule problem in the June 2010 HLSL Compiler (9.29.952.3111) -//see http://support.microsoft.com/kb/2448404 -#if D3DX_VERSION == 0xa2b -#pragma ruledisable 0x0802405f -#endif - -//-------------------------------------------------------------------------------------- -// A sample extraordinary SubD quad is represented by the following diagram: -// -// 15 Valences: -// / \ Vertex 0: 5 -// / 14 Vertex 1: 4 -// 17---------16 / \ Vertex 2: 5 -// | \ | / \ Vertex 3: 3 -// | \ | / 13 -// | \ |/ / Prefixes: -// | 3------2------12 Vertex 0: 9 -// | | | | Vertex 1: 12 -// | | | | Vertex 2: 16 -// 4----0------1------11 Vertex 3: 18 -// / /| | | -// / / | | | -// 5 / 8------9------10 -// \ / / -// 6 / -// \ / -// 7 -// -// Where the quad bounded by vertices 0,1,2,3 represents the actual subd surface of interest -// The 1-ring neighborhood of the quad is represented by vertices 4 through 17. The counter- -// clockwise winding of this 1-ring neighborhood is important, especially when it comes to compute -// the corner vertices of the bicubic patch that we will use to approximate the subd quad (0,1,2,3). -// -// The resulting bicubic patch fits within the subd quad (0,1,2,3) and has the following control -// point layout: -// -// 12--13--14--15 -// 8---9--10--11 -// 4---5---6---7 -// 0---1---2---3 -// -// The inner 4 control points of the bicubic patch are a combination of only the vertices (0,1,2,3) -// of the subd quad. However, the corner control points for the bicubic patch (0,3,15,12) are actually -// a much more complex weighting of the subd patch and the 1-ring neighborhood. In the example above -// the bicubic control point 0 is actually a weighted combination of subd points 0,1,2,3 and 1-ring -// neighborhood points 17, 4, 5, 6, 7, 8, and 9. We can see that the 1-ring neighbor hood is simply -// walked from the prefix value from the previous corner (corner 3 in this case) to the prefix -// prefix value for the current corner. We add one more vertex on either side of the prefix values -// and we have all the data necessary to calculate the value for the corner points. -// -// The edge control points of the bicubic patch (1,2,13,14,4,8,7,11) are also combinations of their -// neighbors, but fortunately each one is only a combination of 6 values and no walk is required. -//-------------------------------------------------------------------------------------- - -#define MOD4(x) ((x)&3) -#ifndef MAX_POINTS -#define MAX_POINTS 32 -#endif -#define MAX_BONE_MATRICES 80 - -//-------------------------------------------------------------------------------------- -// Textures -//-------------------------------------------------------------------------------------- -Texture2D g_txHeight : register( t0 ); // Height and Bump texture -Texture2D g_txDiffuse : register( t1 ); // Diffuse texture -Texture2D g_txSpecular : register( t2 ); // Specular texture - -//-------------------------------------------------------------------------------------- -// Samplers -//-------------------------------------------------------------------------------------- -SamplerState g_samLinear : register( s0 ); -SamplerState g_samPoint : register( s0 ); - -//-------------------------------------------------------------------------------------- -// Constant Buffers -//-------------------------------------------------------------------------------------- -cbuffer cbTangentStencilConstants : register( b0 ) -{ - float g_TanM[1024]; // Tangent patch stencils precomputed by the application - float g_fCi[16]; // Valence coefficients precomputed by the application -}; - -cbuffer cbPerMesh : register( b1 ) -{ - matrix g_mConstBoneWorld[MAX_BONE_MATRICES]; -}; - -cbuffer cbPerFrame : register( b2 ) -{ - matrix g_mViewProjection; - float3 g_vCameraPosWorld; - float g_fTessellationFactor; - float g_fDisplacementHeight; - float3 g_vSolidColor; -}; - -cbuffer cbPerSubset : register( b3 ) -{ - int g_iPatchStartIndex; -} - -//-------------------------------------------------------------------------------------- -Buffer<uint4> g_ValencePrefixBuffer : register( t0 ); - -//-------------------------------------------------------------------------------------- -struct VS_CONTROL_POINT_OUTPUT -{ - float3 vPosition : WORLDPOS; - float2 vUV : TEXCOORD0; - float3 vTangent : TANGENT; -}; - -struct BEZIER_CONTROL_POINT -{ - float3 vPosition : BEZIERPOS; -}; - -struct PS_INPUT -{ - float3 vWorldPos : POSITION; - float3 vNormal : NORMAL; - float2 vUV : TEXCOORD; - float3 vTangent : TANGENT; - float3 vBiTangent : BITANGENT; -}; - -//-------------------------------------------------------------------------------------- -// SubD to Bezier helper functions -//-------------------------------------------------------------------------------------- -// Helps with getting tangent stencils from the g_TanM constant array -#define TANM(a,v) ( g_TanM[ Val[v]*64 + (a) ] ) - -//-------------------------------------------------------------------------------------- -float3 ComputeInteriorVertex( uint index, - uint Val[4], - const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip ) -{ - switch( index ) - { - case 0: - return (ip[0].vPosition*Val[0] + ip[1].vPosition*2 + ip[2].vPosition + ip[3].vPosition*2) / (5+Val[0]); - case 1: - return (ip[0].vPosition*2 + ip[1].vPosition*Val[1] + ip[2].vPosition*2 + ip[3].vPosition) / (5+Val[1]); - case 2: - return (ip[0].vPosition + ip[1].vPosition*2 + ip[2].vPosition*Val[2] + ip[3].vPosition*2) / (5+Val[2]); - case 3: - return (ip[0].vPosition*2 + ip[1].vPosition + ip[2].vPosition*2 + ip[3].vPosition*Val[3]) / (5+Val[3]); - } - - return float3(0,0,0); -} - -//-------------------------------------------------------------------------------------- -// Computes the corner vertices of the output UV patch. The corner vertices are -// a weighted combination of all points that are "connected" to that corner by an edge. -// The interior 4 points of the original subd quad are easy to get. The points in the -// 1-ring neighborhood around the interior quad are not. -// -// Because the valence of that corner could be any number between 3 and 16, we need to -// walk around the subd patch vertices connected to that point. This is there the -// Pref (prefix) values come into play. Each corner has a prefix value that is the index -// of the last value around the 1-ring neighborhood that should be used in calculating -// the coefficient of that corner. The walk goes from the prefix value of the previous -// corner to the prefix value of the current corner. -//-------------------------------------------------------------------------------------- -void ComputeCornerVertex( uint index, - out float3 CornerB, // Corner for the Bezier patch - out float3 CornerU, // Corner for the tangent patch - out float3 CornerV, // Corner for the bitangent patch - const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, - const in uint Val[4], - const in uint Pref[4] ) -{ - const float fOWt = 1; - const float fEWt = 4; - - // Figure out where to start the walk by using the previous corner's prefix value - uint PrefIm1 = 0; - uint uStart = 4; - if( index ) - { - PrefIm1 = Pref[index-1]; - uStart = PrefIm1; - } - - // Setup the walk indices - uint uTIndexStart = 2 - (index&1); - uint uTIndex = uTIndexStart; - - // Calculate the N*N weight for the final value - CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part - - // Zero out the corners - CornerU = float4(0,0,0,0); - CornerV = float4(0,0,0,0); - - const uint uV = Val[index] + ( ( index & 1 ) ? 1 : -1 ); - - // Start the walk with the uStart prefix (the prefix of the corner before us) - CornerB += ip[uStart].vPosition * fEWt; - CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index); - - // Gather all vertices between the previous corner's prefix and our own prefix - // We'll do two at a time, since they always come in twos - while(uStart < Pref[index]-1) - { - ++uStart; - CornerB += ip[uStart].vPosition * fOWt; - CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - ++uTIndex; - ++uStart; - CornerB += ip[uStart].vPosition * fEWt; - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index ); - } - ++uStart; - - // Add in the last guy and make sure to wrap to the beginning if we're the last corner - if (index == 3) - uStart = 4; - CornerB += ip[uStart].vPosition * fOWt; - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - // Add in the guy before the prefix as well - if (index) - uStart = PrefIm1-1; - else - uStart = Pref[3]-1; - uTIndex = uTIndexStart-1; - - CornerB += ip[uStart].vPosition * fOWt; - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - // We're done with the walk now. Now we need to add the contributions of the original subd quad. - CornerB += ip[MOD4(index+1)].vPosition * fEWt; - CornerB += ip[MOD4(index+2)].vPosition * fOWt; - CornerB += ip[MOD4(index+3)].vPosition * fEWt; - - uTIndex = 0 + (index&1)*(Val[index]-1); - uStart = MOD4(index+1); - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); - - uStart = MOD4(index+2); - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - uStart = MOD4(index+3); - uTIndex = (uTIndex+1)%Val[index]; - - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); - - // Normalize the corner weights - CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize - - // fixup signs from directional derivatives... - if( !((index - 1) & 2) ) // 1 and 2 - CornerU *= -1; - - if( index >= 2 ) // 2 and 3 - CornerV *= -1; -} - -void ComputeCornerVertex4444( uint index, - out float3 CornerB, // Corner for the Bezier patch - out float3 CornerU, // Corner for the tangent patch - out float3 CornerV, // Corner for the bitangent patch - const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, - const in uint Val[4], - const in uint Pref[4] ) -{ - const float fOWt = 1; - const float fEWt = 4; - - // Figure out where to start the walk by using the previous corner's prefix value - uint PrefIm1 = 0; - uint uStart = 4; - if( index ) - { - PrefIm1 = Pref[index-1]; - uStart = PrefIm1; - } - - // Setup the walk indices - uint uTIndexStart = 2 - (index&1); - uint uTIndex = uTIndexStart; - - // Calculate the N*N weight for the final value - CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part - - // Zero out the corners - CornerU = float4(0,0,0,0); - CornerV = float4(0,0,0,0); - - const uint uV = Val[index] + ( ( index & 1 ) ? 1 : -1 ); - - // Start the walk with the uStart prefix (the prefix of the corner before us) - CornerB += ip[uStart].vPosition * fEWt; - CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index); - - // Gather all vertices between the previous corner's prefix and our own prefix - // We'll do two at a time, since they always come in twos - while(uStart < Pref[index]-1) - { - ++uStart; - CornerB += ip[uStart].vPosition * fOWt; - CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - ++uTIndex; - ++uStart; - CornerB += ip[uStart].vPosition * fEWt; - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index ); - } - ++uStart; - - // Add in the last guy and make sure to wrap to the beginning if we're the last corner - if (index == 3) - uStart = 4; - CornerB += ip[uStart].vPosition * fOWt; - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - // Add in the guy before the prefix as well - if (index) - uStart = PrefIm1-1; - else - uStart = Pref[3]-1; - uTIndex = uTIndexStart-1; - - CornerB += ip[uStart].vPosition * fOWt; - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - // We're done with the walk now. Now we need to add the contributions of the original subd quad. - CornerB += ip[MOD4(index+1)].vPosition * fEWt; - CornerB += ip[MOD4(index+2)].vPosition * fOWt; - CornerB += ip[MOD4(index+3)].vPosition * fEWt; - - uTIndex = 0 + (index&1)*(Val[index]-1); - uStart = MOD4(index+1); - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); - - uStart = MOD4(index+2); - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index ); - - uStart = MOD4(index+3); - uTIndex = (uTIndex+1)%Val[index]; - - CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index ); - CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index ); - - // Normalize the corner weights - CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize - - // fixup signs from directional derivatives... - if( !((index - 1) & 2) ) // 1 and 2 - CornerU *= -1; - - if( index >= 2 ) // 2 and 3 - CornerV *= -1; -} - -//-------------------------------------------------------------------------------------- -// Computes the edge vertices of the output bicubic patch. The edge vertices -// (1,2,4,7,8,11,13,14) are a weighted (by valence) combination of 6 interior and 1-ring -// neighborhood points. However, we don't have to do the walk on this one since we -// don't need all of the neighbor points attached to this vertex. -//-------------------------------------------------------------------------------------- -float3 ComputeEdgeVertex( in uint index /* 0-7 */, - const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, - const in uint Val[4], - const in uint Pref[4] ) -{ - float val1 = 2 * Val[0] + 10; - float val2 = 2 * Val[1] + 10; - float val13 = 2 * Val[3] + 10; - float val14 = 2 * Val[2] + 10; - float val4 = val1; - float val8 = val13; - float val7 = val2; - float val11 = val14; - - float3 vRetVal = float3(0,0,0); - switch( index ) - { - // Horizontal - case 0: - vRetVal = (Val[0]*2*ip[0].vPosition + 4*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*2 + - 2*ip[Pref[0]-1].vPosition + ip[Pref[0]].vPosition) / val1; - break; - case 1: - vRetVal = (4*ip[0].vPosition + Val[1]*2*ip[1].vPosition + ip[2].vPosition*2 + ip[3].vPosition + - ip[Pref[0]-1].vPosition + 2*ip[Pref[0]].vPosition) / val2; - break; - case 2: - vRetVal = (2*ip[0].vPosition + ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition*2*Val[3] + - 2*ip[Pref[2]].vPosition + ip[Pref[2]-1].vPosition) / val13; - break; - case 3: - vRetVal = (ip[0].vPosition + 2*ip[1].vPosition + Val[2]*2*ip[2].vPosition + ip[3].vPosition*4 + - ip[Pref[2]].vPosition + 2*ip[Pref[2]-1].vPosition) / val14; - break; - // Vertical - case 4: - vRetVal = (Val[0]*2*ip[0].vPosition + 2*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*4 + - 2*ip[4].vPosition + ip[Pref[3]-1].vPosition) / val4; - break; - case 5: - vRetVal = (4*ip[0].vPosition + ip[1].vPosition + 2*ip[2].vPosition + ip[3].vPosition*2*Val[3] + - ip[4].vPosition + 2*ip[Pref[3]-1].vPosition) / val8; - break; - case 6: - vRetVal = (2*ip[0].vPosition + Val[1]*2*ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition + - 2*ip[Pref[1]-1].vPosition + ip[Pref[1]].vPosition) / val7; - break; - case 7: - vRetVal = (ip[0].vPosition + 4*ip[1].vPosition + Val[2]*2*ip[2].vPosition + 2*ip[3].vPosition + - ip[Pref[1]-1].vPosition + 2*ip[Pref[1]].vPosition) / val11; - break; - } - - return vRetVal; -} - -//-------------------------------------------------------------------------------------- -// Helper function -//-------------------------------------------------------------------------------------- -void BezierRaise(inout float3 pQ[3], out float3 pC[4]) -{ - pC[0] = pQ[0]; - pC[3] = pQ[2]; - - for( int i=1; i<3; i++ ) - { - pC[i] = ( 1.0f / 3.0f ) * ( pQ[i - 1] * i + ( 3.0f - i ) * pQ[i] ); - } -} - -//-------------------------------------------------------------------------------------- -// Computes the tangent patch from the input bezier patch -//-------------------------------------------------------------------------------------- -void ComputeTanPatch( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch, - inout float3 vOut[16], - in float fCWts[4], - in float3 vCorner[4], - in float3 vCornerLocal[4], - in const uint cX, - in const uint cY) -{ - float3 vQuad[3]; - float3 vQuadB[3]; - float3 vCubic[4]; - - // boundary edges are really simple... - vQuad[0] = vCornerLocal[0]; - vQuad[2] = vCornerLocal[1]; - vQuad[1] = 3.0f*(bezpatch[2*cX+0*cY].vPosition-bezpatch[1*cX+0*cY].vPosition); - - BezierRaise(vQuad,vCubic); - vOut[1*cX + 0*cY] = vCubic[1]; - vOut[2*cX + 0*cY] = vCubic[2]; - - vQuad[0] = vCornerLocal[2]; - vQuad[2] = vCornerLocal[3]; - vQuad[1] = 3.0f*(bezpatch[2*cX+3*cY].vPosition-bezpatch[1*cX+3*cY].vPosition); - - BezierRaise(vQuad,vCubic); - vOut[1*cX + 3*cY] = vCubic[1]; - vOut[2*cX + 3*cY] = vCubic[2]; - - // two internal edges - this is where work happens... - float3 vA,vB,vC,vD,vE; - float fC0,fC1; - vQuad[1] = 3.0f*(bezpatch[2*cX+2*cY].vPosition-bezpatch[1*cX+2*cY].vPosition); - // also do "second" scan line - vQuadB[1] = 3.0f*(bezpatch[2*cX+1*cY].vPosition-bezpatch[1*cX+1*cY].vPosition); - - vD = 3.0f*(bezpatch[1*cX + 2*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition); - vE = 3.0f*(bezpatch[1*cX + 1*cY].vPosition - bezpatch[0*cX + 1*cY].vPosition); // used later... - - fC0 = fCWts[3]; - fC1 = fCWts[0]; - - // sign flip - vA = -vCorner[3]; - vB = 3.0f*(bezpatch[0*cX + 1*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition); - vC = -vCorner[0]; - - vQuad[0] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD; - vQuadB[0] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE; - - // do end of strip - same as before, but stuff is switched around... - vC = vCorner[2]; - vB = 3.0f*(bezpatch[3*cX + 2*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition); - vA = vCorner[1]; - - vD = 3.0f*(bezpatch[2*cX + 1*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition); - vE = 3.0f*(bezpatch[2*cX + 2*cY].vPosition - bezpatch[3*cX + 2*cY].vPosition); - - fC0 = fCWts[1]; - fC1 = fCWts[2]; - - vQuadB[2] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD; - vQuad[2] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE; - - vQuadB[2] *= -1.0f; - vQuad[2] *= -1.0f; - - BezierRaise(vQuad,vCubic); - - vOut[0*cX + 2*cY] = vCubic[0]; - vOut[1*cX + 2*cY] = vCubic[1]; - vOut[2*cX + 2*cY] = vCubic[2]; - vOut[3*cX + 2*cY] = vCubic[3]; - - BezierRaise(vQuadB,vCubic); - - vOut[0*cX + 1*cY] = vCubic[0]; - vOut[1*cX + 1*cY] = vCubic[1]; - vOut[2*cX + 1*cY] = vCubic[2]; - vOut[3*cX + 1*cY] = vCubic[3]; -} - -//-------------------------------------------------------------------------------------- -// Skinning vertex shader Section -//-------------------------------------------------------------------------------------- -struct VS_CONTROL_POINT_INPUT -{ - float3 vPosition : POSITION; - float2 vUV : TEXCOORD0; - float3 vTangent : TANGENT; - uint4 vBones : BONES; - float4 vWeights : WEIGHTS; -}; - -VS_CONTROL_POINT_OUTPUT PatchSkinningVS( VS_CONTROL_POINT_INPUT Input ) -{ - VS_CONTROL_POINT_OUTPUT Output; - - float4 vInputPos = float4( Input.vPosition, 1 ); - float4 vWorldPos = float4( 0, 0, 0, 0 ); - - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; - - float3 vWorldTan = float3( 0, 0, 0 ); - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; - - Output.vPosition = vWorldPos; - Output.vUV = Input.vUV; - Output.vTangent = vWorldTan; - - return Output; -} - -struct VS_MESH_POINT_INPUT -{ - float3 vPosition : POSITION; - float2 vUV : TEXCOORD0; - float3 vNormal : NORMAL; - float3 vTangent : TANGENT; - uint4 vBones : BONES; - float4 vWeights : WEIGHTS; -}; - -struct VS_MESH_POINT_OUTPUT -{ - float3 vWorldPos : POSITION; - float3 vNormal : NORMAL; - float2 vUV : TEXCOORD; - float3 vTangent : TANGENT; - float3 vBiTangent : BITANGENT; - - float4 vPosition : SV_POSITION; -}; - -VS_MESH_POINT_OUTPUT MeshSkinningVS( VS_MESH_POINT_INPUT Input ) -{ - VS_MESH_POINT_OUTPUT Output; - - float4 vInputPos = float4( Input.vPosition, 1 ); - float4 vWorldPos = float4( 0, 0, 0, 0 ); - - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; - vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; - - float3 vWorldTan = float3( 0, 0, 0 ); - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; - vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; - - float3 vWorldNormal = float3( 0, 0, 0 ); - vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x; - vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y; - vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z; - vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w; - - Output.vWorldPos = vWorldPos.xyz; - Output.vPosition = mul( float4( vWorldPos.xyz, 1 ), g_mViewProjection ); - Output.vUV = Input.vUV; - Output.vTangent = vWorldTan; - Output.vNormal = vWorldNormal; - Output.vBiTangent = cross( vWorldNormal, vWorldTan ); - - return Output; -} - -//-------------------------------------------------------------------------------------- -// SubD to Bezier hull shader Section -//-------------------------------------------------------------------------------------- -struct HS_CONSTANT_DATA_OUTPUT -{ - float Edges[4] : SV_TessFactor; - float Inside[2] : SV_InsideTessFactor; - - float3 vTangent[4] : TANGENT; - float2 vUV[4] : TEXCOORD; - float3 vTanUCorner[4] : TANUCORNER; - float3 vTanVCorner[4] : TANVCORNER; - float4 vCWts : TANWEIGHTS; -}; - -//-------------------------------------------------------------------------------------- -// Load per-patch valence and prefix data -//-------------------------------------------------------------------------------------- -void LoadValenceAndPrefixData( in uint PatchID, out uint Val[4], out uint Prefixes[4] ) -{ - PatchID += g_iPatchStartIndex; - uint4 ValPack = g_ValencePrefixBuffer.Load( PatchID * 2 ); - uint4 PrefPack = g_ValencePrefixBuffer.Load( PatchID * 2 + 1 ); - - Val[0] = ValPack.x; - Val[1] = ValPack.y; - Val[2] = ValPack.z; - Val[3] = ValPack.w; - - Prefixes[0] = PrefPack.x; - Prefixes[1] = PrefPack.y; - Prefixes[2] = PrefPack.z; - Prefixes[3] = PrefPack.w; -} - - -//-------------------------------------------------------------------------------------- -// Constant data function for the SubDToBezierHS. This is executed once per patch. -//-------------------------------------------------------------------------------------- -HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, - uint PatchID : SV_PrimitiveID ) -{ - HS_CONSTANT_DATA_OUTPUT Output; - - float TessAmount = g_fTessellationFactor; - - Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount; - Output.Inside[0] = Output.Inside[1] = TessAmount; - - Output.vTangent[0] = ip[0].vTangent; - Output.vTangent[1] = ip[1].vTangent; - Output.vTangent[2] = ip[2].vTangent; - Output.vTangent[3] = ip[3].vTangent; - - Output.vUV[0] = ip[0].vUV; - Output.vUV[1] = ip[1].vUV; - Output.vUV[2] = ip[2].vUV; - Output.vUV[3] = ip[3].vUV; - - // Compute part of our tangent patch here - uint Val[4]; - uint Prefixes[4]; - LoadValenceAndPrefixData( PatchID, Val, Prefixes ); - - [unroll] - for( int i=0; i<4; i++ ) - { - float3 CornerB, CornerU, CornerV; - ComputeCornerVertex( i, CornerB, CornerU, CornerV, ip, Val, Prefixes ); - Output.vTanUCorner[i] = CornerU; - Output.vTanVCorner[i] = CornerV; - } - - float fCWts[4]; - Output.vCWts.x = g_fCi[ Val[0]-3 ]; - Output.vCWts.y = g_fCi[ Val[1]-3 ]; - Output.vCWts.z = g_fCi[ Val[2]-3 ]; - Output.vCWts.w = g_fCi[ Val[3]-3 ]; - - return Output; -} - -HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, - uint PatchID : SV_PrimitiveID ) -{ - HS_CONSTANT_DATA_OUTPUT Output; - - float TessAmount = g_fTessellationFactor; - - Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount; - Output.Inside[0] = Output.Inside[1] = TessAmount; - - Output.vTangent[0] = ip[0].vTangent; - Output.vTangent[1] = ip[1].vTangent; - Output.vTangent[2] = ip[2].vTangent; - Output.vTangent[3] = ip[3].vTangent; - - Output.vUV[0] = ip[0].vUV; - Output.vUV[1] = ip[1].vUV; - Output.vUV[2] = ip[2].vUV; - Output.vUV[3] = ip[3].vUV; - - // Compute part of our tangent patch here - static const uint Val[4] = (uint[4])uint4(4,4,4,4); - static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16); - - [unroll] - for( int i=0; i<4; i++ ) - { - float3 CornerB, CornerU, CornerV; - ComputeCornerVertex4444( i, CornerB, CornerU, CornerV, ip, Val, Prefixes ); - Output.vTanUCorner[i] = CornerU; - Output.vTanVCorner[i] = CornerV; - } - - float fCWts[4]; - Output.vCWts.x = g_fCi[ Val[0]-3 ]; - Output.vCWts.y = g_fCi[ Val[1]-3 ]; - Output.vCWts.z = g_fCi[ Val[2]-3 ]; - Output.vCWts.w = g_fCi[ Val[3]-3 ]; - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// HS for SubDToBezier. This outputcontrolpoints(16) specifies that we will produce -// 16 control points. Therefore this function will be invoked 16x, one for each output -// control point. -// -// !! PERFORMANCE NOTE: This hull shader is written for maximum readability, and its -// performance is not expected to be optimal on D3D11 hardware. The switch statement -// below that determines the codepath for each patch control point generates sub-optimal -// code for parallel execution on the GPU. A future implementation of this hull shader -// will combine the 16 codepaths and 3 variants (corner, edge, interior) into one shared -// codepath; this change is expected to increase performance at the expense of readability. -//-------------------------------------------------------------------------------------- -[domain("quad")] -[partitioning("integer")] -[outputtopology("triangle_cw")] -[outputcontrolpoints(16)] -[patchconstantfunc("SubDToBezierConstantsHS")] -BEZIER_CONTROL_POINT SubDToBezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p, - uint i : SV_OutputControlPointID, - uint PatchID : SV_PrimitiveID ) -{ - // Valences and prefixes are loaded from a buffer - uint Val[4]; - uint Prefixes[4]; - LoadValenceAndPrefixData( PatchID, Val, Prefixes ); - - float3 CornerB = float3(0,0,0); - float3 CornerU = float3(0,0,0); - float3 CornerV = float3(0,0,0); - - BEZIER_CONTROL_POINT Output; - Output.vPosition = float3(0,0,0); - - // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates - // inefficient code for the sake of readability. - switch( i ) - { - // Interior vertices - case 5: - Output.vPosition = ComputeInteriorVertex( 0, Val, p ); - break; - case 6: - Output.vPosition = ComputeInteriorVertex( 1, Val, p ); - break; - case 10: - Output.vPosition = ComputeInteriorVertex( 2, Val, p ); - break; - case 9: - Output.vPosition = ComputeInteriorVertex( 3, Val, p ); - break; - - // Corner vertices - case 0: - ComputeCornerVertex( 0, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - case 3: - ComputeCornerVertex( 1, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - case 15: - ComputeCornerVertex( 2, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - case 12: - ComputeCornerVertex( 3, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - - // Edge vertices - case 1: - Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes ); - break; - case 2: - Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes ); - break; - case 13: - Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes ); - break; - case 14: - Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes ); - break; - case 4: - Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes ); - break; - case 8: - Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes ); - break; - case 7: - Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes ); - break; - case 11: - Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes ); - break; - } - - return Output; -} - -//-------------------------------------------------------------------------------------- -// Specialised version for Regular (4,4,4,4) patches, this is much simpler and has less -// branching compared to the general one above -//-------------------------------------------------------------------------------------- -[domain("quad")] -[partitioning("integer")] -[outputtopology("triangle_cw")] -[outputcontrolpoints(16)] -[patchconstantfunc("SubDToBezierConstantsHS4444")] -BEZIER_CONTROL_POINT SubDToBezierHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p, - uint i : SV_OutputControlPointID, - uint PatchID : SV_PrimitiveID ) -{ - // Valences and prefixes are Constant for this case (4,4,4,4) - static const uint Val[4] = (uint[4])uint4(4,4,4,4); - static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16); - - float3 CornerB = float3(0,0,0); - float3 CornerU = float3(0,0,0); - float3 CornerV = float3(0,0,0); - - BEZIER_CONTROL_POINT Output; - Output.vPosition = float3(0,0,0); - - // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates - // inefficient code for the sake of readability. - switch( i ) - { - // Interior vertices - case 5: - Output.vPosition = ComputeInteriorVertex( 0, Val, p ); - break; - case 6: - Output.vPosition = ComputeInteriorVertex( 1, Val, p ); - break; - case 10: - Output.vPosition = ComputeInteriorVertex( 2, Val, p ); - break; - case 9: - Output.vPosition = ComputeInteriorVertex( 3, Val, p ); - break; - - // Corner vertices - case 0: - ComputeCornerVertex4444( 0, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - case 3: - ComputeCornerVertex4444( 1, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - case 15: - ComputeCornerVertex4444( 2, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - case 12: - ComputeCornerVertex4444( 3, CornerB, CornerU, CornerV, p, Val, Prefixes ); - Output.vPosition = CornerB; - break; - - // Edge vertices - case 1: - Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes ); - break; - case 2: - Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes ); - break; - case 13: - Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes ); - break; - case 14: - Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes ); - break; - case 4: - Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes ); - break; - case 8: - Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes ); - break; - case 7: - Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes ); - break; - case 11: - Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes ); - break; - } - - return Output; -} - - -//-------------------------------------------------------------------------------------- -// Bezier evaluation domain shader section -//-------------------------------------------------------------------------------------- -struct DS_OUTPUT -{ - float3 vWorldPos : POSITION; - float3 vNormal : NORMAL; - float2 vUV : TEXCOORD; - float3 vTangent : TANGENT; - float3 vBiTangent : BITANGENT; - - float4 vPosition : SV_POSITION; -}; - -//-------------------------------------------------------------------------------------- -float4 BernsteinBasis(float t) -{ - float invT = 1.0f - t; - - return float4( invT * invT * invT, - 3.0f * t * invT * invT, - 3.0f * t * t * invT, - t * t * t ); -} - -//-------------------------------------------------------------------------------------- -float4 dBernsteinBasis(float t) -{ - float invT = 1.0f - t; - - return float4( -3 * invT * invT, - 3 * invT * invT - 6 * t * invT, - 6 * t * invT - 3 * t * t, - 3 * t * t ); -} - -//-------------------------------------------------------------------------------------- -float3 EvaluateBezier( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch, - float4 BasisU, - float4 BasisV ) -{ - float3 Value = float3(0,0,0); - Value = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w ); - Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w ); - Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w ); - Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w ); - - return Value; -} - -//-------------------------------------------------------------------------------------- -float3 EvaluateBezierTan( const float3 bezpatch[16], - float4 BasisU, - float4 BasisV ) -{ - float3 Value = float3(0,0,0); - Value = BasisV.x * ( bezpatch[0] * BasisU.x + bezpatch[1] * BasisU.y + bezpatch[2] * BasisU.z + bezpatch[3] * BasisU.w ); - Value += BasisV.y * ( bezpatch[4] * BasisU.x + bezpatch[5] * BasisU.y + bezpatch[6] * BasisU.z + bezpatch[7] * BasisU.w ); - Value += BasisV.z * ( bezpatch[8] * BasisU.x + bezpatch[9] * BasisU.y + bezpatch[10] * BasisU.z + bezpatch[11] * BasisU.w ); - Value += BasisV.w * ( bezpatch[12] * BasisU.x + bezpatch[13] * BasisU.y + bezpatch[14] * BasisU.z + bezpatch[15] * BasisU.w ); - - return Value; -} - -//-------------------------------------------------------------------------------------- -// Compute a two full tangent patches from the Tangent corner data created in the -// HS constant data function. -//-------------------------------------------------------------------------------------- -void CreatTangentPatches( in HS_CONSTANT_DATA_OUTPUT input, - const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch, - out float3 TanU[16], - out float3 TanV[16] ) -{ - TanV[0] = input.vTanVCorner[0]; - TanV[3] = input.vTanVCorner[1]; - TanV[15] = input.vTanVCorner[2]; - TanV[12] = input.vTanVCorner[3]; - - TanU[0] = input.vTanUCorner[0]; - TanU[3] = input.vTanUCorner[1]; - TanU[15] = input.vTanUCorner[2]; - TanU[12] = input.vTanUCorner[3]; - - float fCWts[4]; - fCWts[0] = input.vCWts.x; - fCWts[1] = input.vCWts.y; - fCWts[2] = input.vCWts.z; - fCWts[3] = input.vCWts.w; - - float3 vCorner[4]; - float3 vCornerLocal[4]; - - vCorner[0] = TanV[0]; - vCorner[1] = TanV[3]; - vCorner[2] = TanV[15]; - vCorner[3] = TanV[12]; - vCornerLocal[0] = TanU[0]; - vCornerLocal[1] = TanU[3]; - vCornerLocal[2] = TanU[12]; - vCornerLocal[3] = TanU[15]; - - ComputeTanPatch( bezpatch, TanU, fCWts, vCorner, vCornerLocal, 1, 4 ); - - fCWts[3] = input.vCWts.y; - fCWts[1] = input.vCWts.w; - - vCorner[0] = TanU[0]; - vCorner[3] = TanU[3]; - vCorner[2] = TanU[15]; - vCorner[1] = TanU[12]; - vCornerLocal[0] = TanV[0]; - vCornerLocal[1] = TanV[12]; - vCornerLocal[2] = TanV[3]; - vCornerLocal[3] = TanV[15]; - - ComputeTanPatch( bezpatch, TanV, fCWts, vCorner, vCornerLocal, 4, 1 ); -} - -//-------------------------------------------------------------------------------------- -// For each input UV (from the Tessellator), evaluate the Bezier patch at this position. -//-------------------------------------------------------------------------------------- -[domain("quad")] -DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input, - float2 UV : SV_DomainLocation, - const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch ) -{ - float4 BasisU = BernsteinBasis( UV.x ); - float4 BasisV = BernsteinBasis( UV.y ); - - float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV ); - - float3 TanU[16]; - float3 TanV[16]; - CreatTangentPatches( input, bezpatch, TanU, TanV ); - float3 Tangent = EvaluateBezierTan( TanU, BasisU, BasisV ); - float3 BiTangent = EvaluateBezierTan( TanV, BasisU, BasisV ); - - // To see what the patch looks like without using the tangent patches to fix the normals, uncomment this section - /* - float4 dBasisU = dBernsteinBasis( UV.x ); - float4 dBasisV = dBernsteinBasis( UV.y ); - Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV ); - BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV ); - */ - - float3 Norm = normalize( cross( Tangent, BiTangent ) ); - - DS_OUTPUT Output; - Output.vNormal = Norm; - - // Evalulate the tangent vectors through bilinear interpolation. - // These tangents are the texture-space tangents. They should not be confused with the parametric - // tangents that we use to get the normals for the bicubic patch. - float3 TextureTanU0 = input.vTangent[0]; - float3 TextureTanU1 = input.vTangent[1]; - float3 TextureTanU2 = input.vTangent[2]; - float3 TextureTanU3 = input.vTangent[3]; - - float3 UVbottom = lerp( TextureTanU0, TextureTanU1, UV.x ); - float3 UVtop = lerp( TextureTanU3, TextureTanU2, UV.x ); - float3 Tan = lerp( UVbottom, UVtop, UV.y ); - - Output.vTangent = Tan; - - // This is an optimization. We assume that the UV mapping of the mesh will result in a "relatively" orthogonal - // tangent basis. If we assume this, then we can avoid fetching and bilerping the BiTangent along with the tangent. - Output.vBiTangent = cross( Norm, Tan ); - - // bilerp the texture coordinates - float2 tex0 = input.vUV[0]; - float2 tex1 = input.vUV[1]; - float2 tex2 = input.vUV[2]; - float2 tex3 = input.vUV[3]; - - float2 bottom = lerp( tex0, tex1, UV.x ); - float2 top = lerp( tex3, tex2, UV.x ); - float2 TexUV = lerp( bottom, top, UV.y ); - Output.vUV = TexUV; - - if( g_fDisplacementHeight > 0 ) - { - // On this sample displacement can go into or out of the mesh. This is why we bias the heigh amount. - float height = g_fDisplacementHeight * ( g_txHeight.SampleLevel( g_samPoint, TexUV, 0 ).a * 2 - 1 ); - float3 WorldPosMiddle = Norm * height; - WorldPos += WorldPosMiddle; - } - - Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection ); - Output.vWorldPos = WorldPos; - - return Output; -} - -//-------------------------------------------------------------------------------------- -// Smooth shading pixel shader section -//-------------------------------------------------------------------------------------- - -float3 safe_normalize( float3 vInput ) -{ - float len2 = dot( vInput, vInput ); - if( len2 > 0 ) - { - return vInput * rsqrt( len2 ); - } - return vInput; -} - -static const float g_fSpecularExponent = 32.0f; -static const float g_fSpecularIntensity = 0.6f; -static const float g_fNormalMapIntensity = 1.5f; - -float2 ComputeDirectionalLight( float3 vWorldPos, float3 vWorldNormal, float3 vDirLightDir ) -{ - // Result.x is diffuse illumination, Result.y is specular illumination - float2 Result = float2( 0, 0 ); - Result.x = pow( saturate( dot( vWorldNormal, -vDirLightDir ) ), 2 ); - - float3 vPointToCamera = normalize( g_vCameraPosWorld - vWorldPos ); - float3 vHalfAngle = normalize( vPointToCamera - vDirLightDir ); - Result.y = pow( saturate( dot( vHalfAngle, vWorldNormal ) ), g_fSpecularExponent ); - - return Result; -} - -float3 ColorGamma( float3 Input ) -{ - return pow( Input, 2.2f ); -} - -float4 SmoothPS( PS_INPUT Input ) : SV_TARGET -{ - float4 vNormalMapSampleRaw = g_txHeight.Sample( g_samLinear, Input.vUV ); - float3 vNormalMapSampleBiased = ( vNormalMapSampleRaw.xyz * 2 ) - 1; - vNormalMapSampleBiased.xy *= g_fNormalMapIntensity; - float3 vNormalMapSample = normalize( vNormalMapSampleBiased ); - - float3 vNormal = safe_normalize( Input.vNormal ) * vNormalMapSample.z; - vNormal += safe_normalize( Input.vTangent ) * vNormalMapSample.x; - vNormal += safe_normalize( Input.vBiTangent ) * vNormalMapSample.y; - - //float3 vColor = float3( 1, 1, 1 ); - float3 vColor = g_txDiffuse.Sample( g_samLinear, Input.vUV ).rgb; - float vSpecular = g_txSpecular.Sample( g_samLinear, Input.vUV ).r * g_fSpecularIntensity; - - const float3 DirLightDirections[4] = - { - // key light - normalize( float3( -63.345150, -58.043934, 27.785097 ) ), - // fill light - normalize( float3( 23.652107, -17.391443, 54.972504 ) ), - // back light 1 - normalize( float3( 20.470509, -22.939510, -33.929531 ) ), - // back light 2 - normalize( float3( -31.003685, 24.242104, -41.352859 ) ), - }; - - const float3 DirLightColors[4] = - { - // key light - ColorGamma( float3( 1.0f, 0.964f, 0.706f ) * 1.0f ), - // fill light - ColorGamma( float3( 0.446f, 0.641f, 1.0f ) * 1.0f ), - // back light 1 - ColorGamma( float3( 1.0f, 0.862f, 0.419f ) * 1.0f ), - // back light 2 - ColorGamma( float3( 0.405f, 0.630f, 1.0f ) * 1.0f ), - }; - - float3 fLightColor = 0; - for( int i = 0; i < 4; ++i ) - { - float2 LightDiffuseSpecular = ComputeDirectionalLight( Input.vWorldPos, vNormal, DirLightDirections[i] ); - fLightColor += DirLightColors[i] * vColor * LightDiffuseSpecular.x; - fLightColor += DirLightColors[i] * LightDiffuseSpecular.y * vSpecular; - } - - return float4( fLightColor, 1 ); -} - -//-------------------------------------------------------------------------------------- -// Solid color shading pixel shader (used for wireframe overlay) -//-------------------------------------------------------------------------------------- -float4 SolidColorPS( PS_INPUT Input ) : SV_TARGET -{ - return float4( g_vSolidColor, 1 ); -} diff --git a/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl deleted file mode 100644 index 7d9763a79..000000000 --- a/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl +++ /dev/null @@ -1,216 +0,0 @@ -//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY - -//SLANG: This test has been disabled because its semantic correctness -//around use of compile-time-constant expressions relies on processing -//the `[unroll]` attribute, and we don't yet support that. - -//-------------------------------------------------------------------------------------- -// File: Skinning10.fx -// -// The effect file for the Skinning10 sample. -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -#ifndef SEPERABLE_BLUR_KERNEL_SIZE -#define SEPERABLE_BLUR_KERNEL_SIZE 3 -#endif - -static const int BLUR_KERNEL_BEGIN = SEPERABLE_BLUR_KERNEL_SIZE / -2; -static const int BLUR_KERNEL_END = SEPERABLE_BLUR_KERNEL_SIZE / 2 + 1; -static const float FLOAT_BLUR_KERNEL_SIZE = (float)SEPERABLE_BLUR_KERNEL_SIZE; - -cbuffer cbblurVS : register( b2) -{ - int2 g_iWidthHeight : packoffset( c0 ); - int g_iKernelStart : packoffset( c0.z ); - int g_iKernelEnd : packoffset( c0.w ); -}; - -//-------------------------------------------------------------------------------------- -// defines -//-------------------------------------------------------------------------------------- - -Texture2DArray g_txShadow : register( t5 ); -SamplerState g_samShadow : register( s5 ); - -//-------------------------------------------------------------------------------------- -// Input/Output structures -//-------------------------------------------------------------------------------------- - -struct PSIn -{ - float4 Pos : SV_Position; //Position - float2 Tex : TEXCOORD; //Texture coordinate - float2 ITex : TEXCOORD2; -}; - -struct VSIn -{ - uint Pos : SV_VertexID ; -}; - - -PSIn VSMain(VSIn inn) -{ - PSIn output; - - output.Pos.y = -1.0f + (inn.Pos%2) * 2.0f ; - output.Pos.x = -1.0f + (inn.Pos/2) * 2.0f; - output.Pos.z = .5; - output.Pos.w = 1; - output.Tex.x = inn.Pos/2; - output.Tex.y = 1.0f - inn.Pos%2; - output.ITex.x = (float)(g_iWidthHeight.x * output.Tex.x); - output.ITex.y = (float)(g_iWidthHeight.y * output.Tex.y); - return output; -} - -//float PSDepth - -//------------------------------------------------------------------------------ -// Logarithmic filtering -//------------------------------------------------------------------------------ - -float log_conv ( float x0, float X, float y0, float Y ) -{ - return (X + log(x0 + (y0 * exp(Y - X)))); -} - - -//-------------------------------------------------------------------------------------- -// Pixel shader that performs bump mapping on the final vertex -//-------------------------------------------------------------------------------------- -float2 PSBlurX(PSIn input) : SV_Target -{ -/* - float2 centerDistance; - if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); - else centerDistance.x = input.Tex.x; - if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); - else centerDistance.y = input.Tex.y; - if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; - centerDistance.x -= .2; - centerDistance.x *= (1.0f / .8); - - float store_samples[8]; - int ind = 0; - for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) { - store_samples[ind] = g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).r; - ind++; - } - const float c = (1.f/5.f); - - float accum; - accum = log_conv( c, store_samples[0], c, store_samples[1] ); - - ind = 0; - for (x = g_iKernelStart - 2; x < g_iKernelEnd; ++x) { - ind++; - accum += log_conv( 1.0f, accum, c, store_samples[ind] ); - } - float2 rt; - rt.x = accum; - return rt; - */ - /* - float2 dep = 0; - float2 centerDistance; - if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); - else centerDistance.x = input.Tex.x; - if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); - else centerDistance.y = input.Tex.y; - if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; - centerDistance.x -= .2; - centerDistance.x *= ( 1.0f / 0.8f ); - - for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) { - dep += g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).rg; - } - dep /= (g_iKernelEnd - g_iKernelStart); - return dep; - */ - - float2 dep=0; - [unroll]for ( int x = BLUR_KERNEL_BEGIN; x < BLUR_KERNEL_END; ++x ) { - dep += g_txShadow.Sample( g_samShadow, float3( input.Tex.x, input.Tex.y, 0 ), int2( x,0 ) ).rg; - } - dep /= FLOAT_BLUR_KERNEL_SIZE; - return dep; - -// return g_txShadow.Sample(g_samShadow, float3(input.Tex.x, input.Tex.y, 0) ).rg; - -} - -//-------------------------------------------------------------------------------------- -// Pixel shader that performs bump mapping on the final vertex -//-------------------------------------------------------------------------------------- -float2 PSBlurY(PSIn input) : SV_Target -{ -/* - float2 centerDistance; - if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); - else centerDistance.x = input.Tex.x; - if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); - else centerDistance.y = input.Tex.y; - if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; - centerDistance.x -= .2; - centerDistance.x *= (1.0f / .8); - - float store_samples[8]; - int ind = 0; - for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) { - store_samples[ind] = g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).r; - } - const float c = (1.f/5.f); - - float accum; - accum = log_conv( c, store_samples[0], c, store_samples[1] ); - - ind = 0; - for (y = g_iKernelStart; y < g_iKernelEnd; ++y) { - ind++; - accum += log_conv( 1.0f, accum, c, store_samples[ind] ); - } - float2 rt; - rt.x = accum; - return rt; - */ - - - /* - float2 dep = 0; - - float2 centerDistance; - if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x); - else centerDistance.x = input.Tex.x; - if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y); - else centerDistance.y = input.Tex.y; - if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; - centerDistance.x -= 0; - centerDistance.x *= (1.0f / 1.0f); - - if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y; - for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) { - dep += g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).rg; - } - - - dep /= (g_iKernelEnd - g_iKernelStart); - return dep; - - */ - - - float2 dep=0; - [unroll]for ( int y = BLUR_KERNEL_BEGIN; y < BLUR_KERNEL_END; ++y ) { - dep += g_txShadow.Sample( g_samShadow, float3( input.Tex.x, input.Tex.y, 0 ), int2( 0,y ) ).rg; - } - dep /= FLOAT_BLUR_KERNEL_SIZE; - return dep; - - //return g_txShadow.Sample(g_samShadow, float3(input.Tex.x, input.Tex.y, 0) ).rg; -} - - - diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl deleted file mode 100644 index 29c9851d8..000000000 --- a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl +++ /dev/null @@ -1,412 +0,0 @@ -//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues. -//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY -//-------------------------------------------------------------------------------------- -// File: RenderCascadeScene.hlsl -// -// This is the main shader file. This shader is compiled with several different flags -// to provide different customizations based on user controls. -// -// -// Copyright (c) Microsoft Corporation. All rights reserved. -//-------------------------------------------------------------------------------------- - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- - -// This flag enables the shadow to blend between cascades. This is most useful when the -// the shadow maps are small and artifact can be seen between the various cascade layers. -#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG -#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0 -#endif - -// There are two methods for selecting the proper cascade a fragment lies in. Interval selection -// compares the depth of the fragment against the frustum's depth partition. -// Map based selection compares the texture coordinates against the acutal cascade maps. -// Map based selection gives better coverage. -// Interval based selection is easier to extend and understand. -#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG -#define SELECT_CASCADE_BY_INTERVAL_FLAG 0 -#endif - -// The number of cascades -#ifndef CASCADE_COUNT_FLAG -#define CASCADE_COUNT_FLAG 3 -#endif - - -// Most titles will find that 3-4 cascades with -// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs. - -cbuffer cbAllShadowData : register( b0 ) -{ - matrix m_mWorldViewProjection; - matrix m_mWorld; - matrix m_mWorldView; - matrix m_mShadow; - float4 m_vCascadeOffset[8]; - float4 m_vCascadeScale[8]; - int m_nCascadeLevels; // Number of Cascades - int m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene - - // For Map based selection scheme, this keeps the pixels inside of the the valid range. - // When there is no boarder, these values are 0 and 1 respectivley. - float m_fMinBorderPadding; - float m_fMaxBorderPadding; - - float m_fCascadeBlendArea; // Amount to overlap when blending between cascades. - float m_fTexelSize; // Padding variables exist because CBs must be a multiple of 16 bytes. - float m_fNativeTexelSizeInX; - float4 m_fCascadeFrustumsEyeSpaceDepthsData[2]; // The values along Z that seperate the cascades. - // This code creates an array based pointer that points towards the vectorized input data. - // This is the only way to index arbitrary arrays of data. - // If the array is used at run time, the compiler will generate code that uses logic to index the correct component. - - static float m_fCascadeFrustumsEyeSpaceDepths[8] = (float[8])m_fCascadeFrustumsEyeSpaceDepthsData; - - float3 m_vLightDir; - float m_fPaddingCB4; - -}; - - - -//-------------------------------------------------------------------------------------- -// Textures and Samplers -//-------------------------------------------------------------------------------------- -Texture2D g_txDiffuse : register( t0 ); -Texture2DArray g_txShadow : register( t5 ); - -SamplerState g_samLinear : register( s0 ); -SamplerState g_samShadow : register( s5 ); - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; - float3 vNormal : NORMAL; - float2 vTexcoord : TEXCOORD0; -}; - -struct VS_OUTPUT -{ - float3 vNormal : NORMAL; - float2 vTexcoord : COLOR0; - float4 vTexShadow : TEXCOORD1; - float4 vPosition : SV_POSITION; - float4 vInterpPos : TEXCOORD2; - float vDepth : TEXCOORD3; -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - VS_OUTPUT Output; - - Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection ); - Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld ); - Output.vTexcoord = Input.vTexcoord; - Output.vInterpPos = Input.vPosition; - Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ; - - // Transform the shadow texture coordinates for all the cascades. - Output.vTexShadow = mul( Input.vPosition, m_mShadow ); - - return Output; -} - - - -static const float4 vCascadeColorsMultiplier[8] = -{ - float4 ( 1.5f, 0.0f, 0.0f, 1.0f ), - float4 ( 0.0f, 1.5f, 0.0f, 1.0f ), - float4 ( 0.0f, 0.0f, 5.5f, 1.0f ), - float4 ( 1.5f, 0.0f, 5.5f, 1.0f ), - float4 ( 1.5f, 1.5f, 0.0f, 1.0f ), - float4 ( 1.0f, 1.0f, 1.0f, 1.0f ), - float4 ( 0.0f, 1.0f, 5.5f, 1.0f ), - float4 ( 0.5f, 3.5f, 0.75f, 1.0f ) -}; - - -void ComputeCoordinatesTransform( in int iCascadeIndex, - in float4 InterpolatedPosition, - in out float4 vShadowTexCoord, - in out float4 vShadowTexCoordViewSpace ) -{ - // Now that we know the correct map, we can transform the world space position of the current fragment - if( SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex]; - vShadowTexCoord += m_vCascadeOffset[iCascadeIndex]; - } - vShadowTexCoord.w = vShadowTexCoord.z; // We put the z value in w so that we can index the texture array with Z. - vShadowTexCoord.z = iCascadeIndex; - -} - -//-------------------------------------------------------------------------------------- -// Use PCF to sample the depth map and return a percent lit value. -//-------------------------------------------------------------------------------------- -void CalculateVarianceShadow ( in float4 vShadowTexCoord, in float4 vShadowMapTextureCoordViewSpace, int iCascade, out float fPercentLit ) -{ - fPercentLit = 0.0f; - // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed. - // This would be a performance improvment. - - float2 mapDepth = 0; - - - // In orderto pull the derivative out of divergent flow control we calculate the - // derivative off of the view space coordinates an then scale the deriviative. - - float3 vShadowTexCoordDDX = - ddx(vShadowMapTextureCoordViewSpace ); - vShadowTexCoordDDX *= m_vCascadeScale[iCascade].xyz; - float3 vShadowTexCoordDDY = - ddy(vShadowMapTextureCoordViewSpace ); - vShadowTexCoordDDY *= m_vCascadeScale[iCascade].xyz; - - mapDepth += g_txShadow.SampleGrad( g_samShadow, vShadowTexCoord.xyz, - vShadowTexCoordDDX, - vShadowTexCoordDDY); - // The sample instruction uses gradients for some filters. - - float fAvgZ = mapDepth.x; // Filtered z - float fAvgZ2 = mapDepth.y; // Filtered z-squared - - if ( vShadowTexCoord.w <= fAvgZ ) // We put the z value in w so that we can index the texture array with Z. - { - fPercentLit = 1; - } - else - { - float variance = ( fAvgZ2 ) - ( fAvgZ * fAvgZ ); - variance = min( 1.0f, max( 0.0f, variance + 0.00001f ) ); - - float mean = fAvgZ; - float d = vShadowTexCoord.w - mean; // We put the z value in w so that we can index the texture array with Z. - float p_max = variance / ( variance + d*d ); - - // To combat light-bleeding, experiment with raising p_max to some power - // (Try values from 0.1 to 100.0, if you like.) - fPercentLit = pow( p_max, 4 ); - - } - -} - -//-------------------------------------------------------------------------------------- -// Calculate amount to blend between two cascades and the band where blending will occure. -//-------------------------------------------------------------------------------------- -void CalculateBlendAmountForInterval ( in int iNextCascadeIndex, - in out float fPixelDepth, - in out float fCurrentPixelsBlendBandLocation, - out float fBlendBetweenCascadesAmount - ) -{ - - // We need to calculate the band of the current shadow map where it will fade into the next cascade. - // We can then early out of the expensive PCF for loop. - // - float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex - 1 ]; - if( iNextCascadeIndex > 1 ) - { - fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ]; - fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ]; - } - // The current pixel's blend band location will be used to determine when we need to blend and by how much. - fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval; - fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation; - // The fBlendBetweenCascadesAmount is our location in the blend band. - fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; -} - - -//-------------------------------------------------------------------------------------- -// Calculate amount to blend between two cascades and the band where blending will occure. -//-------------------------------------------------------------------------------------- -void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord, - in out float fCurrentPixelsBlendBandLocation, - out float fBlendBetweenCascadesAmount ) -{ - // Calcaulte the blend band for the map based selection. - float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y ); - fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ); - float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y ); - fCurrentPixelsBlendBandLocation = - min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 ); - fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea; -} - -//-------------------------------------------------------------------------------------- -// Calculate the shadow based on several options and rende the scene. -//-------------------------------------------------------------------------------------- - -float4 PSMain( VS_OUTPUT Input ) : SV_TARGET -{ - float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord ); - - - float4 vShadowMapTextureCoordViewSpace = 0.0f; - float4 vShadowMapTextureCoord = 0.0f; - float4 vShadowMapTextureCoord_blend = 0.0f; - - float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f); - - float fPercentLit = 0.0f; - float fPercentLit_blend = 0.0f; - - int iCascadeFound = 0; - int iCurrentCascadeIndex=1; - int iNextCascadeIndex = 0; - - float fCurrentPixelDepth; - - // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions. - fCurrentPixelDepth = Input.vDepth; - - // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used. - // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum. - vShadowMapTextureCoordViewSpace = Input.vTexShadow; - - - if( SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - iCurrentCascadeIndex = 0; - if (CASCADE_COUNT_FLAG > 1 ) - { - float4 vCurrentPixelDepth = Input.vDepth; - float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[0]); - float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[1]); - float fIndex = dot( - float4( CASCADE_COUNT_FLAG > 0, - CASCADE_COUNT_FLAG > 1, - CASCADE_COUNT_FLAG > 2, - CASCADE_COUNT_FLAG > 3) - , fComparison ) - + dot( - float4( - CASCADE_COUNT_FLAG > 4, - CASCADE_COUNT_FLAG > 5, - CASCADE_COUNT_FLAG > 6, - CASCADE_COUNT_FLAG > 7) - , fComparison2 ) ; - - fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 ); - iCurrentCascadeIndex = (int)fIndex; - } - } - - if ( !SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - iCurrentCascadeIndex = 0; - if ( CASCADE_COUNT_FLAG == 1 ) - { - vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0]; - vShadowMapTextureCoord += m_vCascadeOffset[0]; - } - if ( CASCADE_COUNT_FLAG > 1 ) { - for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex ) - { - vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex]; - vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex]; - - if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding - && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding ) - { - iCurrentCascadeIndex = iCascadeIndex; - iCascadeFound = 1; - } - } - } - } - // Found the correct map. - vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex]; - - ComputeCoordinatesTransform( iCurrentCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace ); - - if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) - { - // Repeat text coord calculations for the next cascade. - // The next cascade index is used for blurring between maps. - iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 ); - if( !SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex]; - vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex]; - } - ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace ); - } - float fBlendBetweenCascadesAmount = 1.0f; - float fCurrentPixelsBlendBandLocation = 1.0f; - - if( SELECT_CASCADE_BY_INTERVAL_FLAG ) - { - if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) - { - CalculateBlendAmountForInterval ( iNextCascadeIndex, fCurrentPixelDepth, - fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); - - } - } - else - { - if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) - { - CalculateBlendAmountForMap ( vShadowMapTextureCoord, - fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount ); - } - } - - // Because the Z coordinate specifies the texture array, - // the derivative will be 0 when there is no divergence - //float fDivergence = abs( ddy( vShadowMapTextureCoord.z ) ) + abs( ddx( vShadowMapTextureCoord.z ) ); - CalculateVarianceShadow ( vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace, - iCurrentCascadeIndex, fPercentLit); - - // We repeat the calcuation for the next cascade layer, when blending between maps. - if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) - { - if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea ) - { // the current pixel is within the blend band. - - // Because the Z coordinate species the texture array, - // the derivative will be 0 when there is no divergence - float fDivergence = abs( ddy( vShadowMapTextureCoord_blend.z ) ) + - abs( ddx( vShadowMapTextureCoord_blend.z) ); - CalculateVarianceShadow ( vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace, - iNextCascadeIndex, fPercentLit_blend ); - - // Blend the two calculated shadows by the blend amount. - fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount ); - - } - } - - if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4( 1.0f, 1.0f, 1.0f, 1.0f ); - - float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f ); - float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f ); - float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f ); - float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f ); - // Some ambient-like lighting. - float fLighting = - saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f + - saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f + - saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f + - saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ; - - float4 vShadowLighting = fLighting * 0.5f; - fLighting += saturate( dot( m_vLightDir , Input.vNormal ) ); - fLighting = lerp( vShadowLighting, fLighting, fPercentLit ); - - return fLighting * vVisualizeCascadeColor * vDiffuse; - -} - diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl deleted file mode 100644 index 9aec9a55d..000000000 --- a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl +++ /dev/null @@ -1,49 +0,0 @@ -//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VSMain -stage vertex -entry PSMain -stage pixel - -#ifndef __SLANG__ -#define cbPerObject cbPerObject_0 -#define g_mWorldViewProjection g_mWorldViewProjection_0 -#endif - -//-------------------------------------------------------------------------------------- -// Globals -//-------------------------------------------------------------------------------------- -cbuffer cbPerObject : register( b0 ) -{ - matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 ); -}; - -//-------------------------------------------------------------------------------------- -// Input / Output structures -//-------------------------------------------------------------------------------------- -struct VS_INPUT -{ - float4 vPosition : POSITION; -}; - -struct VS_OUTPUT -{ - float4 vPosition : SV_POSITION; -}; - -//-------------------------------------------------------------------------------------- -// Vertex Shader -//-------------------------------------------------------------------------------------- -VS_OUTPUT VSMain( VS_INPUT Input ) -{ - VS_OUTPUT Output; - - - Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection ); - - return Output; -} - - -float2 PSMain (VS_OUTPUT Input) : SV_TARGET -{ - float2 rt; - rt.x = Input.vPosition.z; - rt.y = rt.x * rt.x; - return rt; -}
\ No newline at end of file diff --git a/tests/hlsl/simple/rw-texture.hlsl b/tests/hlsl/simple/rw-texture.hlsl index 26916b474..de8e82777 100644 --- a/tests/hlsl/simple/rw-texture.hlsl +++ b/tests/hlsl/simple/rw-texture.hlsl @@ -5,7 +5,16 @@ // Ensure that we implement the `Load` operations on // `RWTexture*` types with the correct signature. -#ifndef __SLANG__ +#ifdef __SLANG__ +#define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD +#else +#define R(X) : register(X) +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD #define C C_0 #define SV_Target SV_TARGET #define u2 u2_0 @@ -16,19 +25,20 @@ #endif -cbuffer C : register(b0) +BEGIN_CBUFFER(C) { uint2 u2; uint3 u3; -}; +} +END_CBUFFER(C, register(b0)) -RWTexture2D<float4> t2 : register(u1); -RWTexture2DArray<float4> t2a : register(u2); -RWTexture3D<float4> t3 : register(u3); +RWTexture2D<float4> t2 R(u1); +RWTexture2DArray<float4> t2a R(u2); +RWTexture3D<float4> t3 R(u3); float4 main() : SV_Target { - return t2.Load(u2) - + t2a.Load(u3) - + t3.Load(u3); + return t2.Load(CBUFFER_REF(C,u2)) + + t2a.Load(CBUFFER_REF(C,u3)) + + t3.Load(CBUFFER_REF(C,u3)); } diff --git a/tests/parser/cast-precedence.hlsl b/tests/parser/cast-precedence.hlsl index 3383d9912..29793e4a2 100644 --- a/tests/parser/cast-precedence.hlsl +++ b/tests/parser/cast-precedence.hlsl @@ -3,20 +3,32 @@ // Confirm that type-cast expressions parse with // the appropriate precedence. -#ifndef __SLANG__ +#ifdef __SLANG__ +#define R(X) /**/ +#define BEGIN_CBUFFER(NAME) cbuffer NAME +#define END_CBUFFER(NAME, REG) /**/ +#define CBUFFER_REF(NAME, FIELD) FIELD +#else +#define R(X) X +#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME +#define END_CBUFFER(NAME, REG) ; cbuffer NAME : register(REG) { SLANG_ParameterGroup_##NAME NAME; } +#define CBUFFER_REF(NAME, FIELD) NAME.FIELD + #define C C_0 #define a a_0 #define b b_0 #define SV_Position SV_POSITION #endif -cbuffer C : register(b0) +BEGIN_CBUFFER(C) { float a; float b; -}; +} +END_CBUFFER(C,b0) + float4 main() : SV_Position { - return (uint) a / b; + return (uint) CBUFFER_REF(C,a) / CBUFFER_REF(C,b); } diff --git a/tests/reflection/parameter-block-explicit-space.slang b/tests/reflection/parameter-block-explicit-space.slang index 5679a1c35..b4d3eff9c 100644 --- a/tests/reflection/parameter-block-explicit-space.slang +++ b/tests/reflection/parameter-block-explicit-space.slang @@ -71,7 +71,7 @@ struct A { float4 au; }; -cbuffer _S1 : register(b0, space2) +cbuffer a : register(b0, space2) { A a; } Texture2D at1 : register(t0, space2); Texture2D at2 : register(t1, space2); @@ -81,7 +81,7 @@ struct B { float4 bu; }; -cbuffer _S3 : register(b0, space3) +cbuffer b : register(b0, space3) { B b; } Texture2D bt : register(t0, space3); SamplerState bs : register(s0, space3); diff --git a/tests/rewriter/type-splitting.hlsl b/tests/rewriter/type-splitting.hlsl index 0826cbf21..850e1b5ad 100644 --- a/tests/rewriter/type-splitting.hlsl +++ b/tests/rewriter/type-splitting.hlsl @@ -42,9 +42,14 @@ struct Foo_0 float2 u_0; }; +struct SLANG_ParameterGroup_C_0 +{ + Foo_0 foo_0; +}; + cbuffer C_0 { - Foo_0 foo_0; + SLANG_ParameterGroup_C_0 C_0; } Texture2D C_foo_t_0; @@ -52,7 +57,7 @@ SamplerState C_foo_s_0; float4 main() : SV_TARGET { - return C_foo_t_0.Sample(C_foo_s_0, foo_0.u_0); + return C_foo_t_0.Sample(C_foo_s_0, C_0.foo_0.u_0); } #endif diff --git a/tests/vkray/anyhit.slang.glsl b/tests/vkray/anyhit.slang.glsl index 43fd29e01..07789cdbd 100644 --- a/tests/vkray/anyhit.slang.glsl +++ b/tests/vkray/anyhit.slang.glsl @@ -10,8 +10,8 @@ struct Params_0 layout(binding = 0) layout(std140) uniform _S1 { - Params_0 gParams_0; -}; + Params_0 _data; +} gParams_0; layout(binding = 1) uniform texture2D gParams_alphaMap_0; @@ -35,7 +35,7 @@ void main() { SphereHitAttributes_0 _S4 = _S2; - if(bool(gParams_0.mode_0)) + if(bool(gParams_0._data.mode_0)) { float val_0 = textureLod( sampler2D(gParams_alphaMap_0, gParams_sampler_0), diff --git a/tests/vkray/callable-caller.slang.glsl b/tests/vkray/callable-caller.slang.glsl index 2704e6720..b0d174381 100644 --- a/tests/vkray/callable-caller.slang.glsl +++ b/tests/vkray/callable-caller.slang.glsl @@ -4,16 +4,26 @@ layout(row_major) uniform; layout(row_major) buffer; #extension GL_NV_ray_tracing : require +#define tmp_ubo _S1 +#define tmp_launchid _S2 +#define tmp_luanchidf _S3 +#define tmp_launchsize _S4 +#define tmp_launchpos _S5 +#define tmp_shaderidx _S6 +#define tmp_payload _S7 +#define tmp_launchid2 _S8 + struct SLANG_ParameterGroup_C_0 { uint shaderIndex_0; }; layout(binding = 0) -layout(std140) uniform C_0 +layout(std140) +uniform tmp_ubo { - uint shaderIndex_0; -}; + SLANG_ParameterGroup_C_0 _data; +} C_0; struct MaterialPayload_0 { @@ -43,25 +53,25 @@ void main() MaterialPayload_0 payload_1; payload_1.albedo_0 = vec4(0); - uvec3 _S1 = gl_LaunchIDNV; - vec2 _S2 = vec2(_S1.xy); + uvec3 tmp_launchid = gl_LaunchIDNV; + vec2 tmp_luanchidf = vec2(tmp_launchid.xy); - uvec3 _S3 = gl_LaunchSizeNV; - vec2 _S4 = _S2 / vec2(_S3.xy); + uvec3 tmp_launchsize = gl_LaunchSizeNV; + vec2 tmp_launchpos = tmp_luanchidf / vec2(tmp_launchsize.xy); - payload_1.uv_0 = _S4; + payload_1.uv_0 = tmp_launchpos; - uint _S5 = shaderIndex_0; + uint tmp_shaderidx = C_0._data.shaderIndex_0; - MaterialPayload_0 _S6; - _S6 = payload_1; - CallShader_0(_S5, _S6); - payload_1 = _S6; + MaterialPayload_0 tmp_payload; + tmp_payload = payload_1; + CallShader_0(tmp_shaderidx, tmp_payload); + payload_1 = tmp_payload; - uvec3 _S7 = gl_LaunchIDNV; + uvec3 tmp_launchid2 = gl_LaunchIDNV; imageStore( gImage_0, - ivec2(_S7.xy), + ivec2(tmp_launchid2.xy), payload_1.albedo_0); return; } diff --git a/tests/vkray/closesthit.slang.glsl b/tests/vkray/closesthit.slang.glsl index a056b7809..79fd3afbe 100644 --- a/tests/vkray/closesthit.slang.glsl +++ b/tests/vkray/closesthit.slang.glsl @@ -2,50 +2,70 @@ #version 460 #extension GL_NV_ray_tracing : require -layout(shaderRecordNV) -buffer ShaderRecord_0 +#define tmp_shaderrecord _S1 +#define tmp_colors _S2 +#define tmp_hitattrs _S3 +#define tmp_payload _S4 +#define tmp_localattrs _S5 +#define tmp_customidx _S6 +#define tmp_instanceid _S7 +#define tmp_add_0 _S8 +#define tmp_primid _S9 +#define tmp_add_1 _S10 +#define tmp_hitkind _S11 +#define tmp_hitt _S12 +#define tmp_tmin _S13 + +struct SLANG_ParameterGroup_ShaderRecord_0 { - uint shaderRecordID_0; + uint shaderRecordID_0; }; -layout(std430, binding = 0) buffer _S1 +layout(shaderRecordNV) +buffer tmp_shaderrecord { - vec4 colors_0[]; -}; + SLANG_ParameterGroup_ShaderRecord_0 _data; +} ShaderRecord_0; + +layout(std430, binding = 0) buffer tmp_colors +{ + vec4 _data[]; +} colors_0; struct BuiltInTriangleIntersectionAttributes_0 { vec2 barycentrics_0; }; -hitAttributeNV BuiltInTriangleIntersectionAttributes_0 _S2; + +hitAttributeNV BuiltInTriangleIntersectionAttributes_0 tmp_hitattrs; struct ReflectionRay_0 { vec4 color_0; }; -rayPayloadInNV ReflectionRay_0 _S3; +rayPayloadInNV ReflectionRay_0 tmp_payload; void main() { - BuiltInTriangleIntersectionAttributes_0 _S4 = _S2; + BuiltInTriangleIntersectionAttributes_0 tmp_localattrs = tmp_hitattrs; - uint _S5 = gl_InstanceCustomIndexNV; - uint _S6 = gl_InstanceID; + uint tmp_customidx = gl_InstanceCustomIndexNV; + uint tmp_instanceid = gl_InstanceID; - uint _S7 = _S5 + _S6; - uint _S8 = gl_PrimitiveID; + uint tmp_add_0 = tmp_customidx + tmp_instanceid; + uint tmp_primid = gl_PrimitiveID; - uint _S9 = _S7 + _S8; - uint _S10 = gl_HitKindNV; + uint tmp_add_1 = tmp_add_0 + tmp_primid; + uint tmp_hitkind = gl_HitKindNV; - vec4 color_1 = colors_0[_S9 + _S10 + shaderRecordID_0]; + vec4 color_1 = colors_0._data[tmp_add_1 + tmp_hitkind + ShaderRecord_0._data.shaderRecordID_0]; - float _S11 = gl_HitTNV; - float _S12 = gl_RayTminNV; + float tmp_hitt = gl_HitTNV; + float tmp_tmin = gl_RayTminNV; - _S3.color_0 = color_1 * (_S11 - _S12); + tmp_payload.color_0 = color_1 * (tmp_hitt - tmp_tmin); return; } diff --git a/tests/vkray/intersection.slang.glsl b/tests/vkray/intersection.slang.glsl index cfa53efa7..09d7e63a5 100644 --- a/tests/vkray/intersection.slang.glsl +++ b/tests/vkray/intersection.slang.glsl @@ -3,19 +3,37 @@ #extension GL_NV_ray_tracing : require +#define tmp_ubo _S1 +#define tmp_reportHit _S2 +#define tmp_origin _S3 +#define tmp_direction _S4 +#define tmp_tmin _S5 +#define tmp_tmax _S6 +#define tmp_ray _S7 +#define tmp_sphere _S8 +#define tmp_thit _S9 +#define tmp_hitattrs _S10 +#define tmp_dithit _S11 +#define tmp_reportresult _S12 + struct Sphere_0 { vec3 position_0; float radius_0; }; -layout(binding = 0) -layout(std140) -uniform U_0 +struct SLANG_ParameterGroup_U_0 { Sphere_0 gSphere_0; }; +layout(binding = 0) +layout(std140) +uniform tmp_ubo +{ + SLANG_ParameterGroup_U_0 _data; +} U_0; + struct RayDesc_0 { vec3 Origin_0; @@ -45,40 +63,40 @@ hitAttributeNV SphereHitAttributes_0 a_0; bool ReportHit_0(float tHit_1, uint hitKind_0, SphereHitAttributes_0 attributes_0) { a_0 = attributes_0; - bool _S1 = reportIntersectionNV(tHit_1, hitKind_0); - return _S1; + bool tmp_reportHit = reportIntersectionNV(tHit_1, hitKind_0); + return tmp_reportHit; } void main() { RayDesc_0 ray_1; - vec3 _S2 = gl_ObjectRayOriginNV; - ray_1.Origin_0 = _S2; - vec3 _S3 = gl_ObjectRayDirectionNV; + vec3 tmp_origin = gl_ObjectRayOriginNV; + ray_1.Origin_0 = tmp_origin; - ray_1.Direction_0 = _S3; - float _S4 = gl_RayTminNV; + vec3 tmp_direction = gl_ObjectRayDirectionNV; + ray_1.Direction_0 = tmp_direction; - ray_1.TMin_0 = _S4; - float _S5 = gl_RayTmaxNV; + float tmp_tmin = gl_RayTminNV; + ray_1.TMin_0 = tmp_tmin; - ray_1.TMax_0 = _S5; + float tmp_tmax = gl_RayTmaxNV; + ray_1.TMax_0 = tmp_tmax; - RayDesc_0 _S6 = ray_1; + RayDesc_0 tmp_ray = ray_1; - Sphere_0 _S7 = gSphere_0; + Sphere_0 tmp_sphere = U_0._data.gSphere_0; - float _S8; - SphereHitAttributes_0 _S9; - bool _S10 = rayIntersectsSphere_0(_S6, _S7, _S8, _S9); + float tmp_thit; + SphereHitAttributes_0 tmp_hitattrs; + bool tmp_dithit = rayIntersectsSphere_0(tmp_ray, tmp_sphere, tmp_thit, tmp_hitattrs); - float tHit_2 = _S8; - SphereHitAttributes_0 attrs_1 = _S9; + float tHit_2 = tmp_thit; + SphereHitAttributes_0 attrs_1 = tmp_hitattrs; - if(_S10) + if(tmp_dithit) { - bool _S11 = ReportHit_0(tHit_2, (uint((0))), attrs_1); + bool tmp_reportresult = ReportHit_0(tHit_2, (uint((0))), attrs_1); } return; diff --git a/tests/vkray/raygen.slang.glsl b/tests/vkray/raygen.slang.glsl index 512215a73..f65053ecf 100644 --- a/tests/vkray/raygen.slang.glsl +++ b/tests/vkray/raygen.slang.glsl @@ -1,10 +1,46 @@ //TEST_IGNORE_FILE: #version 460 +layout(row_major) uniform; + #extension GL_NV_ray_tracing : require #define TRACING_EPSILON 1e-6 +#define tmp_ubo _S1 +#define tmp_saturate _S2 +#define tmp_launchID_x _S3 +#define tmp_add_x _S4 +#define tmp_launchSize_x _S5 +#define tmp_div_x _S6 +#define tmp_launchID_y _S7 +#define tmp_add_y _S8 +#define tmp_launchSize_y _S9 +#define tmp_div_y _S10 +#define tmp_tex_pos _S11 +#define tmp_tex_nrm _S12 +#define tmp_light_invDist _S13 +#define tmp_trace_A _S14 +#define tmp_trace_B _S15 +#define tmp_trace_C _S16 +#define tmp_trace_D _S17 +#define tmp_trace_E _S18 +#define tmp_trace_ray _S19 +#define tmp_trace_payload _S20 +#define tmp_cmp _S21 +#define tmp_color _S22 +#define tmp_dot _S23 +#define tmp_sat _S24 +#define tmp_trace2_A _S25 +#define tmp_trace2_B _S26 +#define tmp_trace2_C _S27 +#define tmp_trace2_D _S28 +#define tmp_trace2_E _S29 +#define tmp_trace2_ray _S30 +#define tmp_trace2_payload _S31 +#define tmp_storeIdx _S32 + + layout(binding = 0) uniform texture2D samplerPosition_0; layout(binding = 2) uniform sampler sampler_0; layout(binding = 1) uniform texture2D samplerNormal_0; @@ -17,15 +53,20 @@ struct Light_0 #define NUM_LIGHTS 17 -layout(binding = 3) -layout(std140) uniform ubo_0 +struct Uniforms_0 { Light_0 light_0; vec4 viewPos_0; - layout(row_major) mat4x4 view_0; - layout(row_major) mat4x4 model_0; + mat4x4 view_0; + mat4x4 model_0; }; +layout(binding = 3) +layout(std140) uniform tmp_ubo +{ + Uniforms_0 _data; +} ubo_0; + layout(binding = 5) uniform accelerationStructureNV as_0; struct ShadowRay_0 @@ -79,8 +120,8 @@ void TraceRay_0( float saturate_0(float x_0) { - float _S1 = clamp(x_0, float(0), float(1)); - return _S1; + float tmp_saturate = clamp(x_0, float(0), float(1)); + return tmp_saturate; } void TraceRay_1( @@ -114,27 +155,28 @@ void main() { float atten_0; - uvec3 _S2 = gl_LaunchIDNV; - float _S3 = float(_S2.x) + 0.5; - uvec3 _S4 = gl_LaunchSizeNV; - float _S5 = _S3 / float(_S4.x); - uvec3 _S6 = gl_LaunchIDNV; - float _S7 = float(_S6.y) + 0.5; - uvec3 _S8 = gl_LaunchSizeNV; - float _S9 = _S7 / float(_S8.y); - vec2 inUV_0 = vec2(_S5, _S9); + uvec3 tmp_launchID_x = gl_LaunchIDNV; + float tmp_add_x = float(tmp_launchID_x.x) + 0.5; + uvec3 tmp_launchSize_x = gl_LaunchSizeNV; + float tmp_div_x = tmp_add_x / float(tmp_launchSize_x.x); + + uvec3 tmp_launchID_y = gl_LaunchIDNV; + float tmp_add_y = float(tmp_launchID_y.y) + 0.5; + uvec3 tmp_launchSize_y = gl_LaunchSizeNV; + float tmp_div_y = tmp_add_y / float(tmp_launchSize_y.y); + vec2 inUV_0 = vec2(tmp_div_x, tmp_div_y); - vec4 _S10 = texture(sampler2D(samplerPosition_0, sampler_0), inUV_0); - vec3 P_0 = _S10.xyz; + vec4 tmp_tex_pos = texture(sampler2D(samplerPosition_0, sampler_0), inUV_0); + vec3 P_0 = tmp_tex_pos.xyz; - vec4 _S11 = texture(sampler2D(samplerNormal_0, sampler_0), inUV_0); - vec3 N_0 = _S11.xyz * 2.0 - 1.0; + vec4 tmp_tex_nrm = texture(sampler2D(samplerNormal_0, sampler_0), inUV_0); + vec3 N_0 = tmp_tex_nrm.xyz * 2.0 - 1.0; - vec3 lightDelta_0 = light_0.position_0.xyz - P_0; + vec3 lightDelta_0 = ubo_0._data.light_0.position_0.xyz - P_0; float lightDist_0 = length(lightDelta_0); vec3 L_0 = normalize(lightDelta_0); - float _S12 = 1.0 / (lightDist_0 * lightDist_0); + float tmp_light_invDist = 1.0 / (lightDist_0 * lightDist_0); RayDesc_0 ray_0; ray_0.Origin_0 = P_0; @@ -144,47 +186,47 @@ void main() ShadowRay_0 shadowRay_0; shadowRay_0.hitDistance_0 = float(0); - const uint _S13 = uint(1); - const uint _S14 = uint(0xFF); - const uint _S15 = uint(0); - const uint _S16 = uint(0); - const uint _S17 = uint(2); - - RayDesc_0 _S18 = ray_0; - ShadowRay_0 _S19; - _S19 = shadowRay_0; - TraceRay_0(as_0, _S13, _S14, _S15, _S16, _S17, _S18, _S19); - shadowRay_0 = _S19; - - bool _S20 = shadowRay_0.hitDistance_0 < lightDist_0; + const uint tmp_trace_A = uint(1); + const uint tmp_trace_B = uint(0xFF); + const uint tmp_trace_C = uint(0); + const uint tmp_trace_D = uint(0); + const uint tmp_trace_E = uint(2); + + RayDesc_0 tmp_trace_ray = ray_0; + ShadowRay_0 tmp_trace_payload; + tmp_trace_payload = shadowRay_0; + TraceRay_0(as_0, tmp_trace_A, tmp_trace_B, tmp_trace_C, tmp_trace_D, tmp_trace_E, tmp_trace_ray, tmp_trace_payload); + shadowRay_0 = tmp_trace_payload; + + bool tmp_cmp = shadowRay_0.hitDistance_0 < lightDist_0; ReflectionRay_0 reflectionRay_0; - if(_S20) + if(tmp_cmp) { atten_0 = (0.00000000000000000000); } else { - atten_0 = _S12; + atten_0 = tmp_light_invDist; } - vec3 _S21 = light_0.color_0.xyz; - float _S22 = dot(N_0, L_0); - float _S23 = saturate_0(_S22); - vec3 color_2 = (_S21 * _S23) * atten_0; - - const uint _S24 = uint(1); - const uint _S25 = uint(255); - const uint _S26 = uint(0); - const uint _S27 = uint(0); - const uint _S28 = uint(2); - RayDesc_0 _S29 = ray_0; - ReflectionRay_0 _S30; - _S30 = reflectionRay_0; - TraceRay_1(as_0, _S24, _S25, _S26, _S27, _S28, _S29, _S30); - - vec3 color_3 = color_2 + _S30.color_1; - - uvec3 _S31 = gl_LaunchIDNV; - imageStore(outputImage_0, ivec2(uvec2(ivec2(_S31.xy))), vec4(color_3, 1.0)); + vec3 tmp_color = ubo_0._data.light_0.color_0.xyz; + float tmp_dot = dot(N_0, L_0); + float tmp_sat = saturate_0(tmp_dot); + vec3 color_2 = (tmp_color * tmp_sat) * atten_0; + + const uint tmp_trace2_A = uint(1); + const uint tmp_trace2_B = uint(255); + const uint tmp_trace2_C = uint(0); + const uint tmp_trace2_D = uint(0); + const uint tmp_trace2_E = uint(2); + RayDesc_0 tmp_trace2_ray = ray_0; + ReflectionRay_0 tmp_trace2_payload; + tmp_trace2_payload = reflectionRay_0; + TraceRay_1(as_0, tmp_trace2_A, tmp_trace2_B, tmp_trace2_C, tmp_trace2_D, tmp_trace2_E, tmp_trace2_ray, tmp_trace2_payload); + + vec3 color_3 = color_2 + tmp_trace2_payload.color_1; + + uvec3 tmp_storeIdx = gl_LaunchIDNV; + imageStore(outputImage_0, ivec2(uvec2(ivec2(tmp_storeIdx.xy))), vec4(color_3, 1.0)); return; } |
