diff options
| author | James Helferty (NVIDIA) <jhelferty@nvidia.com> | 2025-06-10 11:02:38 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-06-10 10:02:38 -0500 |
| commit | e37202002276b679c5241b2678af612552b06d2c (patch) | |
| tree | dd072338524507cad6db45b28b976d0fbb6dd57a /tests | |
| parent | 954ad3d5466219eab216add0cb1ac920da548425 (diff) | |
Fix IR layout of 3-element vectors in cbuffers for -fvk-use-dx-layout (#7282)
* Better handling for 16-byte boundary of d3d cbuffer
Fixes #6921
D3D cbuffers have slightly different packing rules that allow packing
vectors into a 16-byte slot at element alignments, except when
a field would cross a 16-byte boundary. In that case, we need to
realign the field to the next 16-byte boundary.
In particular, this impacts vec3s, which are not a power of two in
size and thus require slightly different alignment logic, compared to
std430 and std140. (Example: a float and float3 should fit together in
that order in a single slot.)
Adds test cases.
Adds documentation page for GLSL target
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/expected-failure.txt | 2 | ||||
| -rw-r--r-- | tests/hlsl/cbuffer-float3-offsets-aligned.slang | 115 | ||||
| -rw-r--r-- | tests/hlsl/cbuffer-float3-offsets-unaligned.slang | 138 |
3 files changed, 255 insertions, 0 deletions
diff --git a/tests/expected-failure.txt b/tests/expected-failure.txt index 7283c8d97..22142e31d 100644 --- a/tests/expected-failure.txt +++ b/tests/expected-failure.txt @@ -5,3 +5,5 @@ tests/language-feature/saturated-cooperation/fuse.slang (vk) tests/bugs/byte-address-buffer-interlocked-add-f32.slang (vk) tests/ir/loop-unroll-0.slang.1 (vk) tests/hlsl-intrinsic/texture/float-atomics.slang (vk) +tests/hlsl/cbuffer-float3-offsets-aligned.slang.2 (vk) +tests/hlsl/cbuffer-float3-offsets-unaligned.slang.2 (vk) diff --git a/tests/hlsl/cbuffer-float3-offsets-aligned.slang b/tests/hlsl/cbuffer-float3-offsets-aligned.slang new file mode 100644 index 000000000..7c548546a --- /dev/null +++ b/tests/hlsl/cbuffer-float3-offsets-aligned.slang @@ -0,0 +1,115 @@ +//TEST:SIMPLE(filecheck=SPIRV): -target spirv -profile cs_6_2 -entry computeMain -line-directive-mode none -fvk-use-dx-layout +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -dx12 -use-dxil -profile cs_6_2 -Xslang... -Xdxc -fvk-use-dx-layout -Xdxc -enable-16bit-types -X. -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -vk -profile cs_6_2 -Xslang... -fvk-use-dx-layout -X. -output-using-type +//TEST:REFLECTION(filecheck=REFLECT):-stage compute -entry computeMain -target spirv -profile cs_6_2 -no-codegen -line-directive-mode none -fvk-use-dx-layout + +//TEST_INPUT:ubuffer(stride=4, count=17):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer; + +//TEST_INPUT:set Constants.v0={1.0,2.0,3.0} +//TEST_INPUT:set Constants.v1=4.0 +//TEST_INPUT:set Constants.v2=5.0 +//TEST_INPUT:set Constants.v3[0]={6.0,7.0,8.0} +//TEST_INPUT:set Constants.v3[1]={9.0,10.0,11.0} +//TEST_INPUT:set Constants.v4=12.0 +//TEST_INPUT:set Constants.v5=13.0 +//TEST_INPUT:set Constants.v6[0]={14.0,15.0,16.0} +//TEST_INPUT:set Constants.v7=17.0 + +// Checks cbuffer packing rule cases involving 3-element vectors. +// HLSL aligns at the element size, while GLSL std140 and std430 +// align to the size of a 4-element vector of the element type. +// Checks cases that are compatible with std140 and std430. + +cbuffer Constants +{ + // float v1 should be packed in with float3 v0 + float3 v0; + float v1; + + // float3[2] v3 should be aligned to next 16 byte boundary, + // and not packed in with float v2. + float v2; + float3 v3[2]; + + // float v4 should be packed in with last float3 from v3. + float v4; + + // float3[1] v6 should be aligned to next 16 byte boundary + // and not packed in with float v5. + float v5; + float3 v6[1]; + + // float v7 should be packed in with float3 from v6. + float v7; +}; + +// For spirv, check that the offsets are correct. +// SPIRV: OpMemberDecorate {{.*}} 0 Offset 0 +// SPIRV: OpMemberDecorate {{.*}} 1 Offset 12 +// SPIRV: OpMemberDecorate {{.*}} 2 Offset 16 +// SPIRV: OpMemberDecorate {{.*}} 3 Offset 32 +// SPIRV: OpMemberDecorate {{.*}} 4 Offset 60 +// SPIRV: OpMemberDecorate {{.*}} 5 Offset 64 +// SPIRV: OpMemberDecorate {{.*}} 6 Offset 80 +// SPIRV: OpMemberDecorate {{.*}} 7 Offset 92 + +// REFLECT: "name": "v0", +// REFLECT: "offset": 0 +// REFLECT: "name": "v1", +// REFLECT: "offset": 12 +// REFLECT: "name": "v2", +// REFLECT: "offset": 16 +// REFLECT: "name": "v3", +// REFLECT: "offset": 32 +// REFLECT: "name": "v4", +// REFLECT: "offset": 60 +// REFLECT: "name": "v5", +// REFLECT: "offset": 64 +// REFLECT: "name": "v6", +// REFLECT: "offset": 80 +// REFLECT: "name": "v7", +// REFLECT: "offset": 92 + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int i = 0; + outputBuffer[i++] = v0.x; + outputBuffer[i++] = v0.y; + outputBuffer[i++] = v0.z; + outputBuffer[i++] = v1; + outputBuffer[i++] = v2; + outputBuffer[i++] = v3[0].x; + outputBuffer[i++] = v3[0].y; + outputBuffer[i++] = v3[0].z; + outputBuffer[i++] = v3[1].x; + outputBuffer[i++] = v3[1].y; + outputBuffer[i++] = v3[1].z; + outputBuffer[i++] = v4; + outputBuffer[i++] = v5; + outputBuffer[i++] = v6[0].x; + outputBuffer[i++] = v6[0].y; + outputBuffer[i++] = v6[0].z; + outputBuffer[i++] = v7; + + // BUFFER: 1 + // BUFFER-NEXT: 2 + // BUFFER-NEXT: 3 + // BUFFER-NEXT: 4 + // BUFFER-NEXT: 5 + // BUFFER-NEXT: 6 + // BUFFER-NEXT: 7 + // BUFFER-NEXT: 8 + // BUFFER-NEXT: 9 + // BUFFER-NEXT: 10 + // BUFFER-NEXT: 11 + // BUFFER-NEXT: 12 + // BUFFER-NEXT: 13 + // BUFFER-NEXT: 14 + // BUFFER-NEXT: 15 + // BUFFER-NEXT: 16 + // BUFFER-NEXT: 17 + +} + diff --git a/tests/hlsl/cbuffer-float3-offsets-unaligned.slang b/tests/hlsl/cbuffer-float3-offsets-unaligned.slang new file mode 100644 index 000000000..c3824d1a2 --- /dev/null +++ b/tests/hlsl/cbuffer-float3-offsets-unaligned.slang @@ -0,0 +1,138 @@ +//TEST:SIMPLE(filecheck=SPIRV): -target spirv -profile cs_6_2 -entry computeMain -line-directive-mode none -fvk-use-dx-layout +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -dx12 -use-dxil -Xslang... -Xdxc -fvk-use-dx-layout -Xdxc -enable-16bit-types -X. -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -vk -Xslang... -fvk-use-dx-layout -X. -output-using-type +//TEST:REFLECTION(filecheck=REFLECT):-stage compute -entry computeMain -target spirv -profile cs_6_2 -no-codegen -line-directive-mode none -fvk-use-dx-layout + +// dxc: -T cs_6_2 -E computeMain -spirv -fvk-use-dx-layout -enable-16bit-types + +//TEST_INPUT:ubuffer(stride=4, count=24):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer; + +//TEST_INPUT:set Constants.v0=1.0 +//TEST_INPUT:set Constants.v1={2.0,3.0,4.0} +//TEST_INPUT:set Constants.v2=0x4500 +//TEST_INPUT:set Constants.v3={0x4600,0x4700,0x4800} +//TEST_INPUT:set Constants.v4=0x4880 +//TEST_INPUT:set Constants.v5={0x4900,0x4980,0x4a00} +//TEST_INPUT:set Constants.v6=0x4a80 +//TEST_INPUT:set Constants.v7={0x4b00,0x4b80,0x4c00,0x4c40} +//TEST_INPUT:set Constants.v8={0x4c80,0x4cc0,0x4d00} +//TEST_INPUT:set Constants.v9=0x4d40 +//TEST_INPUT:set Constants.v10={22.0,23.0,24.0} + + +// Checks cbuffer packing rule cases involving 3-element vectors. +// HLSL aligns at the element size, while GLSL std140 and std430 +// align to the size of a 4-element vector of the element type. +// Checks cases that are not compatible with std140 and std430. + +cbuffer Constants +{ + // float3 v1 should be packed in with float v0. + float v0; + float3 v1; + + // v2,v3,v4,v5 should all be packed together + float16_t v2; + vector<float16_t, 3> v3; + float16_t v4; + vector<float16_t, 3> v5; + + float16_t v6; + vector<float16_t, 4> v7; + vector<float16_t, 3> v8; + + // There should be a 2-byte gap between v4 and v5. + float16_t v9; + float3 v10; +}; + +// SPIRV: OpMemberDecorate {{.*}} 0 Offset 0 +// SPIRV: OpMemberDecorate {{.*}} 1 Offset 4 +// SPIRV: OpMemberDecorate {{.*}} 2 Offset 16 +// SPIRV: OpMemberDecorate {{.*}} 3 Offset 18 +// SPIRV: OpMemberDecorate {{.*}} 4 Offset 24 +// SPIRV: OpMemberDecorate {{.*}} 5 Offset 26 +// SPIRV: OpMemberDecorate {{.*}} 6 Offset 32 +// SPIRV: OpMemberDecorate {{.*}} 7 Offset 34 +// SPIRV: OpMemberDecorate {{.*}} 8 Offset 42 +// SPIRV: OpMemberDecorate {{.*}} 9 Offset 48 +// SPIRV: OpMemberDecorate {{.*}} 10 Offset 52 + +// REFLECT: "name": "v0", +// REFLECT: "offset": 0 +// REFLECT: "name": "v1", +// REFLECT: "offset": 4 +// REFLECT: "name": "v2", +// REFLECT: "offset": 16 +// REFLECT: "name": "v3", +// REFLECT: "offset": 18 +// REFLECT: "name": "v4", +// REFLECT: "offset": 24 +// REFLECT: "name": "v5", +// REFLECT: "offset": 26 +// REFLECT: "name": "v6", +// REFLECT: "offset": 32 +// REFLECT: "name": "v7", +// REFLECT: "offset": 34 +// REFLECT: "name": "v8", +// REFLECT: "offset": 42 +// REFLECT: "name": "v9", +// REFLECT: "offset": 48 +// REFLECT: "name": "v10", +// REFLECT: "offset": 52 + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int i = 0; + outputBuffer[i++] = v0; + outputBuffer[i++] = v1.x; + outputBuffer[i++] = v1.y; + outputBuffer[i++] = v1.z; + outputBuffer[i++] = v2; + outputBuffer[i++] = v3.x; + outputBuffer[i++] = v3.y; + outputBuffer[i++] = v3.z; + outputBuffer[i++] = v4; + outputBuffer[i++] = v5.x; + outputBuffer[i++] = v5.y; + outputBuffer[i++] = v5.z; + outputBuffer[i++] = v6; + outputBuffer[i++] = v7.x; + outputBuffer[i++] = v7.y; + outputBuffer[i++] = v7.z; + outputBuffer[i++] = v7.w; + outputBuffer[i++] = v8.x; + outputBuffer[i++] = v8.y; + outputBuffer[i++] = v8.z; + outputBuffer[i++] = v9; + outputBuffer[i++] = v10.x; + outputBuffer[i++] = v10.y; + outputBuffer[i++] = v10.z; + + // BUFFER: 1 + // BUFFER-NEXT: 2 + // BUFFER-NEXT: 3 + // BUFFER-NEXT: 4 + // BUFFER-NEXT: 5 + // BUFFER-NEXT: 6 + // BUFFER-NEXT: 7 + // BUFFER-NEXT: 8 + // BUFFER-NEXT: 9 + // BUFFER-NEXT: 10 + // BUFFER-NEXT: 11 + // BUFFER-NEXT: 12 + // BUFFER-NEXT: 13 + // BUFFER-NEXT: 14 + // BUFFER-NEXT: 15 + // BUFFER-NEXT: 16 + // BUFFER-NEXT: 17 + // BUFFER-NEXT: 18 + // BUFFER-NEXT: 19 + // BUFFER-NEXT: 20 + // BUFFER-NEXT: 21 + // BUFFER-NEXT: 22 + // BUFFER-NEXT: 23 + // BUFFER-NEXT: 24 +} |
