summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorJames Helferty (NVIDIA) <jhelferty@nvidia.com>2025-06-10 11:02:38 -0400
committerGitHub <noreply@github.com>2025-06-10 10:02:38 -0500
commite37202002276b679c5241b2678af612552b06d2c (patch)
treedd072338524507cad6db45b28b976d0fbb6dd57a /tests
parent954ad3d5466219eab216add0cb1ac920da548425 (diff)
Fix IR layout of 3-element vectors in cbuffers for -fvk-use-dx-layout (#7282)
* Better handling for 16-byte boundary of d3d cbuffer Fixes #6921 D3D cbuffers have slightly different packing rules that allow packing vectors into a 16-byte slot at element alignments, except when a field would cross a 16-byte boundary. In that case, we need to realign the field to the next 16-byte boundary. In particular, this impacts vec3s, which are not a power of two in size and thus require slightly different alignment logic, compared to std430 and std140. (Example: a float and float3 should fit together in that order in a single slot.) Adds test cases. Adds documentation page for GLSL target
Diffstat (limited to 'tests')
-rw-r--r--tests/expected-failure.txt2
-rw-r--r--tests/hlsl/cbuffer-float3-offsets-aligned.slang115
-rw-r--r--tests/hlsl/cbuffer-float3-offsets-unaligned.slang138
3 files changed, 255 insertions, 0 deletions
diff --git a/tests/expected-failure.txt b/tests/expected-failure.txt
index 7283c8d97..22142e31d 100644
--- a/tests/expected-failure.txt
+++ b/tests/expected-failure.txt
@@ -5,3 +5,5 @@ tests/language-feature/saturated-cooperation/fuse.slang (vk)
tests/bugs/byte-address-buffer-interlocked-add-f32.slang (vk)
tests/ir/loop-unroll-0.slang.1 (vk)
tests/hlsl-intrinsic/texture/float-atomics.slang (vk)
+tests/hlsl/cbuffer-float3-offsets-aligned.slang.2 (vk)
+tests/hlsl/cbuffer-float3-offsets-unaligned.slang.2 (vk)
diff --git a/tests/hlsl/cbuffer-float3-offsets-aligned.slang b/tests/hlsl/cbuffer-float3-offsets-aligned.slang
new file mode 100644
index 000000000..7c548546a
--- /dev/null
+++ b/tests/hlsl/cbuffer-float3-offsets-aligned.slang
@@ -0,0 +1,115 @@
+//TEST:SIMPLE(filecheck=SPIRV): -target spirv -profile cs_6_2 -entry computeMain -line-directive-mode none -fvk-use-dx-layout
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -dx12 -use-dxil -profile cs_6_2 -Xslang... -Xdxc -fvk-use-dx-layout -Xdxc -enable-16bit-types -X. -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -vk -profile cs_6_2 -Xslang... -fvk-use-dx-layout -X. -output-using-type
+//TEST:REFLECTION(filecheck=REFLECT):-stage compute -entry computeMain -target spirv -profile cs_6_2 -no-codegen -line-directive-mode none -fvk-use-dx-layout
+
+//TEST_INPUT:ubuffer(stride=4, count=17):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+//TEST_INPUT:set Constants.v0={1.0,2.0,3.0}
+//TEST_INPUT:set Constants.v1=4.0
+//TEST_INPUT:set Constants.v2=5.0
+//TEST_INPUT:set Constants.v3[0]={6.0,7.0,8.0}
+//TEST_INPUT:set Constants.v3[1]={9.0,10.0,11.0}
+//TEST_INPUT:set Constants.v4=12.0
+//TEST_INPUT:set Constants.v5=13.0
+//TEST_INPUT:set Constants.v6[0]={14.0,15.0,16.0}
+//TEST_INPUT:set Constants.v7=17.0
+
+// Checks cbuffer packing rule cases involving 3-element vectors.
+// HLSL aligns at the element size, while GLSL std140 and std430
+// align to the size of a 4-element vector of the element type.
+// Checks cases that are compatible with std140 and std430.
+
+cbuffer Constants
+{
+ // float v1 should be packed in with float3 v0
+ float3 v0;
+ float v1;
+
+ // float3[2] v3 should be aligned to next 16 byte boundary,
+ // and not packed in with float v2.
+ float v2;
+ float3 v3[2];
+
+ // float v4 should be packed in with last float3 from v3.
+ float v4;
+
+ // float3[1] v6 should be aligned to next 16 byte boundary
+ // and not packed in with float v5.
+ float v5;
+ float3 v6[1];
+
+ // float v7 should be packed in with float3 from v6.
+ float v7;
+};
+
+// For spirv, check that the offsets are correct.
+// SPIRV: OpMemberDecorate {{.*}} 0 Offset 0
+// SPIRV: OpMemberDecorate {{.*}} 1 Offset 12
+// SPIRV: OpMemberDecorate {{.*}} 2 Offset 16
+// SPIRV: OpMemberDecorate {{.*}} 3 Offset 32
+// SPIRV: OpMemberDecorate {{.*}} 4 Offset 60
+// SPIRV: OpMemberDecorate {{.*}} 5 Offset 64
+// SPIRV: OpMemberDecorate {{.*}} 6 Offset 80
+// SPIRV: OpMemberDecorate {{.*}} 7 Offset 92
+
+// REFLECT: "name": "v0",
+// REFLECT: "offset": 0
+// REFLECT: "name": "v1",
+// REFLECT: "offset": 12
+// REFLECT: "name": "v2",
+// REFLECT: "offset": 16
+// REFLECT: "name": "v3",
+// REFLECT: "offset": 32
+// REFLECT: "name": "v4",
+// REFLECT: "offset": 60
+// REFLECT: "name": "v5",
+// REFLECT: "offset": 64
+// REFLECT: "name": "v6",
+// REFLECT: "offset": 80
+// REFLECT: "name": "v7",
+// REFLECT: "offset": 92
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int i = 0;
+ outputBuffer[i++] = v0.x;
+ outputBuffer[i++] = v0.y;
+ outputBuffer[i++] = v0.z;
+ outputBuffer[i++] = v1;
+ outputBuffer[i++] = v2;
+ outputBuffer[i++] = v3[0].x;
+ outputBuffer[i++] = v3[0].y;
+ outputBuffer[i++] = v3[0].z;
+ outputBuffer[i++] = v3[1].x;
+ outputBuffer[i++] = v3[1].y;
+ outputBuffer[i++] = v3[1].z;
+ outputBuffer[i++] = v4;
+ outputBuffer[i++] = v5;
+ outputBuffer[i++] = v6[0].x;
+ outputBuffer[i++] = v6[0].y;
+ outputBuffer[i++] = v6[0].z;
+ outputBuffer[i++] = v7;
+
+ // BUFFER: 1
+ // BUFFER-NEXT: 2
+ // BUFFER-NEXT: 3
+ // BUFFER-NEXT: 4
+ // BUFFER-NEXT: 5
+ // BUFFER-NEXT: 6
+ // BUFFER-NEXT: 7
+ // BUFFER-NEXT: 8
+ // BUFFER-NEXT: 9
+ // BUFFER-NEXT: 10
+ // BUFFER-NEXT: 11
+ // BUFFER-NEXT: 12
+ // BUFFER-NEXT: 13
+ // BUFFER-NEXT: 14
+ // BUFFER-NEXT: 15
+ // BUFFER-NEXT: 16
+ // BUFFER-NEXT: 17
+
+}
+
diff --git a/tests/hlsl/cbuffer-float3-offsets-unaligned.slang b/tests/hlsl/cbuffer-float3-offsets-unaligned.slang
new file mode 100644
index 000000000..c3824d1a2
--- /dev/null
+++ b/tests/hlsl/cbuffer-float3-offsets-unaligned.slang
@@ -0,0 +1,138 @@
+//TEST:SIMPLE(filecheck=SPIRV): -target spirv -profile cs_6_2 -entry computeMain -line-directive-mode none -fvk-use-dx-layout
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -dx12 -use-dxil -Xslang... -Xdxc -fvk-use-dx-layout -Xdxc -enable-16bit-types -X. -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-slang -compute -vk -Xslang... -fvk-use-dx-layout -X. -output-using-type
+//TEST:REFLECTION(filecheck=REFLECT):-stage compute -entry computeMain -target spirv -profile cs_6_2 -no-codegen -line-directive-mode none -fvk-use-dx-layout
+
+// dxc: -T cs_6_2 -E computeMain -spirv -fvk-use-dx-layout -enable-16bit-types
+
+//TEST_INPUT:ubuffer(stride=4, count=24):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+//TEST_INPUT:set Constants.v0=1.0
+//TEST_INPUT:set Constants.v1={2.0,3.0,4.0}
+//TEST_INPUT:set Constants.v2=0x4500
+//TEST_INPUT:set Constants.v3={0x4600,0x4700,0x4800}
+//TEST_INPUT:set Constants.v4=0x4880
+//TEST_INPUT:set Constants.v5={0x4900,0x4980,0x4a00}
+//TEST_INPUT:set Constants.v6=0x4a80
+//TEST_INPUT:set Constants.v7={0x4b00,0x4b80,0x4c00,0x4c40}
+//TEST_INPUT:set Constants.v8={0x4c80,0x4cc0,0x4d00}
+//TEST_INPUT:set Constants.v9=0x4d40
+//TEST_INPUT:set Constants.v10={22.0,23.0,24.0}
+
+
+// Checks cbuffer packing rule cases involving 3-element vectors.
+// HLSL aligns at the element size, while GLSL std140 and std430
+// align to the size of a 4-element vector of the element type.
+// Checks cases that are not compatible with std140 and std430.
+
+cbuffer Constants
+{
+ // float3 v1 should be packed in with float v0.
+ float v0;
+ float3 v1;
+
+ // v2,v3,v4,v5 should all be packed together
+ float16_t v2;
+ vector<float16_t, 3> v3;
+ float16_t v4;
+ vector<float16_t, 3> v5;
+
+ float16_t v6;
+ vector<float16_t, 4> v7;
+ vector<float16_t, 3> v8;
+
+ // There should be a 2-byte gap between v4 and v5.
+ float16_t v9;
+ float3 v10;
+};
+
+// SPIRV: OpMemberDecorate {{.*}} 0 Offset 0
+// SPIRV: OpMemberDecorate {{.*}} 1 Offset 4
+// SPIRV: OpMemberDecorate {{.*}} 2 Offset 16
+// SPIRV: OpMemberDecorate {{.*}} 3 Offset 18
+// SPIRV: OpMemberDecorate {{.*}} 4 Offset 24
+// SPIRV: OpMemberDecorate {{.*}} 5 Offset 26
+// SPIRV: OpMemberDecorate {{.*}} 6 Offset 32
+// SPIRV: OpMemberDecorate {{.*}} 7 Offset 34
+// SPIRV: OpMemberDecorate {{.*}} 8 Offset 42
+// SPIRV: OpMemberDecorate {{.*}} 9 Offset 48
+// SPIRV: OpMemberDecorate {{.*}} 10 Offset 52
+
+// REFLECT: "name": "v0",
+// REFLECT: "offset": 0
+// REFLECT: "name": "v1",
+// REFLECT: "offset": 4
+// REFLECT: "name": "v2",
+// REFLECT: "offset": 16
+// REFLECT: "name": "v3",
+// REFLECT: "offset": 18
+// REFLECT: "name": "v4",
+// REFLECT: "offset": 24
+// REFLECT: "name": "v5",
+// REFLECT: "offset": 26
+// REFLECT: "name": "v6",
+// REFLECT: "offset": 32
+// REFLECT: "name": "v7",
+// REFLECT: "offset": 34
+// REFLECT: "name": "v8",
+// REFLECT: "offset": 42
+// REFLECT: "name": "v9",
+// REFLECT: "offset": 48
+// REFLECT: "name": "v10",
+// REFLECT: "offset": 52
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int i = 0;
+ outputBuffer[i++] = v0;
+ outputBuffer[i++] = v1.x;
+ outputBuffer[i++] = v1.y;
+ outputBuffer[i++] = v1.z;
+ outputBuffer[i++] = v2;
+ outputBuffer[i++] = v3.x;
+ outputBuffer[i++] = v3.y;
+ outputBuffer[i++] = v3.z;
+ outputBuffer[i++] = v4;
+ outputBuffer[i++] = v5.x;
+ outputBuffer[i++] = v5.y;
+ outputBuffer[i++] = v5.z;
+ outputBuffer[i++] = v6;
+ outputBuffer[i++] = v7.x;
+ outputBuffer[i++] = v7.y;
+ outputBuffer[i++] = v7.z;
+ outputBuffer[i++] = v7.w;
+ outputBuffer[i++] = v8.x;
+ outputBuffer[i++] = v8.y;
+ outputBuffer[i++] = v8.z;
+ outputBuffer[i++] = v9;
+ outputBuffer[i++] = v10.x;
+ outputBuffer[i++] = v10.y;
+ outputBuffer[i++] = v10.z;
+
+ // BUFFER: 1
+ // BUFFER-NEXT: 2
+ // BUFFER-NEXT: 3
+ // BUFFER-NEXT: 4
+ // BUFFER-NEXT: 5
+ // BUFFER-NEXT: 6
+ // BUFFER-NEXT: 7
+ // BUFFER-NEXT: 8
+ // BUFFER-NEXT: 9
+ // BUFFER-NEXT: 10
+ // BUFFER-NEXT: 11
+ // BUFFER-NEXT: 12
+ // BUFFER-NEXT: 13
+ // BUFFER-NEXT: 14
+ // BUFFER-NEXT: 15
+ // BUFFER-NEXT: 16
+ // BUFFER-NEXT: 17
+ // BUFFER-NEXT: 18
+ // BUFFER-NEXT: 19
+ // BUFFER-NEXT: 20
+ // BUFFER-NEXT: 21
+ // BUFFER-NEXT: 22
+ // BUFFER-NEXT: 23
+ // BUFFER-NEXT: 24
+}