From bdda8a90cdd44ca822b21233ac988f92d1f20826 Mon Sep 17 00:00:00 2001 From: "Harsh Aggarwal (NVIDIA)" Date: Fri, 1 Aug 2025 14:48:53 +0530 Subject: Fix 7441: CUDA boolean vector layout to use 1-byte elements (#7862) * Fix 7441: CUDA boolean vector layout to use 1-byte elements Boolean vectors (bool1, bool2, bool3, bool4) were incorrectly implemented as integer-based types using 4 bytes per element instead of actual 1-byte boolean elements on CUDA targets. Changes: - Update CUDA prelude to define boolean vectors as structs with bool fields instead of typedef aliases to integer vectors - Implement CUDALayoutRulesImpl::GetVectorLayout to use 1-byte alignment for boolean vectors, matching actual CUDA memory layout behavior - Update make_bool functions to populate struct fields correctly This ensures boolean vectors have the same memory layout as bool[4] arrays: - bool1: 1 byte (was 4 bytes) - bool2: 2 bytes (was 8 bytes) - bool3: 3 bytes (was 12 bytes) - bool4: 4 bytes (was 16 bytes) Fixes memory layout mismatch between Slang reflection API and actual CUDA compilation, achieving 75% memory savings for boolean vector usage. * Fix CI issues - Add and update associated functions and operators * Make boolX same as uchar * Use align construct on struct for boolX * Improve Test case for robust alignment checks * Formatting * Disable selected slangpy tests * add metal check which is slightly different than cuda * Test-1 * Test-2 * Test-3 * Test-4 * ReflectionChange * cleanup and update * _slang_select with plain bool is needed for reverse-loop-checkpoint-test --- tests/bugs/gh-7441.slang | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 tests/bugs/gh-7441.slang (limited to 'tests/bugs') diff --git a/tests/bugs/gh-7441.slang b/tests/bugs/gh-7441.slang new file mode 100644 index 000000000..94ca7965b --- /dev/null +++ b/tests/bugs/gh-7441.slang @@ -0,0 +1,74 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER):-cuda -compute -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUFFER-MTL):-metal -compute -output-using-type +//TEST:REFLECTION(filecheck=REFLECT):-stage compute -entry computeMain -target cuda -no-codegen +//TEST:REFLECTION(filecheck=REFLECT-MTL):-stage compute -entry computeMain -target metal -no-codegen + + +// Test struct for bool layout analysis +struct TestType +{ + uint value; + bool f_bool; + bool1 f_bool1; + bool pad1; + bool2 f_bool2; + bool pad2; + bool3 f_bool3; + bool4 f_bool4; + uint END; +}; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0], stride=4):out,name=buffer +RWStructuredBuffer buffer; + +[shader("compute")] +[numthreads(1, 1, 1)] +void computeMain(uint3 tid: SV_DispatchThreadID) +{ + uint i = tid.x; + + // Initialize test data + buffer[i].value = 7; + buffer[i].f_bool = true; + buffer[i].f_bool1 = bool1(true); + buffer[i].pad1 = false; + buffer[i].f_bool2 = bool2(true, true); + buffer[i].pad2 = false; + buffer[i].f_bool3 = bool3(true, false, true); + buffer[i].f_bool4 = bool4(true, false, true, false); + buffer[i].END = 0x12345678; +} + +// BUFFER: 7 +// BUFFER-NEXT: 101 +// BUFFER-NEXT: 1000101 +// BUFFER-NEXT: 100 +// BUFFER-NEXT: 10001 +// BUFFER-NEXT: 12345678 + +// Expected output for Metal (different struct layout) +// BUFFER-MTL: 7 +// BUFFER-MTL-NEXT: 101 +// BUFFER-MTL-NEXT: 101 +// BUFFER-MTL-NEXT: 10001 +// BUFFER-MTL-NEXT: 10001 +// BUFFER-MTL-NEXT: 12345678 + +// REFLECT: "name": "f_bool1", +// REFLECT: "binding": {"kind": "uniform", "offset": 5, "size": 1, "elementStride": 1} +// REFLECT: "name": "f_bool2", +// REFLECT: "binding": {"kind": "uniform", "offset": 8, "size": 2, "elementStride": 1} +// REFLECT: "name": "f_bool3", +// REFLECT: "binding": {"kind": "uniform", "offset": 11, "size": 3, "elementStride": 1} +// REFLECT: "name": "f_bool4", +// REFLECT: "binding": {"kind": "uniform", "offset": 16, "size": 4, "elementStride": 1} + +// Metal-specific reflection (different bool3 layout) +// REFLECT-MTL: "name": "f_bool1", +// REFLECT-MTL: "binding": {"kind": "uniform", "offset": 5, "size": 1, "elementStride": 1} +// REFLECT-MTL: "name": "f_bool2", +// REFLECT-MTL: "binding": {"kind": "uniform", "offset": 8, "size": 2, "elementStride": 1} +// REFLECT-MTL: "name": "f_bool3", +// REFLECT-MTL: "binding": {"kind": "uniform", "offset": 12, "size": 4, "elementStride": 1} +// REFLECT-MTL: "name": "f_bool4", +// REFLECT-MTL: "binding": {"kind": "uniform", "offset": 16, "size": 4, "elementStride": 1} -- cgit v1.2.3