diff options
Diffstat (limited to 'tests/optimization/buffer-load-defer-user-pointer.slang')
| -rw-r--r-- | tests/optimization/buffer-load-defer-user-pointer.slang | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/tests/optimization/buffer-load-defer-user-pointer.slang b/tests/optimization/buffer-load-defer-user-pointer.slang new file mode 100644 index 000000000..58e6386f9 --- /dev/null +++ b/tests/optimization/buffer-load-defer-user-pointer.slang @@ -0,0 +1,63 @@ +//TEST:SIMPLE(filecheck=SPV): -target spirv -O0 +//TEST:SIMPLE(filecheck=CUDA): -target cuda -entry compute_main -stage compute +//TEST:SIMPLE(filecheck=PTX): -target ptx -entry compute_main -stage compute + +// Check that we can specialize buffer loads through user pointers, and +// do not load big struct elements into registers unnecessarily. + +struct Bottom +{ + float bigArray[1024]; + float bottomGetValue(int index) { return bigArray[index]; } +} + +struct Middle +{ + Bottom bottom; + float middleGetValue(int index) { return bottom.bottomGetValue(index); } +} + +struct Top +{ + StructuredBuffer<Middle*>.Handle middle; + + // Calling `middleGetValue` on `middle[0]` should not causing the entire `Middle` + // struct to be loaded into registers. Instead, we should be able to specialize + // `middleGetValue` to take a `Middle*` and recursively specialize `bottomGetValue` + // to only load the `Bottom.bigArray[index]` element. + float topGetValue(int index) { return middle[0].middleGetValue(index); } +} + +struct Root +{ + Top top; +} + +ConstantBuffer<Root> cb; + +RWStructuredBuffer<float> outputBuffer; + +// Check that the generated CUDA code never loads a `Middle` or `Bottom` struct into a local var. +// CUDA-NOT: Middle{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = +// CUDA-NOT: Bottom{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = +// CUDA-NOT: Top{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = + +// Check that the generated CUDA code can be compiled by nvrtc correctly into PTX. +// PTX: compute_main + +// Check that the generated (unoptimized) SPIR-V contains a specialized Bottom_bottomGetValue function +// that takes in a Bottom* and use access chain to load the required array element directly, without +// needing to load the entire Bottom struct. +// SPV: %Bottom_bottomGetValue = OpFunction %float None +// SPV: OpFunctionParameter %_ptr_PhysicalStorageBuffer_Middle_natural +// SPV: %[[INDEX:[A-Za-z0-9_]+]] = OpFunctionParameter %int +// SPV: %[[PTR:[A-Za-z0-9_]+]] = OpAccessChain %_ptr_PhysicalStorageBuffer_float %{{.*}} %[[INDEX]] +// SPV: %[[VALUE:[A-Za-z0-9_]+]] = OpLoad %float %[[PTR]] +// SPV: OpReturnValue %[[VALUE]] + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + outputBuffer[0] = cb.top.topGetValue(0); +} |
