From 487ae034e2b03ddd67945132c8fecbd937952705 Mon Sep 17 00:00:00 2001 From: Sriram Murali <85252063+sriramm-nv@users.noreply.github.com> Date: Mon, 13 May 2024 23:57:57 -0700 Subject: Add LoadAligned and StoreAligned methods to ByteAddressBuffers (#4066) Fixes #4062 This change enables wide load/stores for byte-address-buffer backed resources, when the data is accessed at an offset that is aligned. **Goals** - Improve performance by issuing wider instructions instead of sequence of scalar instructions, for load and stores of byte-address buffers. - Reduce code-size and readability of the generated shaders. - Help naive users as well as ninja programmers, generate optimal code. **Non Goals** - Help with Structured buffers, or other resources. - Target compilation time improvements. **Key changes** Adds 2 new overloads for Load and Store operations on ByteAddress Buffers. 1. Load / Store with an extra alignment parameter ``` resource.Load(offset, alignment); resource.Store(offset, value, alignment); ``` 2. LoadAligned / StoreAligned with no extra parameter, with the same signature as orignial Load / Store. ``` resource.LoadAligned(offset); resource.StoreAligned(offset, value); ``` - This overload will implicitly identify the alignment value, from the base type T of the elementary unit of the resource. **Supported resources** 1. Vectors This can be upto 4 elements, i.e. float -- float4. 2. Arrays This does not have a limit on number of elements, but on a conservative estimate, we can limit to few hundreds. 3. Structures This is used to group a resource of a single type. ``` struct { float4 x; } ``` **Code updates** - Modified byte-address-ir legalize to handle struct, array and vector kinds of load or store access - Added custom hlsl stdlib functions to implement all the overloads for Load, Store etc. - Added C-like emitter, SPIR-V emitter for handling ByteAddressBuffers. - Added a new core stdlib function intrinsic to wrap around alignOf(). - Added a new peephole optimization entry to identify the equivalent IntLiteral value from the alignOf() inst. - Added tests to check explicit, and implicit aligned Load and Store operations. --- tests/compute/byte-address-buffer-aligned.slang | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tests/compute/byte-address-buffer-aligned.slang') diff --git a/tests/compute/byte-address-buffer-aligned.slang b/tests/compute/byte-address-buffer-aligned.slang index 5024987aa..f959ec66d 100644 --- a/tests/compute/byte-address-buffer-aligned.slang +++ b/tests/compute/byte-address-buffer-aligned.slang @@ -109,8 +109,8 @@ void computeMain(uint3 threadId : SV_DispatchThreadID) // CHECK3-DAG: OpStore %[[V33]] %[[V28]] // CHECK3-DAG: %[[V34:[a-zA-Z0-9_]+]] = OpAccessChain %[[SBf]] %[[BUF00]] // CHECK3-DAG: OpStore %[[V34]] %[[V30]] - buffer0.Store(32, buffer0.Load(32)); - buffer0.Store(32, buffer0.Load(8)); - buffer0.Store(8, buffer0.Load(32)); - buffer0.Store(8, buffer0.Load(8)); + buffer0.StoreAligned(32, buffer0.LoadAligned(32)); + buffer0.StoreAligned(32, buffer0.LoadAligned(8)); + buffer0.StoreAligned(8, buffer0.LoadAligned(32)); + buffer0.StoreAligned(8, buffer0.LoadAligned(8)); } -- cgit v1.2.3