From 4b3f554a58e4224806c31d66874fbe60f1f09332 Mon Sep 17 00:00:00 2001 From: sriramm-nv <85252063+sriramm-nv@users.noreply.github.com> Date: Tue, 16 Apr 2024 23:59:41 -0700 Subject: Force Inline all the InterlockedAdd functions in stdlib (#3965) This change forcibly inlines the InterlockedAdd functions when using byteAddress buffer. The IR generated when using nonUniformResourceInst on RWByteAddressBuffer: buffer[NonUniformResourceIndex(uint(0))].InterlockedAdd(0, 1); follows the sequence of a call into an index lookup that is wrapped by a nonuniformResourceIndex: %ld = nonUniformResourceIndex(0) Call RWStructBufferInterlockedAdd(%ld, 0, 1) This prevents NonUniformResource decoration of the buffer because it is wrapped by the function call to InterlockedAdd, that further expands to: %gep = getElement(%buffer, 0) SpirvAsmInst(..., rwStructuredBufferGEP(%gep, 0), ...) By Force-Inlining the atomic functions, the buffer / resource is made visible to the nonUniformResourceIndex inst, allowing the decoration. Identified while debugging tests/spirv/coherent-2.slang --- .../atomic-float-byte-address-buffer-cross.slang | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang index ffa6d5b94..523c58984 100644 --- a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang +++ b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang @@ -1,6 +1,6 @@ // atomic-float-byte-address-buffer-cross.slang -//TEST:CROSS_COMPILE: -profile cs_6_5 -entry computeMain -target spirv-assembly +//TEST:SIMPLE(filecheck=CHECK): -profile cs_6_5 -entry computeMain -target spirv-assembly // We can't do this test, because it relies on nvAPI //DISABLE_TEST:CROSS_COMPILE: -profile cs_6_5 -entry computeMain -target dxil @@ -13,6 +13,16 @@ RWStructuredBuffer anotherBuffer; [numthreads(16, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { + // CHECK-DAG: OpDecorate %[[V1:[a-zA-Z0-9_]+]] Binding 1 + // CHECK-DAG: OpDecorate %[[V2:[a-zA-Z0-9_]+]] Binding 0 + // CHECK-DAG: %[[P1:[a-zA-Z0-9_]+]] = OpTypePointer Uniform %float + // CHECK-DAG: %[[P2:[a-zA-Z0-9_]+]] = OpTypePointer Input %uint + // CHECK: OpAccessChain %[[P2]] + // CHECK: OpAccessChain %[[P1]] %[[V1]] + // CHECK: OpAccessChain %[[P1]] %[[V2]] + // CHECK: OpAtomicFAddEXT + // CHECK: OpAccessChain %[[P1]] %[[V2]] + // CHECK: OpAtomicFAddEXT uint tid = dispatchThreadID.x; int idx = int((tid & 3) ^ (tid >> 2)); -- cgit v1.2.3