From 7c162eba5329eae7755e55298a455a144fcb0dce Mon Sep 17 00:00:00 2001 From: sriramm-nv <85252063+sriramm-nv@users.noreply.github.com> Date: Fri, 19 Apr 2024 09:12:56 -0700 Subject: Enable NonUniformResourceIndex support for glsl, hlsl and spirv (#3899) Fixes #387676* ForceInline SampleLevel to allow decorations to apply * explictly add all the SPIRVAsmOperand Insts in non-differentiable list, which might get inadvertently processed when these functions are inlined into the main shader * Support NonUniformResourceIndex for SPIR-V target Fixes #3876 * add a new IR instruction for NonUniformResourceIndex * slang ir emitter for nonuniform resource index * update the hlsl meta slang * Add test cases for NonUniformResourceIndex access for buffers and textures, with/without cast, nested access etc. * add default c-like emitter for nonuniformresourceinfo * added hlsl emitter * added glsl emitter * requisites for spirv enabling - new decorator for nonuniformresourceindex - emitter for nonuniformresourceindex signature change * add hasResourceType checker * add rwStructBuffType in resourcetype checker * add a case for nonuniformres in emitDecorations * DO NOT COMMIT: This change adds special handling for RWStructBuf within the isResourceType function, if it is a pointer to this resource, return true to make it work with nonuniformres test * spirv emitter for decorations - update the emitLocalInst to perform decorations at the end * added main spirv emitter code * slang emit spirv bugfix * hacky way of supporting Call Inst * move code to cleanup nonuniform inst into helper function * remove stale codefrom test * add spirv decoration for nonuniform * update test to remove global variables * update coherent-2 test * update comment for special handling * update the spirv legalize to handle nested nonuniforms improved logic that handles call ops, rwstructbuf, nested nonuniforms etc. * update nonuniform-array-of-tex test * missed removing nonuniform inst causing duplicate decorations * add glsl and hlsl variants of nonuniform tests * repurpose the hasResource function into something specific for nonuniform inst decoration helper * clean up comments and code around spirv-legalization to emit nonuniform inst by recursively looking into the inst * use the helper canDecorateNonUniformInst to convert `nonUniformResourceInfo` inst to decoration * converted compute/unbounded-array-of-array cross compile test into a simple check test * update contains Resource helper function to be more generic * clean up the case for opcall handling with nonuniform resource inst * update ptr to struct buffer check to be more explicit and rename the function to check for ptr to resource type * update comments and fix the test for coherent * fix typos * update logic on spirv legalize to delete dead instructions - for some reason this doesn't automatically happen * add comments to declarations * add NonuniformResourceIndex to the non-differential inst list --- .../compute/nonuniformres-array-of-textures.slang | 41 ++++++++++++++++++++++ tests/compute/nonuniformres-atomic.slang | 27 ++++++++++++++ .../nonuniformres-nested-rwstructuredbuf.slang | 35 ++++++++++++++++++ .../compute/unbounded-array-of-array-syntax.slang | 19 +++++++++- 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 tests/compute/nonuniformres-array-of-textures.slang create mode 100644 tests/compute/nonuniformres-atomic.slang create mode 100644 tests/compute/nonuniformres-nested-rwstructuredbuf.slang (limited to 'tests/compute') diff --git a/tests/compute/nonuniformres-array-of-textures.slang b/tests/compute/nonuniformres-array-of-textures.slang new file mode 100644 index 000000000..32eccdbdc --- /dev/null +++ b/tests/compute/nonuniformres-array-of-textures.slang @@ -0,0 +1,41 @@ +//TEST:SIMPLE(filecheck=CHECK0):-target glsl -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK1):-target hlsl -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK2):-target spirv -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK3):-target spirv -entry main -stage compute -emit-spirv-directly + +Texture2D textures[2]; +RWTexture2D outputTexture; +[numthreads(1, 1, 1)] +void main(uint2 pixelIndex : SV_DispatchThreadID) +{ + // CHECK0: texelFetch({{.*}}[nonuniformEXT({{.*}})]) + + // CHECK1: [NonUniformResourceIndex(_{{.*}})].Load(_{{.*}}) + + // CHECK2: OpDecorate %[[VAR1:[a-zA-Z0-9_]+]] NonUniform + // CHECK2: OpDecorate %[[VAR2:[a-zA-Z0-9_]+]] NonUniform + // CHECK2: OpDecorate %[[VAR3:[a-zA-Z0-9_]+]] NonUniform + // CHECK2: OpDecorate %[[VAR4:[a-zA-Z0-9_]+]] NonUniform + // CHECK2: OpDecorate %[[VAR5:[a-zA-Z0-9_]+]] NonUniform + // CHECK2: OpDecorate %[[VAR6:[a-zA-Z0-9_]+]] NonUniform + + // CHECK2: %[[VAR1]] = OpCopyObject %{{.*}} + // CHECK2: %[[VAR2]] = OpAccessChain %_ptr_UniformConstant_{{.*}} %{{.*}} %[[VAR1]] + // CHECK2: %[[VAR3]] = OpLoad %{{.*}} %[[VAR2]] + // CHECK2: %[[VAR4]] = OpCopyObject %{{.*}} + // CHECK2: %[[VAR5]] = OpAccessChain %_ptr_UniformConstant_{{.*}} %{{.*}} %[[VAR4]] + // CHECK2: %[[VAR6]] = OpLoad %{{.*}} %[[VAR5]] + + // CHECK3: OpDecorate %[[VAR1:[a-zA-Z0-9_]+]] NonUniform + // CHECK3: OpDecorate %[[VAR2:[a-zA-Z0-9_]+]] NonUniform + // CHECK3: %[[VAR1]] = OpLoad %{{.*}} + // CHECK3: %{{.*}} = OpImageFetch %v4float %[[VAR1]] %{{.*}} + // CHECK3: %[[VAR2]] = OpLoad %{{.*}} + // CHECK3: %{{.*}} = OpImageFetch %v4float %[[VAR2]] %{{.*}} + float2 tmp0 = textures[NonUniformResourceIndex(pixelIndex.x)].Load(int3(0, 0, 0)); + outputTexture[0] = tmp0; + + float2 tmp1 = textures[NonUniformResourceIndex(uint(float(pixelIndex.x)))].Load(int3(0, 0, 0)); + outputTexture[1] = tmp1; +} + diff --git a/tests/compute/nonuniformres-atomic.slang b/tests/compute/nonuniformres-atomic.slang new file mode 100644 index 000000000..113517006 --- /dev/null +++ b/tests/compute/nonuniformres-atomic.slang @@ -0,0 +1,27 @@ +//TEST:SIMPLE(filecheck=CHECK0):-target glsl -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK1):-target hlsl -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK2):-target spirv -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK3):-target spirv -entry main -stage compute -emit-spirv-directly + +RWTexture2D texArray[2]; + +[numthreads(1,1,1)] +void main( uint2 dispatchThreadID : SV_DispatchThreadID, uint2 groupThreadID : SV_GroupThreadID ) +{ + + // CHECK0: imageAtomicAdd((texArray_{{.*}}[nonuniformEXT({{.*}})] + + // CHECK1: InterlockedAdd(texArray_{{.*}}[NonUniformResourceIndex({{.*}})] + + // CHECK2-DAG: OpDecorate %[[VAR1:[a-zA-Z0-9_]+]] NonUniform + // CHECK2-DAG: OpDecorate %[[VAR2:[a-zA-Z0-9_]+]] NonUniform + // CHECK2-DAG: OpDecorate %[[VAR3:[a-zA-Z0-9_]+]] NonUniform + // CHECK2: %[[VAR1]] = OpCopyObject %{{.*}} + // CHECK2: %[[VAR2]] = OpAccessChain %_ptr_UniformConstant_{{.*}} %{{.*}} %[[VAR1]] + // CHECK2: %[[VAR3]] = OpImageTexelPointer %_ptr_Image_uint %[[VAR2]] %{{.*}} + + // CHECK3-DAG: OpDecorate %[[VAR1:[a-zA-Z0-9_]+]] NonUniform + // CHECK3: %[[VAR1]] = OpAccessChain %_ptr_UniformConstant_{{.*}} %texArray %{{.*}} + // CHECK3: %{{.*}} = OpImageTexelPointer %_ptr_Image_uint %[[VAR1]] %{{.*}} + InterlockedAdd(texArray[NonUniformResourceIndex(dispatchThreadID.x)][uint2(0)], 2); +} diff --git a/tests/compute/nonuniformres-nested-rwstructuredbuf.slang b/tests/compute/nonuniformres-nested-rwstructuredbuf.slang new file mode 100644 index 000000000..3c99ea205 --- /dev/null +++ b/tests/compute/nonuniformres-nested-rwstructuredbuf.slang @@ -0,0 +1,35 @@ +//TEST:SIMPLE(filecheck=CHECK0):-target glsl -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK1):-target hlsl -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK2):-target spirv -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK3):-target spirv -entry main -stage compute -emit-spirv-directly + +RWStructuredBuffer buffer[]; + +[numthreads(8, 1, 1)] +void main(uint3 dispatchThreadID: SV_DispatchThreadID) +{ + // CHECK0: buffer_{{.*}}[nonuniformEXT(nonuniformEXT(nonuniformEXT({{.*}})))] + + // CHECK1: buffer_{{.*}}[NonUniformResourceIndex(NonUniformResourceIndex(NonUniformResourceIndex(_{{.*}})))] + + // CHECK2-DAG: OpDecorate %[[VAR1:[a-zA-Z0-9_]+]] NonUniform + // CHECK2-DAG: OpDecorate %[[VAR2:[a-zA-Z0-9_]+]] NonUniform + // CHECK2-DAG: OpDecorate %[[VAR3:[a-zA-Z0-9_]+]] NonUniform + // CHECK2: %[[VAR1]] = OpCopyObject %{{.*}} + // CHECK2: %[[VAR2]] = OpAccessChain %_ptr_Uniform_int %buffer_{{.*}} %[[VAR1]] + // CHECK2: OpStore %[[VAR2]] %{{.*}} + // CHECK2: %[[VAR3]] = OpCopyObject %{{.*}} + // CHECK2: %{{.*}} = OpBitcast %int %[[VAR3]] + + // CHECK3-DAG: OpDecorate %[[VAR1:[a-zA-Z0-9_]+]] NonUniform + // CHECK3-DAG: OpDecorate %[[VAR2:[a-zA-Z0-9_]+]] NonUniform + // CHECK3: %[[VAR1]] = OpAccessChain %_ptr_StorageBuffer_RWStructuredBuffer %buffer %{{.*}} + // CHECK3: %{{.*}} = OpAccessChain %_ptr_StorageBuffer_int %[[VAR1]] + // CHECK3: %[[VAR2]] = OpAccessChain %_ptr_StorageBuffer_RWStructuredBuffer %buffer %{{.*}} + // CHECK3: %{{.*}} = OpAccessChain %_ptr_StorageBuffer_int %[[VAR2]] + RWStructuredBuffer buffer1 = buffer[NonUniformResourceIndex(NonUniformResourceIndex(NonUniformResourceIndex(dispatchThreadID.x)))]; + buffer1[0] = 1; + + RWStructuredBuffer buffer2 = buffer[int(NonUniformResourceIndex(uint(float(dispatchThreadID.x))))]; + buffer2[0] = 1; +} \ No newline at end of file diff --git a/tests/compute/unbounded-array-of-array-syntax.slang b/tests/compute/unbounded-array-of-array-syntax.slang index bba8605f2..35316d114 100644 --- a/tests/compute/unbounded-array-of-array-syntax.slang +++ b/tests/compute/unbounded-array-of-array-syntax.slang @@ -1,7 +1,7 @@ //IGNORE_TEST:CPU_REFLECTION: -profile cs_5_0 -entry computeMain -target cpp //DISABLED_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute //TEST:CROSS_COMPILE:-target dxbc-assembly -entry computeMain -profile cs_5_1 -//TEST:CROSS_COMPILE:-target spirv-assembly -entry computeMain -profile cs_5_1 +//TEST:SIMPLE(filecheck=CHECK):-target spirv-assembly -entry computeMain -profile cs_5_1 //DISABLED_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer @@ -16,6 +16,23 @@ RWStructuredBuffer g_aoa[]; [numthreads(8, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { + // CHECK: OpCapability {{(ShaderNonUniform|StorageBufferArrayNonUniformIndexing)}} + // CHECK: OpCapability {{(ShaderNonUniform|StorageBufferArrayNonUniformIndexing)}} + // CHECK-DAG: OpDecorate %[[N1:[a-zA-Z0-9_]+]] NonUniform + // CHECK-DAG: OpDecorate %[[N2:[a-zA-Z0-9_]+]] NonUniform + // CHECK-DAG: OpDecorate %[[N3:[a-zA-Z0-9_]+]] NonUniform + // CHECK-DAG: OpDecorate %[[N4:[a-zA-Z0-9_]+]] NonUniform + // CHECK-DAG: OpDecorate %[[B0:[a-zA-Z0-9_]+]] Binding 0 + // CHECK-DAG: OpDecorate %[[B1:[a-zA-Z0-9_]+]] Binding 1 + // CHECK-DAG: %[[P1:[a-zA-Z0-9_]+]] = OpTypePointer Uniform %int + // CHECK-DAG: %[[P2:[a-zA-Z0-9_]+]] = OpTypePointer Uniform %StructuredBuffer_int_t_0 + // CHECK: %[[N1]] = OpCopyObject %int + // CHECK: %{{.*}} = OpAccessChain %[[P2]] %[[B1]] %[[N1]] + // CHECK: %[[N2]] = OpCopyObject %int + // CHECK: %[[N3]] = OpAccessChain %[[P1]] %[[B1]] %[[N2]] + // CHECK: %[[N4]] = OpLoad %int %[[N3]] + // CHECK: %{{.*}} = OpAccessChain %[[P1]] %[[B0]] %int_{{.*}} + // CHECK: OpStore %{{.*}} %[[N4]] int index = int(dispatchThreadID.x); int baseIndex = index >> 2; -- cgit v1.2.3