From fb29bd32cc3404455ff92916a91c517823f486dd Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 2 Nov 2022 09:47:35 -0400 Subject: Shader Execution Reordering (via NVAPI) (#2484) * #include an absolute path didn't work - because paths were taken to always be relative. * Preliminary SER NVAPI support. * Set the DXC compiler version. Fix typo in premake5.lua * Improve DXC version detection. Enable HLSL2021 on late enough version of DXC. * Fix typo. * Fix launch. * Test via DXIL output. * Update dxc-error output. --- tests/cross-compile/dxc-error.hlsl.expected | 6 +- .../hit-object-make-hit.slang | 85 +++++ .../hit-object-make-hit.slang.expected | 347 +++++++++++++++++++++ .../hit-object-make-miss.slang | 28 ++ .../hit-object-make-miss.slang.expected | 139 +++++++++ .../hit-object-reorder-thread.slang | 86 +++++ .../hit-object-reorder-thread.slang.expected | 321 +++++++++++++++++++ .../hit-object-trace-ray.slang | 74 +++++ .../hit-object-trace-ray.slang.expected | 225 +++++++++++++ 9 files changed, 1308 insertions(+), 3 deletions(-) create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.expected create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-reorder-thread.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-reorder-thread.slang.expected create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-trace-ray.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-trace-ray.slang.expected (limited to 'tests') diff --git a/tests/cross-compile/dxc-error.hlsl.expected b/tests/cross-compile/dxc-error.hlsl.expected index 5fdc2362b..c47ca80a6 100644 --- a/tests/cross-compile/dxc-error.hlsl.expected +++ b/tests/cross-compile/dxc-error.hlsl.expected @@ -1,8 +1,8 @@ result code = -1 standard error = { -dxc: tests/cross-compile/dxc-error.hlsl(8): error : use of undeclared identifier 'gOutputBuffer' -dxc: note : gOutputBuffer[tid] = dispatchThreadID.x * 0.5f; -dxc: note : ^ +dxc 1.7: tests/cross-compile/dxc-error.hlsl(8): error : use of undeclared identifier 'gOutputBuffer' +dxc 1.7: note : gOutputBuffer[tid] = dispatchThreadID.x * 0.5f; +dxc 1.7: note : ^ } standard output = { } diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang new file mode 100644 index 000000000..a754ff408 --- /dev/null +++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang @@ -0,0 +1,85 @@ +// hit-object-make-hit.slang + +//TEST:SIMPLE: -target dxil -entry rayGenerationMain -stage raygeneration -profile sm_6_5 -DNV_SHADER_EXTN_SLOT=u0 + +//DISABLE_TEST(compute):COMPARE_COMPUTE:-d3d12 -output-using-type -use-dxil -profile sm_6_5 -render-feature ray-query +//DISABLE_TEST(compute):COMPARE_COMPUTE:-vk -output-using-type -render-feature ray-query + +//TEST_INPUT: set scene = AccelerationStructure +uniform RaytracingAccelerationStructure scene; + +//TEST_INPUT:set outputBuffer = out ubuffer(data=[0, 0, 0, 0], stride=4) +RWStructuredBuffer outputBuffer; + +struct SomeValues +{ + int a; + float b; +}; + +uint calcValue(HitObject hit) +{ + uint r = 0; + + if (!hit.IsMiss()) + { + uint instanceIndex = hit.GetInstanceIndex(); + uint instanceID = hit.GetInstanceID(); + uint geometryIndex = hit.GetGeometryIndex(); + uint primitiveIndex = hit.GetPrimitiveIndex(); + + SomeValues objSomeValues = hit.GetAttributes(); + + r += instanceIndex; + r += instanceID; + r += geometryIndex; + r += primitiveIndex; + r += objSomeValues.a; + } + + return r; +} + +void rayGenerationMain() +{ + int2 launchID = int2(DispatchRaysIndex().xy); + int2 launchSize = int2(DispatchRaysDimensions().xy); + + int idx = launchID.x; + + SomeValues someValues = { idx, idx * 2.0f }; + + RayDesc ray; + ray.Origin = float3(idx, 0, 0); + ray.TMin = 0.01f; + ray.Direction = float3(0, 1, 0); + ray.TMax = 1e4f; + + uint hitKind = 0; + + uint r = 0; + { + HitObject hit = HitObject::MakeHit(0, scene, idx, idx * 2, idx * 3, hitKind, ray, someValues); + + r = calcValue(hit); + } + + { + int rayContributionToHitGroupIndex = 0; + int multiplierForGeometryContributionToHitGroupIndex = 4; + + HitObject hit = HitObject::MakeHit(scene, + idx, + idx * 2, + idx * 3, + hitKind, + rayContributionToHitGroupIndex, + multiplierForGeometryContributionToHitGroupIndex, + ray, + someValues); + + r += calcValue(hit); + } + + outputBuffer[idx] = r; +} diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected new file mode 100644 index 000000000..f481d22f3 --- /dev/null +++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected @@ -0,0 +1,347 @@ +result code = 0 +standard error = { +} +standard output = { +; +; Note: shader requires additional functionality: +; UAVs at every shader stage +; +; shader hash: 98c6f18c569b635938f62584ccf64bbf +; +; Buffer Definitions: +; +; Resource bind info for g_NvidiaExt +; { +; +; struct struct.NvShaderExtnStruct +; { +; +; uint opcode; ; Offset: 0 +; uint rid; ; Offset: 4 +; uint sid; ; Offset: 8 +; uint4 dst1u; ; Offset: 12 +; uint4 src3u; ; Offset: 28 +; uint4 src4u; ; Offset: 44 +; uint4 src5u; ; Offset: 60 +; uint4 src0u; ; Offset: 76 +; uint4 src1u; ; Offset: 92 +; uint4 src2u; ; Offset: 108 +; uint4 dst0u; ; Offset: 124 +; uint markUavRef; ; Offset: 140 +; uint numOutputsForIncCounter; ; Offset: 144 +; float padding1[27]; ; Offset: 148 +; +; } $Element; ; Offset: 0 Size: 256 +; +; } +; +; Resource bind info for outputBuffer_0 +; { +; +; uint $Element; ; Offset: 0 Size: 4 +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; scene_0 texture i32 ras T0 t0 1 +; g_NvidiaExt UAV struct r/w+cnt U0 u0 1 +; outputBuffer_0 UAV struct r/w U1 u1 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer" = type { %struct.NvShaderExtnStruct } +%struct.NvShaderExtnStruct = type { i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32, [27 x float] } +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWStructuredBuffer" = type { i32 } +%struct.SomeValues_0 = type { i32, float } +%struct.AttrWrapper.0 = type { %struct.SomeValues_0 } +%struct.DummyPayload.1 = type { i32 } +%struct.AttrWrapper = type { %struct.SomeValues_0 } +%struct.DummyPayload = type { i32 } +%dx.types.Handle = type { i8* } + +@"\01?g_NvidiaExt@@3V?$RWStructuredBuffer@UNvShaderExtnStruct@@@@A" = external constant %"class.RWStructuredBuffer", align 4 +@"\01?scene_0@@3URaytracingAccelerationStructure@@A" = external constant %struct.RaytracingAccelerationStructure, align 4 +@"\01?outputBuffer_0@@3V?$RWStructuredBuffer@I@@A" = external constant %"class.RWStructuredBuffer", align 4 + +; Function Attrs: nounwind +define void @"\01?rayGenerationMain@@YAXXZ"() #0 { + %1 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?scene_0@@3URaytracingAccelerationStructure@@A", align 4, !noalias !18 + %2 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?outputBuffer_0@@3V?$RWStructuredBuffer@I@@A", align 4 + %3 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?g_NvidiaExt@@3V?$RWStructuredBuffer@UNvShaderExtnStruct@@@@A", align 4, !noalias !21 + %4 = alloca %struct.SomeValues_0, align 8 + %5 = alloca %struct.AttrWrapper.0, align 4 + %6 = alloca %struct.DummyPayload.1, align 4 + %7 = alloca %struct.SomeValues_0, align 8 + %8 = alloca %struct.AttrWrapper, align 4 + %9 = alloca %struct.DummyPayload, align 4 + %10 = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) ; DispatchRaysIndex(col) + %11 = sitofp i32 %10 to float + %12 = fmul fast float %11, 2.000000e+00 + %13 = sitofp i32 %10 to float + %14 = mul nsw i32 %10, 3 + %15 = shl nsw i32 %10, 1 + %16 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %17 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %16, i8 1) ; BufferUpdateCounter(uav,inc) + %18 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %18, i32 %17, i32 0, i32 69, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %19 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %19, i32 %17, i32 144, i32 2, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %20 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %20, i32 %17, i32 76, i32 %10, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %21 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %21, i32 %17, i32 80, i32 %15, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %22 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %22, i32 %17, i32 84, i32 %14, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %23 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %23, i32 %17, i32 88, i32 0, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %24 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %24, i32 %17, i32 92, i32 0, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %25 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %26 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %25, i8 1) ; BufferUpdateCounter(uav,inc) + %27 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %28 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %27, i8 1) ; BufferUpdateCounter(uav,inc) + %29 = getelementptr inbounds %struct.AttrWrapper, %struct.AttrWrapper* %8, i32 0, i32 0, i32 0 + store i32 %10, i32* %29, align 4 + %30 = getelementptr inbounds %struct.AttrWrapper, %struct.AttrWrapper* %8, i32 0, i32 0, i32 1 + store float %12, float* %30, align 4 + call void @dx.op.callShader.struct.AttrWrapper(i32 159, i32 %28, %struct.AttrWrapper* nonnull %8) ; CallShader(ShaderIndex,Parameter) + %31 = call %dx.types.Handle @dx.op.createHandleForLib.struct.RaytracingAccelerationStructure(i32 160, %struct.RaytracingAccelerationStructure %1) ; CreateHandleForLib(Resource) + call void @dx.op.traceRay.struct.DummyPayload(i32 157, %dx.types.Handle %31, i32 0, i32 0, i32 0, i32 0, i32 %28, float %13, float 0.000000e+00, float 0.000000e+00, float 0x3F847AE140000000, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.DummyPayload* nonnull %9) ; TraceRay(AccelerationStructure,RayFlags,InstanceInclusionMask,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToShaderIndex,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %32 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %33 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %32, i8 1) ; BufferUpdateCounter(uav,inc) + %34 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %34, i32 %33, i32 0, i32 73, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %35 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %35, i32 %33, i32 76, i32 %26, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %36 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %37 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %36, i8 1) ; BufferUpdateCounter(uav,inc) + %38 = icmp eq i32 %37, 0 + br i1 %38, label %39, label %76 + +;