From fb29bd32cc3404455ff92916a91c517823f486dd Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 2 Nov 2022 09:47:35 -0400 Subject: Shader Execution Reordering (via NVAPI) (#2484) * #include an absolute path didn't work - because paths were taken to always be relative. * Preliminary SER NVAPI support. * Set the DXC compiler version. Fix typo in premake5.lua * Improve DXC version detection. Enable HLSL2021 on late enough version of DXC. * Fix typo. * Fix launch. * Test via DXIL output. * Update dxc-error output. --- premake5.lua | 2 +- source/compiler-core/slang-dxc-compiler.cpp | 148 ++++++--- source/core/slang-char-util.h | 2 + source/core/slang-command-line.cpp | 8 + source/core/slang-command-line.h | 2 + source/slang/hlsl.meta.slang | 203 ++++++++++++ source/slang/slang-artifact-output-util.cpp | 3 +- source/slang/slang-emit-c-like.cpp | 27 +- source/slang/slang-lower-to-ir.cpp | 12 +- tests/cross-compile/dxc-error.hlsl.expected | 6 +- .../hit-object-make-hit.slang | 85 +++++ .../hit-object-make-hit.slang.expected | 347 +++++++++++++++++++++ .../hit-object-make-miss.slang | 28 ++ .../hit-object-make-miss.slang.expected | 139 +++++++++ .../hit-object-reorder-thread.slang | 86 +++++ .../hit-object-reorder-thread.slang.expected | 321 +++++++++++++++++++ .../hit-object-trace-ray.slang | 74 +++++ .../hit-object-trace-ray.slang.expected | 225 +++++++++++++ tools/render-test/render-test-main.cpp | 12 +- tools/slang-test/slang-test-main.cpp | 58 +++- tools/slang-test/test-context.cpp | 5 +- tools/slang-test/test-context.h | 1 + 22 files changed, 1720 insertions(+), 74 deletions(-) create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.expected create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-reorder-thread.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-reorder-thread.slang.expected create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-trace-ray.slang create mode 100644 tests/hlsl-intrinsic/shader-execution-reordering/hit-object-trace-ray.slang.expected diff --git a/premake5.lua b/premake5.lua index 5c0222315..dd16a170c 100644 --- a/premake5.lua +++ b/premake5.lua @@ -102,7 +102,7 @@ newoption { newoption { trigger = "enable-nvapi", - description = "(Optional) If true will enable NVAPI, if NVAPI is found via CUDA_PATH", + description = "(Optional) If true will enable NVAPI, if NVAPI is found via external/nvapi", value = "bool", default = "false", allowed = { { "true", "True"}, { "false", "False" } } diff --git a/source/compiler-core/slang-dxc-compiler.cpp b/source/compiler-core/slang-dxc-compiler.cpp index 06dfc74b0..2856a77ab 100644 --- a/source/compiler-core/slang-dxc-compiler.cpp +++ b/source/compiler-core/slang-dxc-compiler.cpp @@ -181,9 +181,26 @@ protected: DxcCreateInstanceProc m_createInstance = nullptr; + /// The commit hash associated with the DXC dll used + /// If 0 length, no hash was found + String m_commitHash; + /// The commit count. 0 if not set + uint32_t m_commitCount = 0; + ComPtr m_sharedLibrary; }; +static String _moveTaskMemAllocatedToString(char* chars) +{ + if (chars) + { + const String str(chars); + ::CoTaskMemFree(chars); + return str; + } + return String(); +} + SlangResult DXCDownstreamCompiler::init(ISlangSharedLibrary* library) { m_sharedLibrary = library; @@ -194,7 +211,69 @@ SlangResult DXCDownstreamCompiler::init(ISlangSharedLibrary* library) return SLANG_FAIL; } - m_desc = Desc(SLANG_PASS_THROUGH_DXC); + // Must be able to create the compiler. We inly do this here, because we want to get the compiler + // version. + ComPtr dxcCompiler; + SLANG_RETURN_ON_FAIL(m_createInstance(CLSID_DxcCompiler, __uuidof(dxcCompiler), (LPVOID*)dxcCompiler.writeRef())); + + uint32_t major = 0; + uint32_t minor = 0; + uint32_t patch = 0; + + // Get the version info + { + ComPtr versionInfo; + if (SLANG_SUCCEEDED(dxcCompiler->QueryInterface(versionInfo.writeRef()))) + { + versionInfo->GetVersion(&major, &minor); + } + } + + // Get the commit hash + { + + ComPtr versionInfo; + if (SLANG_SUCCEEDED(dxcCompiler->QueryInterface(versionInfo.writeRef()))) + { + char* commitHash = nullptr; + versionInfo->GetCommitInfo(&m_commitCount, &commitHash); + m_commitHash = _moveTaskMemAllocatedToString(commitHash); + } + } + + // Try and get the custom build string, as we can potentially get the patch version from that. + if (patch == 0) + { + ComPtr versionInfo; + + if (SLANG_SUCCEEDED(dxcCompiler->QueryInterface(versionInfo.writeRef()))) + { + char* customVersionCString = nullptr; + versionInfo->GetCustomVersionString(&customVersionCString); + + const String customVersionString = _moveTaskMemAllocatedToString(customVersionCString); + + SemanticVersion semanticVersion(int(major), int(minor), 0); + StringBuilder buf; + semanticVersion.append(buf); + + if (customVersionString.startsWith(buf) && + customVersionString.getLength() > buf.getLength() + 2 && + customVersionString[buf.getLength()] == '.') + { + // Get the patch slice + UnownedStringSlice patchSlice = StringUtil::getAtInSplit(customVersionString.getUnownedSlice(), '.', 2); + + Int patchValue; + if (SLANG_SUCCEEDED(StringUtil::parseInt(patchSlice, patchValue)) && patchValue > 0) + { + patch = uint32_t(patchValue); + } + } + } + } + + m_desc = Desc(SLANG_PASS_THROUGH_DXC, SemanticVersion(int(major), int(minor), int(patch))); return SLANG_OK; } @@ -438,6 +517,22 @@ SlangResult DXCDownstreamCompiler::compile(const CompileOptions& options, IArtif searchDirectories.searchDirectories.add(asString(includePath)); } + // TODO(JS): Enable in a better way perhaps? + { + // Strictly speaking the HLSL2021 was available in 1.6.2112, in preview + // We enable on 1.7.2207 as that is the first official version, but + // since we may not be able to get the patch version, we'll just assume any version + // over 1.7 has can support the feature. + + const SemanticVersion firstHlsl2021Version(1, 7); + + if (m_desc.version >= firstHlsl2021Version) + { + args.add(L"-HV"); + args.add(L"2021"); + } + } + String sourcePath = ArtifactUtil::findPath(sourceArtifact); OSString wideSourcePath = sourcePath.toWString(); @@ -577,50 +672,21 @@ SlangResult DXCDownstreamCompiler::convert(IArtifact* from, const ArtifactDesc& SlangResult DXCDownstreamCompiler::getVersionString(slang::IBlob** outVersionString) { - ComPtr dxcCompiler; - SLANG_RETURN_ON_FAIL(m_createInstance(CLSID_DxcCompiler, __uuidof(dxcCompiler), (LPVOID*)dxcCompiler.writeRef())); + StringBuilder versionString; + // Append the version + m_desc.version.append(versionString); - ComPtr version; - ComPtr versionInfo; - if (SLANG_SUCCEEDED(dxcCompiler->QueryInterface(versionInfo.writeRef()))) + if (m_commitHash.getLength()) + { + versionString << "#" << m_commitHash; + } + else { - // Because the major/minor version alone does not necessarily capture different releases - // of the DX compiler, we also need to query for the commit hash. If we are unable to - // obtain the commit hash, then we return the shared library timestamp instead. - ComPtr versionInfo2; - if (SLANG_SUCCEEDED(dxcCompiler->QueryInterface(versionInfo2.writeRef()))) - { - uint32_t major; - uint32_t minor; - versionInfo->GetVersion(&major, &minor); - - StringBuilder versionString; - versionString.append(major); - versionString.append("."); - versionString.append(minor); - - char* commitHash = nullptr; - uint32_t unused; - versionInfo2->GetCommitInfo(&unused, &commitHash); - if (commitHash) - { - // Successfully queried the commit hash, append to the version and return. - versionString.append(commitHash); - CoTaskMemFree(commitHash); - - version = StringBlob::create(versionString.getBuffer()); - *outVersionString = version.detach(); - return SLANG_OK; - } - } + // If we don't have the commitHash, we use the library timestamp, to uniquely identify. + versionString << " " << SharedLibraryUtils::getSharedLibraryTimestamp(m_createInstance); } - // If either of the QueryInterface calls fails, we return the shared library timestamp - // as the version instead. - auto timestamp = SharedLibraryUtils::getSharedLibraryTimestamp(m_createInstance); - auto timestampString = String(timestamp); - version = StringBlob::create(timestampString.getBuffer()); - *outVersionString = version.detach(); + *outVersionString = StringBlob::moveCreate(versionString).detach(); return SLANG_OK; } diff --git a/source/core/slang-char-util.h b/source/core/slang-char-util.h index f831f6d55..40abee602 100644 --- a/source/core/slang-char-util.h +++ b/source/core/slang-char-util.h @@ -30,6 +30,8 @@ struct CharUtil /// True if it's alpha SLANG_FORCE_INLINE static bool isAlpha(char c) { return (getFlags(c) & (Flag::Upper | Flag::Lower)) != 0; } + /// True if it's alpha or a digit + SLANG_FORCE_INLINE static bool isAlphaOrDigit(char c) { return (getFlags(c) & (Flag::Upper | Flag::Lower | Flag::Digit)) != 0; } /// True if the character is a valid hex character SLANG_FORCE_INLINE static bool isHexDigit(char c) { return (getFlags(c) & Flag::HexDigit) != 0; } diff --git a/source/core/slang-command-line.cpp b/source/core/slang-command-line.cpp index f8b5ff10f..59e6a6265 100644 --- a/source/core/slang-command-line.cpp +++ b/source/core/slang-command-line.cpp @@ -116,6 +116,14 @@ void CommandLine::appendArgs(StringBuilder& out) const } } +void CommandLine::addArgIfNotFound(const String& in) +{ + if (m_args.indexOf(in) < 0) + { + addArg(in); + } +} + String CommandLine::toString() const { StringBuilder buf; diff --git a/source/core/slang-command-line.h b/source/core/slang-command-line.h index f2007865f..163daf710 100644 --- a/source/core/slang-command-line.h +++ b/source/core/slang-command-line.h @@ -62,6 +62,8 @@ struct CommandLine void addArg(const String& in) { m_args.add(in); } void addArgs(const String* args, Int argsCount) { for (Int i = 0; i < argsCount; ++i) addArg(args[i]); } + void addArgIfNotFound(const String& in); + /// Find the index of an arg which is exact match for slice SLANG_INLINE Index findArgIndex(const UnownedStringSlice& slice) const { return m_args.indexOf(slice); } diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index fe3d511d4..238739fcd 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -5420,3 +5420,206 @@ struct VkSubpassInputMS { T SubpassLoad(int sampleIndex); } + +/// +/// Shader Execution Reordering (SER) +/// +/// NOTE! This API is currently experimental and may change in the future as SER is made available +/// in different APIs and downstream compilers. +/// +/// Based on the NVAPI on D3D12 only currently. +/// +/// White paper on SER on NVAPI https://developer.nvidia.com/sites/default/files/akamai/gameworks/ser-whitepaper.pdf +/// +/// The NVAPI headers (R520) required for this functionality to work can be found here... +/// +/// https://developer.nvidia.com/rtx/path-tracing/nvapi/get-started +/// + + /// Immutable data type representing a ray hit or a miss. Can be used to invoke hit or miss shading, + /// or as a key in ReorderThread. Created by one of several methods described below. HitObject + /// and its related functions are available in raytracing shader types only. +__target_intrinsic(hlsl, NvHitObject) +[__requiresNVAPI] +struct HitObject +{ + /// Executes ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the + /// resulting hit information as a HitObject and does not trigger closesthit or miss shaders. + __target_intrinsic(hlsl, "NvTraceRayHitObject") + [__requiresNVAPI] + static HitObject TraceRay( + RaytracingAccelerationStructure AccelerationStructure, + uint RayFlags, + uint InstanceInclusionMask, + uint RayContributionToHitGroupIndex, + uint MultiplierForGeometryContributionToHitGroupIndex, + uint MissShaderIndex, + RayDesc Ray, + inout payload_t Payload); + + /// Creates a HitObject representing a hit based on values explicitly passed as arguments, without + /// tracing a ray. The primitive specified by AccelerationStructure, InstanceIndex, GeometryIndex, + /// and PrimitiveIndex must exist. The shader table index is computed using the formula used with + /// TraceRay. The computed index must reference a valid hit group record in the shader table. The + /// Attributes parameter must either be an attribute struct, such as + /// BuiltInTriangleIntersectionAttributes, or another HitObject to copy the attributes from. + __target_intrinsic(hlsl, "NvMakeHit") + [__requiresNVAPI] + static HitObject MakeHit( + RaytracingAccelerationStructure AccelerationStructure, + uint InstanceIndex, + uint GeometryIndex, + uint PrimitiveIndex, + uint HitKind, + uint RayContributionToHitGroupIndex, + uint MultiplierForGeometryContributionToHitGroupIndex, + RayDesc Ray, + attr_t attributes); + + /// Creates a HitObject representing a hit based on values explicitly passed as arguments, without + /// tracing a ray. The primitive specified by AccelerationStructure, InstanceIndex, GeometryIndex, + /// and PrimitiveIndex must exist. The shader table index is explicitly provided as an argument + /// instead of being computed from the indexing formula used in TraceRay. The provided index must + /// reference a valid hit group record in the shader table. The Attributes parameter must either be an + /// attribute struct, such as BuiltInTriangleIntersectionAttributes, or another HitObject to copy the + /// attributes from. + __target_intrinsic(hlsl, "NvMakeHitWithRecordIndex") + [__requiresNVAPI] + static HitObject MakeHit( + uint HitGroupRecordIndex, + RaytracingAccelerationStructure AccelerationStructure, + uint InstanceIndex, + uint GeometryIndex, + uint PrimitiveIndex, + uint HitKind, + RayDesc Ray, + attr_t attributes); + + /// Creates a HitObject representing a miss based on values explicitly passed as arguments, without + /// tracing a ray. The provided shader table index must reference a valid miss record in the shader + /// table. + __target_intrinsic(hlsl, "NvMakeMiss") + [__requiresNVAPI] + static HitObject MakeMiss( + uint MissShaderIndex, + RayDesc Ray); + + /// Creates a HitObject representing “NOP” (no operation) which is neither a hit nor a miss. Invoking a + /// NOP hit object using HitObject::Invoke has no effect. Reordering by hit objects using + /// ReorderThread will group NOP hit objects together. This can be useful in some reordering + /// scenarios where future control flow for some threads is known to process neither a hit nor a + /// miss. + __target_intrinsic(hlsl, "NvMakeNop") + [__requiresNVAPI] + static HitObject MakeNop(); + + /// Invokes closesthit or miss shading for the specified hit object. In case of a NOP HitObject, no + /// shader is invoked. + __target_intrinsic(hlsl, "NvInvokeHitObject") + [__requiresNVAPI] + static void Invoke( + RaytracingAccelerationStructure AccelerationStructure, + HitObject HitOrMiss, + inout payload_t Payload); + + /// Returns true if the HitObject encodes a miss, otherwise returns false. + __target_intrinsic(hlsl) + [__requiresNVAPI] + bool IsMiss(); + + /// Returns true if the HitObject encodes a hit, otherwise returns false. + __target_intrinsic(hlsl) + [__requiresNVAPI] + bool IsHit(); + + /// Returns true if the HitObject encodes a nop, otherwise returns false. + __target_intrinsic(hlsl) + [__requiresNVAPI] + bool IsNop(); + + /// Queries ray properties from HitObject. Valid if the hit object represents a hit or a miss. + __target_intrinsic(hlsl) + [__requiresNVAPI] + RayDesc GetRayDesc(); + + /// Queries shader table index from HitObject. Valid if the hit object represents a hit or a miss. + __target_intrinsic(hlsl) + [__requiresNVAPI] + uint GetShaderTableIndex(); + + /// Returns the instance index of a hit. Valid if the hit object represents a hit. + __target_intrinsic(hlsl) + [__requiresNVAPI] + uint GetInstanceIndex(); + + /// Returns the instance ID of a hit. Valid if the hit object represents a hit. + __target_intrinsic(hlsl) + [__requiresNVAPI] + uint GetInstanceID(); + + /// Returns the geometry index of a hit. Valid if the hit object represents a hit. + __target_intrinsic(hlsl) + [__requiresNVAPI] + uint GetGeometryIndex(); + + /// Returns the primitive index of a hit. Valid if the hit object represents a hit. + __target_intrinsic(hlsl) + [__requiresNVAPI] + uint GetPrimitiveIndex(); + + /// Returns the hit kind. Valid if the hit object represents a hit. + __target_intrinsic(hlsl) + [__requiresNVAPI] + uint GetHitKind(); + + /// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss. + __target_intrinsic(hlsl, "$0.GetAttributes<$G0>()") + [__requiresNVAPI] + attr_t GetAttributes(); + + /// Loads a root constant from the local root table referenced by the hit object. Valid if the hit object + /// represents a hit or a miss. RootConstantOffsetInBytes must be a multiple of 4. + __target_intrinsic(hlsl) + [__requiresNVAPI] + uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes); +}; + + + /// Reorders threads based on a coherence hint value. NumCoherenceHintBits indicates how many of + /// the least significant bits of CoherenceHint should be considered during reordering (max: 16). + /// Applications should set this to the lowest value required to represent all possible values in + /// CoherenceHint. For best performance, all threads should provide the same value for + /// NumCoherenceHintBits. + /// Where possible, reordering will also attempt to retain locality in the thread’s launch indices + /// (DispatchRaysIndex in DXR). +__target_intrinsic(hlsl, "NvReorderThread") +[__requiresNVAPI] +void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ); + + /// Reorders threads based on a hit object, optionally extended by a coherence hint value. Coherence + /// hints behave as described in the generic variant of ReorderThread. The maximum number of + /// coherence hint bits in this variant of ReorderThread is 8. If no coherence hint is desired, set + /// NumCoherenceHitBits to zero. + /// Reordering will consider information in the HitObject and coherence hint with the following + /// priority: + /// + /// 1. Shader ID stored in the HitObject + /// 2. Coherence hint, with the most significant hint bit having highest priority + /// 3. Spatial information stored in the HitObject + /// + /// That is, ReorderThread will first attempt to group threads whose HitObject references the + /// same shader ID. (Miss shaders and NOP HitObjects are grouped separately). Within each of these + /// groups, it will attempt to order threads by the value of their coherence hints. And within ranges + /// of equal coherence hints, it will attempt to maximize locality in 3D space of the ray hit (if any). +__target_intrinsic(hlsl, "NvReorderThread") +[__requiresNVAPI] +void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ); + + /// Is equivalent to + /// ``` + /// void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ); + /// ``` + /// With CoherenceHint and NumCoherenceHintBitsFromLSB as 0, meaning they are ignored. +[__requiresNVAPI] +__target_intrinsic(hlsl, "NvReorderThread") +void ReorderThread( HitObject HitOrMiss ); diff --git a/source/slang/slang-artifact-output-util.cpp b/source/slang/slang-artifact-output-util.cpp index ac4138020..e9cfe6615 100644 --- a/source/slang/slang-artifact-output-util.cpp +++ b/source/slang/slang-artifact-output-util.cpp @@ -99,7 +99,8 @@ SlangResult ArtifactOutputUtil::maybeDisassemble(Session* session, IArtifact* ar // If is text, we can just output if (ArtifactDescUtil::isText(desc)) { - return writer->write((const char*)blob->getBufferPointer(), blob->getBufferSize()); + auto text = StringUtil::getSlice(blob); + return writer->write(text.begin(), text.getLength()); } else { diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index dcd25419e..09a18a31c 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -1472,20 +1472,31 @@ IRTargetIntrinsicDecoration* CLikeSourceEmitter::findBestTargetIntrinsicDecorati /* static */bool CLikeSourceEmitter::isOrdinaryName(UnownedStringSlice const& name) { char const* cursor = name.begin(); - char const* end = name.end(); + char const*const end = name.end(); // Consume an optional `.` at the start, which indicates // the ordinary name is for a member function. - if(cursor != end && *cursor == '.') + if(cursor < end && *cursor == '.') cursor++; - while(cursor != end) + // Must have at least one char, and first char can't be a digit + if (cursor >= end || CharUtil::isDigit(cursor[0])) + return false; + + for(; cursor < end; ++cursor) { - int c = *cursor++; - if( (c >= 'a') && (c <= 'z') ) continue; - if( (c >= 'A') && (c <= 'Z') ) continue; - if( (c >= '0') && (c <= '9') ) continue; - if( c == '_' ) continue; + const auto c = *cursor; + if (CharUtil::isAlphaOrDigit(c) || c == '_') + { + continue; + } + + // We allow :: for scope + if (c == ':' && cursor + 1 < end && cursor[1] == ':') + { + ++cursor; + continue; + } return false; } diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 8f00253f5..e2b14f1e3 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -7641,15 +7641,15 @@ struct DeclLoweringVisitor : DeclVisitor // Constructors aren't really member functions, insofar // as they aren't called with a `this` parameter. - // - // TODO: We may also want to exclude `static` functions - // here for the same reason, but this routine is only - // used for the stdlib, where we don't currently have - // any `static` member functions to worry about. - // if(as(decl)) return false; + // Exclude `static` functions for same reason. + if (decl->findModifier()) + { + return false; + } + auto dd = decl->parentDecl; for(;;) { diff --git a/tests/cross-compile/dxc-error.hlsl.expected b/tests/cross-compile/dxc-error.hlsl.expected index 5fdc2362b..c47ca80a6 100644 --- a/tests/cross-compile/dxc-error.hlsl.expected +++ b/tests/cross-compile/dxc-error.hlsl.expected @@ -1,8 +1,8 @@ result code = -1 standard error = { -dxc: tests/cross-compile/dxc-error.hlsl(8): error : use of undeclared identifier 'gOutputBuffer' -dxc: note : gOutputBuffer[tid] = dispatchThreadID.x * 0.5f; -dxc: note : ^ +dxc 1.7: tests/cross-compile/dxc-error.hlsl(8): error : use of undeclared identifier 'gOutputBuffer' +dxc 1.7: note : gOutputBuffer[tid] = dispatchThreadID.x * 0.5f; +dxc 1.7: note : ^ } standard output = { } diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang new file mode 100644 index 000000000..a754ff408 --- /dev/null +++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang @@ -0,0 +1,85 @@ +// hit-object-make-hit.slang + +//TEST:SIMPLE: -target dxil -entry rayGenerationMain -stage raygeneration -profile sm_6_5 -DNV_SHADER_EXTN_SLOT=u0 + +//DISABLE_TEST(compute):COMPARE_COMPUTE:-d3d12 -output-using-type -use-dxil -profile sm_6_5 -render-feature ray-query +//DISABLE_TEST(compute):COMPARE_COMPUTE:-vk -output-using-type -render-feature ray-query + +//TEST_INPUT: set scene = AccelerationStructure +uniform RaytracingAccelerationStructure scene; + +//TEST_INPUT:set outputBuffer = out ubuffer(data=[0, 0, 0, 0], stride=4) +RWStructuredBuffer outputBuffer; + +struct SomeValues +{ + int a; + float b; +}; + +uint calcValue(HitObject hit) +{ + uint r = 0; + + if (!hit.IsMiss()) + { + uint instanceIndex = hit.GetInstanceIndex(); + uint instanceID = hit.GetInstanceID(); + uint geometryIndex = hit.GetGeometryIndex(); + uint primitiveIndex = hit.GetPrimitiveIndex(); + + SomeValues objSomeValues = hit.GetAttributes(); + + r += instanceIndex; + r += instanceID; + r += geometryIndex; + r += primitiveIndex; + r += objSomeValues.a; + } + + return r; +} + +void rayGenerationMain() +{ + int2 launchID = int2(DispatchRaysIndex().xy); + int2 launchSize = int2(DispatchRaysDimensions().xy); + + int idx = launchID.x; + + SomeValues someValues = { idx, idx * 2.0f }; + + RayDesc ray; + ray.Origin = float3(idx, 0, 0); + ray.TMin = 0.01f; + ray.Direction = float3(0, 1, 0); + ray.TMax = 1e4f; + + uint hitKind = 0; + + uint r = 0; + { + HitObject hit = HitObject::MakeHit(0, scene, idx, idx * 2, idx * 3, hitKind, ray, someValues); + + r = calcValue(hit); + } + + { + int rayContributionToHitGroupIndex = 0; + int multiplierForGeometryContributionToHitGroupIndex = 4; + + HitObject hit = HitObject::MakeHit(scene, + idx, + idx * 2, + idx * 3, + hitKind, + rayContributionToHitGroupIndex, + multiplierForGeometryContributionToHitGroupIndex, + ray, + someValues); + + r += calcValue(hit); + } + + outputBuffer[idx] = r; +} diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected new file mode 100644 index 000000000..f481d22f3 --- /dev/null +++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.expected @@ -0,0 +1,347 @@ +result code = 0 +standard error = { +} +standard output = { +; +; Note: shader requires additional functionality: +; UAVs at every shader stage +; +; shader hash: 98c6f18c569b635938f62584ccf64bbf +; +; Buffer Definitions: +; +; Resource bind info for g_NvidiaExt +; { +; +; struct struct.NvShaderExtnStruct +; { +; +; uint opcode; ; Offset: 0 +; uint rid; ; Offset: 4 +; uint sid; ; Offset: 8 +; uint4 dst1u; ; Offset: 12 +; uint4 src3u; ; Offset: 28 +; uint4 src4u; ; Offset: 44 +; uint4 src5u; ; Offset: 60 +; uint4 src0u; ; Offset: 76 +; uint4 src1u; ; Offset: 92 +; uint4 src2u; ; Offset: 108 +; uint4 dst0u; ; Offset: 124 +; uint markUavRef; ; Offset: 140 +; uint numOutputsForIncCounter; ; Offset: 144 +; float padding1[27]; ; Offset: 148 +; +; } $Element; ; Offset: 0 Size: 256 +; +; } +; +; Resource bind info for outputBuffer_0 +; { +; +; uint $Element; ; Offset: 0 Size: 4 +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; scene_0 texture i32 ras T0 t0 1 +; g_NvidiaExt UAV struct r/w+cnt U0 u0 1 +; outputBuffer_0 UAV struct r/w U1 u1 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer" = type { %struct.NvShaderExtnStruct } +%struct.NvShaderExtnStruct = type { i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32, [27 x float] } +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWStructuredBuffer" = type { i32 } +%struct.SomeValues_0 = type { i32, float } +%struct.AttrWrapper.0 = type { %struct.SomeValues_0 } +%struct.DummyPayload.1 = type { i32 } +%struct.AttrWrapper = type { %struct.SomeValues_0 } +%struct.DummyPayload = type { i32 } +%dx.types.Handle = type { i8* } + +@"\01?g_NvidiaExt@@3V?$RWStructuredBuffer@UNvShaderExtnStruct@@@@A" = external constant %"class.RWStructuredBuffer", align 4 +@"\01?scene_0@@3URaytracingAccelerationStructure@@A" = external constant %struct.RaytracingAccelerationStructure, align 4 +@"\01?outputBuffer_0@@3V?$RWStructuredBuffer@I@@A" = external constant %"class.RWStructuredBuffer", align 4 + +; Function Attrs: nounwind +define void @"\01?rayGenerationMain@@YAXXZ"() #0 { + %1 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?scene_0@@3URaytracingAccelerationStructure@@A", align 4, !noalias !18 + %2 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?outputBuffer_0@@3V?$RWStructuredBuffer@I@@A", align 4 + %3 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?g_NvidiaExt@@3V?$RWStructuredBuffer@UNvShaderExtnStruct@@@@A", align 4, !noalias !21 + %4 = alloca %struct.SomeValues_0, align 8 + %5 = alloca %struct.AttrWrapper.0, align 4 + %6 = alloca %struct.DummyPayload.1, align 4 + %7 = alloca %struct.SomeValues_0, align 8 + %8 = alloca %struct.AttrWrapper, align 4 + %9 = alloca %struct.DummyPayload, align 4 + %10 = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) ; DispatchRaysIndex(col) + %11 = sitofp i32 %10 to float + %12 = fmul fast float %11, 2.000000e+00 + %13 = sitofp i32 %10 to float + %14 = mul nsw i32 %10, 3 + %15 = shl nsw i32 %10, 1 + %16 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %17 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %16, i8 1) ; BufferUpdateCounter(uav,inc) + %18 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %18, i32 %17, i32 0, i32 69, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %19 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %19, i32 %17, i32 144, i32 2, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %20 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %20, i32 %17, i32 76, i32 %10, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %21 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %21, i32 %17, i32 80, i32 %15, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %22 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %22, i32 %17, i32 84, i32 %14, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %23 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %23, i32 %17, i32 88, i32 0, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %24 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %24, i32 %17, i32 92, i32 0, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %25 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %26 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %25, i8 1) ; BufferUpdateCounter(uav,inc) + %27 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %28 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %27, i8 1) ; BufferUpdateCounter(uav,inc) + %29 = getelementptr inbounds %struct.AttrWrapper, %struct.AttrWrapper* %8, i32 0, i32 0, i32 0 + store i32 %10, i32* %29, align 4 + %30 = getelementptr inbounds %struct.AttrWrapper, %struct.AttrWrapper* %8, i32 0, i32 0, i32 1 + store float %12, float* %30, align 4 + call void @dx.op.callShader.struct.AttrWrapper(i32 159, i32 %28, %struct.AttrWrapper* nonnull %8) ; CallShader(ShaderIndex,Parameter) + %31 = call %dx.types.Handle @dx.op.createHandleForLib.struct.RaytracingAccelerationStructure(i32 160, %struct.RaytracingAccelerationStructure %1) ; CreateHandleForLib(Resource) + call void @dx.op.traceRay.struct.DummyPayload(i32 157, %dx.types.Handle %31, i32 0, i32 0, i32 0, i32 0, i32 %28, float %13, float 0.000000e+00, float 0.000000e+00, float 0x3F847AE140000000, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+04, %struct.DummyPayload* nonnull %9) ; TraceRay(AccelerationStructure,RayFlags,InstanceInclusionMask,RayContributionToHitGroupIndex,MultiplierForGeometryContributionToShaderIndex,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %32 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %33 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %32, i8 1) ; BufferUpdateCounter(uav,inc) + %34 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %34, i32 %33, i32 0, i32 73, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %35 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %35, i32 %33, i32 76, i32 %26, i32 undef, i32 undef, i32 undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %36 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %3) ; CreateHandleForLib(Resource) + %37 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %36, i8 1) ; BufferUpdateCounter(uav,inc) + %38 = icmp eq i32 %37, 0 + br i1 %38, label %39, label %76 + +;