diff options
| author | Harsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com> | 2025-05-26 21:00:38 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-26 15:30:38 +0000 |
| commit | 83538e0b4b97425ecdae6f72f9c8fd44cb255aac (patch) | |
| tree | 8f27c47fb7c1614fa916c2da6ab9996655e29da1 /source | |
| parent | 8ecb2c70437292ef6fa34f7122df44067de6a4de (diff) | |
Implement shader execution reordering support for OptiX (#7211)
* Implement shader execution reordering support for OptiX
Added OptiX backend support for Shader Execution Reordering (SER) features as outlined in issue #6647. This implementation:
1. Added CUDA target support for HitObject API
2. Implemented core SER functionality (TraceRay, MakeHit/Miss, Invoke)
3. Added OptiX-specific hit object handling functions
4. Added test case for OptiX SER functionality
* format code
---------
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 94 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 5 |
2 files changed, 75 insertions, 24 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index cb050dd51..fd7c7cfc7 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -17250,7 +17250,7 @@ void TraceRay( Ray, __forceVarIntoRayPayloadStructTemporarily(Payload)); return; - case cuda: __intrinsic_asm "traceOptiXRay"; + case cuda: __intrinsic_asm "optixTrace"; case glsl: { [__vulkanRayPayload] @@ -19576,7 +19576,7 @@ struct HitObject /// Executes ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the /// resulting hit information as a HitObject and does not trigger closesthit or miss shaders. [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject TraceRay<payload_t>( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -19629,6 +19629,7 @@ struct HitObject // Write the payload out Payload = p; } + case cuda: __intrinsic_asm "optixTraverse"; case spirv: { [__vulkanRayPayload] @@ -19669,7 +19670,7 @@ struct HitObject /// Executes motion ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the /// resulting hit information as a HitObject and does not trigger closesthit or miss shaders. [ForceInline] - [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject TraceMotionRay<payload_t>( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -19720,6 +19721,7 @@ struct HitObject // Write the payload out Payload = p; } + case cuda: __intrinsic_asm "optixTraverse"; case spirv: { [__vulkanRayPayload] @@ -19768,7 +19770,7 @@ struct HitObject /// Attributes parameter must either be an attribute struct, such as /// BuiltInTriangleIntersectionAttributes, or another HitObject to copy the attributes from. [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeHit<attr_t>( RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, @@ -19816,6 +19818,7 @@ struct HitObject Ray.TMax, __hitObjectAttributesLocation(__hitObjectAttributes<attr_t>())); } + case cuda: __intrinsic_asm "optixMakeHitObject"; case spirv: { // Save the attributes @@ -19853,7 +19856,7 @@ struct HitObject /// See MakeHit but handles Motion /// Currently only supported on VK [ForceInline] - [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionHit<attr_t>( RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, @@ -19890,6 +19893,7 @@ struct HitObject CurrentTime, __hitObjectAttributesLocation(__hitObjectAttributes<attr_t>())); } + case cuda: __intrinsic_asm "optixMakeHitObject"; case spirv: { // Save the attributes @@ -19935,7 +19939,7 @@ struct HitObject /// attribute struct, such as BuiltInTriangleIntersectionAttributes, or another HitObject to copy the /// attributes from. [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeHit<attr_t>( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, @@ -19980,6 +19984,7 @@ struct HitObject Ray.TMax, __hitObjectAttributesLocation(__hitObjectAttributes<attr_t>())); } + case cuda: __intrinsic_asm "optixMakeHitObject"; case spirv: { // Save the attributes @@ -20013,7 +20018,7 @@ struct HitObject /// See MakeHit but handles Motion /// Currently only supported on VK [ForceInline] - [require(glsl_spirv, ser_motion_raygen_closesthit_miss)] + [require(cuda_glsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionHit<attr_t>( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, @@ -20047,6 +20052,7 @@ struct HitObject CurrentTime, __hitObjectAttributesLocation(__hitObjectAttributes<attr_t>())); } + case cuda: __intrinsic_asm "optixMakeHitObject"; case spirv: { // Save the attributes @@ -20084,7 +20090,7 @@ struct HitObject /// table. [__requiresNVAPI] [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeMiss( uint MissShaderIndex, RayDesc Ray) @@ -20094,6 +20100,7 @@ struct HitObject case hlsl: __intrinsic_asm "($2=NvMakeMiss($0,$1))"; case glsl: __glslMakeMiss(__return_val, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax); + case cuda: __intrinsic_asm "optixMakeMissHitObject"; case spirv: { let origin = Ray.Origin; @@ -20119,7 +20126,7 @@ struct HitObject /// See MakeMiss but handles Motion /// Currently only supported on VK [ForceInline] - [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionMiss( uint MissShaderIndex, RayDesc Ray, @@ -20130,6 +20137,7 @@ struct HitObject case hlsl: __intrinsic_asm "($3=NvMakeMotionMiss($0,$1,$2))"; case glsl: __glslMakeMotionMiss(__return_val, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, CurrentTime); + case cuda: __intrinsic_asm "optixMakeMissHitObject"; case spirv: { let origin = Ray.Origin; @@ -20162,7 +20170,7 @@ struct HitObject /// miss. [__requiresNVAPI] [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeNop() { __target_switch @@ -20171,6 +20179,7 @@ struct HitObject __intrinsic_asm "($0 = NvMakeNop())"; case glsl: __glslMakeNop(__return_val); + case cuda: __intrinsic_asm "optixMakeNopHitObject"; case spirv: spirv_asm { @@ -20199,7 +20208,7 @@ struct HitObject /// shader is invoked. [__requiresNVAPI] [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static void Invoke<payload_t>( RaytracingAccelerationStructure AccelerationStructure, HitObject HitOrMiss, @@ -20225,6 +20234,7 @@ struct HitObject // Write payload result Payload = p; } + case cuda: __intrinsic_asm "optixInvoke"; case spirv: { [__vulkanRayPayload] @@ -20251,13 +20261,14 @@ struct HitObject /// Returns true if the HitObject encodes a miss, otherwise returns false. [__requiresNVAPI] [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsMiss() { __target_switch { case hlsl: __intrinsic_asm ".IsMiss"; case glsl: __intrinsic_asm "hitObjectIsMissNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectIsMiss"; case spirv: return spirv_asm { @@ -20271,13 +20282,14 @@ struct HitObject /// Returns true if the HitObject encodes a hit, otherwise returns false. [__requiresNVAPI] [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsHit() { __target_switch { case hlsl: __intrinsic_asm ".IsHit"; case glsl: __intrinsic_asm "hitObjectIsHitNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectIsHit"; case spirv: return spirv_asm { @@ -20291,13 +20303,14 @@ struct HitObject /// Returns true if the HitObject encodes a nop, otherwise returns false. [__requiresNVAPI] [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsNop() { __target_switch { case hlsl: __intrinsic_asm ".IsNop"; case glsl: __intrinsic_asm "hitObjectIsEmptyNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectIsNop"; case spirv: return spirv_asm { @@ -20311,7 +20324,7 @@ struct HitObject /// Queries ray properties from HitObject. Valid if the hit object represents a hit or a miss. [__requiresNVAPI] [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] RayDesc GetRayDesc() { __target_switch @@ -20323,6 +20336,7 @@ struct HitObject RayDesc ray = { __glslGetRayWorldOrigin(), __glslGetTMin(), __glslGetRayWorldDirection(), __glslGetTMax() }; return ray; } + case cuda: __intrinsic_asm "optixHitObjectGetRayDesc"; case spirv: return spirv_asm { @@ -20341,13 +20355,14 @@ struct HitObject [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetShaderTableIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetShaderTableIndex"; case glsl: __intrinsic_asm "hitObjectGetShaderBindingTableRecordIndexNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectGetSbtRecordIndex"; case spirv: return spirv_asm { @@ -20358,17 +20373,41 @@ struct HitObject } } + [__requiresNVAPI] + __glsl_extension(GL_EXT_ray_tracing) + [ForceInline] + [require(cuda_hlsl, ser_raygen_closesthit_miss)] + uint SetShaderTableIndex(uint RecordIndex) + { + __target_switch + { + case hlsl: __intrinsic_asm ".SetShaderTableIndex"; + case cuda: __intrinsic_asm "optixHitObjectSetSbtRecordIndex"; + } + } + + // TODO - Add other targets [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) + [ForceInline] + [require(cuda, ser_raygen_closesthit_miss)] + uint LoadLocalRootArgumentsConstant(uint RootConstantOffsetInBytes) + { + __target_switch + { + case cuda: __intrinsic_asm "optixHitObjectGetSbtDataPointer"; + } + } /// Returns the instance index of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetInstanceIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetInstanceIndex"; case glsl: __intrinsic_asm "hitObjectGetInstanceIdNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectGetInstanceIndex"; case spirv: return spirv_asm { @@ -20383,13 +20422,14 @@ struct HitObject [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetInstanceID() { __target_switch { case hlsl: __intrinsic_asm ".GetInstanceID"; case glsl: __intrinsic_asm "hitObjectGetInstanceCustomIndexNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectGetInstanceId"; case spirv: return spirv_asm { @@ -20404,13 +20444,14 @@ struct HitObject [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetGeometryIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetGeometryIndex"; case glsl: __intrinsic_asm "hitObjectGetGeometryIndexNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectGetSbtGASIndex"; case spirv: return spirv_asm { @@ -20425,13 +20466,14 @@ struct HitObject [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetPrimitiveIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetPrimitiveIndex"; case glsl: __intrinsic_asm "hitObjectGetPrimitiveIndexNV($0)"; + case cuda: __intrinsic_asm "optixHitObjectGetPrimitiveIndex"; case spirv: return spirv_asm { @@ -20596,7 +20638,7 @@ struct HitObject /// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss. [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] attr_t GetAttributes<attr_t>() { __target_switch @@ -20618,6 +20660,7 @@ struct HitObject // Return the attributes return __hitObjectAttributes<attr_t>(); } + case cuda: __intrinsic_asm "optixHitObjectGetAttribute<$TR>($0)"; case spirv: { __Addr<attr_t> attr = __allocHitObjectAttributes<attr_t>(); @@ -21008,13 +21051,14 @@ struct HitObject __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] -[require(glsl_hlsl_spirv, ser_raygen)] +[require(cuda_glsl_hlsl_spirv, ser_raygen)] void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) { __target_switch { case hlsl: __intrinsic_asm "NvReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; + case cuda: __intrinsic_asm "optixReorder"; case spirv: spirv_asm { @@ -21045,13 +21089,14 @@ void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] -[require(glsl_hlsl_spirv, ser_raygen)] +[require(cuda_glsl_hlsl_spirv, ser_raygen)] void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) { __target_switch { case hlsl: __intrinsic_asm "NvReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; + case cuda: __intrinsic_asm "optixReorder($1, $2)"; case spirv: spirv_asm { @@ -21072,13 +21117,14 @@ void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHi __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] -[require(glsl_hlsl_spirv, ser_raygen)] +[require(cuda_glsl_hlsl_spirv, ser_raygen)] void ReorderThread( HitObject HitOrMiss ) { __target_switch { case hlsl: __intrinsic_asm "NvReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; + case cuda: __intrinsic_asm "optixReorder()"; case spirv: spirv_asm { diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 74133fcf0..e5169ba38 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -214,6 +214,11 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, out << "TensorView"; return SLANG_OK; } + case kIROp_HitObjectType: + { + out << "OptixTraversableHandle"; + return SLANG_OK; + } default: { if (isNominalOp(type->getOp())) |
