summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorHarsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com>2025-05-26 21:00:38 +0530
committerGitHub <noreply@github.com>2025-05-26 15:30:38 +0000
commit83538e0b4b97425ecdae6f72f9c8fd44cb255aac (patch)
tree8f27c47fb7c1614fa916c2da6ab9996655e29da1 /source
parent8ecb2c70437292ef6fa34f7122df44067de6a4de (diff)
Implement shader execution reordering support for OptiX (#7211)
* Implement shader execution reordering support for OptiX Added OptiX backend support for Shader Execution Reordering (SER) features as outlined in issue #6647. This implementation: 1. Added CUDA target support for HitObject API 2. Implemented core SER functionality (TraceRay, MakeHit/Miss, Invoke) 3. Added OptiX-specific hit object handling functions 4. Added test case for OptiX SER functionality * format code --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang94
-rw-r--r--source/slang/slang-emit-cuda.cpp5
2 files changed, 75 insertions, 24 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index cb050dd51..fd7c7cfc7 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -17250,7 +17250,7 @@ void TraceRay(
Ray,
__forceVarIntoRayPayloadStructTemporarily(Payload));
return;
- case cuda: __intrinsic_asm "traceOptiXRay";
+ case cuda: __intrinsic_asm "optixTrace";
case glsl:
{
[__vulkanRayPayload]
@@ -19576,7 +19576,7 @@ struct HitObject
/// Executes ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the
/// resulting hit information as a HitObject and does not trigger closesthit or miss shaders.
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
static HitObject TraceRay<payload_t>(
RaytracingAccelerationStructure AccelerationStructure,
uint RayFlags,
@@ -19629,6 +19629,7 @@ struct HitObject
// Write the payload out
Payload = p;
}
+ case cuda: __intrinsic_asm "optixTraverse";
case spirv:
{
[__vulkanRayPayload]
@@ -19669,7 +19670,7 @@ struct HitObject
/// Executes motion ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the
/// resulting hit information as a HitObject and does not trigger closesthit or miss shaders.
[ForceInline]
- [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)]
static HitObject TraceMotionRay<payload_t>(
RaytracingAccelerationStructure AccelerationStructure,
uint RayFlags,
@@ -19720,6 +19721,7 @@ struct HitObject
// Write the payload out
Payload = p;
}
+ case cuda: __intrinsic_asm "optixTraverse";
case spirv:
{
[__vulkanRayPayload]
@@ -19768,7 +19770,7 @@ struct HitObject
/// Attributes parameter must either be an attribute struct, such as
/// BuiltInTriangleIntersectionAttributes, or another HitObject to copy the attributes from.
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
static HitObject MakeHit<attr_t>(
RaytracingAccelerationStructure AccelerationStructure,
uint InstanceIndex,
@@ -19816,6 +19818,7 @@ struct HitObject
Ray.TMax,
__hitObjectAttributesLocation(__hitObjectAttributes<attr_t>()));
}
+ case cuda: __intrinsic_asm "optixMakeHitObject";
case spirv:
{
// Save the attributes
@@ -19853,7 +19856,7 @@ struct HitObject
/// See MakeHit but handles Motion
/// Currently only supported on VK
[ForceInline]
- [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)]
static HitObject MakeMotionHit<attr_t>(
RaytracingAccelerationStructure AccelerationStructure,
uint InstanceIndex,
@@ -19890,6 +19893,7 @@ struct HitObject
CurrentTime,
__hitObjectAttributesLocation(__hitObjectAttributes<attr_t>()));
}
+ case cuda: __intrinsic_asm "optixMakeHitObject";
case spirv:
{
// Save the attributes
@@ -19935,7 +19939,7 @@ struct HitObject
/// attribute struct, such as BuiltInTriangleIntersectionAttributes, or another HitObject to copy the
/// attributes from.
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
static HitObject MakeHit<attr_t>(
uint HitGroupRecordIndex,
RaytracingAccelerationStructure AccelerationStructure,
@@ -19980,6 +19984,7 @@ struct HitObject
Ray.TMax,
__hitObjectAttributesLocation(__hitObjectAttributes<attr_t>()));
}
+ case cuda: __intrinsic_asm "optixMakeHitObject";
case spirv:
{
// Save the attributes
@@ -20013,7 +20018,7 @@ struct HitObject
/// See MakeHit but handles Motion
/// Currently only supported on VK
[ForceInline]
- [require(glsl_spirv, ser_motion_raygen_closesthit_miss)]
+ [require(cuda_glsl_spirv, ser_motion_raygen_closesthit_miss)]
static HitObject MakeMotionHit<attr_t>(
uint HitGroupRecordIndex,
RaytracingAccelerationStructure AccelerationStructure,
@@ -20047,6 +20052,7 @@ struct HitObject
CurrentTime,
__hitObjectAttributesLocation(__hitObjectAttributes<attr_t>()));
}
+ case cuda: __intrinsic_asm "optixMakeHitObject";
case spirv:
{
// Save the attributes
@@ -20084,7 +20090,7 @@ struct HitObject
/// table.
[__requiresNVAPI]
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
static HitObject MakeMiss(
uint MissShaderIndex,
RayDesc Ray)
@@ -20094,6 +20100,7 @@ struct HitObject
case hlsl: __intrinsic_asm "($2=NvMakeMiss($0,$1))";
case glsl:
__glslMakeMiss(__return_val, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax);
+ case cuda: __intrinsic_asm "optixMakeMissHitObject";
case spirv:
{
let origin = Ray.Origin;
@@ -20119,7 +20126,7 @@ struct HitObject
/// See MakeMiss but handles Motion
/// Currently only supported on VK
[ForceInline]
- [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)]
static HitObject MakeMotionMiss(
uint MissShaderIndex,
RayDesc Ray,
@@ -20130,6 +20137,7 @@ struct HitObject
case hlsl: __intrinsic_asm "($3=NvMakeMotionMiss($0,$1,$2))";
case glsl:
__glslMakeMotionMiss(__return_val, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, CurrentTime);
+ case cuda: __intrinsic_asm "optixMakeMissHitObject";
case spirv:
{
let origin = Ray.Origin;
@@ -20162,7 +20170,7 @@ struct HitObject
/// miss.
[__requiresNVAPI]
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
static HitObject MakeNop()
{
__target_switch
@@ -20171,6 +20179,7 @@ struct HitObject
__intrinsic_asm "($0 = NvMakeNop())";
case glsl:
__glslMakeNop(__return_val);
+ case cuda: __intrinsic_asm "optixMakeNopHitObject";
case spirv:
spirv_asm
{
@@ -20199,7 +20208,7 @@ struct HitObject
/// shader is invoked.
[__requiresNVAPI]
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
static void Invoke<payload_t>(
RaytracingAccelerationStructure AccelerationStructure,
HitObject HitOrMiss,
@@ -20225,6 +20234,7 @@ struct HitObject
// Write payload result
Payload = p;
}
+ case cuda: __intrinsic_asm "optixInvoke";
case spirv:
{
[__vulkanRayPayload]
@@ -20251,13 +20261,14 @@ struct HitObject
/// Returns true if the HitObject encodes a miss, otherwise returns false.
[__requiresNVAPI]
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
bool IsMiss()
{
__target_switch
{
case hlsl: __intrinsic_asm ".IsMiss";
case glsl: __intrinsic_asm "hitObjectIsMissNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectIsMiss";
case spirv:
return spirv_asm
{
@@ -20271,13 +20282,14 @@ struct HitObject
/// Returns true if the HitObject encodes a hit, otherwise returns false.
[__requiresNVAPI]
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
bool IsHit()
{
__target_switch
{
case hlsl: __intrinsic_asm ".IsHit";
case glsl: __intrinsic_asm "hitObjectIsHitNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectIsHit";
case spirv:
return spirv_asm
{
@@ -20291,13 +20303,14 @@ struct HitObject
/// Returns true if the HitObject encodes a nop, otherwise returns false.
[__requiresNVAPI]
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
bool IsNop()
{
__target_switch
{
case hlsl: __intrinsic_asm ".IsNop";
case glsl: __intrinsic_asm "hitObjectIsEmptyNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectIsNop";
case spirv:
return spirv_asm
{
@@ -20311,7 +20324,7 @@ struct HitObject
/// Queries ray properties from HitObject. Valid if the hit object represents a hit or a miss.
[__requiresNVAPI]
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
RayDesc GetRayDesc()
{
__target_switch
@@ -20323,6 +20336,7 @@ struct HitObject
RayDesc ray = { __glslGetRayWorldOrigin(), __glslGetTMin(), __glslGetRayWorldDirection(), __glslGetTMax() };
return ray;
}
+ case cuda: __intrinsic_asm "optixHitObjectGetRayDesc";
case spirv:
return spirv_asm
{
@@ -20341,13 +20355,14 @@ struct HitObject
[__requiresNVAPI]
__glsl_extension(GL_EXT_ray_tracing)
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
uint GetShaderTableIndex()
{
__target_switch
{
case hlsl: __intrinsic_asm ".GetShaderTableIndex";
case glsl: __intrinsic_asm "hitObjectGetShaderBindingTableRecordIndexNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectGetSbtRecordIndex";
case spirv:
return spirv_asm
{
@@ -20358,17 +20373,41 @@ struct HitObject
}
}
+ [__requiresNVAPI]
+ __glsl_extension(GL_EXT_ray_tracing)
+ [ForceInline]
+ [require(cuda_hlsl, ser_raygen_closesthit_miss)]
+ uint SetShaderTableIndex(uint RecordIndex)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".SetShaderTableIndex";
+ case cuda: __intrinsic_asm "optixHitObjectSetSbtRecordIndex";
+ }
+ }
+
+ // TODO - Add other targets [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing)
+ [ForceInline]
+ [require(cuda, ser_raygen_closesthit_miss)]
+ uint LoadLocalRootArgumentsConstant(uint RootConstantOffsetInBytes)
+ {
+ __target_switch
+ {
+ case cuda: __intrinsic_asm "optixHitObjectGetSbtDataPointer";
+ }
+ }
/// Returns the instance index of a hit. Valid if the hit object represents a hit.
[__requiresNVAPI]
__glsl_extension(GL_EXT_ray_tracing)
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
uint GetInstanceIndex()
{
__target_switch
{
case hlsl: __intrinsic_asm ".GetInstanceIndex";
case glsl: __intrinsic_asm "hitObjectGetInstanceIdNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectGetInstanceIndex";
case spirv:
return spirv_asm
{
@@ -20383,13 +20422,14 @@ struct HitObject
[__requiresNVAPI]
__glsl_extension(GL_EXT_ray_tracing)
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
uint GetInstanceID()
{
__target_switch
{
case hlsl: __intrinsic_asm ".GetInstanceID";
case glsl: __intrinsic_asm "hitObjectGetInstanceCustomIndexNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectGetInstanceId";
case spirv:
return spirv_asm
{
@@ -20404,13 +20444,14 @@ struct HitObject
[__requiresNVAPI]
__glsl_extension(GL_EXT_ray_tracing)
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
uint GetGeometryIndex()
{
__target_switch
{
case hlsl: __intrinsic_asm ".GetGeometryIndex";
case glsl: __intrinsic_asm "hitObjectGetGeometryIndexNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectGetSbtGASIndex";
case spirv:
return spirv_asm
{
@@ -20425,13 +20466,14 @@ struct HitObject
[__requiresNVAPI]
__glsl_extension(GL_EXT_ray_tracing)
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
uint GetPrimitiveIndex()
{
__target_switch
{
case hlsl: __intrinsic_asm ".GetPrimitiveIndex";
case glsl: __intrinsic_asm "hitObjectGetPrimitiveIndexNV($0)";
+ case cuda: __intrinsic_asm "optixHitObjectGetPrimitiveIndex";
case spirv:
return spirv_asm
{
@@ -20596,7 +20638,7 @@ struct HitObject
/// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss.
[ForceInline]
- [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
+ [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
attr_t GetAttributes<attr_t>()
{
__target_switch
@@ -20618,6 +20660,7 @@ struct HitObject
// Return the attributes
return __hitObjectAttributes<attr_t>();
}
+ case cuda: __intrinsic_asm "optixHitObjectGetAttribute<$TR>($0)";
case spirv:
{
__Addr<attr_t> attr = __allocHitObjectAttributes<attr_t>();
@@ -21008,13 +21051,14 @@ struct HitObject
__glsl_extension(GL_EXT_ray_tracing)
__glsl_extension(GL_NV_shader_invocation_reorder)
[ForceInline]
-[require(glsl_hlsl_spirv, ser_raygen)]
+[require(cuda_glsl_hlsl_spirv, ser_raygen)]
void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB )
{
__target_switch
{
case hlsl: __intrinsic_asm "NvReorderThread";
case glsl: __intrinsic_asm "reorderThreadNV";
+ case cuda: __intrinsic_asm "optixReorder";
case spirv:
spirv_asm
{
@@ -21045,13 +21089,14 @@ void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB )
__glsl_extension(GL_EXT_ray_tracing)
__glsl_extension(GL_NV_shader_invocation_reorder)
[ForceInline]
-[require(glsl_hlsl_spirv, ser_raygen)]
+[require(cuda_glsl_hlsl_spirv, ser_raygen)]
void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB )
{
__target_switch
{
case hlsl: __intrinsic_asm "NvReorderThread";
case glsl: __intrinsic_asm "reorderThreadNV";
+ case cuda: __intrinsic_asm "optixReorder($1, $2)";
case spirv:
spirv_asm
{
@@ -21072,13 +21117,14 @@ void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHi
__glsl_extension(GL_EXT_ray_tracing)
__glsl_extension(GL_NV_shader_invocation_reorder)
[ForceInline]
-[require(glsl_hlsl_spirv, ser_raygen)]
+[require(cuda_glsl_hlsl_spirv, ser_raygen)]
void ReorderThread( HitObject HitOrMiss )
{
__target_switch
{
case hlsl: __intrinsic_asm "NvReorderThread";
case glsl: __intrinsic_asm "reorderThreadNV";
+ case cuda: __intrinsic_asm "optixReorder()";
case spirv:
spirv_asm
{
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 74133fcf0..e5169ba38 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -214,6 +214,11 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target,
out << "TensorView";
return SLANG_OK;
}
+ case kIROp_HitObjectType:
+ {
+ out << "OptixTraversableHandle";
+ return SLANG_OK;
+ }
default:
{
if (isNominalOp(type->getOp()))