summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com>2025-08-08 03:13:25 +0530
committerGitHub <noreply@github.com>2025-08-07 21:43:25 +0000
commite595743b5aa4f6bd88800cfbcfd6eead3cc3d01b (patch)
tree4e019aaf7218b1c0113ad35d935c82aa0c6d5964
parent4721b6ef2dd4e1b39c85acc492f9c6af8898a34b (diff)
Fix intrinsic LoadLocalRootTableConstant for optix (#7949)
Due to an older version of spec referred there was an inconsitency v1.29 2/20/2025 - [HitObject LoadLocalRootArgumentsConstant] Latest spec https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#hitobject-loadlocalroottableconstant Refer: OptiX backend support for Shader Execution Reordering (SER) features as outlined in issue #6647. -
-rw-r--r--prelude/slang-cuda-prelude.h35
-rw-r--r--source/core/slang-platform.cpp21
-rw-r--r--source/slang/hlsl.meta.slang22
-rw-r--r--tests/cuda/optix-cluster.slang2
-rw-r--r--tests/cuda/optix-coopvec.slang2
-rw-r--r--tests/cuda/optix-hit-attributes.slang2
-rw-r--r--tests/cuda/optix-ignore-hit.slang3
-rw-r--r--tests/cuda/optix-ser.slang75
8 files changed, 108 insertions, 54 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index a66fa15cb..c366ae856 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -3644,22 +3644,23 @@ __forceinline__ __device__ void* optixTraverse(
r1);
}
-static __forceinline__ __device__ bool optixHitObjectIsHit(OptixTraversableHandle* hitObj)
+static __forceinline__ __device__ bool slangOptixHitObjectIsHit(OptixTraversableHandle* hitObj)
{
return optixHitObjectIsHit();
}
-static __forceinline__ __device__ bool optixHitObjectIsMiss(OptixTraversableHandle* hitObj)
+static __forceinline__ __device__ bool slangOptixHitObjectIsMiss(OptixTraversableHandle* hitObj)
{
return optixHitObjectIsMiss();
}
-static __forceinline__ __device__ bool optixHitObjectIsNop(OptixTraversableHandle* hitObj)
+static __forceinline__ __device__ bool slangOptixHitObjectIsNop(OptixTraversableHandle* hitObj)
{
return optixHitObjectIsNop();
}
-static __forceinline__ __device__ uint optixHitObjectGetClusterId(OptixTraversableHandle* hitObj)
+static __forceinline__ __device__ uint
+slangOptixHitObjectGetClusterId(OptixTraversableHandle* hitObj)
{
return optixHitObjectGetClusterId();
}
@@ -3809,7 +3810,7 @@ static __forceinline__ __device__ void optixMakeHitObject(
0 /*numTransforms */);
}
-static __forceinline__ __device__ void optixMakeNopHitObject(OptixTraversableHandle* Obj)
+static __forceinline__ __device__ void slangOptixMakeNopHitObject(OptixTraversableHandle* Obj)
{
optixMakeNopHitObject();
}
@@ -3834,22 +3835,25 @@ static __forceinline__ __device__ RayDesc optixHitObjectGetRayDesc(OptixTraversa
return ray;
}
-static __forceinline__ __device__ uint optixHitObjectGetInstanceIndex(OptixTraversableHandle* Obj)
+static __forceinline__ __device__ uint
+slangOptixHitObjectGetInstanceIndex(OptixTraversableHandle* Obj)
{
return optixHitObjectGetInstanceIndex();
}
-static __forceinline__ __device__ uint optixHitObjectGetInstanceId(OptixTraversableHandle* Obj)
+static __forceinline__ __device__ uint slangOptixHitObjectGetInstanceId(OptixTraversableHandle* Obj)
{
return optixHitObjectGetInstanceId();
}
-static __forceinline__ __device__ uint optixHitObjectGetSbtGASIndex(OptixTraversableHandle* Obj)
+static __forceinline__ __device__ uint
+slangOptixHitObjectGetSbtGASIndex(OptixTraversableHandle* Obj)
{
return optixHitObjectGetSbtGASIndex();
}
-static __forceinline__ __device__ uint optixHitObjectGetPrimitiveIndex(OptixTraversableHandle* Obj)
+static __forceinline__ __device__ uint
+slangOptixHitObjectGetPrimitiveIndex(OptixTraversableHandle* Obj)
{
return optixHitObjectGetPrimitiveIndex();
}
@@ -3888,22 +3892,17 @@ static __forceinline__ __device__ T optixHitObjectGetAttribute(OptixTraversableH
return result;
}
-static __forceinline__ __device__ uint optixHitObjectGetSbtRecordIndex(OptixTraversableHandle* Obj)
+static __forceinline__ __device__ uint
+slangOptixHitObjectGetSbtRecordIndex(OptixTraversableHandle* Obj)
{
return optixHitObjectGetSbtRecordIndex();
}
static __forceinline__ __device__ uint
-optixHitObjectSetSbtRecordIndex(OptixTraversableHandle* Obj, uint sbtRecordIndex)
+slangOptixHitObjectSetSbtRecordIndex(OptixTraversableHandle* Obj, uint sbtRecordIndex)
{
optixHitObjectSetSbtRecordIndex(sbtRecordIndex); // returns void
- return 0;
-}
-static __forceinline__ __device__ uint
-optixHitObjectGetSbtDataPointer(OptixTraversableHandle* Obj, uint sbtRecordIndex)
-{
- optixHitObjectGetSbtDataPointer(); // returns void
- return 0;
+ return sbtRecordIndex;
}
#endif
static const int kSlangTorchTensorMaxDim = 5;
diff --git a/source/core/slang-platform.cpp b/source/core/slang-platform.cpp
index aab1f3044..f7e82fdf0 100644
--- a/source/core/slang-platform.cpp
+++ b/source/core/slang-platform.cpp
@@ -174,12 +174,27 @@ SLANG_COMPILE_TIME_ASSERT(E_OUTOFMEMORY == SLANG_E_OUT_OF_MEMORY);
}
#else // _WIN32
-
/* static */ SlangResult PlatformUtil::getInstancePath([[maybe_unused]] StringBuilder& out)
{
- // On non Windows it's typically hard to get the instance path, so we'll say not implemented.
- // The meaning is also somewhat more ambiguous - is it the exe or the shared library path?
+#if defined(__linux__) || defined(__CYGWIN__)
+ char path[PATH_MAX];
+ ssize_t len = readlink("/proc/self/exe", path, sizeof(path) - 1);
+ if (len == -1)
+ {
+ return SLANG_FAIL;
+ }
+
+ path[len] = '\0';
+ String pathString(path);
+
+ // We don't want the instance name, just the path to it
+ out.clear();
+ out.append(Path::getParentDirectory(pathString));
+
+ return out.getLength() > 0 ? SLANG_OK : SLANG_FAIL;
+#else
return SLANG_E_NOT_IMPLEMENTED;
+#endif
}
/* static */ SlangResult PlatformUtil::appendResult(
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index c3d2efaac..2d9543716 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -20472,7 +20472,7 @@ struct HitObject
__intrinsic_asm "($0 = NvMakeNop())";
case glsl:
__glslMakeNop(__return_val);
- case cuda: __intrinsic_asm "optixMakeNopHitObject";
+ case cuda: __intrinsic_asm "slangOptixMakeNopHitObject";
case spirv:
spirv_asm
{
@@ -20561,7 +20561,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".IsMiss";
case glsl: __intrinsic_asm "hitObjectIsMissNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectIsMiss";
+ case cuda: __intrinsic_asm "slangOptixHitObjectIsMiss";
case spirv:
return spirv_asm
{
@@ -20582,7 +20582,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".IsHit";
case glsl: __intrinsic_asm "hitObjectIsHitNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectIsHit";
+ case cuda: __intrinsic_asm "slangOptixHitObjectIsHit";
case spirv:
return spirv_asm
{
@@ -20603,7 +20603,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".IsNop";
case glsl: __intrinsic_asm "hitObjectIsEmptyNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectIsNop";
+ case cuda: __intrinsic_asm "slangOptixHitObjectIsNop";
case spirv:
return spirv_asm
{
@@ -20655,7 +20655,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".GetShaderTableIndex";
case glsl: __intrinsic_asm "hitObjectGetShaderBindingTableRecordIndexNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectGetSbtRecordIndex";
+ case cuda: __intrinsic_asm "slangOptixHitObjectGetSbtRecordIndex";
case spirv:
return spirv_asm
{
@@ -20675,7 +20675,7 @@ struct HitObject
__target_switch
{
case hlsl: __intrinsic_asm ".SetShaderTableIndex";
- case cuda: __intrinsic_asm "optixHitObjectSetSbtRecordIndex";
+ case cuda: __intrinsic_asm "slangOptixHitObjectSetSbtRecordIndex";
}
}
/// Returns the instance index of a hit. Valid if the hit object represents a hit.
@@ -20689,7 +20689,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".GetInstanceIndex";
case glsl: __intrinsic_asm "hitObjectGetInstanceIdNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectGetInstanceIndex";
+ case cuda: __intrinsic_asm "slangOptixHitObjectGetInstanceIndex";
case spirv:
return spirv_asm
{
@@ -20711,7 +20711,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".GetInstanceID";
case glsl: __intrinsic_asm "hitObjectGetInstanceCustomIndexNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectGetInstanceId";
+ case cuda: __intrinsic_asm "slangOptixHitObjectGetInstanceId";
case spirv:
return spirv_asm
{
@@ -20733,7 +20733,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".GetGeometryIndex";
case glsl: __intrinsic_asm "hitObjectGetGeometryIndexNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectGetSbtGASIndex";
+ case cuda: __intrinsic_asm "slangOptixHitObjectGetSbtGASIndex";
case spirv:
return spirv_asm
{
@@ -20755,7 +20755,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".GetPrimitiveIndex";
case glsl: __intrinsic_asm "hitObjectGetPrimitiveIndexNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectGetPrimitiveIndex";
+ case cuda: __intrinsic_asm "slangOptixHitObjectGetPrimitiveIndex";
case spirv:
return spirv_asm
{
@@ -20799,7 +20799,7 @@ struct HitObject
{
case hlsl: __intrinsic_asm ".GetClusterID";
case glsl: __intrinsic_asm "hitObjectGetClusterIdNV($0)";
- case cuda: __intrinsic_asm "optixHitObjectGetClusterId";
+ case cuda: __intrinsic_asm "slangOptixHitObjectGetClusterId";
case spirv:
return spirv_asm
{
diff --git a/tests/cuda/optix-cluster.slang b/tests/cuda/optix-cluster.slang
index 151b9c7dd..86e139701 100644
--- a/tests/cuda/optix-cluster.slang
+++ b/tests/cuda/optix-cluster.slang
@@ -1,4 +1,5 @@
//TEST:SIMPLE(filecheck=CHECK): -target cuda
+//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/" -entry closestHitShaderA
//CHECK: __global__ void __closesthit__closestHitShaderA
//CHECK: optixGetClusterId
struct RayPayload
@@ -9,6 +10,7 @@ struct RayPayload
[shader("closesthit")]
void closestHitShaderA(inout RayPayload payload, in BuiltInTriangleIntersectionAttributes attr)
{
+ //CHECK-PTX:_optix_get_cluster_id
int clusterId = GetClusterID();
float4 color = float4(0, 0, 0, 1);
if (clusterId >= 0)
diff --git a/tests/cuda/optix-coopvec.slang b/tests/cuda/optix-coopvec.slang
index 58e83ebb9..194a97ba5 100644
--- a/tests/cuda/optix-coopvec.slang
+++ b/tests/cuda/optix-coopvec.slang
@@ -1,5 +1,7 @@
//TEST:SIMPLE(filecheck=CHECK): -target cuda -capability optix_coopvec
+//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/"
+// CHECK-PTX: add.f32
// CHECK: optixCoopVecLoad
// CHECK: OptixCoopVec
// CHECK: optixCoopVecTanh
diff --git a/tests/cuda/optix-hit-attributes.slang b/tests/cuda/optix-hit-attributes.slang
index 347401893..a2e73c13a 100644
--- a/tests/cuda/optix-hit-attributes.slang
+++ b/tests/cuda/optix-hit-attributes.slang
@@ -1,5 +1,6 @@
//TEST:SIMPLE(filecheck=CHECK): -target cuda
//CHECK: __global__ void __closesthit__closestHitShaderA
+//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/"
struct RayPayload
{
float4 color;
@@ -8,6 +9,7 @@ struct RayPayload
[shader("closesthit")]
void closestHitShaderA(inout RayPayload payload, in BuiltInTriangleIntersectionAttributes attr)
{
+ //CHECK-PTX: _optix_read_primitive_idx
uint primitiveIndex = PrimitiveIndex();
float4 color = float4(0, 0, 0, 1);
color[primitiveIndex] = 1;
diff --git a/tests/cuda/optix-ignore-hit.slang b/tests/cuda/optix-ignore-hit.slang
index 54cc301bb..891ead960 100644
--- a/tests/cuda/optix-ignore-hit.slang
+++ b/tests/cuda/optix-ignore-hit.slang
@@ -1,8 +1,10 @@
// optix-ignore-hit.slang
//TEST:SIMPLE(filecheck=CHECK): -target cuda -entry anyHitShader
+//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/"
//CHECK: HitBuffer_insert_0(((HitBuffer_0 *)getOptiXRayPayloadPtr()), hit_0.t_0);
//CHECK: optixIgnoreIntersection
+//CHECK-PTX: _optix_get_ray_tmax
struct HitBuffer
{
@@ -28,6 +30,7 @@ void anyHitShader(inout HitBuffer rayHitBuffer)
// Modify the inout parameter
rayHitBuffer.insert(hit.t);
+ // CHECK-PTX: _optix_ignore_intersection
// Early exit - should not lose the modification to inout rayHitBuffer
if (hit.t < rayHitBuffer.last)
IgnoreHit();
diff --git a/tests/cuda/optix-ser.slang b/tests/cuda/optix-ser.slang
index 54f300706..d28db60ef 100644
--- a/tests/cuda/optix-ser.slang
+++ b/tests/cuda/optix-ser.slang
@@ -2,6 +2,7 @@
//TEST:SIMPLE(filecheck=CHECK): -target cuda -entry rayGenerationMain -stage raygeneration
+//TEST:SIMPLE(filecheck=CHECK-PTX): -target ptx -Xnvrtc -I"./external/optix-dev/include/" -entry rayGenerationMain -stage raygeneration
//TEST_INPUT: set scene = AccelerationStructure
uniform RaytracingAccelerationStructure scene;
@@ -18,20 +19,33 @@ struct SomeValues
uint calcValue(HitObject hit)
{
uint r = 0;
-
+
+ // CHECK: slangOptixHitObjectIsHit
if (hit.IsHit())
{
+ // CHECK: slangOptixHitObjectGetInstanceIndex
uint instanceIndex = hit.GetInstanceIndex();
+ // CHECK: slangOptixHitObjectGetInstanceId
uint instanceID = hit.GetInstanceID();
+ // CHECK: slangOptixHitObjectGetSbtGASIndex
uint geometryIndex = hit.GetGeometryIndex();
+
+ // CHECK: slangOptixHitObjectGetPrimitiveIndex
uint primitiveIndex = hit.GetPrimitiveIndex();
+ // CHECK: slangOptixHitObjectGetClusterId
int clusterID = hit.GetClusterID();
+ // CHECK: slangOptixHitObjectGetSbtRecordIndex
uint shaderTableIndex = hit.GetShaderTableIndex();
// spriv and glsl lack these methods
+ // CHECK: slangOptixHitObjectSetSbtRecordIndex({{.*}}0U)
uint setShaderTableIndex = hit.SetShaderTableIndex(0);
+
+ // CHECK: optixHitObjectGetSbtDataPointer()+(0U)
uint ialbedo = hit.LoadLocalRootTableConstant(0);
+
+ // CHECK: optixHitObjectGetAttribute
SomeValues objSomeValues = hit.GetAttributes<SomeValues>();
-
+
r += instanceIndex;
r += instanceID;
r += geometryIndex;
@@ -42,48 +56,57 @@ uint calcValue(HitObject hit)
r += setShaderTableIndex;
r += ialbedo;
}
-
+
return r;
}
-void rayGenerationMain()
+void rayGenerationMain()
{
+ // CHECK: optixGetLaunchIndex
int2 launchID = int2(DispatchRaysIndex().xy);
+ // CHECK: optixGetLaunchDimensions
int2 launchSize = int2(DispatchRaysDimensions().xy);
int idx = launchID.x;
-
+
SomeValues someValues = { idx, idx * 2.0f };
- RayDesc ray;
+ RayDesc ray;
ray.Origin = float3(idx, 0, 0);
ray.TMin = 0.01f;
ray.Direction = float3(0, 1, 0);
ray.TMax = 1e4f;
-
- RAY_FLAG rayFlags = RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | RAY_FLAG_CULL_BACK_FACING_TRIANGLES;
+
+ RAY_FLAG rayFlags =
+ RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | RAY_FLAG_CULL_BACK_FACING_TRIANGLES;
uint instanceInclusionMask = 0xff;
uint rayContributionToHitGroupIndex = 0;
uint multiplierForGeometryContributionToHitGroupIndex = 4;
uint missShaderIndex = 0;
- // SPIRV: OpHitObjectTraceRayNV
// CHECK: optixTraverse
- HitObject hit = HitObject::TraceRay(scene,
- rayFlags,
- instanceInclusionMask,
- rayContributionToHitGroupIndex,
- multiplierForGeometryContributionToHitGroupIndex,
- missShaderIndex,
- ray,
+ // CHECK-PTX:_optix_hitobject_traverse
+ HitObject hit = HitObject::TraceRay(
+ scene,
+ rayFlags,
+ instanceInclusionMask,
+ rayContributionToHitGroupIndex,
+ multiplierForGeometryContributionToHitGroupIndex,
+ missShaderIndex,
+ ray,
someValues);
-
- ReorderThread( hit );
+
+ // CHECK-DAG: optixReorder();
+ // CHECK-DAG: optixReorder((uint(idx_0 & int(3))), (2U));
+ // CHECK-DAG: optixReorder(uint(idx_0 & int(1)), 1U);
+ ReorderThread(hit);
ReorderThread(hit, uint(idx & 3), 2);
ReorderThread(uint(idx & 1), 1);
outputBuffer[idx] = calcValue(hit);
HitObject miss[2];
+ // CHECK: optixMakeMissHitObject(0U, ray_0, &miss_0[int(0)]);
miss[0] = HitObject::MakeMiss(0u, ray);
+ // CHECK: optixMakeMissHitObject(0U, ray_0, 1.0f, &miss_0[int(1)]);
miss[1] = HitObject::MakeMotionMiss(0u, ray, 1.f);
uint hitGroupRecordIndex = 0;
@@ -91,16 +114,21 @@ void rayGenerationMain()
uint geometryIndex = 0;
uint primitiveIndex = 0;
uint hitKind = 0;
- BuiltInTriangleIntersectionAttributes attr = {0.01f, 0.2f};
+ BuiltInTriangleIntersectionAttributes attr = { 0.01f, 0.2f };
- HitObject hitObj = HitObject::MakeHit(hitGroupRecordIndex, scene,
+ // CHECK: optixMakeHitObject
+ HitObject hitObj = HitObject::MakeHit(
+ hitGroupRecordIndex,
+ scene,
instanceIndex,
geometryIndex,
primitiveIndex,
hitKind,
ray,
attr);
+ // CHECK: slangOptixMakeNopHitObject
HitObject nopObj = HitObject::MakeNop();
+ // CHECK: slangOptixHitObjectIsNop
outputBuffer[idx] = uint(nopObj.IsNop());
outputBuffer[idx] += calcValue(hit);
@@ -112,8 +140,10 @@ void rayGenerationMain()
// Change the payload
SomeValues otherValues = { idx * -2, idx * 8.0f };
- HitObject::Invoke( scene, hit, otherValues );
+ // CHECK: optixInvoke
+ HitObject::Invoke(scene, hit, otherValues);
HitObject motionHitObj[2];
+ // CHECK: optixMakeHitObject
motionHitObj[0] = HitObject::MakeMotionHit(
scene,
instanceIndex,
@@ -125,6 +155,7 @@ void rayGenerationMain()
ray,
0.f,
attr);
+ // CHECK: optixMakeHitObject
motionHitObj[1] = HitObject::MakeMotionHit(
hitGroupRecordIndex,
scene,
@@ -138,9 +169,9 @@ void rayGenerationMain()
outputBuffer[idx] += calcValue(motionHitObj[0]);
outputBuffer[idx] += calcValue(motionHitObj[1]);
+ // CHECK: optixHitObjectGetRayDesc
RayDesc rayD = hit.GetRayDesc();
outputBuffer[idx] += uint(rayD.TMin > 0);
outputBuffer[idx] += uint(rayD.TMax < ray.TMin);
-
}