From 01510f2c922af8629c7a730ef92a31fa83bd9f49 Mon Sep 17 00:00:00 2001 From: Yong He Date: Wed, 15 Oct 2025 20:59:47 -0700 Subject: Immutable access qualifier for pointers and use `__ldg` on cuda. (#8710) This PR implements `Access.Immutable` to allow pointers to immutable data. The new type `ImmutablePtr` is defined as an alias of `Ptr`. By forming a immutable pointer, the programmer is conveying to the compiler that the data at the pointer address will never change during the execution of the current program. Therefore loads from immutable pointers can be deduplicated by the compiler, and will translate to `__ldg` when generating code for CUDA. The SPIRV backend is not changed in this PR, since the current SPIRV spec makes it very difficult to specify loads from immutable address without generating tons of wrappers and boilerplate type declarations. We would like to see the spec evolved a bit to around its support of `NonWritable` physical storage pointers or immutable loads before we attempt to express such immutability in SPIRV. For now we simply emit ordinary pointers and loads when generating spirv. --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> --- source/slang/slang-emit.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'source/slang/slang-emit.cpp') diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 1bd1f8b5c..d5e89b1fe 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -32,6 +32,7 @@ #include "slang-ir-collect-global-uniforms.h" #include "slang-ir-com-interface.h" #include "slang-ir-composite-reg-to-mem.h" +#include "slang-ir-cuda-immutable-load.h" #include "slang-ir-dce.h" #include "slang-ir-defer-buffer-load.h" #include "slang-ir-defunctionalization.h" @@ -1886,6 +1887,13 @@ Result linkAndOptimizeIR( specializeAddressSpaceForWGSL(irModule); } + // If we are generating code for CUDA, we should translate all immutable buffer loads to + // using `__ldg` intrinsic for improved performance. + if (isCUDATarget(targetRequest)) + { + lowerImmutableBufferLoadForCUDA(targetProgram, irModule); + } + performForceInlining(irModule); bool emitSpirvDirectly = targetProgram->shouldEmitSPIRVDirectly(); -- cgit v1.2.3