summaryrefslogtreecommitdiffstats
path: root/source/slang/slang-emit.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang/slang-emit.cpp')
-rw-r--r--source/slang/slang-emit.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 1bd1f8b5c..d5e89b1fe 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -32,6 +32,7 @@
#include "slang-ir-collect-global-uniforms.h"
#include "slang-ir-com-interface.h"
#include "slang-ir-composite-reg-to-mem.h"
+#include "slang-ir-cuda-immutable-load.h"
#include "slang-ir-dce.h"
#include "slang-ir-defer-buffer-load.h"
#include "slang-ir-defunctionalization.h"
@@ -1886,6 +1887,13 @@ Result linkAndOptimizeIR(
specializeAddressSpaceForWGSL(irModule);
}
+ // If we are generating code for CUDA, we should translate all immutable buffer loads to
+ // using `__ldg` intrinsic for improved performance.
+ if (isCUDATarget(targetRequest))
+ {
+ lowerImmutableBufferLoadForCUDA(targetProgram, irModule);
+ }
+
performForceInlining(irModule);
bool emitSpirvDirectly = targetProgram->shouldEmitSPIRVDirectly();