summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/slang/slang-emit.cpp8
-rw-r--r--source/slang/slang-ir-inst-defs.h3
-rw-r--r--source/slang/slang-ir-insts.h1
-rw-r--r--source/slang/slang-ir-legalize-varying-params.cpp2
-rw-r--r--source/slang/slang-ir-undo-param-copy.cpp141
-rw-r--r--source/slang/slang-ir-undo-param-copy.h15
-rw-r--r--source/slang/slang-ir.cpp1
7 files changed, 170 insertions, 1 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 02b5a44ed..260bee0ff 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -106,6 +106,7 @@
#include "slang-ir-strip-legalization-insts.h"
#include "slang-ir-synthesize-active-mask.h"
#include "slang-ir-translate-global-varying-var.h"
+#include "slang-ir-undo-param-copy.h"
#include "slang-ir-uniformity.h"
#include "slang-ir-user-type-hint.h"
#include "slang-ir-validate.h"
@@ -1594,6 +1595,13 @@ Result linkAndOptimizeIR(
case CodeGenTarget::Metal:
case CodeGenTarget::CPPSource:
case CodeGenTarget::CUDASource:
+ // For CUDA/OptiX like targets, add our pass to replace inout parameter copies with direct
+ // pointers
+ undoParameterCopy(irModule);
+#if 0
+ dumpIRIfEnabled(codeGenContext, irModule, "PARAMETER COPIES REPLACED WITH DIRECT POINTERS");
+#endif
+ validateIRModuleIfEnabled(codeGenContext, irModule);
moveGlobalVarInitializationToEntryPoints(irModule, targetProgram);
introduceExplicitGlobalContext(irModule, target);
if (target == CodeGenTarget::CPPSource)
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index e44954521..419c8d59d 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -752,7 +752,7 @@ INST(GpuForeach, gpuForeach, 3, 0)
// Wrapper for OptiX intrinsics used to load and store ray payload data using
// a pointer represented by two payload registers.
-INST(GetOptiXRayPayloadPtr, getOptiXRayPayloadPtr, 0, 0)
+INST(GetOptiXRayPayloadPtr, getOptiXRayPayloadPtr, 0, HOISTABLE)
// Wrapper for OptiX intrinsics used to load a single hit attribute
// Takes two arguments: the type (either float or int), and the hit
@@ -1000,6 +1000,7 @@ INST_RANGE(BindingQuery, GetRegisterIndex, GetRegisterSpace)
INST(MaximallyReconvergesDecoration, MaximallyReconverges, 0, 0)
INST(QuadDerivativesDecoration, QuadDerivatives, 0, 0)
INST(RequireFullQuadsDecoration, RequireFullQuads, 0, 0)
+ INST(TempCallArgVarDecoration, TempCallArgVar, 0, 0)
// Marks a type to be non copyable, causing SSA pass to skip turning variables of the the type into SSA values.
INST(NonCopyableTypeDecoration, nonCopyable, 0, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 3280dc35c..c188b1eff 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -485,6 +485,7 @@ IR_SIMPLE_DECORATION(DownstreamModuleImportDecoration)
IR_SIMPLE_DECORATION(MaximallyReconvergesDecoration)
IR_SIMPLE_DECORATION(QuadDerivativesDecoration)
IR_SIMPLE_DECORATION(RequireFullQuadsDecoration)
+IR_SIMPLE_DECORATION(TempCallArgVarDecoration)
struct IRAvailableInDownstreamIRDecoration : IRDecoration
{
diff --git a/source/slang/slang-ir-legalize-varying-params.cpp b/source/slang/slang-ir-legalize-varying-params.cpp
index b31a6d92f..180d8eaa7 100644
--- a/source/slang/slang-ir-legalize-varying-params.cpp
+++ b/source/slang/slang-ir-legalize-varying-params.cpp
@@ -571,6 +571,8 @@ protected:
builder.setInsertBefore(m_firstOrdinaryInst);
auto localVar = builder.emitVar(valueType);
+ // Add TempCallArgVar decoration to mark this variable as a temporary for parameter passing
+ builder.addSimpleDecoration<IRTempCallArgVarDecoration>(localVar);
auto localVal = LegalizedVaryingVal::makeAddress(localVar);
if (const auto inOutType = as<IRInOutType>(paramPtrType))
diff --git a/source/slang/slang-ir-undo-param-copy.cpp b/source/slang/slang-ir-undo-param-copy.cpp
new file mode 100644
index 000000000..d8aac7201
--- /dev/null
+++ b/source/slang/slang-ir-undo-param-copy.cpp
@@ -0,0 +1,141 @@
+#include "slang-ir-undo-param-copy.h"
+
+#include "slang-ir-dce.h"
+#include "slang-ir-insts.h"
+#include "slang-ir.h"
+
+namespace Slang
+{
+// This pass transforms variables decorated with TempCallArgVarDecoration
+// by replacing them with direct references to the original parameters.
+// This is important for CUDA/OptiX targets where functions like 'IgnoreHit'
+// can prevent copy-back operations from executing.
+struct UndoParameterCopyVisitor
+{
+ IRBuilder builder;
+ IRModule* module;
+ bool changed = false;
+
+ // Track instructions to remove
+ List<IRInst*> instsToRemove;
+
+ UndoParameterCopyVisitor(IRModule* module)
+ : module(module)
+ {
+ builder.setInsertInto(module);
+ }
+
+ // Process the entire module
+ void processModule()
+ {
+ // Process all functions in the module
+ for (auto inst = module->getModuleInst()->getFirstChild(); inst; inst = inst->getNextInst())
+ {
+ if (auto func = as<IRFunc>(inst))
+ {
+ processFunc(func);
+ }
+ }
+ }
+
+ // Process a single function
+ void processFunc(IRFunc* func)
+ {
+ instsToRemove.clear();
+ HashSet<IRInst*> originalPtrsForCopyBackCandidates; // Tracks original params that might
+ // have a redundant copy-back
+
+ // Single pass to identify temps, replace uses, and identify redundant copy-back stores.
+ for (auto block = func->getFirstBlock(); block; block = block->getNextBlock())
+ {
+ for (auto inst = block->getFirstInst(); inst; inst = inst->getNextInst())
+ {
+ if (auto varInst = as<IRVar>(inst))
+ {
+ if (varInst->findDecoration<IRTempCallArgVarDecoration>())
+ {
+ IRStore* initializingStore = nullptr;
+ IRInst* originalParamPtr = nullptr;
+
+ // Scan for the store that initializes this varInst
+ // This store should be in the same block, after varInst.
+ // The value stored should be an IRLoad from the original parameter pointer.
+ for (auto scanInst = varInst->getNextInst(); scanInst;
+ scanInst = scanInst->getNextInst())
+ {
+ if (auto storeInst = as<IRStore>(scanInst))
+ {
+ if (storeInst->getPtr() == varInst)
+ {
+ initializingStore = storeInst;
+ if (auto loadInst = as<IRLoad>(storeInst->getVal()))
+ {
+ originalParamPtr = loadInst->getPtr();
+
+ // Found the pattern: var, store(var, load(originalParam))
+ this->changed = true;
+
+ // Replace uses of varInst with originalParamPtr immediately
+ varInst->replaceUsesWith(originalParamPtr);
+
+ // Mark for removal
+ instsToRemove.add(initializingStore);
+ instsToRemove.add(varInst);
+
+ // Record originalParamPtr for copy-back optimization check
+ originalPtrsForCopyBackCandidates.add(originalParamPtr);
+ }
+ break; // Found the initializing store for varInst
+ }
+ }
+ // Stop scanning if another var declaration or a call is encountered
+ if (as<IRVar>(scanInst) || as<IRCall>(scanInst))
+ {
+ break;
+ }
+ }
+ }
+ }
+ else if (auto storeInst = as<IRStore>(inst))
+ {
+ // Check for redundant copy-back: store(originalParam, load(originalParam))
+ IRInst* destPtr = storeInst->getPtr();
+ if (originalPtrsForCopyBackCandidates.contains(destPtr))
+ {
+ if (auto loadVal = as<IRLoad>(storeInst->getVal()))
+ {
+ if (loadVal->getPtr() == destPtr)
+ {
+ // This is a redundant copy-back store
+ instsToRemove.add(storeInst);
+ this->changed = true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Removal pass
+ for (auto& inst : instsToRemove)
+ {
+ if (inst->getParent())
+ {
+ inst->removeAndDeallocate();
+ }
+ }
+ }
+};
+
+void undoParameterCopy(IRModule* module)
+{
+ UndoParameterCopyVisitor visitor(module);
+ visitor.processModule();
+
+ // Run DCE to clean up any dead instructions
+ if (visitor.changed)
+ {
+ eliminateDeadCode(module);
+ }
+}
+} // namespace Slang
diff --git a/source/slang/slang-ir-undo-param-copy.h b/source/slang/slang-ir-undo-param-copy.h
new file mode 100644
index 000000000..8796c5da5
--- /dev/null
+++ b/source/slang/slang-ir-undo-param-copy.h
@@ -0,0 +1,15 @@
+#ifndef SLANG_IR_UNDO_PARAM_COPY_H
+#define SLANG_IR_UNDO_PARAM_COPY_H
+
+#include "slang-ir-insts.h"
+#include "slang-ir.h"
+
+namespace Slang
+{
+// Replace temporary variables created for parameter passing with direct pointer access
+// This is particularly important for CUDA/OptiX targets where functions like 'IgnoreHit'
+// prevent the copy-back step from executing for inout parameters
+void undoParameterCopy(IRModule* module);
+} // namespace Slang
+
+#endif \ No newline at end of file
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index c44196bc5..b09d9e6e2 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -8605,6 +8605,7 @@ bool IRInst::mightHaveSideEffects(SideEffectAnalysisOptions options)
case kIROp_GetElement:
case kIROp_GetElementPtr:
case kIROp_GetOffsetPtr:
+ case kIROp_GetOptiXRayPayloadPtr:
case kIROp_UpdateElement:
case kIROp_MeshOutputRef:
case kIROp_MakeVectorFromScalar: