diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/slang-emit.cpp | 8 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-ir-legalize-varying-params.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-undo-param-copy.cpp | 141 | ||||
| -rw-r--r-- | source/slang/slang-ir-undo-param-copy.h | 15 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 1 |
7 files changed, 170 insertions, 1 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 02b5a44ed..260bee0ff 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -106,6 +106,7 @@ #include "slang-ir-strip-legalization-insts.h" #include "slang-ir-synthesize-active-mask.h" #include "slang-ir-translate-global-varying-var.h" +#include "slang-ir-undo-param-copy.h" #include "slang-ir-uniformity.h" #include "slang-ir-user-type-hint.h" #include "slang-ir-validate.h" @@ -1594,6 +1595,13 @@ Result linkAndOptimizeIR( case CodeGenTarget::Metal: case CodeGenTarget::CPPSource: case CodeGenTarget::CUDASource: + // For CUDA/OptiX like targets, add our pass to replace inout parameter copies with direct + // pointers + undoParameterCopy(irModule); +#if 0 + dumpIRIfEnabled(codeGenContext, irModule, "PARAMETER COPIES REPLACED WITH DIRECT POINTERS"); +#endif + validateIRModuleIfEnabled(codeGenContext, irModule); moveGlobalVarInitializationToEntryPoints(irModule, targetProgram); introduceExplicitGlobalContext(irModule, target); if (target == CodeGenTarget::CPPSource) diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index e44954521..419c8d59d 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -752,7 +752,7 @@ INST(GpuForeach, gpuForeach, 3, 0) // Wrapper for OptiX intrinsics used to load and store ray payload data using // a pointer represented by two payload registers. -INST(GetOptiXRayPayloadPtr, getOptiXRayPayloadPtr, 0, 0) +INST(GetOptiXRayPayloadPtr, getOptiXRayPayloadPtr, 0, HOISTABLE) // Wrapper for OptiX intrinsics used to load a single hit attribute // Takes two arguments: the type (either float or int), and the hit @@ -1000,6 +1000,7 @@ INST_RANGE(BindingQuery, GetRegisterIndex, GetRegisterSpace) INST(MaximallyReconvergesDecoration, MaximallyReconverges, 0, 0) INST(QuadDerivativesDecoration, QuadDerivatives, 0, 0) INST(RequireFullQuadsDecoration, RequireFullQuads, 0, 0) + INST(TempCallArgVarDecoration, TempCallArgVar, 0, 0) // Marks a type to be non copyable, causing SSA pass to skip turning variables of the the type into SSA values. INST(NonCopyableTypeDecoration, nonCopyable, 0, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 3280dc35c..c188b1eff 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -485,6 +485,7 @@ IR_SIMPLE_DECORATION(DownstreamModuleImportDecoration) IR_SIMPLE_DECORATION(MaximallyReconvergesDecoration) IR_SIMPLE_DECORATION(QuadDerivativesDecoration) IR_SIMPLE_DECORATION(RequireFullQuadsDecoration) +IR_SIMPLE_DECORATION(TempCallArgVarDecoration) struct IRAvailableInDownstreamIRDecoration : IRDecoration { diff --git a/source/slang/slang-ir-legalize-varying-params.cpp b/source/slang/slang-ir-legalize-varying-params.cpp index b31a6d92f..180d8eaa7 100644 --- a/source/slang/slang-ir-legalize-varying-params.cpp +++ b/source/slang/slang-ir-legalize-varying-params.cpp @@ -571,6 +571,8 @@ protected: builder.setInsertBefore(m_firstOrdinaryInst); auto localVar = builder.emitVar(valueType); + // Add TempCallArgVar decoration to mark this variable as a temporary for parameter passing + builder.addSimpleDecoration<IRTempCallArgVarDecoration>(localVar); auto localVal = LegalizedVaryingVal::makeAddress(localVar); if (const auto inOutType = as<IRInOutType>(paramPtrType)) diff --git a/source/slang/slang-ir-undo-param-copy.cpp b/source/slang/slang-ir-undo-param-copy.cpp new file mode 100644 index 000000000..d8aac7201 --- /dev/null +++ b/source/slang/slang-ir-undo-param-copy.cpp @@ -0,0 +1,141 @@ +#include "slang-ir-undo-param-copy.h" + +#include "slang-ir-dce.h" +#include "slang-ir-insts.h" +#include "slang-ir.h" + +namespace Slang +{ +// This pass transforms variables decorated with TempCallArgVarDecoration +// by replacing them with direct references to the original parameters. +// This is important for CUDA/OptiX targets where functions like 'IgnoreHit' +// can prevent copy-back operations from executing. +struct UndoParameterCopyVisitor +{ + IRBuilder builder; + IRModule* module; + bool changed = false; + + // Track instructions to remove + List<IRInst*> instsToRemove; + + UndoParameterCopyVisitor(IRModule* module) + : module(module) + { + builder.setInsertInto(module); + } + + // Process the entire module + void processModule() + { + // Process all functions in the module + for (auto inst = module->getModuleInst()->getFirstChild(); inst; inst = inst->getNextInst()) + { + if (auto func = as<IRFunc>(inst)) + { + processFunc(func); + } + } + } + + // Process a single function + void processFunc(IRFunc* func) + { + instsToRemove.clear(); + HashSet<IRInst*> originalPtrsForCopyBackCandidates; // Tracks original params that might + // have a redundant copy-back + + // Single pass to identify temps, replace uses, and identify redundant copy-back stores. + for (auto block = func->getFirstBlock(); block; block = block->getNextBlock()) + { + for (auto inst = block->getFirstInst(); inst; inst = inst->getNextInst()) + { + if (auto varInst = as<IRVar>(inst)) + { + if (varInst->findDecoration<IRTempCallArgVarDecoration>()) + { + IRStore* initializingStore = nullptr; + IRInst* originalParamPtr = nullptr; + + // Scan for the store that initializes this varInst + // This store should be in the same block, after varInst. + // The value stored should be an IRLoad from the original parameter pointer. + for (auto scanInst = varInst->getNextInst(); scanInst; + scanInst = scanInst->getNextInst()) + { + if (auto storeInst = as<IRStore>(scanInst)) + { + if (storeInst->getPtr() == varInst) + { + initializingStore = storeInst; + if (auto loadInst = as<IRLoad>(storeInst->getVal())) + { + originalParamPtr = loadInst->getPtr(); + + // Found the pattern: var, store(var, load(originalParam)) + this->changed = true; + + // Replace uses of varInst with originalParamPtr immediately + varInst->replaceUsesWith(originalParamPtr); + + // Mark for removal + instsToRemove.add(initializingStore); + instsToRemove.add(varInst); + + // Record originalParamPtr for copy-back optimization check + originalPtrsForCopyBackCandidates.add(originalParamPtr); + } + break; // Found the initializing store for varInst + } + } + // Stop scanning if another var declaration or a call is encountered + if (as<IRVar>(scanInst) || as<IRCall>(scanInst)) + { + break; + } + } + } + } + else if (auto storeInst = as<IRStore>(inst)) + { + // Check for redundant copy-back: store(originalParam, load(originalParam)) + IRInst* destPtr = storeInst->getPtr(); + if (originalPtrsForCopyBackCandidates.contains(destPtr)) + { + if (auto loadVal = as<IRLoad>(storeInst->getVal())) + { + if (loadVal->getPtr() == destPtr) + { + // This is a redundant copy-back store + instsToRemove.add(storeInst); + this->changed = true; + } + } + } + } + } + } + + // Removal pass + for (auto& inst : instsToRemove) + { + if (inst->getParent()) + { + inst->removeAndDeallocate(); + } + } + } +}; + +void undoParameterCopy(IRModule* module) +{ + UndoParameterCopyVisitor visitor(module); + visitor.processModule(); + + // Run DCE to clean up any dead instructions + if (visitor.changed) + { + eliminateDeadCode(module); + } +} +} // namespace Slang diff --git a/source/slang/slang-ir-undo-param-copy.h b/source/slang/slang-ir-undo-param-copy.h new file mode 100644 index 000000000..8796c5da5 --- /dev/null +++ b/source/slang/slang-ir-undo-param-copy.h @@ -0,0 +1,15 @@ +#ifndef SLANG_IR_UNDO_PARAM_COPY_H +#define SLANG_IR_UNDO_PARAM_COPY_H + +#include "slang-ir-insts.h" +#include "slang-ir.h" + +namespace Slang +{ +// Replace temporary variables created for parameter passing with direct pointer access +// This is particularly important for CUDA/OptiX targets where functions like 'IgnoreHit' +// prevent the copy-back step from executing for inout parameters +void undoParameterCopy(IRModule* module); +} // namespace Slang + +#endif
\ No newline at end of file diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index c44196bc5..b09d9e6e2 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -8605,6 +8605,7 @@ bool IRInst::mightHaveSideEffects(SideEffectAnalysisOptions options) case kIROp_GetElement: case kIROp_GetElementPtr: case kIROp_GetOffsetPtr: + case kIROp_GetOptiXRayPayloadPtr: case kIROp_UpdateElement: case kIROp_MeshOutputRef: case kIROp_MakeVectorFromScalar: |
