diff options
| author | Harsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com> | 2025-05-17 07:36:19 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-17 02:06:19 +0000 |
| commit | 0be6970c01d212aac7ea7db9b6c1556c530d6889 (patch) | |
| tree | 7e039079e2c96430ad6ecda254e011b5743c73f2 /source | |
| parent | 8f20632a0ba45c3bfada293842e55129949a2ae9 (diff) | |
Fix: Preserve inout param modifications with OptiX IgnoreHit() (#6956)
* Add noreturn attribute to IgnoreHit
* Revert "Add noreturn attribute to IgnoreHit"
This reverts commit 3cf2354dada71b9a8713b08f3a2e261de4aabfa4.
* Fix: Preserve inout param modifications with OptiX IgnoreHit()
Issue #6326 identified that in OptiX, when using IgnoreHit() (which
maps to the "noreturn" optixIgnoreIntersection() intrinsic), any
modifications made to 'inout' parameters within the shader would be
lost. This was due to IgnoreHit() preventing the execution of the
copy-back operation from the temporary variable (used to implement
'inout' semantics) to the original parameter.
This commit introduces a new IR pass, 'undoParameterCopy', specifically
for CUDA/OptiX targets to address this. The pass operates as follows:
1. Identifies temporary IR variables created for 'inout' parameters,
which are now decorated with 'TempCallArgVarDecoration'.
2. Maps these temporary variables back to their original parameter
storage (e.g., the OptiX payload pointer).
3. Replaces all uses of the temporary variable directly with the
original parameter pointer.
4. Removes the temporary variable declaration and its initializing store
(which copied from the original parameter to the temporary).
By transforming the IR to operate directly on the original parameter
storage before any potential call to IgnoreHit(), this fix ensures
that all modifications are preserved, correctly resolving issue #6326.
The pass is integrated into the compilation flow for relevant targets.
* Refactor(IR): Optimize GetOptiXRayPayloadPtr for better DCE/CSE
To allow for more effective dead code elimination (DCE) and
common subexpression elimination (CSE) of `getOptiXRayPayloadPtr`
instructions, this commit:
- Marks `kIROp_GetOptiXRayPayloadPtr` as side-effect-free within
`IRInst::mightHaveSideEffects` (in `slang-ir.cpp`).
- Flags `GetOptiXRayPayloadPtr` as `HOISTABLE` in its definition
within `slang-ir-inst-defs.h`.
This addresses scenarios where multiple, potentially redundant,
calls to `getOptiXRayPayloadPtr` might appear in the IR,
allowing optimizers to produce cleaner and potentially more
efficient code for OptiX targets. This change supports efforts
to refine IR handling for ray-tracing shader stages.
* Remove debugging code
* Refactor UndoParameterCopyVisitor for improved performance
- Optimized IR traversal by combining multiple passes into a single scan
- Removed unnecessary dictionary, immediately replace uses when a temp var is found
- Reduced duplicate code paths by checking for both temp vars and redundant stores in one loop
- Better handling of the 'changed' flag to ensure DCE only runs when needed
- Results in fewer instruction traversals and improved efficiency for large functions
* Add Test
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/slang-emit.cpp | 8 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-ir-legalize-varying-params.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-undo-param-copy.cpp | 141 | ||||
| -rw-r--r-- | source/slang/slang-ir-undo-param-copy.h | 15 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 1 |
7 files changed, 170 insertions, 1 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 02b5a44ed..260bee0ff 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -106,6 +106,7 @@ #include "slang-ir-strip-legalization-insts.h" #include "slang-ir-synthesize-active-mask.h" #include "slang-ir-translate-global-varying-var.h" +#include "slang-ir-undo-param-copy.h" #include "slang-ir-uniformity.h" #include "slang-ir-user-type-hint.h" #include "slang-ir-validate.h" @@ -1594,6 +1595,13 @@ Result linkAndOptimizeIR( case CodeGenTarget::Metal: case CodeGenTarget::CPPSource: case CodeGenTarget::CUDASource: + // For CUDA/OptiX like targets, add our pass to replace inout parameter copies with direct + // pointers + undoParameterCopy(irModule); +#if 0 + dumpIRIfEnabled(codeGenContext, irModule, "PARAMETER COPIES REPLACED WITH DIRECT POINTERS"); +#endif + validateIRModuleIfEnabled(codeGenContext, irModule); moveGlobalVarInitializationToEntryPoints(irModule, targetProgram); introduceExplicitGlobalContext(irModule, target); if (target == CodeGenTarget::CPPSource) diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index e44954521..419c8d59d 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -752,7 +752,7 @@ INST(GpuForeach, gpuForeach, 3, 0) // Wrapper for OptiX intrinsics used to load and store ray payload data using // a pointer represented by two payload registers. -INST(GetOptiXRayPayloadPtr, getOptiXRayPayloadPtr, 0, 0) +INST(GetOptiXRayPayloadPtr, getOptiXRayPayloadPtr, 0, HOISTABLE) // Wrapper for OptiX intrinsics used to load a single hit attribute // Takes two arguments: the type (either float or int), and the hit @@ -1000,6 +1000,7 @@ INST_RANGE(BindingQuery, GetRegisterIndex, GetRegisterSpace) INST(MaximallyReconvergesDecoration, MaximallyReconverges, 0, 0) INST(QuadDerivativesDecoration, QuadDerivatives, 0, 0) INST(RequireFullQuadsDecoration, RequireFullQuads, 0, 0) + INST(TempCallArgVarDecoration, TempCallArgVar, 0, 0) // Marks a type to be non copyable, causing SSA pass to skip turning variables of the the type into SSA values. INST(NonCopyableTypeDecoration, nonCopyable, 0, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 3280dc35c..c188b1eff 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -485,6 +485,7 @@ IR_SIMPLE_DECORATION(DownstreamModuleImportDecoration) IR_SIMPLE_DECORATION(MaximallyReconvergesDecoration) IR_SIMPLE_DECORATION(QuadDerivativesDecoration) IR_SIMPLE_DECORATION(RequireFullQuadsDecoration) +IR_SIMPLE_DECORATION(TempCallArgVarDecoration) struct IRAvailableInDownstreamIRDecoration : IRDecoration { diff --git a/source/slang/slang-ir-legalize-varying-params.cpp b/source/slang/slang-ir-legalize-varying-params.cpp index b31a6d92f..180d8eaa7 100644 --- a/source/slang/slang-ir-legalize-varying-params.cpp +++ b/source/slang/slang-ir-legalize-varying-params.cpp @@ -571,6 +571,8 @@ protected: builder.setInsertBefore(m_firstOrdinaryInst); auto localVar = builder.emitVar(valueType); + // Add TempCallArgVar decoration to mark this variable as a temporary for parameter passing + builder.addSimpleDecoration<IRTempCallArgVarDecoration>(localVar); auto localVal = LegalizedVaryingVal::makeAddress(localVar); if (const auto inOutType = as<IRInOutType>(paramPtrType)) diff --git a/source/slang/slang-ir-undo-param-copy.cpp b/source/slang/slang-ir-undo-param-copy.cpp new file mode 100644 index 000000000..d8aac7201 --- /dev/null +++ b/source/slang/slang-ir-undo-param-copy.cpp @@ -0,0 +1,141 @@ +#include "slang-ir-undo-param-copy.h" + +#include "slang-ir-dce.h" +#include "slang-ir-insts.h" +#include "slang-ir.h" + +namespace Slang +{ +// This pass transforms variables decorated with TempCallArgVarDecoration +// by replacing them with direct references to the original parameters. +// This is important for CUDA/OptiX targets where functions like 'IgnoreHit' +// can prevent copy-back operations from executing. +struct UndoParameterCopyVisitor +{ + IRBuilder builder; + IRModule* module; + bool changed = false; + + // Track instructions to remove + List<IRInst*> instsToRemove; + + UndoParameterCopyVisitor(IRModule* module) + : module(module) + { + builder.setInsertInto(module); + } + + // Process the entire module + void processModule() + { + // Process all functions in the module + for (auto inst = module->getModuleInst()->getFirstChild(); inst; inst = inst->getNextInst()) + { + if (auto func = as<IRFunc>(inst)) + { + processFunc(func); + } + } + } + + // Process a single function + void processFunc(IRFunc* func) + { + instsToRemove.clear(); + HashSet<IRInst*> originalPtrsForCopyBackCandidates; // Tracks original params that might + // have a redundant copy-back + + // Single pass to identify temps, replace uses, and identify redundant copy-back stores. + for (auto block = func->getFirstBlock(); block; block = block->getNextBlock()) + { + for (auto inst = block->getFirstInst(); inst; inst = inst->getNextInst()) + { + if (auto varInst = as<IRVar>(inst)) + { + if (varInst->findDecoration<IRTempCallArgVarDecoration>()) + { + IRStore* initializingStore = nullptr; + IRInst* originalParamPtr = nullptr; + + // Scan for the store that initializes this varInst + // This store should be in the same block, after varInst. + // The value stored should be an IRLoad from the original parameter pointer. + for (auto scanInst = varInst->getNextInst(); scanInst; + scanInst = scanInst->getNextInst()) + { + if (auto storeInst = as<IRStore>(scanInst)) + { + if (storeInst->getPtr() == varInst) + { + initializingStore = storeInst; + if (auto loadInst = as<IRLoad>(storeInst->getVal())) + { + originalParamPtr = loadInst->getPtr(); + + // Found the pattern: var, store(var, load(originalParam)) + this->changed = true; + + // Replace uses of varInst with originalParamPtr immediately + varInst->replaceUsesWith(originalParamPtr); + + // Mark for removal + instsToRemove.add(initializingStore); + instsToRemove.add(varInst); + + // Record originalParamPtr for copy-back optimization check + originalPtrsForCopyBackCandidates.add(originalParamPtr); + } + break; // Found the initializing store for varInst + } + } + // Stop scanning if another var declaration or a call is encountered + if (as<IRVar>(scanInst) || as<IRCall>(scanInst)) + { + break; + } + } + } + } + else if (auto storeInst = as<IRStore>(inst)) + { + // Check for redundant copy-back: store(originalParam, load(originalParam)) + IRInst* destPtr = storeInst->getPtr(); + if (originalPtrsForCopyBackCandidates.contains(destPtr)) + { + if (auto loadVal = as<IRLoad>(storeInst->getVal())) + { + if (loadVal->getPtr() == destPtr) + { + // This is a redundant copy-back store + instsToRemove.add(storeInst); + this->changed = true; + } + } + } + } + } + } + + // Removal pass + for (auto& inst : instsToRemove) + { + if (inst->getParent()) + { + inst->removeAndDeallocate(); + } + } + } +}; + +void undoParameterCopy(IRModule* module) +{ + UndoParameterCopyVisitor visitor(module); + visitor.processModule(); + + // Run DCE to clean up any dead instructions + if (visitor.changed) + { + eliminateDeadCode(module); + } +} +} // namespace Slang diff --git a/source/slang/slang-ir-undo-param-copy.h b/source/slang/slang-ir-undo-param-copy.h new file mode 100644 index 000000000..8796c5da5 --- /dev/null +++ b/source/slang/slang-ir-undo-param-copy.h @@ -0,0 +1,15 @@ +#ifndef SLANG_IR_UNDO_PARAM_COPY_H +#define SLANG_IR_UNDO_PARAM_COPY_H + +#include "slang-ir-insts.h" +#include "slang-ir.h" + +namespace Slang +{ +// Replace temporary variables created for parameter passing with direct pointer access +// This is particularly important for CUDA/OptiX targets where functions like 'IgnoreHit' +// prevent the copy-back step from executing for inout parameters +void undoParameterCopy(IRModule* module); +} // namespace Slang + +#endif
\ No newline at end of file diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index c44196bc5..b09d9e6e2 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -8605,6 +8605,7 @@ bool IRInst::mightHaveSideEffects(SideEffectAnalysisOptions options) case kIROp_GetElement: case kIROp_GetElementPtr: case kIROp_GetOffsetPtr: + case kIROp_GetOptiXRayPayloadPtr: case kIROp_UpdateElement: case kIROp_MeshOutputRef: case kIROp_MakeVectorFromScalar: |
