diff options
| author | Yong He <yonghe@outlook.com> | 2023-07-19 13:50:49 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-07-19 13:50:49 -0700 |
| commit | 1cfb1c85b52e00cde2d21874a88cda2c22d18b62 (patch) | |
| tree | a38b24534d865ffe33a3d0fc030f5449ba729e28 /source/slang | |
| parent | 1fe5e83f3dcc8ef0efa2dd083ebdfab5d0f101a9 (diff) | |
Optimize specialization, and remove unnecessary calls to `simplifyIR`. (#2999)
* Remove unneccessary calls to `simplifyIR`.
* fix.
* Delete obsolete hoistConst pass.
* Fix.
* Small improvements.
* Fix.
* Fix enum lowering.
* fix
* tweaks.
* tweaks.
---------
Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/slang-ast-base.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-ast-support-types.h | 6 | ||||
| -rw-r--r-- | source/slang/slang-check-conformance.cpp | 10 | ||||
| -rw-r--r-- | source/slang/slang-check-stmt.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 22 | ||||
| -rw-r--r-- | source/slang/slang-ir-hoist-constants.cpp | 119 | ||||
| -rw-r--r-- | source/slang/slang-ir-hoist-constants.h | 13 | ||||
| -rw-r--r-- | source/slang/slang-ir-lower-generics.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-ir-peephole.cpp | 30 | ||||
| -rw-r--r-- | source/slang/slang-ir-peephole.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-ir-sccp.cpp | 36 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize.cpp | 289 | ||||
| -rw-r--r-- | source/slang/slang-ir-ssa-simplification.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 1 | ||||
| -rw-r--r-- | source/slang/slang-lower-to-ir.cpp | 12 |
15 files changed, 169 insertions, 384 deletions
diff --git a/source/slang/slang-ast-base.h b/source/slang/slang-ast-base.h index 02bcb726f..b90014316 100644 --- a/source/slang/slang-ast-base.h +++ b/source/slang/slang-ast-base.h @@ -232,7 +232,7 @@ class Type: public Val /// Type derived types store the AST builder they were constructed on. The builder calls this function /// after constructing. - SLANG_FORCE_INLINE void init(ASTNodeType inAstNodeType, ASTBuilder* inAstBuilder) { m_astBuilder = inAstBuilder; astNodeType = inAstNodeType; } + SLANG_FORCE_INLINE void init(ASTNodeType inAstNodeType, ASTBuilder* inAstBuilder) { Val::init(inAstNodeType, inAstBuilder); m_astBuilder = inAstBuilder; } /// Get the ASTBuilder that was used to construct this Type SLANG_FORCE_INLINE ASTBuilder* getASTBuilder() const { return m_astBuilder; } diff --git a/source/slang/slang-ast-support-types.h b/source/slang/slang-ast-support-types.h index 0a5eeb65d..4765d11ec 100644 --- a/source/slang/slang-ast-support-types.h +++ b/source/slang/slang-ast-support-types.h @@ -831,6 +831,12 @@ namespace Slang return equals(other); } + template<typename U> + bool operator != (DeclRef<U> other) const + { + return !equals(other); + } + explicit operator bool() const { return declRefBase; diff --git a/source/slang/slang-check-conformance.cpp b/source/slang/slang-check-conformance.cpp index 2696f798d..1d19e01bf 100644 --- a/source/slang/slang-check-conformance.cpp +++ b/source/slang/slang-check-conformance.cpp @@ -104,6 +104,16 @@ namespace Slang // For now we are continuing to conflate all the subtype-ish relationships but not // tangling convertibility into it. + // First, make sure both sub type and super type decl are ready for lookup. + if (auto subDeclRefType = as<DeclRefType>(subType)) + { + ensureDecl(subDeclRefType->declRef.getDecl(), DeclCheckState::ReadyForLookup); + } + if (auto superDeclRefType = as<DeclRefType>(subType)) + { + ensureDecl(superDeclRefType->declRef.getDecl(), DeclCheckState::ReadyForLookup); + } + // In the common case, we can use the pre-computed inheritance information for `subType` // to enumerate all the types it transitively inherits from. // diff --git a/source/slang/slang-check-stmt.cpp b/source/slang/slang-check-stmt.cpp index 6453f68ab..ba7e977e3 100644 --- a/source/slang/slang-check-stmt.cpp +++ b/source/slang/slang-check-stmt.cpp @@ -492,7 +492,7 @@ namespace Slang auto varExpr = as<VarExpr>(opSideEffectExpr->arguments[0]); if (!varExpr) return; - if (varExpr->declRef != initialVar) + if (varExpr->declRef.getDecl() != initialVar.getDecl()) { // If the user writes something like `for (int i = 0; i < 5; j++)`, // it is most likely a bug, so we issue a warning. @@ -518,7 +518,7 @@ namespace Slang stepSize = m_astBuilder->getIntVal(m_astBuilder->getIntType(), 1); } - if (predicateVar != initialVar) + if (predicateVar.getDecl() != initialVar.getDecl()) { if (predicateVar) getSink()->diagnose(stmt->predicateExpression, Diagnostics::forLoopPredicateCheckingDifferentVar, initialVar, predicateVar); diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index f07dc97f8..6a43ff8c2 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -329,7 +329,6 @@ Result linkAndOptimizeIR( } lowerOptionalType(irModule, sink); - simplifyIR(irModule, sink); switch (target) { @@ -356,6 +355,8 @@ Result linkAndOptimizeIR( #endif validateIRModuleIfEnabled(codeGenContext, irModule); + simplifyIR(irModule, sink); + // It's important that this takes place before defunctionalization as we // want to be able to easily discover the cooperate and fallback funcitons // being passed to saturated_cooperation @@ -475,10 +476,6 @@ Result linkAndOptimizeIR( if (sink->getErrorCount() != 0) return SLANG_FAIL; - - // TODO(DG): There are multiple DCE steps here, which need to be changed - // so that they don't just throw out any non-entry point code - // Debugging code for IR transformations... #if 0 dumpIRIfEnabled(codeGenContext, irModule, "SPECIALIZED"); #endif @@ -493,9 +490,6 @@ Result linkAndOptimizeIR( // simplifyIR(irModule, sink); -#if 0 - dumpIRIfEnabled(codeGenContext, irModule, "AFTER DCE"); -#endif validateIRModuleIfEnabled(codeGenContext, irModule); // We don't need the legalize pass for C/C++ based types @@ -529,7 +523,6 @@ Result linkAndOptimizeIR( legalizeExistentialTypeLayout( irModule, sink); - eliminateDeadCode(irModule); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "EXISTENTIALS LEGALIZED"); @@ -550,7 +543,6 @@ Result linkAndOptimizeIR( legalizeResourceTypes( irModule, sink); - eliminateDeadCode(irModule); // Debugging output of legalization #if 0 @@ -565,7 +557,6 @@ Result linkAndOptimizeIR( legalizeEmptyTypes( irModule, sink); - eliminateDeadCode(irModule); } // Once specialization and type legalization have been performed, @@ -599,7 +590,7 @@ Result linkAndOptimizeIR( { specializeArrayParameters(codeGenContext, irModule); } - simplifyIR(irModule, sink); + eliminateDeadCode(irModule); // Rewrite functions that return arrays to return them via `out` parameter, // since our target languages doesn't allow returning arrays. @@ -842,9 +833,9 @@ Result linkAndOptimizeIR( // functions, so there might still be invalid code in // our IR module. // - // We run IR simplification passes again to clean things up. + // We run DCE pass again to clean things up. // - simplifyIR(irModule, sink); + eliminateDeadCode(irModule); if (isKhronosTarget(targetRequest)) { @@ -885,10 +876,11 @@ Result linkAndOptimizeIR( // Lower all bit_cast operations on complex types into leaf-level // bit_cast on basic types. lowerBitCast(targetRequest, irModule); - simplifyIR(irModule, sink); eliminateMultiLevelBreak(irModule); + simplifyIR(irModule, sink); + // As a late step, we need to take the SSA-form IR and move things *out* // of SSA form, by eliminating all "phi nodes" (block parameters) and // introducing explicit temporaries instead. Doing this at the IR level diff --git a/source/slang/slang-ir-hoist-constants.cpp b/source/slang/slang-ir-hoist-constants.cpp deleted file mode 100644 index 7a250bdb7..000000000 --- a/source/slang/slang-ir-hoist-constants.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// slang-ir-hoist-constants.cpp -#include "slang-ir-hoist-constants.h" -#include "slang-ir-inst-pass-base.h" - -namespace Slang -{ - -struct HoistConstantPass : InstPassBase -{ - HoistConstantPass(IRModule* module) : InstPassBase(module) - {} - - bool changed = false; - - void processModule() - { - processAllInsts([this](IRInst* inst) - { - - if (inst->getParent() == module->getModuleInst() || !inst->getParent()) - return; - auto parent = inst->getParent(); - auto p = parent; - while (p) - { - if (as<IRGlobalValueWithCode>(p)) - return; - p = p->parent; - } - while (parent && parent->parent != module->getModuleInst()) - parent = parent->parent; - if (!parent) - return; - switch (inst->getOp()) - { - default: - return; - case kIROp_Add: - case kIROp_Sub: - case kIROp_Mul: - case kIROp_Div: - case kIROp_Module: - case kIROp_Neg: - case kIROp_And: - case kIROp_Or: - case kIROp_Not: - case kIROp_BitAnd: - case kIROp_BitNot: - case kIROp_BitOr: - case kIROp_BitXor: - case kIROp_Select: - case kIROp_Greater: - case kIROp_Less: - case kIROp_Leq: - case kIROp_Geq: - case kIROp_Eql: - case kIROp_Neq: - case kIROp_BitCast: - case kIROp_Lsh: - case kIROp_Rsh: - case kIROp_MatrixReshape: - case kIROp_VectorReshape: - case kIROp_MakeVector: - case kIROp_MakeMatrix: - case kIROp_MakeMatrixFromScalar: - case kIROp_MakeVectorFromScalar: - case kIROp_MakeOptionalNone: - case kIROp_MakeOptionalValue: - case kIROp_MakeDifferentialPair: - case kIROp_MakeExistential: - case kIROp_ExtractExistentialType: - case kIROp_ExtractExistentialValue: - case kIROp_ExtractExistentialWitnessTable: - case kIROp_WrapExistential: - case kIROp_WitnessTableType: - case kIROp_AttributedType: - case kIROp_MatrixType: - case kIROp_OptionalHasValue: - case kIROp_GetOptionalValue: - case kIROp_IntCast: - case kIROp_FloatCast: - case kIROp_CastIntToFloat: - case kIROp_CastFloatToInt: - case kIROp_CastPtrToBool: - case kIROp_CastPtrToInt: - case kIROp_CastIntToPtr: - case kIROp_CastToVoid: - case kIROp_Reinterpret: - case kIROp_swizzle: - case kIROp_IntLit: - case kIROp_BoolLit: - case kIROp_ArrayType: - case kIROp_Specialize: - case kIROp_VectorType: - break; - } - if (inst->typeUse.get() && inst->typeUse.get()->parent != module->getModuleInst()) - return; - for (UInt i = 0; i < inst->getOperandCount(); i++) - { - if (inst->getOperand(i)->parent != module->getModuleInst()) - return; - } - // all operands are in global scope, we can move this inst to global scope as well. - inst->insertBefore(parent); - changed = true; - }); - } -}; - -bool hoistConstants( - IRModule* module) -{ - HoistConstantPass context(module); - context.processModule(); - return context.changed; -} - -} diff --git a/source/slang/slang-ir-hoist-constants.h b/source/slang/slang-ir-hoist-constants.h deleted file mode 100644 index 28d4a0c6b..000000000 --- a/source/slang/slang-ir-hoist-constants.h +++ /dev/null @@ -1,13 +0,0 @@ -// slang-ir-hoist-constants.h -#pragma once - -namespace Slang -{ -struct IRModule; - - /// A (specialized) generic type may contain insts that computes compile-time constants defined within - /// the type. We should hoist them to global scope so they can be SCCP'd when possible. -bool hoistConstants( - IRModule* module); - -} diff --git a/source/slang/slang-ir-lower-generics.cpp b/source/slang/slang-ir-lower-generics.cpp index 19c075bf1..a7c0f51e6 100644 --- a/source/slang/slang-ir-lower-generics.cpp +++ b/source/slang/slang-ir-lower-generics.cpp @@ -234,9 +234,5 @@ namespace Slang generateAnyValueMarshallingFunctions(&sharedContext); if (sink->getErrorCount() != 0) return; - - // We might have generated new temporary variables during lowering. - // An SSA pass can clean up unnecessary load/stores. - simplifyIR(module); } } // namespace Slang diff --git a/source/slang/slang-ir-peephole.cpp b/source/slang/slang-ir-peephole.cpp index 376795855..ac026b563 100644 --- a/source/slang/slang-ir-peephole.cpp +++ b/source/slang/slang-ir-peephole.cpp @@ -235,9 +235,6 @@ struct PeepholeContext : InstPassBase return false; } - RefPtr<IRDominatorTree> domTree; - IRGlobalValueWithCode* domTreeFunc = nullptr; - void processInst(IRInst* inst) { if (as<IRGlobalValueWithCode>(inst)) @@ -746,13 +743,8 @@ struct PeepholeContext : InstPassBase auto parentFunc = getParentFunc(inst); if (!parentFunc) break; - if (domTreeFunc != parentFunc) - { - domTree = computeDominatorTree(parentFunc); - domTreeFunc = parentFunc; - } - if (!domTree) - break; + + auto domTree = parentFunc->getModule()->findOrCreateDominatorTree(parentFunc); if (domTree->dominates(argValue, inst)) { @@ -816,6 +808,8 @@ struct PeepholeContext : InstPassBase bool processFunc(IRInst* func) { + func->getModule()->invalidateAllAnalysis(); + bool result = false; for (;;) { @@ -847,6 +841,22 @@ bool peepholeOptimize(IRInst* func) return context.processFunc(func); } +bool peepholeOptimizeGlobalScope(IRModule* module) +{ + PeepholeContext context = PeepholeContext(module); + bool result = false; + for (;;) + { + context.changed = false; + for (auto globalInst : module->getGlobalInsts()) + context.processInst(globalInst); + result |= context.changed; + if (!context.changed) + break; + } + return result; +} + bool tryReplaceInstUsesWithSimplifiedValue(IRModule* module, IRInst* inst) { if (inst != tryConstantFoldInst(module, inst)) diff --git a/source/slang/slang-ir-peephole.h b/source/slang/slang-ir-peephole.h index c0b83a715..b2ea1bc04 100644 --- a/source/slang/slang-ir-peephole.h +++ b/source/slang/slang-ir-peephole.h @@ -10,5 +10,6 @@ namespace Slang /// Apply peephole optimizations. bool peepholeOptimize(IRModule* module); bool peepholeOptimize(IRInst* func); + bool peepholeOptimizeGlobalScope(IRModule* module); bool tryReplaceInstUsesWithSimplifiedValue(IRModule* module, IRInst* inst); } diff --git a/source/slang/slang-ir-sccp.cpp b/source/slang/slang-ir-sccp.cpp index d5e4c6e99..ce635dca8 100644 --- a/source/slang/slang-ir-sccp.cpp +++ b/source/slang/slang-ir-sccp.cpp @@ -1248,12 +1248,30 @@ struct SCCPContext } } - // Run the constant folding on global scope only. + // Run the constant folding on global scope and specialized types only. bool applyOnGlobalScope(IRModule* module) { - builderStorage = IRBuilder(shared->module); + bool changed = applyOnScope(module->getModuleInst()); for (auto child : module->getModuleInst()->getChildren()) { + switch (child->getOp()) + { + case kIROp_StructType: + case kIROp_ClassType: + case kIROp_InterfaceType: + case kIROp_WitnessTable: + changed |= applyOnScope(child); + break; + } + } + return changed; + } + + bool applyOnScope(IRInst* scopeInst) + { + builderStorage = IRBuilder(scopeInst); + for (auto child : scopeInst->getChildren()) + { // Only consider evaluable opcodes. if (!isEvaluableOpCode(child->getOp())) continue; @@ -1265,7 +1283,7 @@ struct SCCPContext auto inst = ssaWorkList[0]; ssaWorkList.fastRemoveAt(0); // Only consider evaluable opcodes and insts at global scope. - if (!isEvaluableOpCode(inst->getOp()) || inst->getParent() != module->getModuleInst()) + if (!isEvaluableOpCode(inst->getOp()) || inst->getParent() != scopeInst) continue; updateValueForInst(inst); } @@ -1273,7 +1291,7 @@ struct SCCPContext bool changed = false; // Replace the insts with their values. List<IRInst*> instsToRemove; - for (auto child : module->getModuleInst()->getChildren()) + for (auto child : scopeInst->getChildren()) { if (!isEvaluableOpCode(child->getOp())) continue; @@ -1670,6 +1688,16 @@ static bool applySparseConditionalConstantPropagationRec( for( auto childInst : inst->getDecorationsAndChildren() ) { + switch (childInst->getOp()) + { + case kIROp_Func: + case kIROp_Block: + case kIROp_Generic: + break; + default: + // Skip other op codes. + continue; + } changed |= applySparseConditionalConstantPropagationRec(globalContext, childInst); } return changed; diff --git a/source/slang/slang-ir-specialize.cpp b/source/slang/slang-ir-specialize.cpp index a806a13e6..f1de6e408 100644 --- a/source/slang/slang-ir-specialize.cpp +++ b/source/slang/slang-ir-specialize.cpp @@ -50,20 +50,10 @@ struct SpecializationContext bool changed = false; - // We know that we can only perform generic specialization when all - // of the arguments to a generic are also fully specialized. - // The "is fully specialized" condition is something we - // need to solve for over the program, because the fully- - // specialized-ness of an instruction depends on the - // fully-specialized-ness of its operands. - // - // We will build an explicit hash set to encode those - // instructions that are fully specialized. - // - HashSet<IRInst*>& fullySpecializedInsts; SpecializationContext(IRModule* inModule) - : fullySpecializedInsts(*module->getContainerPool().getHashSet<IRInst>()) + : workList(*inModule->getContainerPool().getList<IRInst>()) + , workListSet(*inModule->getContainerPool().getHashSet<IRInst>()) , cleanInsts(*module->getContainerPool().getHashSet<IRInst>()) , module(inModule) { @@ -83,42 +73,54 @@ struct SpecializationContext // if (!inst) return true; - // An interface requirement entry should always be considered - // to be fully specialized, even if it hasn't been visited. - // - // Note: This logic is here to stop a circularity, where we - // can't mark an interface as used until its requirements are - // used, etc. - // - if (inst->getOp() == kIROp_InterfaceRequirementEntry) - return true; - - // A generic parameter is never specialized. switch (inst->getOp()) { case kIROp_GlobalGenericParam: + case kIROp_LookupWitness: return false; - case kIROp_Param: - if (inst->getParent() && inst->getParent()->getOp() == kIROp_Block && - inst->getParent()->getParent() && - inst->getParent()->getParent()->getOp() == kIROp_Generic) + case kIROp_Specialize: + // The `specialize` instruction is a bit sepcial, + // because it is possible to have a `specialize` + // of a built-in type so that it never gets + // substituted for another type. (e.g., the specific + // case where this code path first showed up + // as necessary was `RayQuery<>`) + // + { + auto specialize = cast<IRSpecialize>(inst); + auto base = specialize->getBase(); + if (auto generic = as<IRGeneric>(base)) + { + // If the thing being specialized can be resolved, + // *and* it is a target intrinsic, ... + // + if (auto result = findGenericReturnVal(generic)) + { + if (result->findDecoration<IRTargetIntrinsicDecoration>()) + { + // ... then we should consider the instruction as + // "fully specialized" in the same cases as for + // any ordinary instruciton. + // + + if (areAllOperandsFullySpecialized(inst)) + { + return true; + } + } + } + } return false; + } } - // A global value is always specialized. + // The default case is that a global value is always specialized. if (inst->getParent() == module->getModuleInst()) { - switch (inst->getOp()) - { - case kIROp_LookupWitness: - case kIROp_Specialize: - return false; - default: - return true; - } + return true; } - return fullySpecializedInsts.contains(inst); + return false; } // When an instruction isn't fully specialized, but its operands *are* @@ -146,34 +148,17 @@ struct SpecializationContext // to be considered for specialization or simplification, // whether generic, existential, etc. // - OrderedHashSet<IRInst*> workList; + List<IRInst*>& workList; + HashSet<IRInst*>& workListSet; HashSet<IRInst*>& cleanInsts; void addToWorkList( IRInst* inst) { -#if 0 - // Note(Yong): we should no longer ignore generic functions - // because they maybe called via dynamic dispatch. - // We still want to specialize calls inside a generic function - // if we can derive its type at compile time. The following - // skipping logic is disabled and we should consider remove it. - // - // - // We will ignore any code that is nested under a generic, - // because it doesn't make sense to perform specialization - // on such code. - // - for (auto ii = inst->getParent(); ii; ii = ii->getParent()) + if (workListSet.add(inst)) { - if (as<IRGeneric>(ii)) - return; - } -#endif + workList.add(inst); - if (workList.add(inst)) - { - cleanInsts.remove(inst); addUsersToWorkList(inst); } @@ -195,24 +180,6 @@ struct SpecializationContext } } - // One of the main transformations we will apply is to - // consider an instruction as being fully specialized. - // - void markInstAsFullySpecialized( - IRInst* inst) - { - if (fullySpecializedInsts.contains(inst)) - return; - fullySpecializedInsts.add(inst); - - // If we know that an instruction is fully specialized, - // then we should start to consider its uses and children - // as candidates for being fully specialized too... - // - addUsersToWorkList(inst); - } - - // Of course, somewhere along the way we expect // to run into uses of `specialize(...)` instructions // to bind a generic to arguments that we want to @@ -499,99 +466,6 @@ struct SpecializationContext return true; } - // Generic specialization depends on identifying when - // instructions are fully specialized. - // - void maybeMarkAsFullySpecialized( - IRInst* inst) - { - // TODO: The logic here is completely bogus and - // we need to revisit the notion of fully-specialized-ness - // to only involve things that are semantically *values* - // rather than computations/expressions. - // - // The rules should be something like: - // - // * Literals are values - // * Composite type constructors where all the operands are value are values - // * References to nominal types are values - // * Built-in types where all the operands are values are values - // - // The system for defining value-ness probably needs - // to combine with the system for deduplicating instructions, - // since values are an important class of instruction we want - // to deduplicate. - - switch (inst->getOp()) - { - default: - // The default case is that an instruction can - // be considered as fully specialized as soon - // as all of its operands are. - // - // Anything defined in global scope can be viewed as fully specialized. - if (inst->getParent() == module->getModuleInst() || - areAllOperandsFullySpecialized(inst)) - { - markInstAsFullySpecialized(inst); - } - break; - - // Certain instructions cannot ever be considered - // fully specialized because they should never - // be substituted into a generic as its arguments. - case kIROp_LookupWitness: - case kIROp_ExtractExistentialType: - case kIROp_BindExistentialsType: - break; - - // An interface type is always fully specialized. - case kIROp_InterfaceType: - markInstAsFullySpecialized(inst); - break; - - case kIROp_Specialize: - // The `specialize` instruction is a bit sepcial, - // because it is possible to have a `specialize` - // of a built-in type so that it never gets - // substituted for another type. (e.g., the specific - // case where this code path first showed up - // as necessary was `RayQuery<>`) - // - { - auto specialize = cast<IRSpecialize>(inst); - auto base = specialize->getBase(); - if (auto generic = as<IRGeneric>(base)) - { - // If the thing being specialized can be resolved, - // *and* it is a target intrinsic, ... - // - if (auto result = findGenericReturnVal(generic)) - { - if (result->findDecoration<IRTargetIntrinsicDecoration>()) - { - // ... then we should consider the instruction as - // "fully specialized" in the same cases as for - // any ordinary instruciton. - // - - if (areAllOperandsFullySpecialized(inst)) - { - markInstAsFullySpecialized(inst); - } - return; - } - } - } - - // Otherwise, a `specialize` instruction falls into - // the case of instructions that should never be - // considered to be fully specialized. - } - break; - } - } - // The core of this pass is to look at one instruction // at a time, and try to perform whatever specialization // is appropriate based on its opcode. @@ -739,6 +613,10 @@ struct SpecializationContext template<typename TDict> void _readSpecializationDictionaryImpl(TDict& dict, IRInst* dictInst) { + int childrenCount = 0; + for (auto child = dictInst->getFirstChild(); child; child = child->next) + childrenCount++; + dict.reserve(1 << Math::Log2Ceil(childrenCount * 2)); for (auto child : dictInst->getChildren()) { auto item = as<IRSpecializationDictionaryItem>(child); @@ -881,10 +759,11 @@ struct SpecializationContext for (;;) { bool iterChanged = false; - addToWorkList(module->getModuleInst()); - - while (workList.getCount() != 0) + for (;;) { + bool hasSpecialization = false; + addToWorkList(module->getModuleInst()); + // We will then iterate until our work list goes dry. // while (workList.getCount() != 0) @@ -892,23 +771,17 @@ struct SpecializationContext IRInst* inst = workList.getLast(); workList.removeLast(); - - cleanInsts.add(inst); + workListSet.remove(inst); // For each instruction we process, we want to perform // a few steps. // - // First we will do any checking required to tag an - // instruction as being fully specialized. - // - maybeMarkAsFullySpecialized(inst); - - // Next we will look for all the general-purpose + // First we will look for all the general-purpose // specialization opportunities (generic specialization, // existential specialization, simplifications, etc.) // if (inst->hasUses() || inst->mightHaveSideEffects()) - iterChanged |= maybeSpecializeInst(inst); + hasSpecialization |= maybeSpecializeInst(inst); // Finally, we need to make our logic recurse through // the whole IR module, so we want to add the children @@ -932,8 +805,10 @@ struct SpecializationContext addToWorkList(child); } } - - addDirtyInstsToWorkListRec(module->getModuleInst()); + if (hasSpecialization) + iterChanged = true; + else + break; } if (iterChanged) @@ -941,22 +816,22 @@ struct SpecializationContext this->changed = true; eliminateDeadCode(module->getModuleInst(), IRDeadCodeEliminationOptions()); } - else - { - // If we run out of specialization opportunities, consider - // lower lookupWitnessMethod insts into dynamic dispatch calls. - iterChanged = lowerWitnessLookup(module, sink); - if (!iterChanged || sink->getErrorCount()) - break; - } + + // Once the work list has gone dry, we should have the invariant + // that there are no `specialize` instructions inside of non-generic + // functions that in turn reference a generic type/function unless the generic is for a + // builtin type/function, or some of the type arguments are unknown at compile time, in + // which case we will rely on a follow up pass the translate it into a dynamic dispatch + // function. + // + // Now we consider lower lookupWitnessMethod insts into dynamic dispatch calls, + // which may open up more specialization opportunities. + // + iterChanged = lowerWitnessLookup(module, sink); + if (!iterChanged || sink->getErrorCount()) + break; } - // Once the work list has gone dry, we should have the invariant - // that there are no `specialize` instructions inside of non-generic - // functions that in turn reference a generic type/function unless the generic is for a - // builtin type/function, or some of the type arguments are unknown at compile time, in - // which case we will rely on a follow up pass the translate it into a dynamic dispatch - // function. // For functions that still have `specialize` uses left, we need to preserve the // its specializations in resulting IR so they can be reconstructed when this @@ -964,16 +839,13 @@ struct SpecializationContext writeSpecializationDictionaries(); } - void addDirtyInstsToWorkListRec(IRInst* inst) + void addInstsToWorkListRec(IRInst* inst) { - if (!cleanInsts.contains(inst)) - { - addToWorkList(inst); - } + addToWorkList(inst); for (auto child = inst->getLastChild(); child; child = child->getPrevInst()) { - addDirtyInstsToWorkListRec(child); + addInstsToWorkListRec(child); } } @@ -1323,11 +1195,14 @@ struct SpecializationContext } // Test if a type is compile time constant. - static bool isCompileTimeConstantType(IRInst* inst) + bool isCompileTimeConstantType(IRInst* inst) { // TODO: We probably need/want a more robust test here. // For now we are just look into the dependency graph of the inst and // see if there are any opcodes that are causing problems. + if (!isInstFullySpecialized(inst)) + return false; + List<IRInst*> localWorkList; HashSet<IRInst*> processedInsts; localWorkList.add(inst); @@ -1558,12 +1433,6 @@ struct SpecializationContext IRFunc* newFunc = builder->createFunc(); newFunc->setFullType(newFuncType); - // By construction, our new function type will be - // "fully specialized" by the rules used for doing - // generic specialization elsewhere in this pass. - // - fullySpecializedInsts.add(newFuncType); - // The above steps have accomplished the "first phase" // of cloning the function (since `IRFunc`s have no // operands). diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp index 43c9f8d2e..dbef20732 100644 --- a/source/slang/slang-ir-ssa-simplification.cpp +++ b/source/slang/slang-ir-ssa-simplification.cpp @@ -6,7 +6,6 @@ #include "slang-ir-dce.h" #include "slang-ir-simplify-cfg.h" #include "slang-ir-peephole.h" -#include "slang-ir-hoist-constants.h" #include "slang-ir-deduplicate-generic-children.h" #include "slang-ir-remove-unused-generic-param.h" #include "slang-ir-redundancy-removal.h" @@ -32,12 +31,11 @@ namespace Slang changed = false; - changed |= hoistConstants(module); changed |= deduplicateGenericChildren(module); changed |= propagateFuncProperties(module); changed |= removeUnusedGenericParam(module); changed |= applySparseConditionalConstantPropagationForGlobalScope(module, sink); - changed |= peepholeOptimize(module); + changed |= peepholeOptimizeGlobalScope(module); for (auto inst : module->getGlobalInsts()) { diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index b6c19b18d..2fc1a9466 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -2482,7 +2482,6 @@ namespace Slang { IRInstKey key = { inst }; - // Ideally we would add if not found, else return if was found instead of testing & then adding. IRInst** found = m_dedupContext->getGlobalValueNumberingMap().tryGetValueOrAdd(key, inst); SLANG_ASSERT(endCursor == memoryArena.getCursor()); // If it's found, just return, and throw away the instruction diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 0dcea9a14..63dc39a20 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -3674,7 +3674,11 @@ struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo> else if (auto declRefType = as<DeclRefType>(type)) { DeclRef<Decl> declRef = declRefType->declRef; - if (auto aggTypeDeclRef = declRef.as<AggTypeDecl>()) + if (auto enumType = declRef.as<EnumDecl>()) + { + return LoweredValInfo::simple(getBuilder()->getIntValue(irType, 0)); + } + else if (auto aggTypeDeclRef = declRef.as<AggTypeDecl>()) { List<IRInst*> args; @@ -9854,7 +9858,11 @@ RefPtr<IRModule> generateIRForTranslationUnit( constructSSA(module); simplifyCFG(module); applySparseConditionalConstantPropagation(module, compileRequest->getSink()); - + for (auto inst : module->getGlobalInsts()) + { + if (auto func = as<IRGlobalValueWithCode>(inst)) + eliminateDeadCode(func); + } // Next, inline calls to any functions that have been // marked for mandatory "early" inlining. // |
