summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2023-07-19 13:50:49 -0700
committerGitHub <noreply@github.com>2023-07-19 13:50:49 -0700
commit1cfb1c85b52e00cde2d21874a88cda2c22d18b62 (patch)
treea38b24534d865ffe33a3d0fc030f5449ba729e28 /source/slang
parent1fe5e83f3dcc8ef0efa2dd083ebdfab5d0f101a9 (diff)
Optimize specialization, and remove unnecessary calls to `simplifyIR`. (#2999)
* Remove unneccessary calls to `simplifyIR`. * fix. * Delete obsolete hoistConst pass. * Fix. * Small improvements. * Fix. * Fix enum lowering. * fix * tweaks. * tweaks. --------- Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/slang-ast-base.h2
-rw-r--r--source/slang/slang-ast-support-types.h6
-rw-r--r--source/slang/slang-check-conformance.cpp10
-rw-r--r--source/slang/slang-check-stmt.cpp4
-rw-r--r--source/slang/slang-emit.cpp22
-rw-r--r--source/slang/slang-ir-hoist-constants.cpp119
-rw-r--r--source/slang/slang-ir-hoist-constants.h13
-rw-r--r--source/slang/slang-ir-lower-generics.cpp4
-rw-r--r--source/slang/slang-ir-peephole.cpp30
-rw-r--r--source/slang/slang-ir-peephole.h1
-rw-r--r--source/slang/slang-ir-sccp.cpp36
-rw-r--r--source/slang/slang-ir-specialize.cpp289
-rw-r--r--source/slang/slang-ir-ssa-simplification.cpp4
-rw-r--r--source/slang/slang-ir.cpp1
-rw-r--r--source/slang/slang-lower-to-ir.cpp12
15 files changed, 169 insertions, 384 deletions
diff --git a/source/slang/slang-ast-base.h b/source/slang/slang-ast-base.h
index 02bcb726f..b90014316 100644
--- a/source/slang/slang-ast-base.h
+++ b/source/slang/slang-ast-base.h
@@ -232,7 +232,7 @@ class Type: public Val
/// Type derived types store the AST builder they were constructed on. The builder calls this function
/// after constructing.
- SLANG_FORCE_INLINE void init(ASTNodeType inAstNodeType, ASTBuilder* inAstBuilder) { m_astBuilder = inAstBuilder; astNodeType = inAstNodeType; }
+ SLANG_FORCE_INLINE void init(ASTNodeType inAstNodeType, ASTBuilder* inAstBuilder) { Val::init(inAstNodeType, inAstBuilder); m_astBuilder = inAstBuilder; }
/// Get the ASTBuilder that was used to construct this Type
SLANG_FORCE_INLINE ASTBuilder* getASTBuilder() const { return m_astBuilder; }
diff --git a/source/slang/slang-ast-support-types.h b/source/slang/slang-ast-support-types.h
index 0a5eeb65d..4765d11ec 100644
--- a/source/slang/slang-ast-support-types.h
+++ b/source/slang/slang-ast-support-types.h
@@ -831,6 +831,12 @@ namespace Slang
return equals(other);
}
+ template<typename U>
+ bool operator != (DeclRef<U> other) const
+ {
+ return !equals(other);
+ }
+
explicit operator bool() const
{
return declRefBase;
diff --git a/source/slang/slang-check-conformance.cpp b/source/slang/slang-check-conformance.cpp
index 2696f798d..1d19e01bf 100644
--- a/source/slang/slang-check-conformance.cpp
+++ b/source/slang/slang-check-conformance.cpp
@@ -104,6 +104,16 @@ namespace Slang
// For now we are continuing to conflate all the subtype-ish relationships but not
// tangling convertibility into it.
+ // First, make sure both sub type and super type decl are ready for lookup.
+ if (auto subDeclRefType = as<DeclRefType>(subType))
+ {
+ ensureDecl(subDeclRefType->declRef.getDecl(), DeclCheckState::ReadyForLookup);
+ }
+ if (auto superDeclRefType = as<DeclRefType>(subType))
+ {
+ ensureDecl(superDeclRefType->declRef.getDecl(), DeclCheckState::ReadyForLookup);
+ }
+
// In the common case, we can use the pre-computed inheritance information for `subType`
// to enumerate all the types it transitively inherits from.
//
diff --git a/source/slang/slang-check-stmt.cpp b/source/slang/slang-check-stmt.cpp
index 6453f68ab..ba7e977e3 100644
--- a/source/slang/slang-check-stmt.cpp
+++ b/source/slang/slang-check-stmt.cpp
@@ -492,7 +492,7 @@ namespace Slang
auto varExpr = as<VarExpr>(opSideEffectExpr->arguments[0]);
if (!varExpr)
return;
- if (varExpr->declRef != initialVar)
+ if (varExpr->declRef.getDecl() != initialVar.getDecl())
{
// If the user writes something like `for (int i = 0; i < 5; j++)`,
// it is most likely a bug, so we issue a warning.
@@ -518,7 +518,7 @@ namespace Slang
stepSize = m_astBuilder->getIntVal(m_astBuilder->getIntType(), 1);
}
- if (predicateVar != initialVar)
+ if (predicateVar.getDecl() != initialVar.getDecl())
{
if (predicateVar)
getSink()->diagnose(stmt->predicateExpression, Diagnostics::forLoopPredicateCheckingDifferentVar, initialVar, predicateVar);
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index f07dc97f8..6a43ff8c2 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -329,7 +329,6 @@ Result linkAndOptimizeIR(
}
lowerOptionalType(irModule, sink);
- simplifyIR(irModule, sink);
switch (target)
{
@@ -356,6 +355,8 @@ Result linkAndOptimizeIR(
#endif
validateIRModuleIfEnabled(codeGenContext, irModule);
+ simplifyIR(irModule, sink);
+
// It's important that this takes place before defunctionalization as we
// want to be able to easily discover the cooperate and fallback funcitons
// being passed to saturated_cooperation
@@ -475,10 +476,6 @@ Result linkAndOptimizeIR(
if (sink->getErrorCount() != 0)
return SLANG_FAIL;
-
- // TODO(DG): There are multiple DCE steps here, which need to be changed
- // so that they don't just throw out any non-entry point code
- // Debugging code for IR transformations...
#if 0
dumpIRIfEnabled(codeGenContext, irModule, "SPECIALIZED");
#endif
@@ -493,9 +490,6 @@ Result linkAndOptimizeIR(
//
simplifyIR(irModule, sink);
-#if 0
- dumpIRIfEnabled(codeGenContext, irModule, "AFTER DCE");
-#endif
validateIRModuleIfEnabled(codeGenContext, irModule);
// We don't need the legalize pass for C/C++ based types
@@ -529,7 +523,6 @@ Result linkAndOptimizeIR(
legalizeExistentialTypeLayout(
irModule,
sink);
- eliminateDeadCode(irModule);
#if 0
dumpIRIfEnabled(codeGenContext, irModule, "EXISTENTIALS LEGALIZED");
@@ -550,7 +543,6 @@ Result linkAndOptimizeIR(
legalizeResourceTypes(
irModule,
sink);
- eliminateDeadCode(irModule);
// Debugging output of legalization
#if 0
@@ -565,7 +557,6 @@ Result linkAndOptimizeIR(
legalizeEmptyTypes(
irModule,
sink);
- eliminateDeadCode(irModule);
}
// Once specialization and type legalization have been performed,
@@ -599,7 +590,7 @@ Result linkAndOptimizeIR(
{
specializeArrayParameters(codeGenContext, irModule);
}
- simplifyIR(irModule, sink);
+ eliminateDeadCode(irModule);
// Rewrite functions that return arrays to return them via `out` parameter,
// since our target languages doesn't allow returning arrays.
@@ -842,9 +833,9 @@ Result linkAndOptimizeIR(
// functions, so there might still be invalid code in
// our IR module.
//
- // We run IR simplification passes again to clean things up.
+ // We run DCE pass again to clean things up.
//
- simplifyIR(irModule, sink);
+ eliminateDeadCode(irModule);
if (isKhronosTarget(targetRequest))
{
@@ -885,10 +876,11 @@ Result linkAndOptimizeIR(
// Lower all bit_cast operations on complex types into leaf-level
// bit_cast on basic types.
lowerBitCast(targetRequest, irModule);
- simplifyIR(irModule, sink);
eliminateMultiLevelBreak(irModule);
+ simplifyIR(irModule, sink);
+
// As a late step, we need to take the SSA-form IR and move things *out*
// of SSA form, by eliminating all "phi nodes" (block parameters) and
// introducing explicit temporaries instead. Doing this at the IR level
diff --git a/source/slang/slang-ir-hoist-constants.cpp b/source/slang/slang-ir-hoist-constants.cpp
deleted file mode 100644
index 7a250bdb7..000000000
--- a/source/slang/slang-ir-hoist-constants.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// slang-ir-hoist-constants.cpp
-#include "slang-ir-hoist-constants.h"
-#include "slang-ir-inst-pass-base.h"
-
-namespace Slang
-{
-
-struct HoistConstantPass : InstPassBase
-{
- HoistConstantPass(IRModule* module) : InstPassBase(module)
- {}
-
- bool changed = false;
-
- void processModule()
- {
- processAllInsts([this](IRInst* inst)
- {
-
- if (inst->getParent() == module->getModuleInst() || !inst->getParent())
- return;
- auto parent = inst->getParent();
- auto p = parent;
- while (p)
- {
- if (as<IRGlobalValueWithCode>(p))
- return;
- p = p->parent;
- }
- while (parent && parent->parent != module->getModuleInst())
- parent = parent->parent;
- if (!parent)
- return;
- switch (inst->getOp())
- {
- default:
- return;
- case kIROp_Add:
- case kIROp_Sub:
- case kIROp_Mul:
- case kIROp_Div:
- case kIROp_Module:
- case kIROp_Neg:
- case kIROp_And:
- case kIROp_Or:
- case kIROp_Not:
- case kIROp_BitAnd:
- case kIROp_BitNot:
- case kIROp_BitOr:
- case kIROp_BitXor:
- case kIROp_Select:
- case kIROp_Greater:
- case kIROp_Less:
- case kIROp_Leq:
- case kIROp_Geq:
- case kIROp_Eql:
- case kIROp_Neq:
- case kIROp_BitCast:
- case kIROp_Lsh:
- case kIROp_Rsh:
- case kIROp_MatrixReshape:
- case kIROp_VectorReshape:
- case kIROp_MakeVector:
- case kIROp_MakeMatrix:
- case kIROp_MakeMatrixFromScalar:
- case kIROp_MakeVectorFromScalar:
- case kIROp_MakeOptionalNone:
- case kIROp_MakeOptionalValue:
- case kIROp_MakeDifferentialPair:
- case kIROp_MakeExistential:
- case kIROp_ExtractExistentialType:
- case kIROp_ExtractExistentialValue:
- case kIROp_ExtractExistentialWitnessTable:
- case kIROp_WrapExistential:
- case kIROp_WitnessTableType:
- case kIROp_AttributedType:
- case kIROp_MatrixType:
- case kIROp_OptionalHasValue:
- case kIROp_GetOptionalValue:
- case kIROp_IntCast:
- case kIROp_FloatCast:
- case kIROp_CastIntToFloat:
- case kIROp_CastFloatToInt:
- case kIROp_CastPtrToBool:
- case kIROp_CastPtrToInt:
- case kIROp_CastIntToPtr:
- case kIROp_CastToVoid:
- case kIROp_Reinterpret:
- case kIROp_swizzle:
- case kIROp_IntLit:
- case kIROp_BoolLit:
- case kIROp_ArrayType:
- case kIROp_Specialize:
- case kIROp_VectorType:
- break;
- }
- if (inst->typeUse.get() && inst->typeUse.get()->parent != module->getModuleInst())
- return;
- for (UInt i = 0; i < inst->getOperandCount(); i++)
- {
- if (inst->getOperand(i)->parent != module->getModuleInst())
- return;
- }
- // all operands are in global scope, we can move this inst to global scope as well.
- inst->insertBefore(parent);
- changed = true;
- });
- }
-};
-
-bool hoistConstants(
- IRModule* module)
-{
- HoistConstantPass context(module);
- context.processModule();
- return context.changed;
-}
-
-}
diff --git a/source/slang/slang-ir-hoist-constants.h b/source/slang/slang-ir-hoist-constants.h
deleted file mode 100644
index 28d4a0c6b..000000000
--- a/source/slang/slang-ir-hoist-constants.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// slang-ir-hoist-constants.h
-#pragma once
-
-namespace Slang
-{
-struct IRModule;
-
- /// A (specialized) generic type may contain insts that computes compile-time constants defined within
- /// the type. We should hoist them to global scope so they can be SCCP'd when possible.
-bool hoistConstants(
- IRModule* module);
-
-}
diff --git a/source/slang/slang-ir-lower-generics.cpp b/source/slang/slang-ir-lower-generics.cpp
index 19c075bf1..a7c0f51e6 100644
--- a/source/slang/slang-ir-lower-generics.cpp
+++ b/source/slang/slang-ir-lower-generics.cpp
@@ -234,9 +234,5 @@ namespace Slang
generateAnyValueMarshallingFunctions(&sharedContext);
if (sink->getErrorCount() != 0)
return;
-
- // We might have generated new temporary variables during lowering.
- // An SSA pass can clean up unnecessary load/stores.
- simplifyIR(module);
}
} // namespace Slang
diff --git a/source/slang/slang-ir-peephole.cpp b/source/slang/slang-ir-peephole.cpp
index 376795855..ac026b563 100644
--- a/source/slang/slang-ir-peephole.cpp
+++ b/source/slang/slang-ir-peephole.cpp
@@ -235,9 +235,6 @@ struct PeepholeContext : InstPassBase
return false;
}
- RefPtr<IRDominatorTree> domTree;
- IRGlobalValueWithCode* domTreeFunc = nullptr;
-
void processInst(IRInst* inst)
{
if (as<IRGlobalValueWithCode>(inst))
@@ -746,13 +743,8 @@ struct PeepholeContext : InstPassBase
auto parentFunc = getParentFunc(inst);
if (!parentFunc)
break;
- if (domTreeFunc != parentFunc)
- {
- domTree = computeDominatorTree(parentFunc);
- domTreeFunc = parentFunc;
- }
- if (!domTree)
- break;
+
+ auto domTree = parentFunc->getModule()->findOrCreateDominatorTree(parentFunc);
if (domTree->dominates(argValue, inst))
{
@@ -816,6 +808,8 @@ struct PeepholeContext : InstPassBase
bool processFunc(IRInst* func)
{
+ func->getModule()->invalidateAllAnalysis();
+
bool result = false;
for (;;)
{
@@ -847,6 +841,22 @@ bool peepholeOptimize(IRInst* func)
return context.processFunc(func);
}
+bool peepholeOptimizeGlobalScope(IRModule* module)
+{
+ PeepholeContext context = PeepholeContext(module);
+ bool result = false;
+ for (;;)
+ {
+ context.changed = false;
+ for (auto globalInst : module->getGlobalInsts())
+ context.processInst(globalInst);
+ result |= context.changed;
+ if (!context.changed)
+ break;
+ }
+ return result;
+}
+
bool tryReplaceInstUsesWithSimplifiedValue(IRModule* module, IRInst* inst)
{
if (inst != tryConstantFoldInst(module, inst))
diff --git a/source/slang/slang-ir-peephole.h b/source/slang/slang-ir-peephole.h
index c0b83a715..b2ea1bc04 100644
--- a/source/slang/slang-ir-peephole.h
+++ b/source/slang/slang-ir-peephole.h
@@ -10,5 +10,6 @@ namespace Slang
/// Apply peephole optimizations.
bool peepholeOptimize(IRModule* module);
bool peepholeOptimize(IRInst* func);
+ bool peepholeOptimizeGlobalScope(IRModule* module);
bool tryReplaceInstUsesWithSimplifiedValue(IRModule* module, IRInst* inst);
}
diff --git a/source/slang/slang-ir-sccp.cpp b/source/slang/slang-ir-sccp.cpp
index d5e4c6e99..ce635dca8 100644
--- a/source/slang/slang-ir-sccp.cpp
+++ b/source/slang/slang-ir-sccp.cpp
@@ -1248,12 +1248,30 @@ struct SCCPContext
}
}
- // Run the constant folding on global scope only.
+ // Run the constant folding on global scope and specialized types only.
bool applyOnGlobalScope(IRModule* module)
{
- builderStorage = IRBuilder(shared->module);
+ bool changed = applyOnScope(module->getModuleInst());
for (auto child : module->getModuleInst()->getChildren())
{
+ switch (child->getOp())
+ {
+ case kIROp_StructType:
+ case kIROp_ClassType:
+ case kIROp_InterfaceType:
+ case kIROp_WitnessTable:
+ changed |= applyOnScope(child);
+ break;
+ }
+ }
+ return changed;
+ }
+
+ bool applyOnScope(IRInst* scopeInst)
+ {
+ builderStorage = IRBuilder(scopeInst);
+ for (auto child : scopeInst->getChildren())
+ {
// Only consider evaluable opcodes.
if (!isEvaluableOpCode(child->getOp()))
continue;
@@ -1265,7 +1283,7 @@ struct SCCPContext
auto inst = ssaWorkList[0];
ssaWorkList.fastRemoveAt(0);
// Only consider evaluable opcodes and insts at global scope.
- if (!isEvaluableOpCode(inst->getOp()) || inst->getParent() != module->getModuleInst())
+ if (!isEvaluableOpCode(inst->getOp()) || inst->getParent() != scopeInst)
continue;
updateValueForInst(inst);
}
@@ -1273,7 +1291,7 @@ struct SCCPContext
bool changed = false;
// Replace the insts with their values.
List<IRInst*> instsToRemove;
- for (auto child : module->getModuleInst()->getChildren())
+ for (auto child : scopeInst->getChildren())
{
if (!isEvaluableOpCode(child->getOp()))
continue;
@@ -1670,6 +1688,16 @@ static bool applySparseConditionalConstantPropagationRec(
for( auto childInst : inst->getDecorationsAndChildren() )
{
+ switch (childInst->getOp())
+ {
+ case kIROp_Func:
+ case kIROp_Block:
+ case kIROp_Generic:
+ break;
+ default:
+ // Skip other op codes.
+ continue;
+ }
changed |= applySparseConditionalConstantPropagationRec(globalContext, childInst);
}
return changed;
diff --git a/source/slang/slang-ir-specialize.cpp b/source/slang/slang-ir-specialize.cpp
index a806a13e6..f1de6e408 100644
--- a/source/slang/slang-ir-specialize.cpp
+++ b/source/slang/slang-ir-specialize.cpp
@@ -50,20 +50,10 @@ struct SpecializationContext
bool changed = false;
- // We know that we can only perform generic specialization when all
- // of the arguments to a generic are also fully specialized.
- // The "is fully specialized" condition is something we
- // need to solve for over the program, because the fully-
- // specialized-ness of an instruction depends on the
- // fully-specialized-ness of its operands.
- //
- // We will build an explicit hash set to encode those
- // instructions that are fully specialized.
- //
- HashSet<IRInst*>& fullySpecializedInsts;
SpecializationContext(IRModule* inModule)
- : fullySpecializedInsts(*module->getContainerPool().getHashSet<IRInst>())
+ : workList(*inModule->getContainerPool().getList<IRInst>())
+ , workListSet(*inModule->getContainerPool().getHashSet<IRInst>())
, cleanInsts(*module->getContainerPool().getHashSet<IRInst>())
, module(inModule)
{
@@ -83,42 +73,54 @@ struct SpecializationContext
//
if (!inst) return true;
- // An interface requirement entry should always be considered
- // to be fully specialized, even if it hasn't been visited.
- //
- // Note: This logic is here to stop a circularity, where we
- // can't mark an interface as used until its requirements are
- // used, etc.
- //
- if (inst->getOp() == kIROp_InterfaceRequirementEntry)
- return true;
-
- // A generic parameter is never specialized.
switch (inst->getOp())
{
case kIROp_GlobalGenericParam:
+ case kIROp_LookupWitness:
return false;
- case kIROp_Param:
- if (inst->getParent() && inst->getParent()->getOp() == kIROp_Block &&
- inst->getParent()->getParent() &&
- inst->getParent()->getParent()->getOp() == kIROp_Generic)
+ case kIROp_Specialize:
+ // The `specialize` instruction is a bit sepcial,
+ // because it is possible to have a `specialize`
+ // of a built-in type so that it never gets
+ // substituted for another type. (e.g., the specific
+ // case where this code path first showed up
+ // as necessary was `RayQuery<>`)
+ //
+ {
+ auto specialize = cast<IRSpecialize>(inst);
+ auto base = specialize->getBase();
+ if (auto generic = as<IRGeneric>(base))
+ {
+ // If the thing being specialized can be resolved,
+ // *and* it is a target intrinsic, ...
+ //
+ if (auto result = findGenericReturnVal(generic))
+ {
+ if (result->findDecoration<IRTargetIntrinsicDecoration>())
+ {
+ // ... then we should consider the instruction as
+ // "fully specialized" in the same cases as for
+ // any ordinary instruciton.
+ //
+
+ if (areAllOperandsFullySpecialized(inst))
+ {
+ return true;
+ }
+ }
+ }
+ }
return false;
+ }
}
- // A global value is always specialized.
+ // The default case is that a global value is always specialized.
if (inst->getParent() == module->getModuleInst())
{
- switch (inst->getOp())
- {
- case kIROp_LookupWitness:
- case kIROp_Specialize:
- return false;
- default:
- return true;
- }
+ return true;
}
- return fullySpecializedInsts.contains(inst);
+ return false;
}
// When an instruction isn't fully specialized, but its operands *are*
@@ -146,34 +148,17 @@ struct SpecializationContext
// to be considered for specialization or simplification,
// whether generic, existential, etc.
//
- OrderedHashSet<IRInst*> workList;
+ List<IRInst*>& workList;
+ HashSet<IRInst*>& workListSet;
HashSet<IRInst*>& cleanInsts;
void addToWorkList(
IRInst* inst)
{
-#if 0
- // Note(Yong): we should no longer ignore generic functions
- // because they maybe called via dynamic dispatch.
- // We still want to specialize calls inside a generic function
- // if we can derive its type at compile time. The following
- // skipping logic is disabled and we should consider remove it.
- //
- //
- // We will ignore any code that is nested under a generic,
- // because it doesn't make sense to perform specialization
- // on such code.
- //
- for (auto ii = inst->getParent(); ii; ii = ii->getParent())
+ if (workListSet.add(inst))
{
- if (as<IRGeneric>(ii))
- return;
- }
-#endif
+ workList.add(inst);
- if (workList.add(inst))
- {
- cleanInsts.remove(inst);
addUsersToWorkList(inst);
}
@@ -195,24 +180,6 @@ struct SpecializationContext
}
}
- // One of the main transformations we will apply is to
- // consider an instruction as being fully specialized.
- //
- void markInstAsFullySpecialized(
- IRInst* inst)
- {
- if (fullySpecializedInsts.contains(inst))
- return;
- fullySpecializedInsts.add(inst);
-
- // If we know that an instruction is fully specialized,
- // then we should start to consider its uses and children
- // as candidates for being fully specialized too...
- //
- addUsersToWorkList(inst);
- }
-
-
// Of course, somewhere along the way we expect
// to run into uses of `specialize(...)` instructions
// to bind a generic to arguments that we want to
@@ -499,99 +466,6 @@ struct SpecializationContext
return true;
}
- // Generic specialization depends on identifying when
- // instructions are fully specialized.
- //
- void maybeMarkAsFullySpecialized(
- IRInst* inst)
- {
- // TODO: The logic here is completely bogus and
- // we need to revisit the notion of fully-specialized-ness
- // to only involve things that are semantically *values*
- // rather than computations/expressions.
- //
- // The rules should be something like:
- //
- // * Literals are values
- // * Composite type constructors where all the operands are value are values
- // * References to nominal types are values
- // * Built-in types where all the operands are values are values
- //
- // The system for defining value-ness probably needs
- // to combine with the system for deduplicating instructions,
- // since values are an important class of instruction we want
- // to deduplicate.
-
- switch (inst->getOp())
- {
- default:
- // The default case is that an instruction can
- // be considered as fully specialized as soon
- // as all of its operands are.
- //
- // Anything defined in global scope can be viewed as fully specialized.
- if (inst->getParent() == module->getModuleInst() ||
- areAllOperandsFullySpecialized(inst))
- {
- markInstAsFullySpecialized(inst);
- }
- break;
-
- // Certain instructions cannot ever be considered
- // fully specialized because they should never
- // be substituted into a generic as its arguments.
- case kIROp_LookupWitness:
- case kIROp_ExtractExistentialType:
- case kIROp_BindExistentialsType:
- break;
-
- // An interface type is always fully specialized.
- case kIROp_InterfaceType:
- markInstAsFullySpecialized(inst);
- break;
-
- case kIROp_Specialize:
- // The `specialize` instruction is a bit sepcial,
- // because it is possible to have a `specialize`
- // of a built-in type so that it never gets
- // substituted for another type. (e.g., the specific
- // case where this code path first showed up
- // as necessary was `RayQuery<>`)
- //
- {
- auto specialize = cast<IRSpecialize>(inst);
- auto base = specialize->getBase();
- if (auto generic = as<IRGeneric>(base))
- {
- // If the thing being specialized can be resolved,
- // *and* it is a target intrinsic, ...
- //
- if (auto result = findGenericReturnVal(generic))
- {
- if (result->findDecoration<IRTargetIntrinsicDecoration>())
- {
- // ... then we should consider the instruction as
- // "fully specialized" in the same cases as for
- // any ordinary instruciton.
- //
-
- if (areAllOperandsFullySpecialized(inst))
- {
- markInstAsFullySpecialized(inst);
- }
- return;
- }
- }
- }
-
- // Otherwise, a `specialize` instruction falls into
- // the case of instructions that should never be
- // considered to be fully specialized.
- }
- break;
- }
- }
-
// The core of this pass is to look at one instruction
// at a time, and try to perform whatever specialization
// is appropriate based on its opcode.
@@ -739,6 +613,10 @@ struct SpecializationContext
template<typename TDict>
void _readSpecializationDictionaryImpl(TDict& dict, IRInst* dictInst)
{
+ int childrenCount = 0;
+ for (auto child = dictInst->getFirstChild(); child; child = child->next)
+ childrenCount++;
+ dict.reserve(1 << Math::Log2Ceil(childrenCount * 2));
for (auto child : dictInst->getChildren())
{
auto item = as<IRSpecializationDictionaryItem>(child);
@@ -881,10 +759,11 @@ struct SpecializationContext
for (;;)
{
bool iterChanged = false;
- addToWorkList(module->getModuleInst());
-
- while (workList.getCount() != 0)
+ for (;;)
{
+ bool hasSpecialization = false;
+ addToWorkList(module->getModuleInst());
+
// We will then iterate until our work list goes dry.
//
while (workList.getCount() != 0)
@@ -892,23 +771,17 @@ struct SpecializationContext
IRInst* inst = workList.getLast();
workList.removeLast();
-
- cleanInsts.add(inst);
+ workListSet.remove(inst);
// For each instruction we process, we want to perform
// a few steps.
//
- // First we will do any checking required to tag an
- // instruction as being fully specialized.
- //
- maybeMarkAsFullySpecialized(inst);
-
- // Next we will look for all the general-purpose
+ // First we will look for all the general-purpose
// specialization opportunities (generic specialization,
// existential specialization, simplifications, etc.)
//
if (inst->hasUses() || inst->mightHaveSideEffects())
- iterChanged |= maybeSpecializeInst(inst);
+ hasSpecialization |= maybeSpecializeInst(inst);
// Finally, we need to make our logic recurse through
// the whole IR module, so we want to add the children
@@ -932,8 +805,10 @@ struct SpecializationContext
addToWorkList(child);
}
}
-
- addDirtyInstsToWorkListRec(module->getModuleInst());
+ if (hasSpecialization)
+ iterChanged = true;
+ else
+ break;
}
if (iterChanged)
@@ -941,22 +816,22 @@ struct SpecializationContext
this->changed = true;
eliminateDeadCode(module->getModuleInst(), IRDeadCodeEliminationOptions());
}
- else
- {
- // If we run out of specialization opportunities, consider
- // lower lookupWitnessMethod insts into dynamic dispatch calls.
- iterChanged = lowerWitnessLookup(module, sink);
- if (!iterChanged || sink->getErrorCount())
- break;
- }
+
+ // Once the work list has gone dry, we should have the invariant
+ // that there are no `specialize` instructions inside of non-generic
+ // functions that in turn reference a generic type/function unless the generic is for a
+ // builtin type/function, or some of the type arguments are unknown at compile time, in
+ // which case we will rely on a follow up pass the translate it into a dynamic dispatch
+ // function.
+ //
+ // Now we consider lower lookupWitnessMethod insts into dynamic dispatch calls,
+ // which may open up more specialization opportunities.
+ //
+ iterChanged = lowerWitnessLookup(module, sink);
+ if (!iterChanged || sink->getErrorCount())
+ break;
}
- // Once the work list has gone dry, we should have the invariant
- // that there are no `specialize` instructions inside of non-generic
- // functions that in turn reference a generic type/function unless the generic is for a
- // builtin type/function, or some of the type arguments are unknown at compile time, in
- // which case we will rely on a follow up pass the translate it into a dynamic dispatch
- // function.
// For functions that still have `specialize` uses left, we need to preserve the
// its specializations in resulting IR so they can be reconstructed when this
@@ -964,16 +839,13 @@ struct SpecializationContext
writeSpecializationDictionaries();
}
- void addDirtyInstsToWorkListRec(IRInst* inst)
+ void addInstsToWorkListRec(IRInst* inst)
{
- if (!cleanInsts.contains(inst))
- {
- addToWorkList(inst);
- }
+ addToWorkList(inst);
for (auto child = inst->getLastChild(); child; child = child->getPrevInst())
{
- addDirtyInstsToWorkListRec(child);
+ addInstsToWorkListRec(child);
}
}
@@ -1323,11 +1195,14 @@ struct SpecializationContext
}
// Test if a type is compile time constant.
- static bool isCompileTimeConstantType(IRInst* inst)
+ bool isCompileTimeConstantType(IRInst* inst)
{
// TODO: We probably need/want a more robust test here.
// For now we are just look into the dependency graph of the inst and
// see if there are any opcodes that are causing problems.
+ if (!isInstFullySpecialized(inst))
+ return false;
+
List<IRInst*> localWorkList;
HashSet<IRInst*> processedInsts;
localWorkList.add(inst);
@@ -1558,12 +1433,6 @@ struct SpecializationContext
IRFunc* newFunc = builder->createFunc();
newFunc->setFullType(newFuncType);
- // By construction, our new function type will be
- // "fully specialized" by the rules used for doing
- // generic specialization elsewhere in this pass.
- //
- fullySpecializedInsts.add(newFuncType);
-
// The above steps have accomplished the "first phase"
// of cloning the function (since `IRFunc`s have no
// operands).
diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp
index 43c9f8d2e..dbef20732 100644
--- a/source/slang/slang-ir-ssa-simplification.cpp
+++ b/source/slang/slang-ir-ssa-simplification.cpp
@@ -6,7 +6,6 @@
#include "slang-ir-dce.h"
#include "slang-ir-simplify-cfg.h"
#include "slang-ir-peephole.h"
-#include "slang-ir-hoist-constants.h"
#include "slang-ir-deduplicate-generic-children.h"
#include "slang-ir-remove-unused-generic-param.h"
#include "slang-ir-redundancy-removal.h"
@@ -32,12 +31,11 @@ namespace Slang
changed = false;
- changed |= hoistConstants(module);
changed |= deduplicateGenericChildren(module);
changed |= propagateFuncProperties(module);
changed |= removeUnusedGenericParam(module);
changed |= applySparseConditionalConstantPropagationForGlobalScope(module, sink);
- changed |= peepholeOptimize(module);
+ changed |= peepholeOptimizeGlobalScope(module);
for (auto inst : module->getGlobalInsts())
{
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index b6c19b18d..2fc1a9466 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -2482,7 +2482,6 @@ namespace Slang
{
IRInstKey key = { inst };
- // Ideally we would add if not found, else return if was found instead of testing & then adding.
IRInst** found = m_dedupContext->getGlobalValueNumberingMap().tryGetValueOrAdd(key, inst);
SLANG_ASSERT(endCursor == memoryArena.getCursor());
// If it's found, just return, and throw away the instruction
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 0dcea9a14..63dc39a20 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -3674,7 +3674,11 @@ struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo>
else if (auto declRefType = as<DeclRefType>(type))
{
DeclRef<Decl> declRef = declRefType->declRef;
- if (auto aggTypeDeclRef = declRef.as<AggTypeDecl>())
+ if (auto enumType = declRef.as<EnumDecl>())
+ {
+ return LoweredValInfo::simple(getBuilder()->getIntValue(irType, 0));
+ }
+ else if (auto aggTypeDeclRef = declRef.as<AggTypeDecl>())
{
List<IRInst*> args;
@@ -9854,7 +9858,11 @@ RefPtr<IRModule> generateIRForTranslationUnit(
constructSSA(module);
simplifyCFG(module);
applySparseConditionalConstantPropagation(module, compileRequest->getSink());
-
+ for (auto inst : module->getGlobalInsts())
+ {
+ if (auto func = as<IRGlobalValueWithCode>(inst))
+ eliminateDeadCode(func);
+ }
// Next, inline calls to any functions that have been
// marked for mandatory "early" inlining.
//