summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2023-07-12 16:00:05 -0700
committerGitHub <noreply@github.com>2023-07-12 16:00:05 -0700
commit261b2f1f2bc13ccf7db5ec68c825ffc7b0781f7f (patch)
tree4953e376e705a8110cb8164dda5b239c04f2768b
parentbbd9c2e6d7b57f5acc3238083ab2f7c7b140df5e (diff)
Use scratchData on `IRInst` to replace HashSets. (#2978)
* Use scratchData on `IRInst` to replace HashSets. * Update test results. * Initialize scratchData. * Update autodiff documentation. * Use enum instead of bool. --------- Co-authored-by: Yong He <yhe@nvidia.com>
-rw-r--r--source/core/slang-common.h6
-rw-r--r--source/slang/slang-ast-builder.cpp6
-rw-r--r--source/slang/slang-ast-builder.h11
-rw-r--r--source/slang/slang-compiler.cpp2
-rw-r--r--source/slang/slang-emit.cpp2
-rw-r--r--source/slang/slang-ir-dce.cpp42
-rw-r--r--source/slang/slang-ir-legalize-types.cpp41
-rw-r--r--source/slang/slang-ir-redundancy-removal.cpp4
-rw-r--r--source/slang/slang-ir-specialize.cpp48
-rw-r--r--source/slang/slang-ir-util.cpp43
-rw-r--r--source/slang/slang-ir-util.h7
-rw-r--r--source/slang/slang-ir.cpp24
-rw-r--r--source/slang/slang-ir.h50
-rw-r--r--source/slang/slang-lower-to-ir.cpp3
-rw-r--r--source/slang/slang.cpp2
-rw-r--r--tests/compute/half-texture.slang.glsl36
-rw-r--r--tests/experimental/liveness/liveness.slang.expected39
-rw-r--r--tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected2
-rw-r--r--tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected32
-rw-r--r--tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected2
20 files changed, 257 insertions, 145 deletions
diff --git a/source/core/slang-common.h b/source/core/slang-common.h
index 5cf0432f2..62c73df08 100644
--- a/source/core/slang-common.h
+++ b/source/core/slang-common.h
@@ -76,6 +76,12 @@ namespace Slang
template <typename T>
SLANG_FORCE_INLINE T* clone(IClonable* clonable) { return (T*)clonable->clone(T::getTypeGuid()); }
+ template <typename T>
+ inline bool isBitSet(T value, T bitToTest)
+ {
+ static_assert(sizeof(T) <= sizeof(uint32_t), "Only support up to 32 bit enums");
+ return (T)((uint32_t)value & (uint32_t)bitToTest) == bitToTest;
+ }
}
// TODO: Shouldn't these be SLANG_ prefixed?
diff --git a/source/slang/slang-ast-builder.cpp b/source/slang/slang-ast-builder.cpp
index 33bd23f43..e6e1b5e75 100644
--- a/source/slang/slang-ast-builder.cpp
+++ b/source/slang/slang-ast-builder.cpp
@@ -445,6 +445,7 @@ TypeType* ASTBuilder::getTypeType(Type* type)
bool ASTBuilder::NodeDesc::operator==(NodeDesc const& that) const
{
+ if (hashCode != that.hashCode) return false;
if(type != that.type) return false;
if(operands.getCount() != that.operands.getCount()) return false;
for(Index i = 0; i < operands.getCount(); ++i)
@@ -461,7 +462,8 @@ bool ASTBuilder::NodeDesc::operator==(NodeDesc const& that) const
}
return true;
}
-HashCode ASTBuilder::NodeDesc::getHashCode() const
+
+void ASTBuilder::NodeDesc::init()
{
Hasher hasher;
hasher.hashValue(Int(type));
@@ -474,7 +476,7 @@ HashCode ASTBuilder::NodeDesc::getHashCode() const
//
hasher.hashValue(operands[i].values.nodeOperand);
}
- return hasher.getResult();
+ hashCode = hasher.getResult();
}
DeclRef<Decl> _getSpecializedDeclRef(ASTBuilder* builder, Decl* decl, Substitutions* subst)
diff --git a/source/slang/slang-ast-builder.h b/source/slang/slang-ast-builder.h
index 618636417..a2543ab1e 100644
--- a/source/slang/slang-ast-builder.h
+++ b/source/slang/slang-ast-builder.h
@@ -143,7 +143,10 @@ public:
ShortList<NodeOperand, 4> operands;
bool operator==(NodeDesc const& that) const;
- HashCode getHashCode() const;
+ HashCode getHashCode() const { return hashCode; }
+ void init();
+ private:
+ HashCode hashCode = 0;
};
template<typename NodeCreateFunc>
@@ -217,6 +220,7 @@ public:
NodeDesc desc;
desc.type = T::kType;
addOrAppendToNodeList(desc.operands, args...);
+ desc.init();
return (T*)_getOrCreateImpl(desc, [&]()
{
return create<T>(args...);
@@ -230,6 +234,7 @@ public:
NodeDesc desc;
desc.type = T::kType;
+ desc.init();
return (T*)_getOrCreateImpl(desc, [this]() { return create<T>(); });
}
@@ -240,6 +245,7 @@ public:
NodeDesc desc;
desc.type = T::kType;
addOrAppendToNodeList(desc.operands, args...);
+ desc.init();
return (T*)_getOrCreateImpl(desc, [&]()
{
return create<T>();
@@ -253,6 +259,7 @@ public:
NodeDesc desc;
desc.type = T::kType;
desc.operands.addRange(operands);
+ desc.init();
return (T*)_getOrCreateImpl(desc, [&]()
{
return create<T>();
@@ -305,6 +312,7 @@ public:
{
desc.operands.add(outer);
}
+ desc.init();
auto result = (GenericSubstitution*)_getOrCreateImpl(desc, [this]() {return create<GenericSubstitution>(); });
if (result->args.getCount() != args.getCount())
{
@@ -326,6 +334,7 @@ public:
{
desc.operands.add(outer);
}
+ desc.init();
auto result = (ThisTypeSubstitution*)_getOrCreateImpl(desc, [this]() {return create<ThisTypeSubstitution>(); });
result->interfaceDecl = interfaceDecl;
result->witness = subtypeWitness;
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index a5f46da3e..c1d7798e3 100644
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -622,7 +622,7 @@ namespace Slang
# pragma warning(disable:4702)
#endif
SlangResult CodeGenContext::emitEntryPointsSource(ComPtr<IArtifact>& outArtifact)
- {
+ {
outArtifact.setNull();
SLANG_RETURN_ON_FAIL(requireTranslationUnitSourceFiles());
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index d71cc5507..f07dc97f8 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -981,6 +981,8 @@ Result linkAndOptimizeIR(
SlangResult CodeGenContext::emitEntryPointsSourceFromIR(ComPtr<IArtifact>& outArtifact)
{
+ SLANG_PROFILE;
+
outArtifact.setNull();
auto session = getSession();
diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp
index c6636aaee..c00e4ca7b 100644
--- a/source/slang/slang-ir-dce.cpp
+++ b/source/slang/slang-ir-dce.cpp
@@ -29,30 +29,15 @@ struct DeadCodeEliminationContext
// there could be new DCE opportunities.
bool phiRemoved = false;
- // Our overall process is going to be to determine
- // which instructions in the module are "live"
- // and then eliminate anything that wasn't found to
- // be live.
- //
- // We will track the liveness state by keeping
- // a set of all instructions we have so far determined
- // to be live.
- //
- HashSet<IRInst*> liveInsts;
-
// Querying whether an instruction has been
// determined to be live is easy.
+ // To speedup the test, we use the
+ // `scratchData` field of each inst as the marker.
//
- bool isInstLive(IRInst* inst)
+ bool isInstAlive(IRInst* inst)
{
- // The only wrinkle is that we want to safeguard
- // against a null instruction (there are some
- // corner cases where we still construct IR
- // instructions with a null type).
- //
- if(!inst) return false;
-
- return liveInsts.contains(inst);
+ if (!inst) return false;
+ return inst->scratchData != 0;
}
// We are going to do an iterative analysis
@@ -81,10 +66,11 @@ struct DeadCodeEliminationContext
//
if(!inst) return;
- if(liveInsts.contains(inst))
- return;
- liveInsts.add(inst);
- workList.add(inst);
+ if (!inst->scratchData)
+ {
+ inst->scratchData = 1;
+ workList.add(inst);
+ }
}
IRInst* getUndefInst()
@@ -109,7 +95,9 @@ struct DeadCodeEliminationContext
for (;;)
{
- liveInsts.clear();
+ // Clear the `alive` bits by initializing all scratchData to 0.
+ initializeScratchData(root);
+
workList.clear();
// First of all, we know that the root instruction
@@ -242,7 +230,7 @@ struct DeadCodeEliminationContext
//
// The easy case is if `inst` is dead (that is, not live).
//
- if( !isInstLive(inst) )
+ if( !isInstAlive(inst) )
{
// We can simply remove and deallocate `inst` because it is
// dead, and not worry about any of its descendents,
@@ -324,7 +312,7 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o
// First, if `inst` is an instruction that might have some effects
// when it is executed, then we should keep it around.
//
- if (inst->mightHaveSideEffects())
+ if (inst->mightHaveSideEffects(SideEffectAnalysisOptions::UseDominanceTree))
{
return true;
}
diff --git a/source/slang/slang-ir-legalize-types.cpp b/source/slang/slang-ir-legalize-types.cpp
index a88d43db3..bf5893db7 100644
--- a/source/slang/slang-ir-legalize-types.cpp
+++ b/source/slang/slang-ir-legalize-types.cpp
@@ -18,6 +18,7 @@
#include "slang-ir-insts.h"
#include "slang-legalize-types.h"
#include "slang-mangle.h"
+#include "slang-ir-util.h"
namespace Slang
{
@@ -3466,6 +3467,9 @@ static LegalVal legalizeGlobalParam(
}
}
+static constexpr int kHasBeenAddedOrProcessedScratchBitIndex = 0;
+static constexpr int kHasBeenAddedScratchBitIndex = 1;
+
struct IRTypeLegalizationPass
{
IRTypeLegalizationContext* context;
@@ -3480,13 +3484,29 @@ struct IRTypeLegalizationPass
// instructions have ever been added to the work list.
List<IRInst*> workList;
- HashSet<IRInst*> hasBeenAddedOrProcessedSet;
- HashSet<IRInst*> addedToWorkListSet;
+
+ bool hasBeenAddedOrProcessed(IRInst* inst)
+ {
+ if (!inst) return true;
+ return (inst->scratchData & (1 << kHasBeenAddedOrProcessedScratchBitIndex)) != 0;
+ }
+ void setHasBeenAddedOrProcessed(IRInst* inst)
+ {
+ inst->scratchData |= (1 << kHasBeenAddedOrProcessedScratchBitIndex);
+ }
+ bool addedToWorkList(IRInst* inst)
+ {
+ return (inst->scratchData & (1 << kHasBeenAddedScratchBitIndex)) != 0;
+ }
+ void setAddedToWorkList(IRInst* inst)
+ {
+ inst->scratchData |= (1 << kHasBeenAddedScratchBitIndex);
+ }
bool hasBeenAddedToWorkListOrProcessed(IRInst* inst)
{
- if (hasBeenAddedToWorkList(inst)) return true;
- return hasBeenAddedOrProcessedSet.contains(inst);
+ if (!inst) return true;
+ return (inst->scratchData != 0);
}
// We will add a simple query to check whether an instruciton
@@ -3526,7 +3546,7 @@ struct IRTypeLegalizationPass
//
if(inst->getOp() == kIROp_InterfaceRequirementEntry) return true;
- return addedToWorkListSet.contains(inst);
+ return addedToWorkList(inst);
}
// Next we define a convenience routine for adding something to the work list.
@@ -3535,15 +3555,17 @@ struct IRTypeLegalizationPass
{
// We want to avoid adding anything we've already added or processed.
//
- if(addedToWorkListSet.contains(inst))
+ if(addedToWorkList(inst))
return;
workList.add(inst);
- addedToWorkListSet.add(inst);
- hasBeenAddedOrProcessedSet.add(inst);
+ setAddedToWorkList(inst);
+ setHasBeenAddedOrProcessed(inst);
}
void processModule(IRModule* module)
{
+ initializeScratchData(module->getModuleInst());
+
// In order to process an entire module, we start by adding the
// root module insturction to our work list, and then we will
// proceed to process instructions until the work list goes dry.
@@ -3568,7 +3590,8 @@ struct IRTypeLegalizationPass
//
List<IRInst*> workListCopy;
Swap(workListCopy, workList);
- addedToWorkListSet.clear();
+
+ resetScratchDataBit(module->getModuleInst(), kHasBeenAddedScratchBitIndex);
// Now we simply process each instruction on the copy of
// the work list, knowing that `processInst` may add additional
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index 37e8ba5bb..227ef1d4d 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -23,6 +23,7 @@ struct RedundancyRemovalContext
case kIROp_And:
case kIROp_Or:
case kIROp_Not:
+ case kIROp_Neg:
case kIROp_FieldExtract:
case kIROp_FieldAddress:
case kIROp_GetElement:
@@ -43,6 +44,7 @@ struct RedundancyRemovalContext
case kIROp_MakeMatrixFromScalar:
case kIROp_MakeVectorFromScalar:
case kIROp_swizzle:
+ case kIROp_swizzleSet:
case kIROp_MatrixReshape:
case kIROp_MakeString:
case kIROp_MakeResultError:
@@ -59,6 +61,8 @@ struct RedundancyRemovalContext
case kIROp_BitOr:
case kIROp_BitXor:
case kIROp_BitCast:
+ case kIROp_IntCast:
+ case kIROp_FloatCast:
case kIROp_Reinterpret:
case kIROp_Greater:
case kIROp_Less:
diff --git a/source/slang/slang-ir-specialize.cpp b/source/slang/slang-ir-specialize.cpp
index 48b083b7d..3fc3b153d 100644
--- a/source/slang/slang-ir-specialize.cpp
+++ b/source/slang/slang-ir-specialize.cpp
@@ -7,6 +7,7 @@
#include "slang-ir-ssa-simplification.h"
#include "slang-ir-lower-witness-lookup.h"
#include "slang-ir-dce.h"
+#include "slang-ir-util.h"
#include "../core/slang-performance-profiler.h"
namespace Slang
@@ -57,14 +58,29 @@ struct SpecializationContext
// specialized-ness of an instruction depends on the
// fully-specialized-ness of its operands.
//
- // We will build an explicit hash set to encode those
- // instructions that are fully specialized.
- //
- HashSet<IRInst*> fullySpecializedInsts;
-
- // An instruction is then fully specialized if and only
- // if it is in our set.
+ // We will use an inst's scratchData to represent whether or not
+ // the inst is considered as fully specialized.
//
+ void setFullySpecializedBit(IRInst* inst)
+ {
+ inst->scratchData |= 1;
+ }
+ bool getFullySpecializedBit(IRInst* inst)
+ {
+ return (inst->scratchData & 1) != 0;
+ }
+ void setCleanBit(IRInst* inst)
+ {
+ inst->scratchData |= 2;
+ }
+ void resetCleanBit(IRInst* inst)
+ {
+ inst->scratchData &= (~2);
+ }
+ bool getCleanBit(IRInst* inst)
+ {
+ return (inst->scratchData & 2) != 0;
+ }
bool isInstFullySpecialized(
IRInst* inst)
{
@@ -111,7 +127,7 @@ struct SpecializationContext
}
}
- return fullySpecializedInsts.contains(inst);
+ return getFullySpecializedBit(inst);
}
// When an instruction isn't fully specialized, but its operands *are*
@@ -140,7 +156,6 @@ struct SpecializationContext
// whether generic, existential, etc.
//
OrderedHashSet<IRInst*> workList;
- HashSet<IRInst*> cleanInsts;
void addToWorkList(
IRInst* inst)
@@ -166,7 +181,7 @@ struct SpecializationContext
if (workList.add(inst))
{
- cleanInsts.remove(inst);
+ resetCleanBit(inst);
addUsersToWorkList(inst);
}
@@ -194,9 +209,9 @@ struct SpecializationContext
void markInstAsFullySpecialized(
IRInst* inst)
{
- if(fullySpecializedInsts.contains(inst))
+ if(getFullySpecializedBit(inst))
return;
- fullySpecializedInsts.add(inst);
+ setFullySpecializedBit(inst);
// If we know that an instruction is fully specialized,
// then we should start to consider its uses and children
@@ -874,6 +889,9 @@ struct SpecializationContext
for (;;)
{
bool iterChanged = false;
+
+ initializeScratchData(module->getModuleInst());
+
addToWorkList(module->getModuleInst());
while (workList.getCount() != 0)
@@ -886,7 +904,7 @@ struct SpecializationContext
workList.removeLast();
- cleanInsts.add(inst);
+ setCleanBit(inst);
// For each instruction we process, we want to perform
// a few steps.
@@ -959,7 +977,7 @@ struct SpecializationContext
void addDirtyInstsToWorkListRec(IRInst* inst)
{
- if( !cleanInsts.contains(inst) )
+ if( !getCleanBit(inst) )
{
addToWorkList(inst);
}
@@ -1555,7 +1573,7 @@ struct SpecializationContext
// "fully specialized" by the rules used for doing
// generic specialization elsewhere in this pass.
//
- fullySpecializedInsts.add(newFuncType);
+ setFullySpecializedBit(newFuncType);
// The above steps have accomplished the "first phase"
// of cloning the function (since `IRFunc`s have no
diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp
index 6b94711e0..07aaa127f 100644
--- a/source/slang/slang-ir-util.cpp
+++ b/source/slang/slang-ir-util.cpp
@@ -645,7 +645,7 @@ void setInsertAfterOrdinaryInst(IRBuilder* builder, IRInst* inst)
}
}
-bool areCallArgumentsSideEffectFree(IRCall* call)
+bool areCallArgumentsSideEffectFree(IRCall* call, SideEffectAnalysisOptions options)
{
// If the function has no side effect and is not writing to any outputs,
// we can safely treat the call as a normal inst.
@@ -668,10 +668,13 @@ bool areCallArgumentsSideEffectFree(IRCall* call)
auto module = parentFunc->getModule();
if (!module)
return false;
- auto dom = module->findDominatorTree(parentFunc);
if (arg->getOp() == kIROp_Var && getParentFunc(arg) == parentFunc)
{
+ IRDominatorTree* dom = nullptr;
+ if (isBitSet(options, SideEffectAnalysisOptions::UseDominanceTree))
+ dom = module->findOrCreateDominatorTree(parentFunc);
+
// If the pointer argument is a local variable (thus can't alias with other addresses)
// and it is never read from in the function, we can safely treat the call as having
// no side-effect.
@@ -751,17 +754,17 @@ bool areCallArgumentsSideEffectFree(IRCall* call)
return true;
}
-bool isPureFunctionalCall(IRCall* call)
+bool isPureFunctionalCall(IRCall* call, SideEffectAnalysisOptions options)
{
auto callee = getResolvedInstForDecorations(call->getCallee());
if (callee->findDecoration<IRReadNoneDecoration>())
{
- return areCallArgumentsSideEffectFree(call);
+ return areCallArgumentsSideEffectFree(call, options);
}
return false;
}
-bool isSideEffectFreeFunctionalCall(IRCall* call)
+bool isSideEffectFreeFunctionalCall(IRCall* call, SideEffectAnalysisOptions options)
{
// If the call has been marked as no-side-effect, we
// will treat it so, by-passing all other checks.
@@ -770,7 +773,7 @@ bool isSideEffectFreeFunctionalCall(IRCall* call)
if (!doesCalleeHaveSideEffect(call->getCallee()))
{
- return areCallArgumentsSideEffectFree(call);
+ return areCallArgumentsSideEffectFree(call, options);
}
return false;
}
@@ -964,6 +967,34 @@ bool isOne(IRInst* inst)
}
}
+void initializeScratchData(IRInst* inst)
+{
+ List<IRInst*> workList;
+ workList.add(inst);
+ while (workList.getCount() != 0)
+ {
+ auto item = workList.getLast();
+ workList.removeLast();
+ item->scratchData = 0;
+ for (auto child = item->getLastDecorationOrChild(); child; child = child->getPrevInst())
+ workList.add(child);
+ }
+}
+
+void resetScratchDataBit(IRInst* inst, int bitIndex)
+{
+ List<IRInst*> workList;
+ workList.add(inst);
+ while (workList.getCount() != 0)
+ {
+ auto item = workList.getLast();
+ workList.removeLast();
+ item->scratchData &= ~(1ULL << bitIndex);
+ for (auto child = item->getLastDecorationOrChild(); child; child = child->getPrevInst())
+ workList.add(child);
+ }
+}
+
struct GenericChildrenMigrationContextImpl
{
IRCloneEnv cloneEnv;
diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h
index c12f0b62b..98c3996a2 100644
--- a/source/slang/slang-ir-util.h
+++ b/source/slang/slang-ir-util.h
@@ -173,12 +173,12 @@ String dumpIRToString(IRInst* root);
// Returns whether a call insts can be treated as a pure functional inst, and thus can be
// DCE'd and deduplicated.
// (no writes to memory, no reads from unknown memory, no side effects).
-bool isPureFunctionalCall(IRCall* callInst);
+bool isPureFunctionalCall(IRCall* callInst, SideEffectAnalysisOptions options = SideEffectAnalysisOptions::None);
// Returns whether a call insts can be treated as a pure functional inst, and thus can be
// DCE'd (but not necessarily deduplicated).
// (no side effects).
-bool isSideEffectFreeFunctionalCall(IRCall* call);
+bool isSideEffectFreeFunctionalCall(IRCall* call, SideEffectAnalysisOptions options = SideEffectAnalysisOptions::None);
bool doesCalleeHaveSideEffect(IRInst* callee);
@@ -221,6 +221,9 @@ bool isGlobalOrUnknownMutableAddress(IRGlobalValueWithCode* parentFunc, IRInst*
bool isZero(IRInst* inst);
bool isOne(IRInst* inst);
+
+void initializeScratchData(IRInst* inst);
+void resetScratchDataBit(IRInst* inst, int bitIndex);
}
#endif
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index a44667a79..35803cedb 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -1982,25 +1982,7 @@ namespace Slang
}
//
- bool operator==(IRInstKey const& left, IRInstKey const& right)
- {
- if(left.inst->getOp() != right.inst->getOp()) return false;
- if(left.inst->getFullType() != right.inst->getFullType()) return false;
- if(left.inst->operandCount != right.inst->operandCount) return false;
-
- auto argCount = left.inst->operandCount;
- auto leftArgs = left.inst->getOperands();
- auto rightArgs = right.inst->getOperands();
- for( UInt aa = 0; aa < argCount; ++aa )
- {
- if(leftArgs[aa].get() != rightArgs[aa].get())
- return false;
- }
-
- return true;
- }
-
- HashCode IRInstKey::getHashCode()
+ HashCode IRInstKey::_getHashCode()
{
auto code = Slang::getHashCode(inst->getOp());
code = combineHash(code, Slang::getHashCode(inst->getFullType()));
@@ -7186,7 +7168,7 @@ namespace Slang
}
}
- bool IRInst::mightHaveSideEffects()
+ bool IRInst::mightHaveSideEffects(SideEffectAnalysisOptions options)
{
// TODO: We should drive this based on flags specified
// in `ir-inst-defs.h` isntead of hard-coding things here,
@@ -7230,7 +7212,7 @@ namespace Slang
// common subexpression elimination, etc.
//
auto call = cast<IRCall>(this);
- return !isSideEffectFreeFunctionalCall(call);
+ return !isSideEffectFreeFunctionalCall(call, options);
}
break;
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 44dc585ab..e7c7f4fb2 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -548,6 +548,12 @@ private:
IRInst* m_inst = nullptr;
};
+enum class SideEffectAnalysisOptions
+{
+ None,
+ UseDominanceTree,
+};
+
// Every value in the IR is an instruction (even things
// like literal values).
//
@@ -733,6 +739,11 @@ struct IRInst
getOperands()[index].init(this, value);
}
+ // Reserved memory space for use by individual IR passes.
+ // This field is not supposed to be valid outside an IR pass,
+ // and each IR pass should always treat it as uninitialized
+ // upon entry.
+ UInt64 scratchData = 0;
//
@@ -785,7 +796,7 @@ struct IRInst
/// It is possible that this instruction has side effects?
///
/// This is a conservative test, and will return `true` if an exact answer can't be determined.
- bool mightHaveSideEffects();
+ bool mightHaveSideEffects(SideEffectAnalysisOptions options = SideEffectAnalysisOptions::None);
// RTTI support
static bool isaImpl(IROp) { return true; }
@@ -1971,12 +1982,41 @@ struct IRModule;
// Description of an instruction to be used for global value numbering
struct IRInstKey
{
- IRInst* inst;
+private:
+ IRInst* inst = nullptr;
+ HashCode hashCode = 0;
+ HashCode _getHashCode();
- HashCode getHashCode();
-};
+public:
+ IRInstKey() = default;
+ IRInstKey(const IRInstKey& key) = default;
+ IRInstKey(IRInst* i)
+ : inst(i)
+ {
+ hashCode = _getHashCode();
+ }
+ HashCode getHashCode() const { return hashCode; }
+ IRInst* getInst() const { return inst; }
-bool operator==(IRInstKey const& left, IRInstKey const& right);
+ bool operator==(IRInstKey const& right) const
+ {
+ if (hashCode != right.getHashCode()) return false;
+ if (getInst()->getOp() != right.getInst()->getOp()) return false;
+ if (getInst()->getFullType() != right.getInst()->getFullType()) return false;
+ if (getInst()->operandCount != right.getInst()->operandCount) return false;
+
+ auto argCount = getInst()->operandCount;
+ auto leftArgs = getInst()->getOperands();
+ auto rightArgs = right.getInst()->getOperands();
+ for (UInt aa = 0; aa < argCount; ++aa)
+ {
+ if (leftArgs[aa].get() != rightArgs[aa].get())
+ return false;
+ }
+
+ return true;
+ }
+};
struct IRConstantKey
{
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index a1c7a2b8e..7439c67f4 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -6,6 +6,7 @@
#include "../core/slang-random-generator.h"
#include "../core/slang-hash.h"
#include "../core/slang-char-util.h"
+#include "../core/slang-performance-profiler.h"
#include "slang-check.h"
#include "slang-ir.h"
@@ -9740,6 +9741,8 @@ RefPtr<IRModule> generateIRForTranslationUnit(
ASTBuilder* astBuilder,
TranslationUnitRequest* translationUnit)
{
+ SLANG_PROFILE;
+
auto session = translationUnit->getSession();
auto compileRequest = translationUnit->compileRequest;
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index ffeabb0bd..246662909 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -2351,6 +2351,8 @@ RefPtr<ComponentType> createSpecializedGlobalAndEntryPointsComponentType(
void FrontEndCompileRequest::checkAllTranslationUnits()
{
+ SLANG_PROFILE;
+
LoadedModuleDictionary loadedModules;
if (additionalLoadedModules)
loadedModules = *additionalLoadedModules;
diff --git a/tests/compute/half-texture.slang.glsl b/tests/compute/half-texture.slang.glsl
index 0eccccaaf..27f63620d 100644
--- a/tests/compute/half-texture.slang.glsl
+++ b/tests/compute/half-texture.slang.glsl
@@ -25,20 +25,26 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in;
void main()
{
ivec2 pos_0 = ivec2(gl_GlobalInvocationID.xy);
- const float _S2 = 1.00000000000000000000 / 3.00000000000000000000;
- int _S3 = pos_0.y;
- int _S4 = pos_0.x;
- ivec2 pos2_0 = ivec2(3 - _S3, 3 - _S4);
-
- float16_t h_0 = (float16_t(imageLoad((halfTexture_0), ivec2((uvec2(pos2_0)))).x));
- f16vec2 h2_0 = (f16vec2(imageLoad((halfTexture2_0), ivec2((uvec2(pos2_0)))).xy));
- f16vec4 h4_0 = (f16vec4(imageLoad((halfTexture4_0), ivec2((uvec2(pos2_0))))));
- imageStore((halfTexture_0), ivec2((uvec2(pos_0))), f16vec4(h2_0.x + h2_0.y, float16_t(0), float16_t(0), float16_t(0)));
- imageStore((halfTexture2_0), ivec2((uvec2(pos_0))), f16vec4(h4_0.xy, float16_t(0), float16_t(0)));
- imageStore((halfTexture4_0), ivec2((uvec2(pos_0))), f16vec4(h2_0, h_0, h_0));
-
- int index_0 = _S4 + _S3 * 4;
- ((outputBuffer_0)._data[(uint(index_0))]) = index_0;
+ int _S2 = pos_0.y;
+
+ int _S3 = pos_0.x;
+
+ uvec2 _S4 = uvec2(ivec2(3 - _S2, 3 - _S3));
+
+ float16_t h_0 = (float16_t(imageLoad((halfTexture_0), ivec2((_S4))).x));
+ f16vec2 h2_0 = (f16vec2(imageLoad((halfTexture2_0), ivec2((_S4))).xy));
+ f16vec4 h4_0 = (f16vec4(imageLoad((halfTexture4_0), ivec2((_S4)))));
+
+
+
+ uvec2 _S5 = uvec2(pos_0);
+
+ imageStore((halfTexture_0), ivec2((_S5)), f16vec4(h2_0.x + h2_0.y, float16_t(0), float16_t(0), float16_t(0)));
+ imageStore((halfTexture2_0), ivec2((_S5)), f16vec4(h4_0.xy, float16_t(0), float16_t(0)));
+ imageStore((halfTexture4_0), ivec2((_S5)), f16vec4(h2_0, h_0, h_0));
+
+ int index_0 = _S3 + _S2 * 4;
+ ((outputBuffer_0)._data[(uint(index_0))]) = index_0;
return;
-} \ No newline at end of file
+}
diff --git a/tests/experimental/liveness/liveness.slang.expected b/tests/experimental/liveness/liveness.slang.expected
index fe49e85d1..0f5e5deab 100644
--- a/tests/experimental/liveness/liveness.slang.expected
+++ b/tests/experimental/liveness/liveness.slang.expected
@@ -95,9 +95,10 @@ void main()
SomeStruct_0 _S10 = makeSomeStruct_0();
const int _S11[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
int _S12 = index_0 & 7;
+ uint _S13 = uint(index_0);
int v_1 = someSlowFunc_0(index_0);
- bool _S13 = (v_1 & 256) != 0;
- int _S14 = v_1 & 3;
+ bool _S14 = (v_1 & 256) != 0;
+ uint _S15 = uint(v_1 & 3);
int i_2;
livenessStart_1(i_2, 0);
i_2 = 0;
@@ -120,10 +121,10 @@ void main()
livenessStart_2(t_0, 0);
t_0 = _S10;
SomeStruct_0 u_0;
- if(_S13)
+ if(_S14)
{
- s_3.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]);
- t_0.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]);
+ s_3.x_0 = ((anotherBuffer_0)._data[(_S15)]);
+ t_0.x_0 = ((anotherBuffer_0)._data[(_S15)]);
livenessStart_2(u_0, 0);
u_0.a_1 = 0;
u_0.x_0 = 0;
@@ -136,33 +137,33 @@ void main()
x_1.a_1 = 0;
x_1.x_0 = 0;
x_1.c_0 = _S11;
- x_1.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]) + 1;
- SomeStruct_0 _S15 = x_1;
+ x_1.x_0 = ((anotherBuffer_0)._data[(_S15)]) + 1;
+ SomeStruct_0 _S16 = x_1;
livenessEnd_2(x_1, 0);
livenessStart_2(u_0, 0);
- u_0 = _S15;
+ u_0 = _S16;
}
s_3.c_0[_S12] = s_3.c_0[_S12] + 1;
- int _S16 = s_3.x_0 + t_0.x_0;
- SomeStruct_0 _S17 = u_0;
+ int _S17 = s_3.x_0 + t_0.x_0;
+ SomeStruct_0 _S18 = u_0;
livenessEnd_2(u_0, 0);
- int _S18 = _S16 + _S17.x_0 + doThing_0(t_0);
- int _S19 = somethingElse_0(t_0);
+ int _S19 = _S17 + _S18.x_0 + doThing_0(t_0);
+ int _S20 = somethingElse_0(t_0);
livenessEnd_2(t_0, 0);
- int _S20 = _S18 + _S19;
- int _S21 = s_3.c_0[2];
+ int _S21 = _S19 + _S20;
+ int _S22 = s_3.c_0[2];
livenessEnd_2(s_3, 0);
- int _S22 = _S20 + _S21;
- int _S23 = res_0;
+ int _S23 = _S21 + _S22;
+ int _S24 = res_0;
livenessEnd_0(res_0, 0);
- int res_1 = _S23 + _S22;
+ int res_1 = _S24 + _S23;
i_2 = i_2 + 1;
livenessStart_1(res_0, 0);
res_0 = res_1;
}
- int _S24 = res_0;
+ int _S25 = res_0;
livenessEnd_0(res_0, 0);
- ((outputBuffer_0)._data[(uint(index_0))]) = _S24;
+ ((outputBuffer_0)._data[(_S13)]) = _S25;
return;
}
diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected
index c7be594d1..97b04be47 100644
--- a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected
+++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected
@@ -37,7 +37,7 @@ void main()
hitObjectRecordMissNV(hitObj_1, _S5, ray_0.Origin_0, ray_0.TMin_0, ray_0.Direction_0, ray_0.TMax_0);
bool _S6 = (hitObjectIsMissNV((hitObj_1)));
uint _S7 = uint(int(_S6));
- ((outputBuffer_0)._data[(uint(idx_0))]) = _S7;
+ ((outputBuffer_0)._data[(_S4)]) = _S7;
return;
}
diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected
index 938a19480..c9589b805 100644
--- a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected
+++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected
@@ -91,29 +91,21 @@ void main()
ray_2.Direction_0 = vec3(0.0, 1.0, 0.0);
ray_2.TMax_0 = 10000.0;
uint _S10 = uint(idx_0);
- int _S11 = idx_0 * 2;
- uint _S12 = uint(_S11);
- int _S13 = idx_0 * 3;
- uint _S14 = uint(_S13);
- RayDesc_0 _S15 = ray_2;
+ uint _S11 = uint(idx_0 * 2);
+ uint _S12 = uint(idx_0 * 3);
+ RayDesc_0 _S13 = ray_2;
hitObjectNV hitObj_0;
- int _S16 = int(_S10);
- int _S17 = int(_S12);
- int _S18 = int(_S14);
- hitObjectRecordHitWithIndexNV(hitObj_0, scene_0, _S16, _S17, _S18, 0U, 0U, _S15.Origin_0, _S15.TMin_0, _S15.Direction_0, _S15.TMax_0, (0));
+ int _S14 = int(_S10);
+ int _S15 = int(_S11);
+ int _S16 = int(_S12);
+ hitObjectRecordHitWithIndexNV(hitObj_0, scene_0, _S14, _S15, _S16, 0U, 0U, _S13.Origin_0, _S13.TMin_0, _S13.Direction_0, _S13.TMax_0, (0));
uint r_3 = calcValue_0(hitObj_0);
- uint _S19 = uint(idx_0);
- uint _S20 = uint(_S11);
- uint _S21 = uint(_S13);
- RayDesc_0 _S22 = ray_2;
+ RayDesc_0 _S17 = ray_2;
hitObjectNV hitObj_1;
- int _S23 = int(_S19);
- int _S24 = int(_S21);
- int _S25 = int(_S20);
- hitObjectRecordHitNV(hitObj_1, scene_0, _S23, _S24, _S25, 0U, 0U, 4U, _S22.Origin_0, _S22.TMin_0, _S22.Direction_0, _S22.TMax_0, (0));
- uint _S26 = calcValue_0(hitObj_1);
- uint r_4 = r_3 + _S26;
- ((outputBuffer_0)._data[(uint(idx_0))]) = r_4;
+ hitObjectRecordHitNV(hitObj_1, scene_0, _S14, _S16, _S15, 0U, 0U, 4U, _S17.Origin_0, _S17.TMin_0, _S17.Direction_0, _S17.TMax_0, (0));
+ uint _S18 = calcValue_0(hitObj_1);
+ uint r_4 = r_3 + _S18;
+ ((outputBuffer_0)._data[(_S10)]) = r_4;
return;
}
diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected
index 3afe48bc9..c79c238e6 100644
--- a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected
+++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected
@@ -32,7 +32,7 @@ void main()
hitObjectRecordMissNV(hitObj_0, _S2, ray_0.Origin_0, ray_0.TMin_0, ray_0.Direction_0, ray_0.TMax_0);
bool _S3 = (hitObjectIsMissNV((hitObj_0)));
uint _S4 = uint(int(_S3));
- ((outputBuffer_0)._data[(uint(idx_0))]) = _S4;
+ ((outputBuffer_0)._data[(_S2)]) = _S4;
return;
}