diff options
| author | Yong He <yonghe@outlook.com> | 2023-07-12 16:00:05 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-07-12 16:00:05 -0700 |
| commit | 261b2f1f2bc13ccf7db5ec68c825ffc7b0781f7f (patch) | |
| tree | 4953e376e705a8110cb8164dda5b239c04f2768b | |
| parent | bbd9c2e6d7b57f5acc3238083ab2f7c7b140df5e (diff) | |
Use scratchData on `IRInst` to replace HashSets. (#2978)
* Use scratchData on `IRInst` to replace HashSets.
* Update test results.
* Initialize scratchData.
* Update autodiff documentation.
* Use enum instead of bool.
---------
Co-authored-by: Yong He <yhe@nvidia.com>
20 files changed, 257 insertions, 145 deletions
diff --git a/source/core/slang-common.h b/source/core/slang-common.h index 5cf0432f2..62c73df08 100644 --- a/source/core/slang-common.h +++ b/source/core/slang-common.h @@ -76,6 +76,12 @@ namespace Slang template <typename T> SLANG_FORCE_INLINE T* clone(IClonable* clonable) { return (T*)clonable->clone(T::getTypeGuid()); } + template <typename T> + inline bool isBitSet(T value, T bitToTest) + { + static_assert(sizeof(T) <= sizeof(uint32_t), "Only support up to 32 bit enums"); + return (T)((uint32_t)value & (uint32_t)bitToTest) == bitToTest; + } } // TODO: Shouldn't these be SLANG_ prefixed? diff --git a/source/slang/slang-ast-builder.cpp b/source/slang/slang-ast-builder.cpp index 33bd23f43..e6e1b5e75 100644 --- a/source/slang/slang-ast-builder.cpp +++ b/source/slang/slang-ast-builder.cpp @@ -445,6 +445,7 @@ TypeType* ASTBuilder::getTypeType(Type* type) bool ASTBuilder::NodeDesc::operator==(NodeDesc const& that) const { + if (hashCode != that.hashCode) return false; if(type != that.type) return false; if(operands.getCount() != that.operands.getCount()) return false; for(Index i = 0; i < operands.getCount(); ++i) @@ -461,7 +462,8 @@ bool ASTBuilder::NodeDesc::operator==(NodeDesc const& that) const } return true; } -HashCode ASTBuilder::NodeDesc::getHashCode() const + +void ASTBuilder::NodeDesc::init() { Hasher hasher; hasher.hashValue(Int(type)); @@ -474,7 +476,7 @@ HashCode ASTBuilder::NodeDesc::getHashCode() const // hasher.hashValue(operands[i].values.nodeOperand); } - return hasher.getResult(); + hashCode = hasher.getResult(); } DeclRef<Decl> _getSpecializedDeclRef(ASTBuilder* builder, Decl* decl, Substitutions* subst) diff --git a/source/slang/slang-ast-builder.h b/source/slang/slang-ast-builder.h index 618636417..a2543ab1e 100644 --- a/source/slang/slang-ast-builder.h +++ b/source/slang/slang-ast-builder.h @@ -143,7 +143,10 @@ public: ShortList<NodeOperand, 4> operands; bool operator==(NodeDesc const& that) const; - HashCode getHashCode() const; + HashCode getHashCode() const { return hashCode; } + void init(); + private: + HashCode hashCode = 0; }; template<typename NodeCreateFunc> @@ -217,6 +220,7 @@ public: NodeDesc desc; desc.type = T::kType; addOrAppendToNodeList(desc.operands, args...); + desc.init(); return (T*)_getOrCreateImpl(desc, [&]() { return create<T>(args...); @@ -230,6 +234,7 @@ public: NodeDesc desc; desc.type = T::kType; + desc.init(); return (T*)_getOrCreateImpl(desc, [this]() { return create<T>(); }); } @@ -240,6 +245,7 @@ public: NodeDesc desc; desc.type = T::kType; addOrAppendToNodeList(desc.operands, args...); + desc.init(); return (T*)_getOrCreateImpl(desc, [&]() { return create<T>(); @@ -253,6 +259,7 @@ public: NodeDesc desc; desc.type = T::kType; desc.operands.addRange(operands); + desc.init(); return (T*)_getOrCreateImpl(desc, [&]() { return create<T>(); @@ -305,6 +312,7 @@ public: { desc.operands.add(outer); } + desc.init(); auto result = (GenericSubstitution*)_getOrCreateImpl(desc, [this]() {return create<GenericSubstitution>(); }); if (result->args.getCount() != args.getCount()) { @@ -326,6 +334,7 @@ public: { desc.operands.add(outer); } + desc.init(); auto result = (ThisTypeSubstitution*)_getOrCreateImpl(desc, [this]() {return create<ThisTypeSubstitution>(); }); result->interfaceDecl = interfaceDecl; result->witness = subtypeWitness; diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index a5f46da3e..c1d7798e3 100644 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -622,7 +622,7 @@ namespace Slang # pragma warning(disable:4702) #endif SlangResult CodeGenContext::emitEntryPointsSource(ComPtr<IArtifact>& outArtifact) - { + { outArtifact.setNull(); SLANG_RETURN_ON_FAIL(requireTranslationUnitSourceFiles()); diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index d71cc5507..f07dc97f8 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -981,6 +981,8 @@ Result linkAndOptimizeIR( SlangResult CodeGenContext::emitEntryPointsSourceFromIR(ComPtr<IArtifact>& outArtifact) { + SLANG_PROFILE; + outArtifact.setNull(); auto session = getSession(); diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index c6636aaee..c00e4ca7b 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -29,30 +29,15 @@ struct DeadCodeEliminationContext // there could be new DCE opportunities. bool phiRemoved = false; - // Our overall process is going to be to determine - // which instructions in the module are "live" - // and then eliminate anything that wasn't found to - // be live. - // - // We will track the liveness state by keeping - // a set of all instructions we have so far determined - // to be live. - // - HashSet<IRInst*> liveInsts; - // Querying whether an instruction has been // determined to be live is easy. + // To speedup the test, we use the + // `scratchData` field of each inst as the marker. // - bool isInstLive(IRInst* inst) + bool isInstAlive(IRInst* inst) { - // The only wrinkle is that we want to safeguard - // against a null instruction (there are some - // corner cases where we still construct IR - // instructions with a null type). - // - if(!inst) return false; - - return liveInsts.contains(inst); + if (!inst) return false; + return inst->scratchData != 0; } // We are going to do an iterative analysis @@ -81,10 +66,11 @@ struct DeadCodeEliminationContext // if(!inst) return; - if(liveInsts.contains(inst)) - return; - liveInsts.add(inst); - workList.add(inst); + if (!inst->scratchData) + { + inst->scratchData = 1; + workList.add(inst); + } } IRInst* getUndefInst() @@ -109,7 +95,9 @@ struct DeadCodeEliminationContext for (;;) { - liveInsts.clear(); + // Clear the `alive` bits by initializing all scratchData to 0. + initializeScratchData(root); + workList.clear(); // First of all, we know that the root instruction @@ -242,7 +230,7 @@ struct DeadCodeEliminationContext // // The easy case is if `inst` is dead (that is, not live). // - if( !isInstLive(inst) ) + if( !isInstAlive(inst) ) { // We can simply remove and deallocate `inst` because it is // dead, and not worry about any of its descendents, @@ -324,7 +312,7 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o // First, if `inst` is an instruction that might have some effects // when it is executed, then we should keep it around. // - if (inst->mightHaveSideEffects()) + if (inst->mightHaveSideEffects(SideEffectAnalysisOptions::UseDominanceTree)) { return true; } diff --git a/source/slang/slang-ir-legalize-types.cpp b/source/slang/slang-ir-legalize-types.cpp index a88d43db3..bf5893db7 100644 --- a/source/slang/slang-ir-legalize-types.cpp +++ b/source/slang/slang-ir-legalize-types.cpp @@ -18,6 +18,7 @@ #include "slang-ir-insts.h" #include "slang-legalize-types.h" #include "slang-mangle.h" +#include "slang-ir-util.h" namespace Slang { @@ -3466,6 +3467,9 @@ static LegalVal legalizeGlobalParam( } } +static constexpr int kHasBeenAddedOrProcessedScratchBitIndex = 0; +static constexpr int kHasBeenAddedScratchBitIndex = 1; + struct IRTypeLegalizationPass { IRTypeLegalizationContext* context; @@ -3480,13 +3484,29 @@ struct IRTypeLegalizationPass // instructions have ever been added to the work list. List<IRInst*> workList; - HashSet<IRInst*> hasBeenAddedOrProcessedSet; - HashSet<IRInst*> addedToWorkListSet; + + bool hasBeenAddedOrProcessed(IRInst* inst) + { + if (!inst) return true; + return (inst->scratchData & (1 << kHasBeenAddedOrProcessedScratchBitIndex)) != 0; + } + void setHasBeenAddedOrProcessed(IRInst* inst) + { + inst->scratchData |= (1 << kHasBeenAddedOrProcessedScratchBitIndex); + } + bool addedToWorkList(IRInst* inst) + { + return (inst->scratchData & (1 << kHasBeenAddedScratchBitIndex)) != 0; + } + void setAddedToWorkList(IRInst* inst) + { + inst->scratchData |= (1 << kHasBeenAddedScratchBitIndex); + } bool hasBeenAddedToWorkListOrProcessed(IRInst* inst) { - if (hasBeenAddedToWorkList(inst)) return true; - return hasBeenAddedOrProcessedSet.contains(inst); + if (!inst) return true; + return (inst->scratchData != 0); } // We will add a simple query to check whether an instruciton @@ -3526,7 +3546,7 @@ struct IRTypeLegalizationPass // if(inst->getOp() == kIROp_InterfaceRequirementEntry) return true; - return addedToWorkListSet.contains(inst); + return addedToWorkList(inst); } // Next we define a convenience routine for adding something to the work list. @@ -3535,15 +3555,17 @@ struct IRTypeLegalizationPass { // We want to avoid adding anything we've already added or processed. // - if(addedToWorkListSet.contains(inst)) + if(addedToWorkList(inst)) return; workList.add(inst); - addedToWorkListSet.add(inst); - hasBeenAddedOrProcessedSet.add(inst); + setAddedToWorkList(inst); + setHasBeenAddedOrProcessed(inst); } void processModule(IRModule* module) { + initializeScratchData(module->getModuleInst()); + // In order to process an entire module, we start by adding the // root module insturction to our work list, and then we will // proceed to process instructions until the work list goes dry. @@ -3568,7 +3590,8 @@ struct IRTypeLegalizationPass // List<IRInst*> workListCopy; Swap(workListCopy, workList); - addedToWorkListSet.clear(); + + resetScratchDataBit(module->getModuleInst(), kHasBeenAddedScratchBitIndex); // Now we simply process each instruction on the copy of // the work list, knowing that `processInst` may add additional diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp index 37e8ba5bb..227ef1d4d 100644 --- a/source/slang/slang-ir-redundancy-removal.cpp +++ b/source/slang/slang-ir-redundancy-removal.cpp @@ -23,6 +23,7 @@ struct RedundancyRemovalContext case kIROp_And: case kIROp_Or: case kIROp_Not: + case kIROp_Neg: case kIROp_FieldExtract: case kIROp_FieldAddress: case kIROp_GetElement: @@ -43,6 +44,7 @@ struct RedundancyRemovalContext case kIROp_MakeMatrixFromScalar: case kIROp_MakeVectorFromScalar: case kIROp_swizzle: + case kIROp_swizzleSet: case kIROp_MatrixReshape: case kIROp_MakeString: case kIROp_MakeResultError: @@ -59,6 +61,8 @@ struct RedundancyRemovalContext case kIROp_BitOr: case kIROp_BitXor: case kIROp_BitCast: + case kIROp_IntCast: + case kIROp_FloatCast: case kIROp_Reinterpret: case kIROp_Greater: case kIROp_Less: diff --git a/source/slang/slang-ir-specialize.cpp b/source/slang/slang-ir-specialize.cpp index 48b083b7d..3fc3b153d 100644 --- a/source/slang/slang-ir-specialize.cpp +++ b/source/slang/slang-ir-specialize.cpp @@ -7,6 +7,7 @@ #include "slang-ir-ssa-simplification.h" #include "slang-ir-lower-witness-lookup.h" #include "slang-ir-dce.h" +#include "slang-ir-util.h" #include "../core/slang-performance-profiler.h" namespace Slang @@ -57,14 +58,29 @@ struct SpecializationContext // specialized-ness of an instruction depends on the // fully-specialized-ness of its operands. // - // We will build an explicit hash set to encode those - // instructions that are fully specialized. - // - HashSet<IRInst*> fullySpecializedInsts; - - // An instruction is then fully specialized if and only - // if it is in our set. + // We will use an inst's scratchData to represent whether or not + // the inst is considered as fully specialized. // + void setFullySpecializedBit(IRInst* inst) + { + inst->scratchData |= 1; + } + bool getFullySpecializedBit(IRInst* inst) + { + return (inst->scratchData & 1) != 0; + } + void setCleanBit(IRInst* inst) + { + inst->scratchData |= 2; + } + void resetCleanBit(IRInst* inst) + { + inst->scratchData &= (~2); + } + bool getCleanBit(IRInst* inst) + { + return (inst->scratchData & 2) != 0; + } bool isInstFullySpecialized( IRInst* inst) { @@ -111,7 +127,7 @@ struct SpecializationContext } } - return fullySpecializedInsts.contains(inst); + return getFullySpecializedBit(inst); } // When an instruction isn't fully specialized, but its operands *are* @@ -140,7 +156,6 @@ struct SpecializationContext // whether generic, existential, etc. // OrderedHashSet<IRInst*> workList; - HashSet<IRInst*> cleanInsts; void addToWorkList( IRInst* inst) @@ -166,7 +181,7 @@ struct SpecializationContext if (workList.add(inst)) { - cleanInsts.remove(inst); + resetCleanBit(inst); addUsersToWorkList(inst); } @@ -194,9 +209,9 @@ struct SpecializationContext void markInstAsFullySpecialized( IRInst* inst) { - if(fullySpecializedInsts.contains(inst)) + if(getFullySpecializedBit(inst)) return; - fullySpecializedInsts.add(inst); + setFullySpecializedBit(inst); // If we know that an instruction is fully specialized, // then we should start to consider its uses and children @@ -874,6 +889,9 @@ struct SpecializationContext for (;;) { bool iterChanged = false; + + initializeScratchData(module->getModuleInst()); + addToWorkList(module->getModuleInst()); while (workList.getCount() != 0) @@ -886,7 +904,7 @@ struct SpecializationContext workList.removeLast(); - cleanInsts.add(inst); + setCleanBit(inst); // For each instruction we process, we want to perform // a few steps. @@ -959,7 +977,7 @@ struct SpecializationContext void addDirtyInstsToWorkListRec(IRInst* inst) { - if( !cleanInsts.contains(inst) ) + if( !getCleanBit(inst) ) { addToWorkList(inst); } @@ -1555,7 +1573,7 @@ struct SpecializationContext // "fully specialized" by the rules used for doing // generic specialization elsewhere in this pass. // - fullySpecializedInsts.add(newFuncType); + setFullySpecializedBit(newFuncType); // The above steps have accomplished the "first phase" // of cloning the function (since `IRFunc`s have no diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp index 6b94711e0..07aaa127f 100644 --- a/source/slang/slang-ir-util.cpp +++ b/source/slang/slang-ir-util.cpp @@ -645,7 +645,7 @@ void setInsertAfterOrdinaryInst(IRBuilder* builder, IRInst* inst) } } -bool areCallArgumentsSideEffectFree(IRCall* call) +bool areCallArgumentsSideEffectFree(IRCall* call, SideEffectAnalysisOptions options) { // If the function has no side effect and is not writing to any outputs, // we can safely treat the call as a normal inst. @@ -668,10 +668,13 @@ bool areCallArgumentsSideEffectFree(IRCall* call) auto module = parentFunc->getModule(); if (!module) return false; - auto dom = module->findDominatorTree(parentFunc); if (arg->getOp() == kIROp_Var && getParentFunc(arg) == parentFunc) { + IRDominatorTree* dom = nullptr; + if (isBitSet(options, SideEffectAnalysisOptions::UseDominanceTree)) + dom = module->findOrCreateDominatorTree(parentFunc); + // If the pointer argument is a local variable (thus can't alias with other addresses) // and it is never read from in the function, we can safely treat the call as having // no side-effect. @@ -751,17 +754,17 @@ bool areCallArgumentsSideEffectFree(IRCall* call) return true; } -bool isPureFunctionalCall(IRCall* call) +bool isPureFunctionalCall(IRCall* call, SideEffectAnalysisOptions options) { auto callee = getResolvedInstForDecorations(call->getCallee()); if (callee->findDecoration<IRReadNoneDecoration>()) { - return areCallArgumentsSideEffectFree(call); + return areCallArgumentsSideEffectFree(call, options); } return false; } -bool isSideEffectFreeFunctionalCall(IRCall* call) +bool isSideEffectFreeFunctionalCall(IRCall* call, SideEffectAnalysisOptions options) { // If the call has been marked as no-side-effect, we // will treat it so, by-passing all other checks. @@ -770,7 +773,7 @@ bool isSideEffectFreeFunctionalCall(IRCall* call) if (!doesCalleeHaveSideEffect(call->getCallee())) { - return areCallArgumentsSideEffectFree(call); + return areCallArgumentsSideEffectFree(call, options); } return false; } @@ -964,6 +967,34 @@ bool isOne(IRInst* inst) } } +void initializeScratchData(IRInst* inst) +{ + List<IRInst*> workList; + workList.add(inst); + while (workList.getCount() != 0) + { + auto item = workList.getLast(); + workList.removeLast(); + item->scratchData = 0; + for (auto child = item->getLastDecorationOrChild(); child; child = child->getPrevInst()) + workList.add(child); + } +} + +void resetScratchDataBit(IRInst* inst, int bitIndex) +{ + List<IRInst*> workList; + workList.add(inst); + while (workList.getCount() != 0) + { + auto item = workList.getLast(); + workList.removeLast(); + item->scratchData &= ~(1ULL << bitIndex); + for (auto child = item->getLastDecorationOrChild(); child; child = child->getPrevInst()) + workList.add(child); + } +} + struct GenericChildrenMigrationContextImpl { IRCloneEnv cloneEnv; diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h index c12f0b62b..98c3996a2 100644 --- a/source/slang/slang-ir-util.h +++ b/source/slang/slang-ir-util.h @@ -173,12 +173,12 @@ String dumpIRToString(IRInst* root); // Returns whether a call insts can be treated as a pure functional inst, and thus can be // DCE'd and deduplicated. // (no writes to memory, no reads from unknown memory, no side effects). -bool isPureFunctionalCall(IRCall* callInst); +bool isPureFunctionalCall(IRCall* callInst, SideEffectAnalysisOptions options = SideEffectAnalysisOptions::None); // Returns whether a call insts can be treated as a pure functional inst, and thus can be // DCE'd (but not necessarily deduplicated). // (no side effects). -bool isSideEffectFreeFunctionalCall(IRCall* call); +bool isSideEffectFreeFunctionalCall(IRCall* call, SideEffectAnalysisOptions options = SideEffectAnalysisOptions::None); bool doesCalleeHaveSideEffect(IRInst* callee); @@ -221,6 +221,9 @@ bool isGlobalOrUnknownMutableAddress(IRGlobalValueWithCode* parentFunc, IRInst* bool isZero(IRInst* inst); bool isOne(IRInst* inst); + +void initializeScratchData(IRInst* inst); +void resetScratchDataBit(IRInst* inst, int bitIndex); } #endif diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index a44667a79..35803cedb 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -1982,25 +1982,7 @@ namespace Slang } // - bool operator==(IRInstKey const& left, IRInstKey const& right) - { - if(left.inst->getOp() != right.inst->getOp()) return false; - if(left.inst->getFullType() != right.inst->getFullType()) return false; - if(left.inst->operandCount != right.inst->operandCount) return false; - - auto argCount = left.inst->operandCount; - auto leftArgs = left.inst->getOperands(); - auto rightArgs = right.inst->getOperands(); - for( UInt aa = 0; aa < argCount; ++aa ) - { - if(leftArgs[aa].get() != rightArgs[aa].get()) - return false; - } - - return true; - } - - HashCode IRInstKey::getHashCode() + HashCode IRInstKey::_getHashCode() { auto code = Slang::getHashCode(inst->getOp()); code = combineHash(code, Slang::getHashCode(inst->getFullType())); @@ -7186,7 +7168,7 @@ namespace Slang } } - bool IRInst::mightHaveSideEffects() + bool IRInst::mightHaveSideEffects(SideEffectAnalysisOptions options) { // TODO: We should drive this based on flags specified // in `ir-inst-defs.h` isntead of hard-coding things here, @@ -7230,7 +7212,7 @@ namespace Slang // common subexpression elimination, etc. // auto call = cast<IRCall>(this); - return !isSideEffectFreeFunctionalCall(call); + return !isSideEffectFreeFunctionalCall(call, options); } break; diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h index 44dc585ab..e7c7f4fb2 100644 --- a/source/slang/slang-ir.h +++ b/source/slang/slang-ir.h @@ -548,6 +548,12 @@ private: IRInst* m_inst = nullptr; }; +enum class SideEffectAnalysisOptions +{ + None, + UseDominanceTree, +}; + // Every value in the IR is an instruction (even things // like literal values). // @@ -733,6 +739,11 @@ struct IRInst getOperands()[index].init(this, value); } + // Reserved memory space for use by individual IR passes. + // This field is not supposed to be valid outside an IR pass, + // and each IR pass should always treat it as uninitialized + // upon entry. + UInt64 scratchData = 0; // @@ -785,7 +796,7 @@ struct IRInst /// It is possible that this instruction has side effects? /// /// This is a conservative test, and will return `true` if an exact answer can't be determined. - bool mightHaveSideEffects(); + bool mightHaveSideEffects(SideEffectAnalysisOptions options = SideEffectAnalysisOptions::None); // RTTI support static bool isaImpl(IROp) { return true; } @@ -1971,12 +1982,41 @@ struct IRModule; // Description of an instruction to be used for global value numbering struct IRInstKey { - IRInst* inst; +private: + IRInst* inst = nullptr; + HashCode hashCode = 0; + HashCode _getHashCode(); - HashCode getHashCode(); -}; +public: + IRInstKey() = default; + IRInstKey(const IRInstKey& key) = default; + IRInstKey(IRInst* i) + : inst(i) + { + hashCode = _getHashCode(); + } + HashCode getHashCode() const { return hashCode; } + IRInst* getInst() const { return inst; } -bool operator==(IRInstKey const& left, IRInstKey const& right); + bool operator==(IRInstKey const& right) const + { + if (hashCode != right.getHashCode()) return false; + if (getInst()->getOp() != right.getInst()->getOp()) return false; + if (getInst()->getFullType() != right.getInst()->getFullType()) return false; + if (getInst()->operandCount != right.getInst()->operandCount) return false; + + auto argCount = getInst()->operandCount; + auto leftArgs = getInst()->getOperands(); + auto rightArgs = right.getInst()->getOperands(); + for (UInt aa = 0; aa < argCount; ++aa) + { + if (leftArgs[aa].get() != rightArgs[aa].get()) + return false; + } + + return true; + } +}; struct IRConstantKey { diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index a1c7a2b8e..7439c67f4 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -6,6 +6,7 @@ #include "../core/slang-random-generator.h" #include "../core/slang-hash.h" #include "../core/slang-char-util.h" +#include "../core/slang-performance-profiler.h" #include "slang-check.h" #include "slang-ir.h" @@ -9740,6 +9741,8 @@ RefPtr<IRModule> generateIRForTranslationUnit( ASTBuilder* astBuilder, TranslationUnitRequest* translationUnit) { + SLANG_PROFILE; + auto session = translationUnit->getSession(); auto compileRequest = translationUnit->compileRequest; diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index ffeabb0bd..246662909 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -2351,6 +2351,8 @@ RefPtr<ComponentType> createSpecializedGlobalAndEntryPointsComponentType( void FrontEndCompileRequest::checkAllTranslationUnits() { + SLANG_PROFILE; + LoadedModuleDictionary loadedModules; if (additionalLoadedModules) loadedModules = *additionalLoadedModules; diff --git a/tests/compute/half-texture.slang.glsl b/tests/compute/half-texture.slang.glsl index 0eccccaaf..27f63620d 100644 --- a/tests/compute/half-texture.slang.glsl +++ b/tests/compute/half-texture.slang.glsl @@ -25,20 +25,26 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in; void main() { ivec2 pos_0 = ivec2(gl_GlobalInvocationID.xy); - const float _S2 = 1.00000000000000000000 / 3.00000000000000000000; - int _S3 = pos_0.y; - int _S4 = pos_0.x; - ivec2 pos2_0 = ivec2(3 - _S3, 3 - _S4); - - float16_t h_0 = (float16_t(imageLoad((halfTexture_0), ivec2((uvec2(pos2_0)))).x)); - f16vec2 h2_0 = (f16vec2(imageLoad((halfTexture2_0), ivec2((uvec2(pos2_0)))).xy)); - f16vec4 h4_0 = (f16vec4(imageLoad((halfTexture4_0), ivec2((uvec2(pos2_0)))))); - imageStore((halfTexture_0), ivec2((uvec2(pos_0))), f16vec4(h2_0.x + h2_0.y, float16_t(0), float16_t(0), float16_t(0))); - imageStore((halfTexture2_0), ivec2((uvec2(pos_0))), f16vec4(h4_0.xy, float16_t(0), float16_t(0))); - imageStore((halfTexture4_0), ivec2((uvec2(pos_0))), f16vec4(h2_0, h_0, h_0)); - - int index_0 = _S4 + _S3 * 4; - ((outputBuffer_0)._data[(uint(index_0))]) = index_0; + int _S2 = pos_0.y; + + int _S3 = pos_0.x; + + uvec2 _S4 = uvec2(ivec2(3 - _S2, 3 - _S3)); + + float16_t h_0 = (float16_t(imageLoad((halfTexture_0), ivec2((_S4))).x)); + f16vec2 h2_0 = (f16vec2(imageLoad((halfTexture2_0), ivec2((_S4))).xy)); + f16vec4 h4_0 = (f16vec4(imageLoad((halfTexture4_0), ivec2((_S4))))); + + + + uvec2 _S5 = uvec2(pos_0); + + imageStore((halfTexture_0), ivec2((_S5)), f16vec4(h2_0.x + h2_0.y, float16_t(0), float16_t(0), float16_t(0))); + imageStore((halfTexture2_0), ivec2((_S5)), f16vec4(h4_0.xy, float16_t(0), float16_t(0))); + imageStore((halfTexture4_0), ivec2((_S5)), f16vec4(h2_0, h_0, h_0)); + + int index_0 = _S3 + _S2 * 4; + ((outputBuffer_0)._data[(uint(index_0))]) = index_0; return; -}
\ No newline at end of file +} diff --git a/tests/experimental/liveness/liveness.slang.expected b/tests/experimental/liveness/liveness.slang.expected index fe49e85d1..0f5e5deab 100644 --- a/tests/experimental/liveness/liveness.slang.expected +++ b/tests/experimental/liveness/liveness.slang.expected @@ -95,9 +95,10 @@ void main() SomeStruct_0 _S10 = makeSomeStruct_0(); const int _S11[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int _S12 = index_0 & 7; + uint _S13 = uint(index_0); int v_1 = someSlowFunc_0(index_0); - bool _S13 = (v_1 & 256) != 0; - int _S14 = v_1 & 3; + bool _S14 = (v_1 & 256) != 0; + uint _S15 = uint(v_1 & 3); int i_2; livenessStart_1(i_2, 0); i_2 = 0; @@ -120,10 +121,10 @@ void main() livenessStart_2(t_0, 0); t_0 = _S10; SomeStruct_0 u_0; - if(_S13) + if(_S14) { - s_3.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]); - t_0.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]); + s_3.x_0 = ((anotherBuffer_0)._data[(_S15)]); + t_0.x_0 = ((anotherBuffer_0)._data[(_S15)]); livenessStart_2(u_0, 0); u_0.a_1 = 0; u_0.x_0 = 0; @@ -136,33 +137,33 @@ void main() x_1.a_1 = 0; x_1.x_0 = 0; x_1.c_0 = _S11; - x_1.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]) + 1; - SomeStruct_0 _S15 = x_1; + x_1.x_0 = ((anotherBuffer_0)._data[(_S15)]) + 1; + SomeStruct_0 _S16 = x_1; livenessEnd_2(x_1, 0); livenessStart_2(u_0, 0); - u_0 = _S15; + u_0 = _S16; } s_3.c_0[_S12] = s_3.c_0[_S12] + 1; - int _S16 = s_3.x_0 + t_0.x_0; - SomeStruct_0 _S17 = u_0; + int _S17 = s_3.x_0 + t_0.x_0; + SomeStruct_0 _S18 = u_0; livenessEnd_2(u_0, 0); - int _S18 = _S16 + _S17.x_0 + doThing_0(t_0); - int _S19 = somethingElse_0(t_0); + int _S19 = _S17 + _S18.x_0 + doThing_0(t_0); + int _S20 = somethingElse_0(t_0); livenessEnd_2(t_0, 0); - int _S20 = _S18 + _S19; - int _S21 = s_3.c_0[2]; + int _S21 = _S19 + _S20; + int _S22 = s_3.c_0[2]; livenessEnd_2(s_3, 0); - int _S22 = _S20 + _S21; - int _S23 = res_0; + int _S23 = _S21 + _S22; + int _S24 = res_0; livenessEnd_0(res_0, 0); - int res_1 = _S23 + _S22; + int res_1 = _S24 + _S23; i_2 = i_2 + 1; livenessStart_1(res_0, 0); res_0 = res_1; } - int _S24 = res_0; + int _S25 = res_0; livenessEnd_0(res_0, 0); - ((outputBuffer_0)._data[(uint(index_0))]) = _S24; + ((outputBuffer_0)._data[(_S13)]) = _S25; return; } diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected index c7be594d1..97b04be47 100644 --- a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected +++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-assign.slang.1.expected @@ -37,7 +37,7 @@ void main() hitObjectRecordMissNV(hitObj_1, _S5, ray_0.Origin_0, ray_0.TMin_0, ray_0.Direction_0, ray_0.TMax_0); bool _S6 = (hitObjectIsMissNV((hitObj_1))); uint _S7 = uint(int(_S6)); - ((outputBuffer_0)._data[(uint(idx_0))]) = _S7; + ((outputBuffer_0)._data[(_S4)]) = _S7; return; } diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected index 938a19480..c9589b805 100644 --- a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected +++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected @@ -91,29 +91,21 @@ void main() ray_2.Direction_0 = vec3(0.0, 1.0, 0.0); ray_2.TMax_0 = 10000.0; uint _S10 = uint(idx_0); - int _S11 = idx_0 * 2; - uint _S12 = uint(_S11); - int _S13 = idx_0 * 3; - uint _S14 = uint(_S13); - RayDesc_0 _S15 = ray_2; + uint _S11 = uint(idx_0 * 2); + uint _S12 = uint(idx_0 * 3); + RayDesc_0 _S13 = ray_2; hitObjectNV hitObj_0; - int _S16 = int(_S10); - int _S17 = int(_S12); - int _S18 = int(_S14); - hitObjectRecordHitWithIndexNV(hitObj_0, scene_0, _S16, _S17, _S18, 0U, 0U, _S15.Origin_0, _S15.TMin_0, _S15.Direction_0, _S15.TMax_0, (0)); + int _S14 = int(_S10); + int _S15 = int(_S11); + int _S16 = int(_S12); + hitObjectRecordHitWithIndexNV(hitObj_0, scene_0, _S14, _S15, _S16, 0U, 0U, _S13.Origin_0, _S13.TMin_0, _S13.Direction_0, _S13.TMax_0, (0)); uint r_3 = calcValue_0(hitObj_0); - uint _S19 = uint(idx_0); - uint _S20 = uint(_S11); - uint _S21 = uint(_S13); - RayDesc_0 _S22 = ray_2; + RayDesc_0 _S17 = ray_2; hitObjectNV hitObj_1; - int _S23 = int(_S19); - int _S24 = int(_S21); - int _S25 = int(_S20); - hitObjectRecordHitNV(hitObj_1, scene_0, _S23, _S24, _S25, 0U, 0U, 4U, _S22.Origin_0, _S22.TMin_0, _S22.Direction_0, _S22.TMax_0, (0)); - uint _S26 = calcValue_0(hitObj_1); - uint r_4 = r_3 + _S26; - ((outputBuffer_0)._data[(uint(idx_0))]) = r_4; + hitObjectRecordHitNV(hitObj_1, scene_0, _S14, _S16, _S15, 0U, 0U, 4U, _S17.Origin_0, _S17.TMin_0, _S17.Direction_0, _S17.TMax_0, (0)); + uint _S18 = calcValue_0(hitObj_1); + uint r_4 = r_3 + _S18; + ((outputBuffer_0)._data[(_S10)]) = r_4; return; } diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected index 3afe48bc9..c79c238e6 100644 --- a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected +++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-miss.slang.1.expected @@ -32,7 +32,7 @@ void main() hitObjectRecordMissNV(hitObj_0, _S2, ray_0.Origin_0, ray_0.TMin_0, ray_0.Direction_0, ray_0.TMax_0); bool _S3 = (hitObjectIsMissNV((hitObj_0))); uint _S4 = uint(int(_S3)); - ((outputBuffer_0)._data[(uint(idx_0))]) = _S4; + ((outputBuffer_0)._data[(_S2)]) = _S4; return; } |
