From b0c7eb885dac6b609a46a961feb71d2f983a0d76 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Mon, 13 Jun 2022 18:51:24 -0400 Subject: More liveness improvements (#2272) * #include an absolute path didn't work - because paths were taken to always be relative. * Use TerminatedUnownedStringSlice for literals in output C++. * Remove Escape/Unescape functions used in slang-token-reader.cpp Add target type of 'host-cpp' etc to map to the target types. * Fix some corner cases around string encoding. * Added unit test for string escaping. Fixed some assorted escaping bugs. * Updated test output. * Added decode test. * Stop using hex output, to get around 'greedy' aspect. Use octal instead. * Added HostHostCallable Small changes to use ArtifactDesc/Info instead of large switches. * Fix C++ emit to handle arbitrary function export. * Add options handling for callable without an output being specified. * Can compile with COM interface. Added example using com interface. * Use the IR Ptr type instead of hack in C++ emit for interfaces. * Fix issue with outputting the COM call when ptr is used. * Fix crash issue on compilation failure. * Add support for __global. * Added `ActualGlobalRate` Added special handling around globals and COM interfaces. Tested out in cpu-com-example. * Fix typo in NodeBase. * Support for accessing globals by name working. * Bounds checking for C++ Improved bounds checks for CUDA. * Check that actual global initialization is working. * Fix typo. * Refactor the com replacement such that it doesn't need a cache or do anything special with GlobalVar. * Fix typo in CUDA prelude. * Remove context. Only create replacement if needed. * Split out COM host-callable into a unit-test. * host-callable com testing on C++and llvm. * Comment around the COM ptr replacement. * WIP Zero bound test. * Disable com test on vs 32 bit. Fix C++ prelude * Disable 32 bit targets testing com host-callable. * For now disable zero index test. * Enable bounds checking for CPU/CUDA. * Small fixes. Disable CUDA zero index bound fix. * Add test result for bound check. * Work around for index wrapping issue. * Added Fixed array test. * Only enable prelude asserts via SLANG_PRELUDE_ENABLE_ASSERT (unless defined by the user) * Small fix around instCount. * Improve liveness loop handing and tests. * Improve liveness comment. * More conservative loop handling. * Make liveness deterministic to make testing work. * Added 'span tidy' Added some more tests. * Simplify span simplification, because could collapse inappropriate spans. * Updated liveness with simple loop tracking. * Update test results. * Small tidy up. * Update comments in liveness tests. * Improve liveness comments. * Loop handling without needing LoopInfo tracking. * Improve liveness comments. * Small fix around removing uninteresting spans. Improve naming. * Store current loop information in Loop structure on the stack. * Add processing to statically determine which loop a block belongs to. * Small improvement around leaving a loop. * Fix release build warning. * Small improvement to const correctness around Loop. * Add stores to liveness run information, to allow for more sophisticated loop analysis. --- source/slang/slang-ir-liveness.cpp | 307 ++++++++++++++++++++++++------------- 1 file changed, 202 insertions(+), 105 deletions(-) (limited to 'source/slang') diff --git a/source/slang/slang-ir-liveness.cpp b/source/slang/slang-ir-liveness.cpp index 6f2af9fa0..b708307c2 100644 --- a/source/slang/slang-ir-liveness.cpp +++ b/source/slang/slang-ir-liveness.cpp @@ -100,6 +100,7 @@ public: m_startIndex(list->getCount()), m_list(list) { + SLANG_ASSERT(list); } ~RAIIStackArray() { @@ -107,7 +108,7 @@ public: } const Index m_startIndex; - List*const m_list; + List* m_list; }; struct LivenessContext @@ -140,12 +141,10 @@ struct LivenessContext /// Block info (indexed via BlockIndex), that is valid across analysing liveness of a root struct BlockInfo { - /// Reset any information for a start void resetForStart() { result = BlockResult::NotVisited; - loopParentBlockIndex = BlockIndex::Invalid; } /// Reset any information needed for a new root @@ -160,7 +159,6 @@ struct LivenessContext } // These are reset for *each* liveness start - BlockIndex loopParentBlockIndex; ///< Block index to block which is the parent for this block. Only valid for blocks that are loop starts. BlockResult result; ///< The result for this block // These remain constant for all live starts to a root. @@ -180,6 +178,7 @@ struct LivenessContext successorsCount = 0; breakBlockIndex = BlockIndex::Invalid; targetBlockIndex = BlockIndex::Invalid; + owningLoopBlockIndex = BlockIndex::Invalid; } bool isLoopStart() const { return breakBlockIndex != BlockIndex::Invalid; } @@ -189,10 +188,20 @@ struct LivenessContext BlockIndex breakBlockIndex; ///< If this block terminates in a loop holds the break block BlockIndex targetBlockIndex; ///< If this block terminates in a loop holds the target block + BlockIndex owningLoopBlockIndex;///< The loop this block 'belongs' to (or Invalid if doesn't belong to a loop) + Index successorsStart; ///< Indexes into block successors Count successorsCount; ///< How many successors }; + struct Loop + { + const Loop* parentLoop; ///< The parent loop, which will be entered when this loop is left via a break + BlockIndex targetBlockIndex; ///< The target block for this loop + BlockIndex breakBlockIndex; ///< The break block for this loop + BlockIndex loopBlockIndex; ///< Block id that terminates with loop we are currently in + }; + /// Process the module void process(); @@ -213,11 +222,11 @@ struct LivenessContext /// Process a successor to a block /// Can only be called after a call to _findAliasesAndAccesses for the root. - BlockResult _processSuccessor(BlockIndex blockIndex, BlockIndex loopBlockIndex); + BlockResult _processSuccessor(BlockIndex blockIndex, const Loop* loop); /// Process a block /// Can only be called after a call to _findAliasesAndAccesses for the root. - BlockResult _processBlock(BlockIndex blockIndex, const ConstArrayView& run, BlockIndex loopBlockIndex); + BlockResult _processBlock(BlockIndex blockIndex, const ConstArrayView& run, const Loop* loop); /// Process all the locations in the function /// NOTE: All locations must be to the same function, and ordered by root. @@ -256,8 +265,8 @@ struct LivenessContext // Returns the index in the run of a start for the current root, else -1 Index _indexOfRootStart(const ConstArrayView& run); - /// Returns the last index within the run which is an access, else -1 - Index _findLastAccess(const ConstArrayView& run); + /// Returns the last index within the run which is a load-like access, else -1 + Index _findLastLoadLike(const ConstArrayView& run); /// Adds an LiveRangeEnd for the root after `inst` if there isn't one there already void _maybeAddEndAfterInst(IRInst* inst); @@ -308,6 +317,10 @@ struct LivenessContext return makeConstArrayView(m_blockSuccessors.getBuffer() + info.successorsStart, info.successorsCount); } + /// Determine which loops blocks 'belong' to. The owning block is the block that *contains* the + /// loop instruction as it's terminator. + void _calcLoopOwnership(); + RefPtr m_dominatorTree; ///< The dominator tree for the current function IRLiveRangeStart* m_rootLiveStart = nullptr; ///< The current live start for the root @@ -394,7 +407,7 @@ LivenessContext::BlockResult LivenessContext::_addBlockResult(BlockIndex blockIn return result; } -LivenessContext::BlockResult LivenessContext::_processSuccessor(BlockIndex blockIndex, BlockIndex loopBlockIndex) +LivenessContext::BlockResult LivenessContext::_processSuccessor(BlockIndex blockIndex, const Loop* loop) { auto blockInfo = _getBlockInfo(blockIndex); @@ -431,52 +444,45 @@ LivenessContext::BlockResult LivenessContext::_processSuccessor(BlockIndex block SLANG_ASSERT(startIndex >= 0); // We want to run all the way up to the start - return _processBlock(blockIndex, run.head(startIndex), loopBlockIndex); + return _processBlock(blockIndex, run.head(startIndex), loop); } - // If we are looping - if (loopBlockIndex != BlockIndex::Invalid) + // If we are looping and branching to the start of the current loop + if (loop && loop->targetBlockIndex == blockIndex) { - // And what we are branching too is the start of the current loop - const auto& loopInfo = _getFixedBlockInfo(loopBlockIndex); - if (loopInfo.targetBlockIndex == blockIndex) + // This block has been visisted, that means it has been traversed to get here + // meaning the root *must* be live on the looping. + + // TODO(JS): + // The solution used here is somewhat conservative, it assumes if a branch back to the start of the loop can be reached that + // + // * There might be some path where the loop might exit + // * There might be some path where the root(variable or alias) may be loaded/or stored + // + // If these assumptions are wrong it will lead to + // + // * Potentially a liveness end that is never hit(outside of the loop) + // * Potentially liveness for a root that spans across the loop even if that is not actually necessary + // + // This could be improved on but would probably need something like 'loop analysis' that specially determined + // those scenarios, such that the assumptions aren't needed. It would need to be 'separate analysis', because + // the liveness traversal is a kind of incremental depth first traversal. But for loop analysis it would require + // at loop start the result on all paths through the loop. + + const auto breakBlockIndex = loop->breakBlockIndex; + + // Process what comes after the loop (in the scope of the parent loop if any) + result = _processSuccessor(breakBlockIndex, loop->parentLoop); + if (result != BlockResult::Found) { - // This block has been visisted, that means it has been traversed to get here - // meaning the root *must* be live on the looping. - - // TODO(JS): - // The solution used here is somewhat conservative, it assumes if a branch back to the start of the loop can be reached that - // - // * There might be some path where the loop might exit - // * There might be some path where the root(variable or alias) may be loaded/or stored - // - // If these assumptions are wrong it will lead to - // - // * Potentially a liveness end that is never hit(outside of the loop) - // * Potentially liveness for a root that spans across the loop even if that is not actually necessary - // - // This could be improved on but would probably need something like 'loop analysis' that specially determined - // those scenarios, such that the assumptions aren't needed. It would need to be 'separate analysis', because - // the liveness traversal is a kind of incremental depth first traversal. But for loop analysis it would require - // at loop start the result on all paths through the loop. - - const auto breakBlockIndex = loopInfo.breakBlockIndex; + // If an end is not found from the break, + // we just insert an end at the start of the break block - const auto parentLoop = _getBlockInfo(loopBlockIndex)->loopParentBlockIndex; - - // Process what comes after the loop (in the scope of the parent loop if any) - result = _processSuccessor(breakBlockIndex, parentLoop); - if (result != BlockResult::Found) - { - // If an end is not found from the break, - // we just insert an end at the start of the break block - - _maybeAddEndAtBlockStart(breakBlockIndex); - result = _addBlockResult(breakBlockIndex, BlockResult::Found); - } - - return result; + _maybeAddEndAtBlockStart(breakBlockIndex); + result = _addBlockResult(breakBlockIndex, BlockResult::Found); } + + return result; } // Otherwise just return result @@ -503,20 +509,15 @@ LivenessContext::BlockResult LivenessContext::_processSuccessor(BlockIndex block _addBlockResult(blockIndex, BlockResult::Visited); // Special case leaving the loop. - // If we are in a loop - if (loopBlockIndex != BlockIndex::Invalid) + // If we are in a loop, and the block we are going to is the break block then we are no longer in this loop + if (loop && loop->breakBlockIndex == blockIndex) { - auto& loopInfo = m_fixedBlockInfos[Index(loopBlockIndex)]; - // And the block we are going to is the break block then we are no longer in this loop - if (loopInfo.breakBlockIndex == blockIndex) - { - // So the loop we are currently in must be the parent - loopBlockIndex = _getBlockInfo(loopBlockIndex)->loopParentBlockIndex; - } + // We are in the parent loop + loop = loop->parentLoop; } // Else process the block to try and find the last used instruction - return _processBlock(blockIndex, _getRun(blockInfo), loopBlockIndex); + return _processBlock(blockIndex, _getRun(blockInfo), loop); } Index LivenessContext::_indexOfRootStart(const ConstArrayView& run) @@ -535,15 +536,16 @@ Index LivenessContext::_indexOfRootStart(const ConstArrayView& run) return -1; } -Index LivenessContext::_findLastAccess(const ConstArrayView& run) +Index LivenessContext::_findLastLoadLike(const ConstArrayView& run) { for (Index i = run.getCount() - 1; i >= 0; --i) { auto inst = run[i]; - // If it's not a live start, it must be an access - if (as(inst) == nullptr) + + const auto op = inst->getOp(); + if (op != kIROp_LiveRangeStart && op != kIROp_Store) { - // Check it really is an access inst.. + // Must be 'load like then' SLANG_ASSERT(_isAnyRunInst(inst)); return i; } @@ -608,13 +610,13 @@ LivenessContext::BlockResult LivenessContext::_completeBlock(BlockIndex blockInd // We can't have a root start in the run! SLANG_ASSERT(_indexOfRootStart(run) < 0); - // Look for the last access - const auto lastAccessIndex = _findLastAccess(run); + // Look for the last load like access + const auto lastLoadLikeIndex = _findLastLoadLike(run); // If we found one, that is the end of the range - if (lastAccessIndex >= 0) + if (lastLoadLikeIndex >= 0) { - _maybeAddEndAfterRunIndex(blockIndex, run, lastAccessIndex); + _maybeAddEndAfterRunIndex(blockIndex, run, lastLoadLikeIndex); // Add the result return _addBlockResult(blockIndex, BlockResult::Found); @@ -634,7 +636,7 @@ static IRLoop* _getLoopTerminator(IRBlock* block) return nullptr; } -LivenessContext::BlockResult LivenessContext::_processBlock(BlockIndex blockIndex, const ConstArrayView& run, BlockIndex loopBlockIndex) +LivenessContext::BlockResult LivenessContext::_processBlock(BlockIndex blockIndex, const ConstArrayView& run, const Loop* loop) { // Note that the run must be some part of the run for the block indicated by blockIndex. One of // @@ -659,6 +661,19 @@ LivenessContext::BlockResult LivenessContext::_processBlock(BlockIndex blockInde } } + // Find all the successors for this block + auto successors = _getSuccessors(blockIndex); + + const Index successorCount = successors.getCount(); + + // NOTE! Care is needed around successorResults, because _processorSuccessor may cause the underlying list + // to be reallocated. + // If we always access through successorResults (ie RAIIStackArray type), things will be fine though. + + // Set up space to store successor results + RAIIStackArray successorResults(&m_successorResults); + successorResults.setCount(successorCount); + // If we hit a loop add the information and make this the current loop info { const auto& fixedInfo = _getFixedBlockInfo(blockIndex); @@ -666,45 +681,32 @@ LivenessContext::BlockResult LivenessContext::_processBlock(BlockIndex blockInde { SLANG_ASSERT(_getLoopTerminator(fixedInfo.block)); - // Get the varaying block info - auto blockInfo = _getBlockInfo(blockIndex); - - // Its either not set or set to the same value - SLANG_ASSERT(blockInfo->loopParentBlockIndex == BlockIndex::Invalid || blockInfo->loopParentBlockIndex == loopBlockIndex); - - // Set the parent loop block index - blockInfo->loopParentBlockIndex = loopBlockIndex; + Loop nextLoop; + nextLoop.parentLoop = loop; + nextLoop.breakBlockIndex = fixedInfo.breakBlockIndex; + nextLoop.targetBlockIndex = fixedInfo.targetBlockIndex; + nextLoop.loopBlockIndex = blockIndex; - // This block is the current loop block - loopBlockIndex = blockIndex; + for (Index i = 0; i < successorCount; ++i) + { + const auto result = _processSuccessor(successors[i], &nextLoop); + successorResults[i] = result; + } + } + else + { + for (Index i = 0; i < successorCount; ++i) + { + const auto result = _processSuccessor(successors[i], loop); + successorResults[i] = result; + } } } // Zero initialize all the counts Index foundCounts[Index(BlockResult::CountOf)] = { 0 }; - - // Find all the successors for this block - auto successors = _getSuccessors(blockIndex); - - const Index successorCount = successors.getCount(); - - // Set up space to store successor results - RAIIStackArray successorResults(&m_successorResults); - successorResults.setCount(successorCount); - - for (Index i = 0; i < successorCount; ++i) + for (const auto successorResult : successorResults.getConstView()) { - const auto successorBlockIndex = successors[i]; - - // NOTE! Care is needed around successorResults, because _processorSuccessor may cause the underlying list - // to be reallocated. - // If we always access through successorResults (ie RAIIStackArray type), things will be fine though. - - // Process the successor - const auto successorResult = _processSuccessor(successorBlockIndex, loopBlockIndex); - - successorResults[i] = successorResult; - // Change counts depending on the result foundCounts[Index(successorResult)]++; } @@ -867,15 +869,17 @@ void LivenessContext::_findAliasesAndAccesses(IRInst* root) } case kIROp_Load: { - // We only care about loads in terms of identifying liveness + // We normally only care about loads in terms of identifying liveness within a block + // the last load being the last necessay live point. base = static_cast(cur)->getPtr(); accessType = AccessType::Access; break; } case kIROp_Store: { - // In terms of liveness, stores can be ignored (although in the future we will probably - // want to support to make loops analysis work more accurately) + // We need stores for loop analysis + base = static_cast(cur)->getPtr(); + accessType = AccessType::Access; break; } case kIROp_getElement: @@ -961,7 +965,7 @@ void LivenessContext::_findAndEmitRangeEnd(IRLiveRangeStart* liveRangeStart) _addBlockResult(rootStartBlockIndex, BlockResult::Visited); // Recursively find results - auto foundResult = _processBlock(rootStartBlockIndex, run, BlockIndex::Invalid); + auto foundResult = _processBlock(rootStartBlockIndex, run, nullptr); if (foundResult == BlockResult::NotFound) { @@ -991,7 +995,8 @@ bool LivenessContext::_isNormalRunInst(IRInst* inst) // The ops in the list above are the only ops *currently* that indicate an access. // Has to be consistent with `_findAliasesAndAccesses` if (op == kIROp_Call || - op == kIROp_Load) + op == kIROp_Load || + op == kIROp_Store) { // Just because it's the right type *doesn't* mean it's an access, it has to also // be in the access set @@ -1179,6 +1184,94 @@ void LivenessContext::_processRoot(IRLiveRangeStart* const* rangeStarts, Count r m_rootBlock = nullptr; } +void LivenessContext::_calcLoopOwnership() +{ + // Should all be set to invalid initially + for (const auto& fixedInfo : m_fixedBlockInfos) + { + // To stop an error when assert isn't defined... + SLANG_UNUSED(fixedInfo); + SLANG_ASSERT(fixedInfo.owningLoopBlockIndex == BlockIndex::Invalid); + } + + const Count blocksCount = m_fixedBlockInfos.getCount(); + + List work; + + for (Index i = 0; i < blocksCount; ++i) + { + const BlockIndex outerBlockIndex = BlockIndex(i); + const auto& loopInfo = _getFixedBlockInfo(outerBlockIndex); + if (loopInfo.isLoopStart()) + { + const BlockIndex loopBlockIndex = outerBlockIndex; + + work.clear(); + + BlockIndex blockIndex = loopInfo.targetBlockIndex; + + while (true) + { + // If it's already set we are done + auto& curOwner = m_fixedBlockInfos[Index(blockIndex)].owningLoopBlockIndex; + if (curOwner != BlockIndex::Invalid) + { + SLANG_ASSERT(curOwner == loopBlockIndex); + continue; + } + + // Set that it belongs to this loop + curOwner = loopBlockIndex; + + BlockIndex successorStorage[1]; + ConstArrayView successors; + + const auto& info = _getFixedBlockInfo(blockIndex); + if (info.isLoopStart()) + { + // The 'successor' is what comes after the loop + const BlockIndex breakIndex = info.breakBlockIndex; + successorStorage[0] = breakIndex; + successors = makeConstArrayView(successorStorage, 1); + } + else + { + successors = _getSuccessors(blockIndex); + } + + // Add any successors that aren't visited or terminators + for (const auto successorBlockIndex : successors) + { + // If it loops or repeats, we don't need to add + if (successorBlockIndex == loopInfo.breakBlockIndex || + successorBlockIndex == loopInfo.targetBlockIndex) + { + continue; + } + // Check if already owned (must be by this loop) + const auto successorOwner = _getFixedBlockInfo(successorBlockIndex).owningLoopBlockIndex; + if (successorOwner != BlockIndex::Invalid) + { + SLANG_ASSERT(successorOwner == loopBlockIndex); + continue; + } + + work.add(successorBlockIndex); + } + + // If nothing left we are done + if (work.getCount() == 0) + { + break; + } + + blockIndex = work.getLast(); + work.removeLast(); + } + } + } +} + void LivenessContext::_processFunction(IRFunc* func) { SLANG_ASSERT(m_rangeStarts.getCount() > 0); @@ -1249,6 +1342,10 @@ void LivenessContext::_processFunction(IRFunc* func) *dst++ = m_blockIndexMap[successor]; } } + + // Once we have the successors set up we can determine which loops each block belongs to. + // This can be useful for doing loop analysis + _calcLoopOwnership(); } // Find the run of locations that all access the same root -- cgit v1.2.3