diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2022-05-09 13:45:36 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-05-09 13:45:36 -0400 |
| commit | 7a9bc08f3548fefeb54b907a5de301b90435f04a (patch) | |
| tree | a41d5ac4017ef8d8fa703ca8ecb85d51e82831f5 | |
| parent | 117f5e554839efc13066517461eafaf8f2fd96c6 (diff) | |
Liveness pass fixes and improvements (#2225)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Fix for loops within dominator tree.
Fix for functions that have no body.
* Use a count array.
Update some comments.
* Special case handling of the root block, for searching for last access.
* Enable liveness test with glsl output.
Co-authored-by: Theresa Foley <10618364+tangent-vector@users.noreply.github.com>
| -rw-r--r-- | source/slang/slang-ir-liveness.cpp | 86 | ||||
| -rw-r--r-- | tests/experimental/liveness/liveness.slang | 2 | ||||
| -rw-r--r-- | tests/experimental/liveness/liveness.slang.expected | 111 |
3 files changed, 120 insertions, 79 deletions
diff --git a/source/slang/slang-ir-liveness.cpp b/source/slang/slang-ir-liveness.cpp index 7667a33f0..414f18d77 100644 --- a/source/slang/slang-ir-liveness.cpp +++ b/source/slang/slang-ir-liveness.cpp @@ -109,10 +109,16 @@ struct LivenessContext enum class FoundResult { Found, ///< All paths were either not dominated, found - NotFound, ///< It is dominated but no access was found + NotFound, ///< It is dominated but no access was found. + Visited, ///< The block has been visited (as part of a traversal), but does not yet have a result. Used to detect loops. NotDominated, ///< If it's not dominated it can't have a liveness end + + CountOf, }; + /// True if a result can be premoted `from` to `to` + static bool canPromote(FoundResult from, FoundResult to) { return Index(to) <= Index(from) && from != FoundResult::NotDominated; } + enum class AccessType { None, ///< There is no access @@ -162,7 +168,7 @@ struct LivenessContext RefPtr<IRDominatorTree> m_dominatorTree; ///< The dominator tree for the current function - IRInst* m_root; ///< The root item we are searching for accesses to, to determine scope/liveness + RootInfo m_rootInfo; ///< The root item we are searching for accesses to, to determine scope/liveness IRBlock* m_rootBlock; ///< The block the root is in HashSet<IRInst*> m_accessSet; ///< Holds a set of all the functions that in some way access the root. @@ -190,14 +196,20 @@ void LivenessContext::_addLiveRangeEndAtBlockStart(IRBlock* block, IRInst* root) IRInst* LivenessContext::_findLastAccessInBlock(IRBlock* block) { - // Search the instructions in this block in reverse order, to find first access - for (IRInst* cur = block->getLastChild(); cur; cur = cur->getPrevInst()) + // If we are in the root block, we don't want to search *before* the liveStart + IRInst* lastInst = (block == m_rootBlock) ? m_rootInfo.liveStart : nullptr; + + // Search backwards for first access + for (IRInst* cur = block->getLastChild(); cur != lastInst; cur = cur->getPrevInst()) { + SLANG_ASSERT(cur); + if (m_accessSet.Contains(cur)) { return cur; } } + return nullptr; } @@ -207,12 +219,9 @@ LivenessContext::FoundResult LivenessContext::_addResult(IRBlock* block, FoundRe if (currentResultPtr) { const auto currentResult = *currentResultPtr; - // If it were NotDominated, it cannot be promoted to Found/NotFound. - SLANG_ASSERT(currentResult != FoundResult::NotDominated); - - // We can only promote from NotFound -> Found - - SLANG_ASSERT(Index(result) <= Index(currentResult)); + + // Check we can promote + SLANG_ASSERT(canPromote(currentResult, result)); // Set the new result *currentResultPtr = result; @@ -239,6 +248,9 @@ LivenessContext::FoundResult LivenessContext::processSuccessor(IRBlock* block) return _addResult(block, FoundResult::NotDominated); } + // Mark that it is visited + m_blockResult.Add(block, FoundResult::Visited); + // Else process the block to try and find the last used instruction return processBlock(block); } @@ -253,8 +265,8 @@ LivenessContext::FoundResult LivenessContext::processBlock(IRBlock* block) List<FoundResult> successorResults; successorResults.setCount(count); - Index foundCount = 0; - Index notDominatedCount = 0; + // Zero initialize all the counts + Index foundCounts[Index(FoundResult::CountOf)] = { 0 }; { auto cur = successors.begin(); @@ -269,18 +281,22 @@ LivenessContext::FoundResult LivenessContext::processBlock(IRBlock* block) successorResults[i] = successorResult; // Change counts depending on the result - foundCount += Index(successorResult == FoundResult::Found); - notDominatedCount += Index(successorResult == FoundResult::NotDominated); + foundCounts[Index(successorResult)]++; } } + const Index foundCount = foundCounts[Index(FoundResult::Found)]; + const Index notFoundCount = foundCounts[Index(FoundResult::NotFound)]; + + const Index otherCount = count - (foundCount + notFoundCount); + // If one or more of the successors (or successors of successors), // was found to have the last access, we need to mark the end of scope // at the start of any other paths (which are dominated). if (foundCount > 0) { // If all successors have result, or are not dominated - if (foundCount + notDominatedCount == count) + if (foundCount + otherCount == count) { return _addResult(block, FoundResult::Found); } @@ -293,19 +309,18 @@ LivenessContext::FoundResult LivenessContext::processBlock(IRBlock* block) const auto successorResult = successorResults[i]; if (successorResult == FoundResult::NotFound) { - _addLiveRangeEndAtBlockStart(successor, m_root); + _addLiveRangeEndAtBlockStart(successor, m_rootInfo.root); _addResult(successor, FoundResult::Found); } } - // This block, can be marked as found as all successors are either not dominated, found - // or have have + // This block, can now be marked as found return _addResult(block, FoundResult::Found); } - // Search the instructions in this block in reverse order, to find first access + // Search the instructions in this block in reverse order, to find last access IRInst* lastAccess = _findLastAccessInBlock(block); - + // Wasn't an access so we are done if (lastAccess == nullptr) { @@ -320,7 +335,7 @@ LivenessContext::FoundResult LivenessContext::processBlock(IRBlock* block) m_builder.setInsertLoc(IRInsertLoc::after(lastAccess)); // Add the live end inst - m_builder.emitLiveRangeEnd(m_root); + m_builder.emitLiveRangeEnd(m_rootInfo.root); return _addResult(block, FoundResult::Found); } @@ -334,15 +349,15 @@ void LivenessContext::processRoot(const RootInfo& rootInfo) auto root = rootInfo.root; // Store root information, so don't have to pass around methods + m_rootInfo = rootInfo; m_rootBlock = as<IRBlock>(root->parent); - m_root = root; - + // Add the root to the list of aliases, to start lookup m_aliases.add(root); // The challenge here is to try and determine when a root is no longer accessed, and so is no longer live // - // Note that a root can be accessed directly, but also through `aliases`. For example if the root is a structure + // Note that a root can be accessed directly, but also through `aliases`. For example if the root is a structure, // a pointer to a field in the root would be an alias. // // In terms of liveness, the only accesses that are important are loads. This is because if the last operation on @@ -461,13 +476,22 @@ void LivenessContext::processRoot(const RootInfo& rootInfo) // Now we want to find the last access in the graph of successors // // This works by recursively starting from the block where the variable is defined, walking depth first the graph of - // successors. We cache the results in m_blockResult + // successors. We cache the results in m_blockResults // - // There is an extra caveat around the dominator tree. If we just traversed the successors, if there is a loop - // we'd end up in an infinite loop. We can avoid this because we know that the root is only available in blocks dominated - // by the root. + // There is an extra caveat around the dominator tree. In principal a variable in block A is accessible by any block that is + // dominated by A. It's actually more restricted than this - because IR has other rules that provide more tight scoping. + // The extra information can be seen in a loop instruction also indicating the break and continue blocks. + // + // If we just traversed the successors, if there is a loop we'd end up in an infinite loop. We can partly avoid this because + // we know that the root is only available in blocks dominated by the root. There is also the scenario where there is a loop + // in blocks within the dominator tree. That is handled by marking 'Visited' when a final result isn't known, but we want to + // detect a loop. In most respect Visited behaves in the same manner as NotDominated. { + // Mark the root as visited to stop an infinite loop + _addResult(m_rootBlock, FoundResult::Visited); + + // Recursively find results auto foundResult = processBlock(m_rootBlock); if (foundResult == FoundResult::NotFound) @@ -482,6 +506,12 @@ void LivenessContext::processRoot(const RootInfo& rootInfo) void LivenessContext::_processFunction(IRFunc* funcInst) { + // If it has no body, then we are done + if (funcInst->getFirstBlock() == nullptr) + { + return; + } + List<RootInfo> rootInfos; // Iterate through blocks in the function, looking for variables to live track diff --git a/tests/experimental/liveness/liveness.slang b/tests/experimental/liveness/liveness.slang index d7b7d6ab5..70a8c0a24 100644 --- a/tests/experimental/liveness/liveness.slang +++ b/tests/experimental/liveness/liveness.slang @@ -1,4 +1,4 @@ -//DISABLE_TEST:SIMPLE:-target hlsl -entry computeMain -profile cs_6_3 -track-liveness +//TEST:SIMPLE:-target glsl -entry computeMain -profile cs_6_3 -track-liveness //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<int> outputBuffer; diff --git a/tests/experimental/liveness/liveness.slang.expected b/tests/experimental/liveness/liveness.slang.expected index 21e162256..7cb39861c 100644 --- a/tests/experimental/liveness/liveness.slang.expected +++ b/tests/experimental/liveness/liveness.slang.expected @@ -2,27 +2,26 @@ result code = 0 standard error = { } standard output = { -#ifdef SLANG_HLSL_ENABLE_NVAPI -#include "nvHLSLExtns.h" -#endif +#version 450 +#extension GL_EXT_spirv_intrinsics : require +layout(row_major) uniform; +layout(row_major) buffer; -#pragma pack_matrix(column_major) - -#line 24 "tests/experimental/liveness/liveness.slang" +#line 24 0 int someSlowFunc_0(int a_0) { int i_0; uint v_0; #line 26 - uint _S1 = (uint) a_0; - i_0 = int(0); + uint _S1 = uint(a_0); + i_0 = 0; v_0 = _S1; for(;;) { #line 27 - if(i_0 < a_0 * int(20)) + if(i_0 < a_0 * 20) { } else @@ -31,17 +30,17 @@ int someSlowFunc_0(int a_0) } #line 29 - uint _S2 = (uint) ((int) ((bool) (v_0 >> int(1)) || (bool) (v_0 << int(31))) * i_0); + uint _S2 = uint(int(bool(v_0 >> 1) || bool(v_0 << 31)) * i_0); #line 27 - int i_1 = i_0 + int(1); + int i_1 = i_0 + 1; #line 27 i_0 = i_1; v_0 = _S2; } - return (int) v_0; + return int(v_0); } @@ -50,58 +49,70 @@ struct SomeStruct_0 { int a_1; int x_0; - int c_0[int(100)]; + int c_0[100]; }; #line 17 SomeStruct_0 makeSomeStruct_0() { - int _S3[int(100)] = { int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0) }; + const int _S3[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #line 19 - SomeStruct_0 s_0 = { int(0), int(0), _S3 }; + SomeStruct_0 s_0 = { 0, 0, _S3 }; return s_0; } #line 15 -RWStructuredBuffer<int > anotherBuffer_0 : register(u1); - +layout(std430, binding = 1) buffer _S4 { + int _data[]; +} anotherBuffer_0; #line 40 int doThing_0(SomeStruct_0 s_1) { - return s_1.x_0 * int(2) + int(1); + return s_1.x_0 * 2 + 1; } #line 34 int somethingElse_0(inout SomeStruct_0 s_2) { - s_2.x_0 = s_2.x_0 + int(1); + s_2.x_0 = s_2.x_0 + 1; return s_2.x_0; } #line 4 -RWStructuredBuffer<int > outputBuffer_0 : register(u0); +layout(std430, binding = 0) buffer _S5 { + int _data[]; +} outputBuffer_0; + +#line 4 +spirv_instruction(id = 256) +void livenessStart_0(spirv_by_reference SomeStruct_0 _0, int _1); + + +#line 62 +spirv_instruction(id = 257) +void livenessEnd_0(spirv_by_reference SomeStruct_0 _0, int _1); #line 46 -[shader("compute")][numthreads(4, 1, 1)] -void computeMain(uint3 dispatchThreadID_0 : SV_DISPATCHTHREADID) +layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in; +void main() { int i_2; int res_0; SomeStruct_0 u_0; #line 48 - int index_0 = (int) dispatchThreadID_0.x; + int index_0 = int(gl_GlobalInvocationID.x); - i_2 = int(0); + i_2 = 0; res_0 = index_0; for(;;) { @@ -121,26 +132,26 @@ void computeMain(uint3 dispatchThreadID_0 : SV_DISPATCHTHREADID) SomeStruct_0 s_3; #line 56 - SLANG_LIVE_START(s_3) + livenessStart_0(s_3, 0); SomeStruct_0 t_0; #line 57 - SLANG_LIVE_START(t_0) + livenessStart_0(t_0, 0); #line 57 - SomeStruct_0 _S4 = makeSomeStruct_0(); + SomeStruct_0 _S6 = makeSomeStruct_0(); #line 57 - t_0 = _S4; - int _S5[int(100)] = { int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0), int(0) }; + t_0 = _S6; + const int _S7[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #line 58 - SomeStruct_0 u_1 = { int(0), int(0), _S5 }; + SomeStruct_0 u_1 = { 0, 0, _S7 }; - if((bool) (v_1 & int(256))) + if(bool(v_1 & 256)) { - s_3.x_0 = anotherBuffer_0[(uint) (v_1 & int(3))]; - t_0.x_0 = anotherBuffer_0[(uint) (v_1 & int(3))]; + s_3.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]); + t_0.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]); #line 60 u_0 = u_1; @@ -152,51 +163,51 @@ void computeMain(uint3 dispatchThreadID_0 : SV_DISPATCHTHREADID) SomeStruct_0 x_1; #line 68 - SLANG_LIVE_START(x_1) + livenessStart_0(x_1, 0); #line 68 x_1 = u_1; - x_1.x_0 = anotherBuffer_0[(uint) (v_1 & int(3))] + int(1); - SomeStruct_0 _S6 = x_1; + x_1.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]) + 1; + SomeStruct_0 _S8 = x_1; #line 70 - SLANG_LIVE_END(x_1) + livenessEnd_0(x_1, 0); #line 60 - u_0 = _S6; + u_0 = _S8; } #line 74 - s_3.c_0[index_0 & int(7)] = s_3.c_0[index_0 & int(7)] + int(1); + s_3.c_0[index_0 & 7] = s_3.c_0[index_0 & 7] + 1; - int _S7 = s_3.x_0 + t_0.x_0 + u_0.x_0; + int _S9 = s_3.x_0 + t_0.x_0 + u_0.x_0; #line 76 - int _S8 = doThing_0(t_0); + int _S10 = doThing_0(t_0); #line 76 - int _S9 = _S7 + _S8; + int _S11 = _S9 + _S10; #line 76 - int _S10 = somethingElse_0(t_0); + int _S12 = somethingElse_0(t_0); #line 76 - SLANG_LIVE_END(t_0) + livenessEnd_0(t_0, 0); #line 76 - int _S11 = _S9 + _S10; + int _S13 = _S11 + _S12; #line 76 - int _S12 = s_3.c_0[int(2)]; + int _S14 = s_3.c_0[2]; #line 76 - SLANG_LIVE_END(s_3) + livenessEnd_0(s_3, 0); #line 76 - int res_1 = res_0 + (_S11 + _S12); + int res_1 = res_0 + (_S13 + _S14); #line 52 - int i_3 = i_2 + int(1); + int i_3 = i_2 + 1; #line 52 i_2 = i_3; @@ -204,7 +215,7 @@ void computeMain(uint3 dispatchThreadID_0 : SV_DISPATCHTHREADID) } #line 79 - outputBuffer_0[(uint) index_0] = res_0; + ((outputBuffer_0)._data[(uint(index_0))]) = res_0; return; } |
