From fb6432b58e52caef333ddcfd33fc468d044f8a61 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 12 Feb 2019 11:16:28 -0500 Subject: Feature/alloc for ir inst creation review (#839) * Make MemoryArena rewindable. * Add test for rewinding MemoryArena * Use memory rewinding in IRInst lookup instead of malloc/free. * Small tidy. * Don't bother recreating instruction if after lookup it's found it's new. * Fix 32 bit signed compare issue. * Improve documentation around MemoryArena. * * Improve perf of test for hash expansion on Dictionary * First attempt at TryGetOrAdd * Improve comments around findOrEmitHoistableInst * Removed template from Dictionary. Use TryGetValueOrAdd to findOrEmitHoistableInst * Use TryGetValueOrAdd in findOrEmitHoistableInst * Use Type thing = {} to initialize type over Type thing{} style. --- source/core/dictionary.h | 66 +++++++++++++++++++++++--------------- source/core/slang-memory-arena.cpp | 33 ++++++++++--------- source/core/slang-memory-arena.h | 29 +++++++++-------- source/slang/ir.cpp | 53 ++++++++++-------------------- 4 files changed, 92 insertions(+), 89 deletions(-) diff --git a/source/core/dictionary.h b/source/core/dictionary.h index d7daa1c6c..6d97bc2b3 100644 --- a/source/core/dictionary.h +++ b/source/core/dictionary.h @@ -127,15 +127,13 @@ namespace Slang } }; - template - inline int GetHashPos(T & key) const + inline int GetHashPos(TKey& key) const { return ((unsigned int)(GetHashCode(key) * 2654435761)) % bucketSizeMinusOne; } - template - FindPositionResult FindPosition(const T & key) const + FindPositionResult FindPosition(const TKey& key) const { - int hashPos = GetHashPos((T&)key); + int hashPos = GetHashPos(const_cast(key)); int insertPos = -1; int numProbes = 0; while (numProbes <= bucketSizeMinusOne) @@ -163,7 +161,7 @@ namespace Slang return FindPositionResult(-1, insertPos); throw InvalidOperationException("Hash map is full. This indicates an error in Key::Equal or Key::GetHashCode."); } - TValue & _Insert(KeyValuePair && kvPair, int pos) + TValue & _Insert(KeyValuePair&& kvPair, int pos) { hashMap[pos] = _Move(kvPair); SetEmpty(pos, false); @@ -172,7 +170,7 @@ namespace Slang } void Rehash() { - if (bucketSizeMinusOne == -1 || _count / (float)bucketSizeMinusOne >= MaxLoadFactor) + if (bucketSizeMinusOne == -1 || _count >= int(MaxLoadFactor * bucketSizeMinusOne)) { int newSize = (bucketSizeMinusOne + 1) * 2; if (newSize == 0) @@ -194,7 +192,7 @@ namespace Slang } } - bool AddIfNotExists(KeyValuePair && kvPair) + bool AddIfNotExists(KeyValuePair&& kvPair) { Rehash(); auto pos = FindPosition(kvPair.Key); @@ -209,12 +207,12 @@ namespace Slang else throw InvalidOperationException("Inconsistent find result returned. This is a bug in Dictionary implementation."); } - void Add(KeyValuePair && kvPair) + void Add(KeyValuePair&& kvPair) { if (!AddIfNotExists(_Move(kvPair))) throw KeyExistsException("The key already exists in Dictionary."); } - TValue & Set(KeyValuePair && kvPair) + TValue& Set(KeyValuePair&& kvPair) { Rehash(); auto pos = FindPosition(kvPair.Key); @@ -331,16 +329,34 @@ namespace Slang marks.Clear(); } - template - bool ContainsKey(const T & key) const + TValue* TryGetValueOrAdd(const TKey& key, const TValue& value) + { + Rehash(); + auto pos = FindPosition(key); + if (pos.ObjectPosition != -1) + { + return &hashMap[pos.ObjectPosition].Value; + } + else if (pos.InsertionPosition != -1) + { + // Make pair + KeyValuePair kvPair(_Move(key), _Move(value)); + _count++; + _Insert(_Move(kvPair), pos.InsertionPosition); + return nullptr; + } + else + throw InvalidOperationException("Inconsistent find result returned. This is a bug in Dictionary implementation."); + } + + bool ContainsKey(const TKey& key) const { if (bucketSizeMinusOne == -1) return false; auto pos = FindPosition(key); return pos.ObjectPosition != -1; } - template - bool TryGetValue(const T & key, TValue & value) const + bool TryGetValue(const TKey& key, TValue& value) const { if (bucketSizeMinusOne == -1) return false; @@ -352,8 +368,7 @@ namespace Slang } return false; } - template - TValue * TryGetValue(const T & key) const + TValue* TryGetValue(const TKey& key) const { if (bucketSizeMinusOne == -1) return nullptr; @@ -364,18 +379,19 @@ namespace Slang } return nullptr; } + class ItemProxy { private: const Dictionary * dict; TKey key; public: - ItemProxy(const TKey & _key, const Dictionary * _dict) + ItemProxy(const TKey& _key, const Dictionary* _dict) { this->dict = _dict; this->key = _key; } - ItemProxy(TKey && _key, const Dictionary * _dict) + ItemProxy(TKey&& _key, const Dictionary* _dict) { this->dict = _dict; this->key = _Move(_key); @@ -431,24 +447,24 @@ namespace Slang { bucketSizeMinusOne = -1; _count = 0; - hashMap = 0; + hashMap = nullptr; } template Dictionary(Arg arg, Args... args) { Init(arg, args...); } - Dictionary(const Dictionary & other) - : bucketSizeMinusOne(-1), _count(0), hashMap(0) + Dictionary(const Dictionary& other) + : bucketSizeMinusOne(-1), _count(0), hashMap(nullptr) { *this = other; } - Dictionary(Dictionary && other) - : bucketSizeMinusOne(-1), _count(0), hashMap(0) + Dictionary(Dictionary&& other) + : bucketSizeMinusOne(-1), _count(0), hashMap(nullptr) { *this = (_Move(other)); } - Dictionary & operator = (const Dictionary & other) + Dictionary& operator = (const Dictionary& other) { if (this == &other) return *this; @@ -461,7 +477,7 @@ namespace Slang hashMap[i] = other.hashMap[i]; return *this; } - Dictionary & operator = (Dictionary && other) + Dictionary & operator = (Dictionary&& other) { if (this == &other) return *this; diff --git a/source/core/slang-memory-arena.cpp b/source/core/slang-memory-arena.cpp index 407c6eedd..99e6261ff 100644 --- a/source/core/slang-memory-arena.cpp +++ b/source/core/slang-memory-arena.cpp @@ -39,7 +39,7 @@ void MemoryArena::_initialize(size_t blockPayloadSize, size_t alignment) // Alignment must be a power of 2 assert(((alignment - 1) & alignment) == 0); - // Must be at least sizeof(void*) in size, as that is the minimum the backing allocator will be + // Ensure it's alignment is at least kMinAlignment alignment = (alignment < kMinAlignment) ? kMinAlignment : alignment; m_blockPayloadSize = blockPayloadSize; @@ -120,7 +120,8 @@ void MemoryArena::_deallocateBlocks(Block* start) bool MemoryArena::_isNormalBlock(Block* block) { - size_t blockSize = size_t(block->m_end - block->m_start); + // The size of the block in total is from m_alloc to the m_end (ie the size that is passed into _newBlock) + const size_t blockSize = size_t(block->m_end - block->m_alloc); return (blockSize == m_blockAllocSize) && ((size_t(block->m_start) & (m_blockAlignment - 1)) == 0); } @@ -134,7 +135,7 @@ void MemoryArena::_deallocateBlock(Block* block) } else { - // Must be oversized so free it + // Must be odd sized so free it ::free(block->m_alloc); // Free it in the block list m_blockFreeList.deallocate(block); @@ -144,7 +145,6 @@ void MemoryArena::_deallocateBlock(Block* block) void MemoryArena::deallocateAll() { // we need to rewind through m_usedBlocks -> seeing it the are normal sized or not - Block* block = m_usedBlocks; while (block) { @@ -278,23 +278,20 @@ void* MemoryArena::_allocateAlignedFromNewBlock(size_t size, size_t alignment) // The size of the block must be at least large enough to take into account alignment size_t allocSize = (alignment <= kMinAlignment) ? size : (size + alignment); - const size_t currentRemainSize = size_t(m_end - m_current); - Block* block; - // There are two scenario + // There are two scenarios // a) Allocate a new normal block and make current - // b) Allocate a new 'oversized' block and make current + // b) Allocate a new 'odd-sized' block and make current // - // That by always allocating a new block if oversized, we lose more efficiency in terms of storage (the previous block + // That by always allocating a new block if odd-sized, we lose more efficiency in terms of storage (the previous block // may not have been used much). BUT doing so makes it easy to rewind - as the blocks are always in order of allocation. + // // An improvement might be to have some abstraction that sits on top that can do this tracking (or have the blocks // themselves record if they alias over a previously used block - but we don't bother with this here. - - // If there is > 1/3 of block remaining, or the block required is too big to fit use an 'oversized' block - if ((currentRemainSize * 3 > m_blockPayloadSize) || (allocSize > m_blockAllocSize)) + if (allocSize > m_blockAllocSize) { - // This is an oversized block so just allocate the whole thing + // This is an odd-sized block so just allocate the whole thing block = _newBlock(allocSize, alignment); } else @@ -303,16 +300,22 @@ void* MemoryArena::_allocateAlignedFromNewBlock(size_t size, size_t alignment) assert(allocSize <= m_blockAllocSize); block = _newNormalBlock(); } + // If not allocated we are done if (!block) { return nullptr; } - // It's a new regular block... + + // Make the current block _addCurrentBlock(block); + // Allocated memory is just the start of this block + uint8_t* memory = m_current; + // It must already be aligned + assert((size_t(m_current) & alignMask) == 0); + // Do the aligned allocation (which must fit) by aligning the pointer - uint8_t* memory = (uint8_t*)((size_t(m_current) + alignMask) & ~alignMask); // It must fit if the previous code is correct... assert(memory + size <= m_end); // Move the current pointer diff --git a/source/core/slang-memory-arena.h b/source/core/slang-memory-arena.h index 14eca9e02..52400f9ed 100644 --- a/source/core/slang-memory-arena.h +++ b/source/core/slang-memory-arena.h @@ -45,16 +45,16 @@ class MemoryArena public: typedef MemoryArena ThisType; - /** The minimum alignment of the backing memory allocator. + /** The minimum alignment of the backing memory allocator. NOTE! That this should not be greater than the alignment of the underlying allocator, and should never be less than sizeof(void*). */ static const size_t kMinAlignment = sizeof(void*); /** Determines if an allocation is consistent with an allocation from this arena. The test cannot say definitively if this was such an allocation, because the exact details - of each allocation is not kept. + of each allocation are not kept. @param alloc The start of the allocation - @param sizeInBytes The size of the allocation + @param sizeInBytes The size of the allocation in bytes @return true if allocation could have been from this Arena */ bool isValid(const void* alloc, size_t sizeInBytes) const; @@ -64,9 +64,7 @@ public: /** Allocate some memory of at least size bytes without having any specific alignment. - Can be used for slightly faster *aligned* allocations if caveats in class description are met. The - Unaligned, means the method will not enforce alignment - but a client call to allocateUnaligned can control - subsequent allocations alignments via it's size. + Can be used for slightly faster *aligned* allocations if caveats in class description are met. Alignment is kMinAlignment or better. @param size The size of the allocation requested (in bytes and must be > 0). @return The allocation. Can be nullptr if backing allocator was not able to request required memory */ @@ -84,11 +82,14 @@ public: void* allocateAligned(size_t sizeInBytes, size_t alignment); /** Allocate some aligned memory of at least size bytes - @param size Size of allocation wanted (must be > 0). + @param sizeInBytes Size of allocation wanted (must be > 0). @return The allocation (or nullptr if unable to allocate). */ void* allocateUnaligned(size_t sizeInBytes); /** Allocates a null terminated string. + + NOTE, it is not possible to rewind to a zero length string allocation (because such a strings memory is not held on the arena) + @param str A null-terminated string @return A copy of the string held on the arena */ const char* allocateString(const char* str); @@ -99,11 +100,11 @@ public: @return A copy of the string held on the arena. */ const char* allocateString(const char* chars, size_t numChars); - /// Allocate an element of the specified type. Note: Constructor for type is not executed. + /// Allocate space for the specified type, with appropriate alignment. Note: Constructor for type is *NOT* executed. template T* allocate(); - /// Allocate an array of a specified type. NOTE Constructor of T is NOT executed. + /// Allocate an array of a specified type. NOTE Constructor of T is *NOT* executed. template T* allocateArray(size_t numElems); @@ -116,7 +117,7 @@ public: T* allocateAndZeroArray(size_t numElems); /** Deallocates all allocated memory. That backing memory will generally not be released so - subsequent allocation will be fast, and from the same memory. Note though that 'oversize' blocks + subsequent allocation will be fast, and from the same memory. Note though that 'odd' blocks will be deallocated. */ void deallocateAll(); @@ -133,7 +134,8 @@ public: /// Total memory allocated in bytes size_t calcTotalMemoryAllocated() const; - /// Get thue current allocation cursor (memory address where subsequent allocations will be placed if space within the current block) + /// Get the current allocation cursor (memory address where subsequent allocations will be placed if space within the current block) + /// The address of an allocated block can be used as a cursor to rewind to, such that it and all subsequent allocations will be deallocated void* getCursor() const { return m_current; } /// Rewind (and effectively deallocate) all allocations *after* the cursor void rewindToCursor(const void* cursor); @@ -259,10 +261,10 @@ SLANG_FORCE_INLINE void* MemoryArena::allocate(size_t sizeInBytes) // -------------------------------------------------------------------------- SLANG_FORCE_INLINE void* MemoryArena::allocateAndZero(size_t sizeInBytes) { - // Implement without calling ::allocate, because in most common case we don't need to test for null. assert(sizeInBytes > 0); // Align with the minimum alignment const size_t alignMask = kMinAlignment - 1; + // Implement without calling ::allocate, because in most common case we don't need to test for null. uint8_t* mem = (uint8_t*)((size_t(m_current) + alignMask) & ~alignMask); uint8_t* end = mem + sizeInBytes; if ( end <= m_end) @@ -331,7 +333,8 @@ inline const char* MemoryArena::allocateString(const char* chars, size_t numChar template SLANG_FORCE_INLINE T* MemoryArena::allocate() { - return reinterpret_cast(allocateAligned(sizeof(T), SLANG_ALIGN_OF(T))); + void* mem = (SLANG_ALIGN_OF(T) <= kMinAlignment) ? allocate(sizeof(T)) : allocateAligned(sizeof(T), SLANG_ALIGN_OF(T)); + return reinterpret_cast(mem); } // -------------------------------------------------------------------------- diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp index bed506520..ce587aec5 100644 --- a/source/slang/ir.cpp +++ b/source/slang/ir.cpp @@ -1458,17 +1458,12 @@ namespace Slang operandCount += listOperandCounts[ii]; } - // A more aggressive Impl could create the instruction once, and only free the memory - // allocated to construct if it was found that the instruction already exists. - // Here we just avoid using malloc and use the memoryArena as a cheap way to allocate some - // temporary memory. auto& memoryArena = builder->getModule()->memoryArena; void* cursor = memoryArena.getCursor(); - // We are going to create a dummy instruction on the stack, - // which will be used as a key for lookup, so see if we + // We are going to create a 'dummy' instruction on the memoryArena + // which can be used as a key for lookup, so see if we // already have an equivalent instruction available to use. - size_t keySize = sizeof(IRInst) + operandCount * sizeof(IRUse); IRInst* inst = (IRInst*) memoryArena.allocateAndZero(keySize); @@ -1481,7 +1476,7 @@ namespace Slang inst->typeUse.usedValue = type; inst->operandCount = (uint32_t) operandCount; - // Don't link up as we may free (if we already have this) + // Don't link up as we may free (if we already have this key) { IRUse* operand = inst->getOperands(); for (UInt ii = 0; ii < operandListCount; ++ii) @@ -1495,35 +1490,24 @@ namespace Slang } } - IRInstKey key; - key.inst = inst; - - // Ideally we would add if not found, else return if was found instead of testing & then adding. - IRInst* foundInst = nullptr; - bool found = builder->sharedBuilder->globalValueNumberingMap.TryGetValue(key, foundInst); - - SLANG_ASSERT(endCursor == memoryArena.getCursor()); - - if (found) + // Find or add the key/inst { - memoryArena.rewindToCursor(cursor); - return foundInst; - } + IRInstKey key = { inst }; - // Make the lookup instruction into proper instruction. Equivalent to - // IRInst* inst = createInstImpl( - // builder, - // op, - // type, - // 0, - // nullptr, - // operandListCount, - // listOperandCounts, - // listOperands); + // Ideally we would add if not found, else return if was found instead of testing & then adding. + IRInst** found = builder->sharedBuilder->globalValueNumberingMap.TryGetValueOrAdd(key, inst); + SLANG_ASSERT(endCursor == memoryArena.getCursor()); + // If it's found, just return, and throw away the instruction + if (found) + { + memoryArena.rewindToCursor(cursor); + return *found; + } + } + // Make the lookup 'inst' instruction into 'proper' instruction. Equivalent to + // IRInst* inst = createInstImpl(builder, op, type, 0, nullptr, operandListCount, listOperandCounts, listOperands); { - - // Okay now need to link up if (type) { inst->typeUse.usedValue = nullptr; @@ -1545,9 +1529,6 @@ namespace Slang addHoistableInst(builder, inst); - key.inst = inst; - builder->sharedBuilder->globalValueNumberingMap.Add(key, inst); - return inst; } -- cgit v1.2.3