diff options
| author | Konstantin <const@const.me> | 2023-01-16 14:52:43 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-01-16 14:52:43 +0100 |
| commit | 8c4603c73675958efc960fbd4bb599a2909d106a (patch) | |
| tree | 714dc6fc9a1672d5fd7f89676b97e10959662abc /Whisper/CPU/BufferAllocator.cpp | |
| parent | 990a8d0dbaefc996244097397259e92758b15cce (diff) | |
Source codes
Diffstat (limited to 'Whisper/CPU/BufferAllocator.cpp')
| -rw-r--r-- | Whisper/CPU/BufferAllocator.cpp | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/Whisper/CPU/BufferAllocator.cpp b/Whisper/CPU/BufferAllocator.cpp new file mode 100644 index 0000000..156382c --- /dev/null +++ b/Whisper/CPU/BufferAllocator.cpp @@ -0,0 +1,145 @@ +#include <stdafx.h> +#include "BufferAllocator.h" +#include <immintrin.h> +#include <ammintrin.h> +using namespace CpuCompute; + +HRESULT BufferAllocator::create( size_t cb ) +{ + CHECK( buffer.allocate( cb ) ); + head = 0; + size = cb; + dbgMarkUninitializedMemory( buffer.pointer(), cb ); + return S_OK; +} + +namespace +{ + // Round up the integer by 32 bytes + __forceinline size_t roundUpAlloc( size_t cb ) + { + const size_t mask = 31; + cb += mask; + // We require AVX1+FMA3 support, might as well use BMI1 + return _andn_u64( mask, cb ); + } +} + +void* BufferAllocator::allocate( size_t cb, size_t align ) noexcept +{ + assert( align <= 32 ); + cb = roundUpAlloc( cb ); + + uint8_t* pointer = buffer.pointer(); + if( head + cb > size || nullptr == pointer ) + { + logError( u8"BufferAllocator.allocate, not enough capacity" ); + return nullptr; + } + + void* const res = pointer + head; + head += cb; + assert( head <= size ); + dbgMarkUninitializedMemory( res, cb ); + return res; +} + +namespace +{ + // 2 MB of memory, we hope the OS kernel will then be smart enough to give us large pages. + constexpr size_t virtualAllocGranularityExp2 = 21; + + constexpr size_t virtualAllocGranularityMask = ( ( (size_t)1 ) << virtualAllocGranularityExp2 ) - 1; + + // Round up the integer by 2 megabytes + __forceinline size_t roundUpVirtualAlloc( size_t cb ) + { + const size_t mask = virtualAllocGranularityMask; + cb += mask; + return _andn_u64( mask, cb ); + } +} + +HRESULT VirtualAllocator::create( size_t cb ) +{ + if( nullptr != pointer ) + return HRESULT_FROM_WIN32( ERROR_ALREADY_INITIALIZED ); + cb = roundUpVirtualAlloc( cb ); + pointer = (uint8_t*)VirtualAlloc( NULL, cb, MEM_RESERVE, PAGE_READWRITE ); + if( nullptr != pointer ) + { + head = 0; + sizeAllocated = 0; + sizeVirtual = cb; + return S_OK; + } + + const HRESULT hr = getLastHr(); + logErrorHr( hr, u8"VirtualAlloc failed" ); + return hr; +} + +void* VirtualAllocator::allocate( size_t cb, size_t align ) noexcept +{ + assert( align <= 32 ); + cb = roundUpAlloc( cb ); + + const size_t newHead = head + cb; + if( newHead <= sizeAllocated ) + { + void* const res = pointer + head; + head = newHead; + dbgMarkUninitializedMemory( res, cb ); + return res; + } + + if( newHead <= sizeVirtual ) + { + uint8_t* const ptrCommit = pointer + sizeAllocated; + const size_t cbCommit = roundUpVirtualAlloc( newHead ) - sizeAllocated; + void* const res = VirtualAlloc( ptrCommit, cbCommit, MEM_COMMIT, PAGE_READWRITE ); + if( nullptr != res ) + { + sizeAllocated += cbCommit; + assert( sizeAllocated <= sizeVirtual ); + void* const res = pointer + head; + head = newHead; + dbgMarkUninitializedMemory( res, cb ); + return res; + } + + const HRESULT hr = getLastHr(); + logErrorHr( hr, u8"VirtualAllocator.allocate, VirtualAlloc failed" ); + return nullptr; + } + + logError( u8"VirtualAllocator.allocate, not enough arena capacity" ); + return nullptr; +} + +VirtualAllocator::~VirtualAllocator() +{ + if( nullptr == pointer ) + return; + + if( VirtualFree( pointer, 0, MEM_RELEASE ) ) + { + pointer = nullptr; + return; + } + + const HRESULT hr = getLastHr(); + logErrorHr( hr, u8"VirtualFree failed" ); +} + +#ifndef NDEBUG +// Reusing Microsoft's magic numbers: https://asawicki.info/news_1292_magic_numbers_in_visual_c +void CpuCompute::dbgMarkUninitializedMemory( void* pv, size_t cb ) +{ + __stosb( (uint8_t*)pv, 0xCD, cb ); +} +void CpuCompute::dbgMarkFreedMemory( void* pv, size_t cb ) +{ + __stosd( (DWORD*)pv, 0xFEEEFEEEu, cb / 4 ); +} +#endif
\ No newline at end of file |
