diff options
| author | Konstantin <const@const.me> | 2023-01-16 14:52:43 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-01-16 14:52:43 +0100 |
| commit | 8c4603c73675958efc960fbd4bb599a2909d106a (patch) | |
| tree | 714dc6fc9a1672d5fd7f89676b97e10959662abc /Whisper/Utils/ProfileCollection.cpp | |
| parent | 990a8d0dbaefc996244097397259e92758b15cce (diff) | |
Source codes
Diffstat (limited to 'Whisper/Utils/ProfileCollection.cpp')
| -rw-r--r-- | Whisper/Utils/ProfileCollection.cpp | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/Whisper/Utils/ProfileCollection.cpp b/Whisper/Utils/ProfileCollection.cpp new file mode 100644 index 0000000..2fc5919 --- /dev/null +++ b/Whisper/Utils/ProfileCollection.cpp @@ -0,0 +1,331 @@ +#include "stdafx.h" +#include "ProfileCollection.h" +#include "GpuProfiler.h" +#include "../Whisper/WhisperModel.h" +#include "../D3D/shaderNames.h" +using namespace Whisper; + +ProfileCollection::Measure& ProfileCollection::measure( DirectCompute::eProfilerBlock which ) +{ + uint32_t key = (uint16_t)which; + key |= 0x20000; + return measures[ key ]; +} + +ProfileCollection::Measure& ProfileCollection::measure( DirectCompute::eComputeShader which ) +{ + uint32_t key = (uint16_t)which; + key |= 0x30000; + return measures[ key ]; +} + +ProfileCollection::Measure& ProfileCollection::measure( eCpuBlock which ) +{ + uint32_t key = (uint8_t)which; + key |= 0x10000; + CComCritSecLock<CComAutoCriticalSection> lock{ critSec }; + return measures[ key ]; +} + +#if PROFILER_COLLECT_TAGS +ProfileCollection::Measure& ProfileCollection::measure( DirectCompute::eComputeShader which, uint16_t tag ) +{ + uint32_t key = (uint8_t)which; + key = key << 16; + key |= tag; + CComCritSecLock<CComAutoCriticalSection> lock{ critSec }; + return taggedShaders[ key ]; +} +#endif + +namespace +{ + using pfnPrintEnum = const char* ( * )( uint16_t val ); + + static const char* printCpuBlock( uint16_t id ) + { + const eCpuBlock which = (eCpuBlock)id; + switch( which ) + { +#define V(x) case eCpuBlock::x: return #x + V( LoadModel ); + V( Run ); + V( Spectrogram ); + V( Sample ); + V( VAD ); + V( Decode ); + V( DecodeStep ); + V( DecodeLayer ); +#undef V + } + assert( false ); + return nullptr; + } + + static const char* printGpuBlock( uint16_t id ) + { + using DirectCompute::eProfilerBlock; + const eProfilerBlock which = (eProfilerBlock)id; + + switch( which ) + { +#define V(x) case eProfilerBlock::x: return #x + V( LoadModel ); + V( Run ); + V( Encode ); + V( EncodeLayer ); + V( Decode ); + V( DecodeStep ); + V( DecodeLayer ); +#undef V + } + assert( false ); + return nullptr; + } + + static const char* printShader( uint16_t id ) + { + return DirectCompute::computeShaderName( (DirectCompute::eComputeShader)id ); + } + + static pfnPrintEnum printSectionStart( uint16_t type ) + { + switch( type ) + { + case 1: + logInfo( u8" CPU Tasks" ); + return &printCpuBlock; + case 2: + logInfo( u8" GPU Tasks" ); + return &printGpuBlock; + case 3: + logInfo( u8" Compute Shaders" ); + return &printShader; + default: + return nullptr; + } + } + + struct PrintedTime + { + double value; + const char* unit; + + PrintedTime( uint64_t ticks ) + { + const double dbl = (double)(int64_t)ticks; + if( ticks >= 10'000'000 ) + { + value = dbl / 1.0E+7; + unit = "seconds"; + } + else if( ticks >= 10'000 ) + { + value = dbl / 1.0E+4; + unit = "milliseconds"; + } + else + { + value = dbl / 1.0E+1; + unit = "microseconds"; + } + } + PrintedTime( double dbl ) + { + if( dbl >= 10'000'000 ) + { + value = dbl / 1.0E+7; + unit = "seconds"; + } + else if( dbl >= 10'000 ) + { + value = dbl / 1.0E+4; + unit = "milliseconds"; + } + else + { + value = dbl / 1.0E+1; + unit = "microseconds"; + } + } + }; +} + +void ProfileCollection::Measure::print( const char* name ) const +{ + PrintedTime total{ totalTicks }; + if( 1 == count ) + logInfo( u8"%s\t%g %s", name, total.value, total.unit ); + else + { + PrintedTime avg = (double)totalTicks / (double)(int64_t)count; + logInfo( u8"%s\t%g %s, %zu calls, %g %s average", name, total.value, total.unit, count, avg.value, avg.unit ); + } +} + +#if PROFILER_COLLECT_TAGS +struct TaggedShaderCmp +{ + bool operator()( uint16_t cs, uint32_t key ) const + { + return cs < key >> 16; + } + bool operator()( uint32_t key, uint16_t cs ) const + { + return key >> 16 < cs; + } +}; + +void ProfileCollection::TaggedTemp::print() const +{ + PrintedTime total{ ticks }; + if( 1 == count ) + logInfo( u8" %s\t%g %s", name, total.value, total.unit ); + else + { + PrintedTime avg = (double)ticks / (double)(int64_t)count; + logInfo( u8" %s\t%g %s, %zu calls, %g %s average", name, total.value, total.unit, count, avg.value, avg.unit ); + } +} +#endif + +void ProfileCollection::print() +{ + keysTemp.clear(); + for( POSITION pos = measures.GetStartPosition(); nullptr != pos; ) + { + auto* p = measures.GetNext( pos ); + if( p->m_value.count == 0 ) + continue; + keysTemp.push_back( p->m_key ); + } + + std::sort( keysTemp.begin(), keysTemp.end() ); + auto it = std::lower_bound( keysTemp.begin(), keysTemp.end(), 0x30000u ); + if( it != keysTemp.end() ) + { + auto lambda = [ this ]( uint32_t a, uint32_t b ) + { + const uint64_t ta = measures.Lookup( a )->m_value.totalTicks; + const uint64_t tb = measures.Lookup( b )->m_value.totalTicks; + return ta > tb; + }; + std::stable_sort( it, keysTemp.end(), lambda ); + } + +#if PROFILER_COLLECT_TAGS + taggedKeysTemp.clear(); + for( POSITION pos = taggedShaders.GetStartPosition(); nullptr != pos; ) + { + auto* p = taggedShaders.GetNext( pos ); + if( p->m_value.count == 0 ) + continue; + taggedKeysTemp.push_back( p->m_key ); + } + std::sort( taggedKeysTemp.begin(), taggedKeysTemp.end() ); +#endif + + uint16_t prevKeyType = 0; + pfnPrintEnum pfn = nullptr; + for( uint32_t k : keysTemp ) + { + const uint16_t type = (uint16_t)( k >> 16 ); + if( type != prevKeyType ) + { + prevKeyType = type; + pfn = printSectionStart( type ); + } + if( pfn == nullptr ) + continue; + const auto* p = measures.Lookup( k ); + assert( nullptr != p ); + p->m_value.print( pfn( (uint16_t)k ) ); + +#if PROFILER_COLLECT_TAGS + if( type == 3 ) + { + // Compute shader + auto range = std::equal_range( taggedKeysTemp.begin(), taggedKeysTemp.end(), (uint16_t)k, TaggedShaderCmp{} ); + if( range.first != range.second ) + { + // We have at least 1 tag for that compute shader + taggedTimes.clear(); + uint64_t totalTicks = 0; + size_t totalCount = 0; + for( auto it = range.first; it != range.second; it++ ) + { + const uint32_t key = *it; + const uint16_t tagId = (uint16_t)key; + assert( 0 != tagId ); + const auto* p = taggedShaders.Lookup( key ); + assert( nullptr != p ); + + auto& rdi = taggedTimes.emplace_back(); + rdi.ticks = p->m_value.totalTicks; + totalTicks += p->m_value.totalTicks; + + rdi.count = p->m_value.count; + totalCount += p->m_value.count; + + rdi.name = tagNames[ tagId ]; + } + + assert( totalCount <= p->m_value.count ); + if( totalCount < p->m_value.count ) + { + auto& rdi = taggedTimes.emplace_back(); + rdi.ticks = p->m_value.totalTicks - totalTicks; + rdi.count = p->m_value.count - totalCount; + rdi.name = tagNames[ 0 ]; + } + std::stable_sort( taggedTimes.begin(), taggedTimes.end() ); + for( const auto& e : taggedTimes ) + e.print(); + } + } +#endif + } +} + +void ProfileCollection::reset() +{ + for( POSITION pos = measures.GetStartPosition(); nullptr != pos; ) + measures.GetNextValue( pos ).reset(); +} + +ProfileCollection::ProfileCollection( const WhisperModel& model ) +{ + const __m128i vals = model.getLoadTimes(); + + uint64_t s = (uint64_t)_mm_cvtsi128_si64( vals ); + measure( eCpuBlock::LoadModel ).add( s ); + + s = (uint64_t)_mm_extract_epi64( vals, 1 ); + measure( DirectCompute::eProfilerBlock::LoadModel ).add( s ); +#if PROFILER_COLLECT_TAGS + // Tag ID 0 means no tag at all. makeTagId() method returns 0 for nullptr name, and starts numbering with 1 for non-empoty tag names + // Push the tag name corresponding to ID = 0, this way we can index directly with tag IDs. + tagNames.push_back( "<untagged>" ); +#endif +} + +uint16_t ProfileCollection::makeTagId( const char* tag ) +{ +#if PROFILER_COLLECT_TAGS + if( nullptr == tag ) + return 0; + auto p = tagIDs.Lookup( tag ); + if( nullptr != p ) + return p->m_value; + const size_t newTag = tagIDs.GetCount() + 1; + if( newTag <= 0xFFFF ) + { + tagIDs.SetAt( tag, (uint16_t)newTag ); + tagNames.push_back( tag ); + return (uint16_t)newTag; + } + throw DISP_E_OVERFLOW; +#else + return 0; +#endif +}
\ No newline at end of file |
