diff options
| -rw-r--r-- | Whisper/Utils/ProfileCollection.cpp | 1 | ||||
| -rw-r--r-- | Whisper/Utils/ProfileCollection.h | 1 | ||||
| -rw-r--r-- | Whisper/Whisper/ContextImpl.capture.cpp | 1 | ||||
| -rw-r--r-- | Whisper/Whisper/ContextImpl.cpp | 32 | ||||
| -rw-r--r-- | Whisper/Whisper/ContextImpl.misc.cpp | 4 |
5 files changed, 17 insertions, 22 deletions
diff --git a/Whisper/Utils/ProfileCollection.cpp b/Whisper/Utils/ProfileCollection.cpp index eaf737c..a463655 100644 --- a/Whisper/Utils/ProfileCollection.cpp +++ b/Whisper/Utils/ProfileCollection.cpp @@ -49,6 +49,7 @@ namespace { #define V(x) case eCpuBlock::x: return #x V( LoadModel ); + V( RunComplete ); V( Run ); V( Callbacks ); V( Spectrogram ); diff --git a/Whisper/Utils/ProfileCollection.h b/Whisper/Utils/ProfileCollection.h index a03319e..3d25609 100644 --- a/Whisper/Utils/ProfileCollection.h +++ b/Whisper/Utils/ProfileCollection.h @@ -15,6 +15,7 @@ namespace Whisper enum struct eCpuBlock : uint8_t { LoadModel, + RunComplete, Run, Callbacks, Spectrogram, diff --git a/Whisper/Whisper/ContextImpl.capture.cpp b/Whisper/Whisper/ContextImpl.capture.cpp index 86dc0d2..ba4b10e 100644 --- a/Whisper/Whisper/ContextImpl.capture.cpp +++ b/Whisper/Whisper/ContextImpl.capture.cpp @@ -404,6 +404,7 @@ HRESULT COMLIGHTCALL ContextImpl::runCapture( const sFullParams& params, const s } } + auto profCompleteCpu = profiler.cpuBlock( eCpuBlock::RunComplete ); Capture capture{ callbacks, reader, params, this, profiler }; CHECK( capture.startup( reader ) ); diff --git a/Whisper/Whisper/ContextImpl.cpp b/Whisper/Whisper/ContextImpl.cpp index a8e16f5..175af74 100644 --- a/Whisper/Whisper/ContextImpl.cpp +++ b/Whisper/Whisper/ContextImpl.cpp @@ -15,6 +15,7 @@ ContextImpl::ContextImpl( const WhisperModel& modelData, iModel* modelPointer ) HRESULT ContextImpl::encode( iSpectrogram& mel, int seek ) { + auto prof = profiler.cpuBlock( eCpuBlock::Encode ); // whisper_encode using namespace DirectCompute; @@ -254,6 +255,7 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const // main loop int seek = seek_start; + // Start measuring "Run" profiler value, both CPU and GPU times auto prof = context.completeProfiler(); while( true ) { @@ -262,23 +264,16 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const const int pos = seek - seek_start; const int total = seek_end - seek_start; const double percentage = (double)pos / (double)total; + auto cb = profiler.cpuBlock( eCpuBlock::Callbacks ); CHECK( progress.pfn( percentage, this, progress.pv ) ); } - /* - const int progress_cur = ( 100 * ( seek - seek_start ) ) / ( seek_end - seek_start ); - while( progress_cur >= progress_prev + progress_step ) - { - progress_prev += progress_step; - if( params.flag( eFullParamsFlags::PrintProgress ) ) - logInfo( u8"%s: progress = %3d%%", __func__, progress_prev ); - } - */ if( seek + 100 >= seek_end ) break; if( nullptr != params.encoder_begin_callback ) { + auto cb = profiler.cpuBlock( eCpuBlock::Callbacks ); HRESULT hr = params.encoder_begin_callback( this, params.encoder_begin_callback_user_data ); if( FAILED( hr ) ) return hr; @@ -323,6 +318,7 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const bool has_ts = false; // have we already sampled a non-beg timestamp token for the current segment? { + // Measure "Decode" profiler value, both CPU and GPU times auto prof = context.decodeProfiler(); for( int i = 0, n_max = model.parameters.n_text_ctx / 2 - 4; i < n_max; i++ ) { @@ -402,7 +398,6 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const } } } - if( failed ) { logError( u8"%s: failed to generate timestamp token - skipping one second", __func__ ); @@ -443,12 +438,9 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const if( params.flag( eFullParamsFlags::PrintRealtime ) ) { if( params.flag( eFullParamsFlags::PrintTimestamps ) ) - printf( "[%s --> %s] %s\n", to_timestamp( tt0 ).c_str(), to_timestamp( tt1 ).c_str(), text.c_str() ); + logDebug( u8"[%s --> %s] %s", to_timestamp( tt0 ).c_str(), to_timestamp( tt1 ).c_str(), text.c_str() ); else - { - printf( "%s", text.c_str() ); - fflush( stdout ); - } + logDebug( u8"%s", text.c_str() ); } result_all.push_back( { tt0, tt1, text, {} } ); @@ -465,6 +457,7 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const } if( nullptr != params.new_segment_callback ) { + auto cb = profiler.cpuBlock( eCpuBlock::Callbacks ); HRESULT hr = params.new_segment_callback( this, n_new, params.new_segment_callback_user_data ); if( FAILED( hr ) ) return hr; @@ -490,12 +483,9 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const if( params.flag( eFullParamsFlags::PrintRealtime ) ) { if( params.flag( eFullParamsFlags::PrintTimestamps ) ) - printf( "[%s --> %s] %s\n", to_timestamp( tt0 ).c_str(), to_timestamp( tt1 ).c_str(), text.c_str() ); + logDebug( u8"[%s --> %s] %s", to_timestamp( tt0 ).c_str(), to_timestamp( tt1 ).c_str(), text.c_str() ); else - { - printf( "%s", text.c_str() ); - fflush( stdout ); - } + logDebug( u8"%s", text.c_str() ); } result_all.push_back( { tt0, tt1, text, {} } ); @@ -511,6 +501,7 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const } if( nullptr != params.new_segment_callback ) { + auto cb = profiler.cpuBlock( eCpuBlock::Callbacks ); HRESULT hr = params.new_segment_callback( this, n_new, params.new_segment_callback_user_data ); if( FAILED( hr ) ) return hr; @@ -522,6 +513,7 @@ HRESULT COMLIGHTCALL ContextImpl::runFullImpl( const sFullParams& params, const if( nullptr != progress.pfn ) { + auto cb = profiler.cpuBlock( eCpuBlock::Callbacks ); CHECK( progress.pfn( 1.0, this, progress.pv ) ); } return S_OK; diff --git a/Whisper/Whisper/ContextImpl.misc.cpp b/Whisper/Whisper/ContextImpl.misc.cpp index 98d2164..9ce5000 100644 --- a/Whisper/Whisper/ContextImpl.misc.cpp +++ b/Whisper/Whisper/ContextImpl.misc.cpp @@ -348,7 +348,7 @@ HRESULT COMLIGHTCALL ContextImpl::runFull( const sFullParams& params, const iAud #endif CHECK( buffer->getTime( mediaTimeOffset ) ); - auto profCompleteCpu = profiler.cpuBlock( eCpuBlock::Run ); + auto profCompleteCpu = profiler.cpuBlock( eCpuBlock::RunComplete ); { auto p = profiler.cpuBlock( eCpuBlock::Spectrogram ); CHECK( spectrogram.pcmToMel( buffer, model.filters, params.cpuThreads ) ); @@ -382,7 +382,7 @@ HRESULT COMLIGHTCALL ContextImpl::runStreamed( const sFullParams& params, const } mediaTimeOffset = 0; - auto profCompleteCpu = profiler.cpuBlock( eCpuBlock::Run ); + auto profCompleteCpu = profiler.cpuBlock( eCpuBlock::RunComplete ); CComPtr<IMFSourceReader> mfReader; CHECK( reader->getReader( &mfReader ) ); |
