summaryrefslogtreecommitdiffstats
path: root/Whisper/Utils/ProfileCollection.h
blob: 3d256095303db8ce3daf819b2602582dd7b08dce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#pragma once
#include <atlcoll.h>
#include "CpuProfiler.h"

namespace DirectCompute
{
	enum struct eComputeShader : uint16_t;
	enum struct eProfilerBlock : uint16_t;
}

namespace Whisper
{
	struct WhisperModel;

	enum struct eCpuBlock : uint8_t
	{
		LoadModel,
		RunComplete,
		Run,
		Callbacks,
		Spectrogram,
		Sample,
		VAD,
		Encode,
		Decode,
		DecodeStep,
		DecodeLayer,
	};

	class ProfileCollection
	{
	public:
		ProfileCollection( const WhisperModel& model );

		struct Measure
		{
			size_t count = 0;
			// 100-nanosecond ticks
			uint64_t totalTicks = 0;

			void reset()
			{
				count = 0;
				totalTicks = 0;
			}

			void print( const char* name ) const;

			void add( uint64_t val )
			{
				count++;
				totalTicks += val;
			}
		};

		Measure& measure( DirectCompute::eProfilerBlock which );
		Measure& measure( DirectCompute::eComputeShader which );
		Measure& measure( eCpuBlock which );
#if PROFILER_COLLECT_TAGS
		Measure& measure( DirectCompute::eComputeShader which, uint16_t tag );
#endif
		void print();

		void reset();

		class CpuRaii
		{
			Measure* dest;
			const int64_t tsc;

		public:
			CpuRaii( Measure& m ) : dest( &m ), tsc( tscNow() )
			{ }
			CpuRaii( const CpuRaii& ) = delete;
			CpuRaii( CpuRaii&& that ) noexcept :
				tsc( that.tsc )
			{
				dest = that.dest;
				that.dest = nullptr;
			}

			~CpuRaii()
			{
				if( nullptr != dest )
				{
					const int64_t elapsed = tscNow() - tsc;
					dest->add( ticksFromTsc( elapsed ) );
				}
			}
		};

		decltype( auto ) cpuBlock( eCpuBlock which )
		{
			return CpuRaii{ measure( which ) };
		}

		uint16_t makeTagId( const char* tag );

	private:
		CAtlMap<uint32_t, Measure> measures;
		CComAutoCriticalSection critSec;
#if PROFILER_COLLECT_TAGS
		CAtlMap<const char*, uint16_t> tagIDs;
		std::vector<const char*> tagNames;
		CAtlMap<uint32_t, Measure> taggedShaders;
		std::vector<uint32_t> taggedKeysTemp;
		struct TaggedTemp
		{
			uint64_t ticks;
			size_t count;
			const char* name;

			bool operator<( const TaggedTemp& that ) const
			{
				// Flipping the comparison to sort in descending order
				return ticks > that.ticks;
			}

			void print() const;
		};
		std::vector<TaggedTemp> taggedTimes;
#endif
		std::vector<uint32_t> keysTemp;
	};
}