summaryrefslogtreecommitdiffstats
path: root/Whisper/Utils/parallelFor.h
blob: 15cd603b291d16abc5de4bfa20fcf2db01619249 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#pragma once

namespace Whisper
{
	// A callback to offload to the thread pool
	using pfnParallelForCallback = HRESULT( * )( int ith, void* ctx ) noexcept;

	// A simple parallel for implementation; Windows includes a decent thread pool since Vista (2006)
	HRESULT parallelFor( pfnParallelForCallback pfn, int threadsCount, void* ctx );

	// Use this version when you wanna use the thread pool repeatedly, for the same work.
	// This class caches native work handle, saving a couple of WinAPI calls.
	class alignas( 64 ) ThreadPoolWork
	{
		PTP_WORK work = nullptr;

		// We want these volatile fields in another cache line from the rest of the data of this class.
		// threadIndex field is concurrently modified by different CPU cores, and these cache coherency protocols are slow.
		// OTOH, work and callback fields of this class only change when created / destroyed, that cache line is shared by CPU cores without any performance penalty.
		alignas( 64 ) volatile long threadIndex = 0;
		volatile HRESULT status = E_UNEXPECTED;

		static void __stdcall callbackStatic( PTP_CALLBACK_INSTANCE Instance, PVOID pv, PTP_WORK Work );

	protected:
		virtual HRESULT threadPoolCallback( int ith ) noexcept = 0;

	public:
		ThreadPoolWork() = default;
		ThreadPoolWork( const ThreadPoolWork& ) = delete;

		~ThreadPoolWork();

		HRESULT create();

		HRESULT parallelFor( int threadsCount ) noexcept;
	};
}