Whisper/ML/Tensor.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

#pragma once
#include "TensorShape.h"
#include "TensorGpuViews.h"
#include "../D3D/enums.h"

namespace DirectCompute
{
	// A minimal tensor object sufficient to compute things on GPU, with compute shaders
	// This class only takes 48 bytes in system memory, and is very cheap to make copies 'coz GPU objects are reference counted.
	class Tensor : public TensorShape, public TensorGpuViews
	{
		CComPtr<ID3D11Buffer> getBuffer() const;

		struct TensorType
		{
			eDataType type;
			eBufferUse usage;
			bool hasInitialData;
		};
#ifdef _DEBUG
		// In debug builds, we include a few pieces of data to this class.
		TensorType dbgType;
#endif
	protected:
		HRESULT create( eDataType type, std::initializer_list<uint32_t> sizeElements, eBufferUse usage, CComPtr<ID3D11Buffer>& buffer, const void* rsi, ID3D11Buffer** ppStagingBuffer );

		static uint32_t dxgiSizeof( DXGI_FORMAT format );

		void downloadImpl( const D3D11_SHADER_RESOURCE_VIEW_DESC& viewDesc, uint32_t countElements, size_t cbElement, void* rdi ) const;

	public:
		Tensor() = default;

		// These copy operators don't copy any data, they merely increment ref.counter of the GPU resources
		Tensor( const Tensor& );
		Tensor( Tensor&& that ) noexcept;
		Tensor& operator=( const Tensor& that );
		Tensor& operator=( Tensor&& that ) noexcept;

		// Move the provided buffer views into this newly created tensor, and assign the shape
		// This destroys old values in the smart pointers
		Tensor( const TensorShape& shape, CComPtr<ID3D11ShaderResourceView>& srv, CComPtr<ID3D11UnorderedAccessView>& uav ) noexcept;

		Tensor( const TensorShape& shape, const TensorGpuViews& views );

		// Create a tensor from the GGML's one
		HRESULT create( const ggml_tensor& ggml, eBufferUse usage, bool uploadData );

		// Create a new dense tensor of the specified size in elements, without initial data
		HRESULT create( eDataType type, std::initializer_list<uint32_t> sizeElements );
		HRESULT create( eDataType type, const std::array<uint32_t, 4>& sizeElements );
		HRESULT createImmutable( eDataType type, const std::array<int, 4>& size, const void* rsi );

		eDataType getType() const;

		// This method should probably only be used to test things
		// TensorEx is better for production usage, because it creates staging buffer in advance.
		void download( std::vector<float>& vec ) const;
		void download( std::vector<uint16_t>& vec ) const;

		// ggml_reshape_3d
		Tensor reshape3d( uint32_t ne0, uint32_t ne1, uint32_t ne2 ) const;

		inline void dbgSetType( eDataType dt, bool hasData = false, eBufferUse use = eBufferUse::ReadWrite )
		{
#ifdef _DEBUG
			dbgType.type = dt;
			dbgType.hasInitialData = hasData;
			dbgType.usage = use;
#endif
		}

		__m128i getMemoryUse() const
		{
			return resourceMemoryUsage( srv );
		}
	};
}