summaryrefslogtreecommitdiffstats
path: root/Whisper/D3D/device.cpp
blob: c1356326380073e5c956faee327501c8681073f9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#include "stdafx.h"
#include "device.h"
#include <immintrin.h>
#include <ammintrin.h>
#pragma comment(lib, "D3D11.lib")
#include "RenderDoc/renderDoc.h"
#include "../API/eGpuModelFlags.h"

namespace DirectCompute
{
	CComPtr<ID3D11Device> g_device;
	CComPtr<ID3D11DeviceContext> g_context;
	D3D_FEATURE_LEVEL g_featureLevel = (D3D_FEATURE_LEVEL)0;

	ID3D11Device* device() { return g_device; }
	ID3D11DeviceContext* context() { return g_context; }
	D3D_FEATURE_LEVEL featureLevel() { return g_featureLevel; }

	void terminate()
	{
		g_context = nullptr;
		g_device = nullptr;
	}

	static HRESULT createDevice()
	{
		if( g_device )
			return S_FALSE;

		const std::array<D3D_FEATURE_LEVEL, 4> levels = { D3D_FEATURE_LEVEL_12_1 , D3D_FEATURE_LEVEL_12_0 , D3D_FEATURE_LEVEL_11_1 , D3D_FEATURE_LEVEL_11_0 };
		UINT flags = D3D11_CREATE_DEVICE_DISABLE_GPU_TIMEOUT | D3D11_CREATE_DEVICE_SINGLETHREADED;
		bool renderDoc = initializeRenderDoc();
#ifdef _DEBUG
		if( !renderDoc )
		{
			// Last time I checked, RenderDoc crashed with debug version of D3D11 runtime
			// Only setting this flag unless renderdoc.dll is loaded to the current process
			flags |= D3D11_CREATE_DEVICE_DEBUG;
		}
#endif
		constexpr UINT levelsCount = (UINT)levels.size();
		HRESULT hr = D3D11CreateDevice( nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, flags, levels.data(), levelsCount, D3D11_SDK_VERSION, &g_device, &g_featureLevel, &g_context );
		if( SUCCEEDED( hr ) )
			return S_OK;
		// D3D11_CREATE_DEVICE_DISABLE_GPU_TIMEOUT: This value is not supported until Direct3D 11.1
		// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_create_device_flag
		flags = _andn_u32( D3D11_CREATE_DEVICE_DISABLE_GPU_TIMEOUT, flags );

		hr = D3D11CreateDevice( nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, flags, levels.data(), levelsCount, D3D11_SDK_VERSION, &g_device, &g_featureLevel, &g_context );
		if( SUCCEEDED( hr ) )
			return S_OK;
		return hr;
	}

	sGpuInfo s_gpuInfo = {};
	const sGpuInfo& gpuInfo = s_gpuInfo;

	using Whisper::eGpuModelFlags;
	inline constexpr uint32_t operator|( eGpuModelFlags a, eGpuModelFlags b )
	{
		return (uint32_t)a | (uint32_t)b;
	}
	inline bool operator&( uint32_t flags, eGpuModelFlags bit )
	{
		return 0 != ( flags & (uint32_t)bit );
	}
	inline bool merge3( uint32_t flags, eGpuModelFlags enabled, eGpuModelFlags disabled, bool def )
	{
		if( flags & enabled )
			return true;
		if( flags & disabled )
			return false;
		return def;
	}

	static HRESULT queryDeviceInfo( uint32_t flags )
	{
		if( nullptr == g_device )
			return OLE_E_BLANK;
		CComPtr<IDXGIDevice> dd;
		CHECK( g_device.QueryInterface( &dd ) );

		CComPtr<IDXGIAdapter> adapter;
		CHECK( dd->GetAdapter( &adapter ) );

		DXGI_ADAPTER_DESC desc;
		adapter->GetDesc( &desc );

		const size_t descLen = wcsnlen_s( desc.Description, 128 );
		const wchar_t* rsi = &desc.Description[ 0 ];
		s_gpuInfo.description.assign( rsi, rsi + descLen );
		s_gpuInfo.vendor = (eGpuVendor)desc.VendorId;
		s_gpuInfo.device = (uint16_t)desc.DeviceId;
		s_gpuInfo.revision = (uint16_t)desc.Revision;
		s_gpuInfo.subsystem = desc.SubSysId;
		s_gpuInfo.vramDedicated = desc.DedicatedVideoMemory;
		s_gpuInfo.ramDedicated = desc.DedicatedSystemMemory;
		s_gpuInfo.ramShared = desc.SharedSystemMemory;

		// Set up these flags
		uint8_t ef = 0;
		const bool amd = ( s_gpuInfo.vendor == eGpuVendor::AMD );
		if( merge3( flags, eGpuModelFlags::Wave64, eGpuModelFlags::Wave32, amd ) )
			ef |= (uint8_t)eGpuEffectiveFlags::Wave64;
		if( merge3( flags, eGpuModelFlags::UseReshapedMatMul, eGpuModelFlags::NoReshapedMatMul, amd ) )
			ef |= (uint8_t)eGpuEffectiveFlags::ReshapedMatMul;
		s_gpuInfo.flags = (eGpuEffectiveFlags)ef;


		if( willLogMessage( Whisper::eLogLevel::Debug ) )
		{
			const int fl = g_featureLevel;
			const int flMajor = ( fl >> 12 ) & 0xF;
			const int flMinor = ( fl >> 8 ) & 0xF;

			logDebug16( L"Using GPU \"%s\", feature level %i.%i, effective flags %S | %S",
				s_gpuInfo.description.c_str(), flMajor, flMinor,
				s_gpuInfo.wave64() ? "Wave64" : "Wave32",
				s_gpuInfo.useReshapedMatMul() ? "UseReshapedMatMul" : "NoReshapedMatMul" );
		}
		return S_OK;
	}

	static HRESULT validateFlags( uint32_t flags )
	{
		constexpr uint32_t waveBoth = eGpuModelFlags::Wave32 | eGpuModelFlags::Wave64;
		if( ( flags & waveBoth ) == waveBoth )
		{
			logError( u8"eGpuModelFlags.%s and eGpuModelFlags.%s are mutually exclusive", "Wave32", "Wave64" );
			return E_INVALIDARG;
		}

		constexpr uint32_t reshapedBoth = eGpuModelFlags::NoReshapedMatMul | eGpuModelFlags::UseReshapedMatMul;
		if( ( flags & reshapedBoth ) == reshapedBoth )
		{
			logError( u8"eGpuModelFlags.%s and eGpuModelFlags.%s are mutually exclusive", "NoReshapedMatMul", "UseReshapedMatMul" );
			return E_INVALIDARG;
		}
		return S_OK;
	}

	HRESULT initialize( uint32_t flags )
	{
		CHECK( validateFlags( flags ) );
		HRESULT hr = createDevice();
		if( hr != S_OK )
			return hr;
		queryDeviceInfo( flags );
		return S_OK;
	}

	__m128i __declspec( noinline ) bufferMemoryUsage( ID3D11Buffer* buffer )
	{
		if( nullptr != buffer )
		{
			D3D11_BUFFER_DESC desc;
			buffer->GetDesc( &desc );

			if( desc.Usage != D3D11_USAGE_STAGING )
				return setHigh_size( desc.ByteWidth );
			else
				return setLow_size( desc.ByteWidth );
		}
		return _mm_setzero_si128();
	}

	__m128i __declspec( noinline ) resourceMemoryUsage( ID3D11ShaderResourceView* srv )
	{
		if( nullptr != srv )
		{
			CComPtr<ID3D11Resource> res;
			srv->GetResource( &res );
			CComPtr<ID3D11Buffer> buff;
			if( SUCCEEDED( res.QueryInterface( &buff ) ) )
				return bufferMemoryUsage( buff );
			assert( false );	// We don't use textures in this project
		}
		return _mm_setzero_si128();
	}
}