diff options
| author | Konstantin <const@const.me> | 2023-01-18 20:35:30 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-01-18 20:35:30 +0100 |
| commit | 11c399b70c7ad5664b6060b39632e6b9fa815350 (patch) | |
| tree | 763afed51699017749d3f0398f16928aad7544a4 /Whisper/D3D/device.cpp | |
| parent | ad097a744759c6a78e1b33ea9d2b4b2af01c529d (diff) | |
Optional startup flags to override performance-related defaults for the compute shaders
Diffstat (limited to 'Whisper/D3D/device.cpp')
| -rw-r--r-- | Whisper/D3D/device.cpp | 54 |
1 files changed, 51 insertions, 3 deletions
diff --git a/Whisper/D3D/device.cpp b/Whisper/D3D/device.cpp index 4eb5a60..5b0a6e8 100644 --- a/Whisper/D3D/device.cpp +++ b/Whisper/D3D/device.cpp @@ -4,6 +4,7 @@ #include <ammintrin.h> #pragma comment(lib, "D3D11.lib") #include "RenderDoc/renderDoc.h" +#include "../API/eGpuModelFlags.h" namespace DirectCompute { @@ -54,7 +55,25 @@ namespace DirectCompute sGpuInfo s_gpuInfo = {}; const sGpuInfo& gpuInfo = s_gpuInfo; - static HRESULT queryDeviceInfo() + using Whisper::eGpuModelFlags; + inline constexpr uint32_t operator|( eGpuModelFlags a, eGpuModelFlags b ) + { + return (uint32_t)a | (uint32_t)b; + } + inline bool operator&( uint32_t flags, eGpuModelFlags bit ) + { + return 0 != ( flags & (uint32_t)bit ); + } + inline bool merge3( uint32_t flags, eGpuModelFlags enabled, eGpuModelFlags disabled, bool def ) + { + if( flags & enabled ) + return true; + if( flags & disabled ) + return false; + return def; + } + + static HRESULT queryDeviceInfo( uint32_t flags ) { if( nullptr == g_device ) return OLE_E_BLANK; @@ -77,15 +96,44 @@ namespace DirectCompute s_gpuInfo.vramDedicated = desc.DedicatedVideoMemory; s_gpuInfo.ramDedicated = desc.DedicatedSystemMemory; s_gpuInfo.ramShared = desc.SharedSystemMemory; + + // Set up these flags + uint8_t ef = 0; + const bool amd = ( s_gpuInfo.vendor == eGpuVendor::AMD ); + if( merge3( flags, eGpuModelFlags::Wave64, eGpuModelFlags::Wave32, amd ) ) + ef |= (uint8_t)eGpuEffectiveFlags::Wave64; + if( merge3( flags, eGpuModelFlags::UseReshapedMatMul, eGpuModelFlags::NoReshapedMatMul, amd ) ) + ef |= (uint8_t)eGpuEffectiveFlags::ReshapedMatMul; + s_gpuInfo.flags = (eGpuEffectiveFlags)ef; + + return S_OK; + } + + static HRESULT validateFlags( uint32_t flags ) + { + constexpr uint32_t waveBoth = eGpuModelFlags::Wave32 | eGpuModelFlags::Wave64; + if( ( flags & waveBoth ) == waveBoth ) + { + logError( u8"eGpuModelFlags.%s and eGpuModelFlags.%s are mutually exclusive", "Wave32", "Wave64" ); + return E_INVALIDARG; + } + + constexpr uint32_t reshapedBoth = eGpuModelFlags::NoReshapedMatMul | eGpuModelFlags::UseReshapedMatMul; + if( ( flags & reshapedBoth ) == reshapedBoth ) + { + logError( u8"eGpuModelFlags.%s and eGpuModelFlags.%s are mutually exclusive", "NoReshapedMatMul", "UseReshapedMatMul" ); + return E_INVALIDARG; + } return S_OK; } - HRESULT initialize() + HRESULT initialize( uint32_t flags ) { + CHECK( validateFlags( flags ) ); HRESULT hr = createDevice(); if( hr != S_OK ) return hr; - queryDeviceInfo(); + queryDeviceInfo( flags ); return S_OK; } |
