diff options
| author | Konstantin <const@const.me> | 2023-01-18 20:35:30 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-01-18 20:35:30 +0100 |
| commit | 11c399b70c7ad5664b6060b39632e6b9fa815350 (patch) | |
| tree | 763afed51699017749d3f0398f16928aad7544a4 /WhisperNet | |
| parent | ad097a744759c6a78e1b33ea9d2b4b2af01c529d (diff) | |
Optional startup flags to override performance-related defaults for the compute shaders
Diffstat (limited to 'WhisperNet')
| -rw-r--r-- | WhisperNet/API/eGpuModelFlags.cs | 28 | ||||
| -rw-r--r-- | WhisperNet/API/eModelImplementation.cs | 2 | ||||
| -rw-r--r-- | WhisperNet/Library.cs | 2 |
3 files changed, 30 insertions, 2 deletions
diff --git a/WhisperNet/API/eGpuModelFlags.cs b/WhisperNet/API/eGpuModelFlags.cs new file mode 100644 index 0000000..106235f --- /dev/null +++ b/WhisperNet/API/eGpuModelFlags.cs @@ -0,0 +1,28 @@ +namespace Whisper +{ + /// <summary>These flags affect compute shaders performance (which ones are faster depends on GPU model),<br/> + /// and VRAM memory usage (UseReshapedMatMul needs slightly more VRAM).</summary> + [Flags] + public enum eGpuModelFlags: uint + { + /// <summary>Equivalent to <c>Wave32 | NoReshapedMatMul</c> on Intel and nVidia GPUs,<br/> + /// and <c>Wave64 | UseReshapedMatMul</c> on AMD GPUs</summary> + None = 0, + + /// <summary>Use Wave32 version of compute shaders even on AMD GPUs</summary> + /// <remarks>Incompatible with <see cref="Wave64" /></remarks> + Wave32 = 1, + + /// <summary>Use Wave64 version of compute shaders even on nVidia and Intel GPUs</summary> + /// <remarks>Incompatible with <see cref="Wave32" /></remarks> + Wave64 = 2, + + /// <summary>Do not use reshaped matrix multiplication shaders on AMD GPUs</summary> + /// <remarks>Incompatible with <see cref="UseReshapedMatMul" /></remarks> + NoReshapedMatMul = 4, + + /// <summary>Use reshaped matrix multiplication shaders even on nVidia and Intel GPUs</summary> + /// <remarks>Incompatible with <see cref="NoReshapedMatMul" /></remarks> + UseReshapedMatMul = 8, + } +}
\ No newline at end of file diff --git a/WhisperNet/API/eModelImplementation.cs b/WhisperNet/API/eModelImplementation.cs index 1b0a079..a0e61fb 100644 --- a/WhisperNet/API/eModelImplementation.cs +++ b/WhisperNet/API/eModelImplementation.cs @@ -1,6 +1,6 @@ namespace Whisper { - /// <summary>Implementation value for the <see cref="Library.loadModel(string, eModelImplementation)" /> factory function</summary> + /// <summary>Implementation value for the <see cref="Library.loadModel(string, eGpuModelFlags, eModelImplementation)" /> factory function</summary> public enum eModelImplementation: uint { /// <summary>GPGPU implementation based on Direct3D 11.0 compute shaders</summary> diff --git a/WhisperNet/Library.cs b/WhisperNet/Library.cs index 72ecb6e..5bdb0a3 100644 --- a/WhisperNet/Library.cs +++ b/WhisperNet/Library.cs @@ -35,7 +35,7 @@ namespace Whisper /// <remarks>Models are large, depending on user’s disk speed this might take a while, and this function blocks the calling thread.<br/> /// Consider <see cref="loadModelAsync" /> instead.</remarks> /// <seealso href="https://huggingface.co/datasets/ggerganov/whisper.cpp" /> - public static iModel loadModel( string path, eModelImplementation impl = eModelImplementation.GPU ) + public static iModel loadModel( string path, eGpuModelFlags flags = eGpuModelFlags.None, eModelImplementation impl = eModelImplementation.GPU ) { iModel model; sLoadModelCallbacks callbacks = default; |
