summaryrefslogtreecommitdiffstats
path: root/WhisperNet
diff options
context:
space:
mode:
authorKonstantin <const@const.me>2023-01-18 20:35:30 +0100
committerKonstantin <const@const.me>2023-01-18 20:35:30 +0100
commit11c399b70c7ad5664b6060b39632e6b9fa815350 (patch)
tree763afed51699017749d3f0398f16928aad7544a4 /WhisperNet
parentad097a744759c6a78e1b33ea9d2b4b2af01c529d (diff)
Optional startup flags to override performance-related defaults for the compute shaders
Diffstat (limited to 'WhisperNet')
-rw-r--r--WhisperNet/API/eGpuModelFlags.cs28
-rw-r--r--WhisperNet/API/eModelImplementation.cs2
-rw-r--r--WhisperNet/Library.cs2
3 files changed, 30 insertions, 2 deletions
diff --git a/WhisperNet/API/eGpuModelFlags.cs b/WhisperNet/API/eGpuModelFlags.cs
new file mode 100644
index 0000000..106235f
--- /dev/null
+++ b/WhisperNet/API/eGpuModelFlags.cs
@@ -0,0 +1,28 @@
+namespace Whisper
+{
+ /// <summary>These flags affect compute shaders performance (which ones are faster depends on GPU model),<br/>
+ /// and VRAM memory usage (UseReshapedMatMul needs slightly more VRAM).</summary>
+ [Flags]
+ public enum eGpuModelFlags: uint
+ {
+ /// <summary>Equivalent to <c>Wave32 | NoReshapedMatMul</c> on Intel and nVidia GPUs,<br/>
+ /// and <c>Wave64 | UseReshapedMatMul</c> on AMD GPUs</summary>
+ None = 0,
+
+ /// <summary>Use Wave32 version of compute shaders even on AMD GPUs</summary>
+ /// <remarks>Incompatible with <see cref="Wave64" /></remarks>
+ Wave32 = 1,
+
+ /// <summary>Use Wave64 version of compute shaders even on nVidia and Intel GPUs</summary>
+ /// <remarks>Incompatible with <see cref="Wave32" /></remarks>
+ Wave64 = 2,
+
+ /// <summary>Do not use reshaped matrix multiplication shaders on AMD GPUs</summary>
+ /// <remarks>Incompatible with <see cref="UseReshapedMatMul" /></remarks>
+ NoReshapedMatMul = 4,
+
+ /// <summary>Use reshaped matrix multiplication shaders even on nVidia and Intel GPUs</summary>
+ /// <remarks>Incompatible with <see cref="NoReshapedMatMul" /></remarks>
+ UseReshapedMatMul = 8,
+ }
+} \ No newline at end of file
diff --git a/WhisperNet/API/eModelImplementation.cs b/WhisperNet/API/eModelImplementation.cs
index 1b0a079..a0e61fb 100644
--- a/WhisperNet/API/eModelImplementation.cs
+++ b/WhisperNet/API/eModelImplementation.cs
@@ -1,6 +1,6 @@
namespace Whisper
{
- /// <summary>Implementation value for the <see cref="Library.loadModel(string, eModelImplementation)" /> factory function</summary>
+ /// <summary>Implementation value for the <see cref="Library.loadModel(string, eGpuModelFlags, eModelImplementation)" /> factory function</summary>
public enum eModelImplementation: uint
{
/// <summary>GPGPU implementation based on Direct3D 11.0 compute shaders</summary>
diff --git a/WhisperNet/Library.cs b/WhisperNet/Library.cs
index 72ecb6e..5bdb0a3 100644
--- a/WhisperNet/Library.cs
+++ b/WhisperNet/Library.cs
@@ -35,7 +35,7 @@ namespace Whisper
/// <remarks>Models are large, depending on user’s disk speed this might take a while, and this function blocks the calling thread.<br/>
/// Consider <see cref="loadModelAsync" /> instead.</remarks>
/// <seealso href="https://huggingface.co/datasets/ggerganov/whisper.cpp" />
- public static iModel loadModel( string path, eModelImplementation impl = eModelImplementation.GPU )
+ public static iModel loadModel( string path, eGpuModelFlags flags = eGpuModelFlags.None, eModelImplementation impl = eModelImplementation.GPU )
{
iModel model;
sLoadModelCallbacks callbacks = default;