Optional startup flags to override performance-related defaults for the compute shaders

author: Konstantin <const@const.me> 2023-01-18 20:35:30 +0100
committer: Konstantin <const@const.me> 2023-01-18 20:35:30 +0100
commit: 11c399b70c7ad5664b6060b39632e6b9fa815350 (patch)
tree: 763afed51699017749d3f0398f16928aad7544a4 /WhisperNet
parent: ad097a744759c6a78e1b33ea9d2b4b2af01c529d (diff)
3 files changed, 30 insertions, 2 deletions
diff --git a/WhisperNet/API/eGpuModelFlags.cs b/WhisperNet/API/eGpuModelFlags.cs
new file mode 100644
index 0000000..106235f
--- /dev/null
+++ b/WhisperNet/API/eGpuModelFlags.cs
@@ -0,0 +1,28 @@
+namespace Whisper
+{
+	/// <summary>These flags affect compute shaders performance (which ones are faster depends on GPU model),<br/>
+	/// and VRAM memory usage (UseReshapedMatMul needs slightly more VRAM).</summary>
+	[Flags]
+	public enum eGpuModelFlags: uint
+	{
+		/// <summary>Equivalent to <c>Wave32 | NoReshapedMatMul</c> on Intel and nVidia GPUs,<br/>
+		/// and <c>Wave64 | UseReshapedMatMul</c> on AMD GPUs</summary>
+		None = 0,
+
+		/// <summary>Use Wave32 version of compute shaders even on AMD GPUs</summary>
+		/// <remarks>Incompatible with <see cref="Wave64" /></remarks>
+		Wave32 = 1,
+
+		/// <summary>Use Wave64 version of compute shaders even on nVidia and Intel GPUs</summary>
+		/// <remarks>Incompatible with <see cref="Wave32" /></remarks>
+		Wave64 = 2,
+
+		/// <summary>Do not use reshaped matrix multiplication shaders on AMD GPUs</summary>
+		/// <remarks>Incompatible with <see cref="UseReshapedMatMul" /></remarks>
+		NoReshapedMatMul = 4,
+
+		/// <summary>Use reshaped matrix multiplication shaders even on nVidia and Intel GPUs</summary>
+		/// <remarks>Incompatible with <see cref="NoReshapedMatMul" /></remarks>
+		UseReshapedMatMul = 8,
+	}
+}
+\ No newline at end of file
diff --git a/WhisperNet/API/eModelImplementation.cs b/WhisperNet/API/eModelImplementation.cs
index 1b0a079..a0e61fb 100644
--- a/WhisperNet/API/eModelImplementation.cs
+++ b/WhisperNet/API/eModelImplementation.cs
@@ -1,6 +1,6 @@
 namespace Whisper
 {
-	/// <summary>Implementation value for the <see cref="Library.loadModel(string, eModelImplementation)" /> factory function</summary>
+	/// <summary>Implementation value for the <see cref="Library.loadModel(string, eGpuModelFlags, eModelImplementation)" /> factory function</summary>
 	public enum eModelImplementation: uint
 	{
 		/// <summary>GPGPU implementation based on Direct3D 11.0 compute shaders</summary>
diff --git a/WhisperNet/Library.cs b/WhisperNet/Library.cs
index 72ecb6e..5bdb0a3 100644
--- a/WhisperNet/Library.cs
+++ b/WhisperNet/Library.cs
@@ -35,7 +35,7 @@ namespace Whisper
 		/// <remarks>Models are large, depending on user’s disk speed this might take a while, and this function blocks the calling thread.<br/>
 		/// Consider <see cref="loadModelAsync" /> instead.</remarks>
 		/// <seealso href="https://huggingface.co/datasets/ggerganov/whisper.cpp" />
-		public static iModel loadModel( string path, eModelImplementation impl = eModelImplementation.GPU )
+		public static iModel loadModel( string path, eGpuModelFlags flags = eGpuModelFlags.None, eModelImplementation impl = eModelImplementation.GPU )
 		{
 			iModel model;
 			sLoadModelCallbacks callbacks = default;
author	Konstantin <const@const.me>	2023-01-18 20:35:30 +0100
committer	Konstantin <const@const.me>	2023-01-18 20:35:30 +0100
commit	11c399b70c7ad5664b6060b39632e6b9fa815350 (patch)
tree	763afed51699017749d3f0398f16928aad7544a4 /WhisperNet
parent	ad097a744759c6a78e1b33ea9d2b4b2af01c529d (diff)