From 27dfc3428a7016e2d05dd67b6d8b88c0b982baa9 Mon Sep 17 00:00:00 2001 From: Konstantin Date: Mon, 23 Jan 2023 14:38:12 +0100 Subject: Performance improvement, `softMax` shader --- ComputeShaders/softMaxLong.hlsl | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 ComputeShaders/softMaxLong.hlsl (limited to 'ComputeShaders/softMaxLong.hlsl') diff --git a/ComputeShaders/softMaxLong.hlsl b/ComputeShaders/softMaxLong.hlsl new file mode 100644 index 0000000..1f2c2be --- /dev/null +++ b/ComputeShaders/softMaxLong.hlsl @@ -0,0 +1,6 @@ +// This version is for the "dec.probs" shader tag +// The input tensor has a size [ 51865, 3 ], a very long tensor with just 3 rows. +// Despite the shader only runs on 3 GPU cores, large count of threads helps substantially, this shader is about 50% faster. +#define THREADS 1024 + +#include "softMax.hlsl" \ No newline at end of file -- cgit v1.2.3