summaryrefslogtreecommitdiffstats
path: root/ComputeShaders/softMaxLong.hlsl
diff options
context:
space:
mode:
Diffstat (limited to 'ComputeShaders/softMaxLong.hlsl')
-rw-r--r--ComputeShaders/softMaxLong.hlsl6
1 files changed, 6 insertions, 0 deletions
diff --git a/ComputeShaders/softMaxLong.hlsl b/ComputeShaders/softMaxLong.hlsl
new file mode 100644
index 0000000..1f2c2be
--- /dev/null
+++ b/ComputeShaders/softMaxLong.hlsl
@@ -0,0 +1,6 @@
+// This version is for the "dec.probs" shader tag
+// The input tensor has a size [ 51865, 3 ], a very long tensor with just 3 rows.
+// Despite the shader only runs on 3 GPU cores, large count of threads helps substantially, this shader is about 50% faster.
+#define THREADS 1024
+
+#include "softMax.hlsl" \ No newline at end of file