summaryrefslogtreecommitdiffstats
path: root/ComputeShaders
diff options
context:
space:
mode:
authorKonstantin <const@const.me>2023-01-24 14:59:42 +0100
committerKonstantin <const@const.me>2023-01-24 14:59:42 +0100
commitb13a68d3a458e86cb1791d47c332985b026d2eef (patch)
tree1ec2d9d759c000c874919501d0e3950c5fd73489 /ComputeShaders
parenta6c544b2a577ae0597e78a17ad36057a522bc1e4 (diff)
Comments
Diffstat (limited to 'ComputeShaders')
-rw-r--r--ComputeShaders/mulMatTiled.hlsl5
1 files changed, 1 insertions, 4 deletions
diff --git a/ComputeShaders/mulMatTiled.hlsl b/ComputeShaders/mulMatTiled.hlsl
index 4835cb9..bfc51e8 100644
--- a/ComputeShaders/mulMatTiled.hlsl
+++ b/ComputeShaders/mulMatTiled.hlsl
@@ -4,13 +4,10 @@
#ifndef TILE_SIZE
static const uint TILE_SIZE = 32;
#endif
-
#ifndef THREADS_Y
-// Performance measures on Ryzen 7 5700G iGPU, the time is just for this shader:
-// 1 (32 threads per group) - 17.1 seconds, 2 - 9.02424 seconds, 4 - 6.95762 seconds, 6 - 6.79011 seconds, 8 - 6.67279 seconds, 10 - 6.9456 seconds, 16 - 7.20502 seconds
-// On nVidia, 8 is also the fastest option.
static const uint THREADS_Y = 8;
#endif
+// The above values have a following constraint: TILE_SIZE = THREADS_Y * N * 4 where N is an integer
#ifndef STREAM_SECOND_MATRIX
// Funfact: enabling this on 1080Ti ruins the performance, by a factor of 3.5