From 15dbcacdbc5db68c1ea86bb330d07ec70de75af6 Mon Sep 17 00:00:00 2001 From: Konstantin Date: Mon, 23 Jan 2023 20:28:59 +0100 Subject: Minor, micro-optimization --- Whisper/ML/Context.ops.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'Whisper/ML/Context.ops.cpp') diff --git a/Whisper/ML/Context.ops.cpp b/Whisper/ML/Context.ops.cpp index a94497e..f6309f0 100644 --- a/Whisper/ML/Context.ops.cpp +++ b/Whisper/ML/Context.ops.cpp @@ -84,9 +84,20 @@ Tensor __declspec( noinline ) MlContext::view2d( const Tensor& a, uint32_t ne0, Tensor MlContext::transpose( const Tensor& a ) { - Tensor result = a; - std::swap( result.ne[ 0 ], result.ne[ 1 ] ); - std::swap( result.nb[ 0 ], result.nb[ 1 ] ); + Tensor result; + + // A magic number for _mm_shuffle_epi32 SSE2 instruction to swap two lower int32 lanes in a vector + constexpr int swapXy = _MM_SHUFFLE( 3, 2, 0, 1 ); + + __m128i v = a.sizeVec(); + v = _mm_shuffle_epi32( v, swapXy ); + store( result.ne, v ); + + v = a.stridesVec(); + v = _mm_shuffle_epi32( v, swapXy ); + store( result.nb, v ); + + result.setGpuViews( a, a ); return result; } -- cgit v1.2.3