summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin <const@const.me>2023-01-23 16:44:52 +0100
committerKonstantin <const@const.me>2023-01-23 16:44:52 +0100
commit01325d7168669f8d05446314f8b53c62e7de3af9 (patch)
tree49ff276c29b5e3b513ca61cf426603fd8ee3cc0b
parent27dfc3428a7016e2d05dd67b6d8b88c0b982baa9 (diff)
Minor, micro-optimization
-rw-r--r--Whisper/Hybrid/HybridContext.cpp4
-rw-r--r--Whisper/Utils/miscUtils.cpp24
-rw-r--r--Whisper/Utils/miscUtils.h5
-rw-r--r--Whisper/Whisper/WhisperContext.cpp6
4 files changed, 33 insertions, 6 deletions
diff --git a/Whisper/Hybrid/HybridContext.cpp b/Whisper/Hybrid/HybridContext.cpp
index 64ed2e6..5811bbd 100644
--- a/Whisper/Hybrid/HybridContext.cpp
+++ b/Whisper/Hybrid/HybridContext.cpp
@@ -176,7 +176,7 @@ HRESULT HybridContext::decode( const int* tokens, const int n_tokens, const int
{
Tensor Qcur = ml.mulMat( layer.attnQuery.w, cur );
if( 0 == il ) Tracing::tensor( "dec-Qcur-0", Qcur );
- const float scaling = (float)pow( float( (int)n_state ) / (int)n_head, -0.25 );
+ const float scaling = computeScaling( (int)n_state, (int)n_head );
ml.addRepeatScale( Qcur, layer.attnQuery.b, scaling );
if( 0 == il ) Tracing::tensor( "dec-Qcur-1", Qcur );
@@ -241,7 +241,7 @@ HRESULT HybridContext::decode( const int* tokens, const int n_tokens, const int
// cross-attention
{
Tensor Qcur = ml.mulMat( layer.crossAttnQuery.w, cur );
- ml.addRepeatScale( Qcur, layer.crossAttnQuery.b, (float)pow( float( (int)n_state ) / (int)n_head, -0.25 ) );
+ ml.addRepeatScale( Qcur, layer.crossAttnQuery.b, computeScaling( (int)n_state, (int)n_head ) );
// Kcross is already scaled
const uint32_t len = M * n_state;
diff --git a/Whisper/Utils/miscUtils.cpp b/Whisper/Utils/miscUtils.cpp
index c3f7dd1..6644220 100644
--- a/Whisper/Utils/miscUtils.cpp
+++ b/Whisper/Utils/miscUtils.cpp
@@ -1,5 +1,6 @@
#include "stdafx.h"
#include "miscUtils.h"
+#include <cmath>
void setCurrentThreadName( const char* threadName )
{
@@ -30,4 +31,27 @@ void setCurrentThreadName( const char* threadName )
__except( EXCEPTION_EXECUTE_HANDLER )
{
}
+}
+
+float computeScaling( int mul, int div )
+{
+#ifdef _DEBUG
+ const float ref = (float)std::pow( (double)mul / (double)div, -0.25 );
+#endif
+ // Make int32 vector with both numbers
+ __m128i iv = _mm_cvtsi32_si128( mul );
+ iv = _mm_insert_epi32( iv, div, 1 );
+ // Convert both numbers to FP64
+ __m128d v = _mm_cvtepi32_pd( iv );
+ // Compute mul / div
+ v = _mm_div_sd( v, _mm_unpackhi_pd( v, v ) );
+ // Square root
+ v = _mm_sqrt_sd( v, v );
+ // 4-th root
+ v = _mm_sqrt_sd( v, v );
+ // Invert the value
+ v = _mm_div_sd( _mm_set_sd( 1.0 ), v );
+ // Downcast to FP32, and return the result
+ __m128 f32 = _mm_cvtsd_ss( _mm_setzero_ps(), v );
+ return _mm_cvtss_f32( f32 );
} \ No newline at end of file
diff --git a/Whisper/Utils/miscUtils.h b/Whisper/Utils/miscUtils.h
index d665cbc..ff52e21 100644
--- a/Whisper/Utils/miscUtils.h
+++ b/Whisper/Utils/miscUtils.h
@@ -78,4 +78,7 @@ template<class E>
inline size_t vectorMemoryUse( const std::vector<E>& vec )
{
return sizeof( E ) * vec.capacity();
-} \ No newline at end of file
+}
+
+// The formula is pow( mul / div, -0.25 )
+float computeScaling( int mul, int div ); \ No newline at end of file
diff --git a/Whisper/Whisper/WhisperContext.cpp b/Whisper/Whisper/WhisperContext.cpp
index e694930..823d207 100644
--- a/Whisper/Whisper/WhisperContext.cpp
+++ b/Whisper/Whisper/WhisperContext.cpp
@@ -358,7 +358,7 @@ Tensor WhisperContext::encode( Whisper::iSpectrogram& spectrogram, const sEncode
const size_t layersCount = encParams.n_text_layer;
const uint32_t stride = encParams.n_state * encParams.n_ctx;
- const float finalScaling = (float)pow( float( encParams.n_state ) / encParams.n_head, -0.25 );
+ const float finalScaling = computeScaling( (int)encParams.n_state, (int)encParams.n_head );
for( size_t i = 0; i < layersCount; i++ )
{
const LayerDecoder& layer = gpuModel.dec.layers[ i ];
@@ -422,7 +422,7 @@ Tensor WhisperContext::decodeLayer( const Tensor& inpL, size_t il, const sLayerD
profiler.setNextTag( "dec.layer.1" );
Tensor Qcur = mulMat( layer.attnQuery.w, cur );
if( 0 == il ) Tracing::tensor( "dec-Qcur-0", Qcur );
- const float scaling = (float)pow( float( (int)ldp.n_state ) / (int)ldp.n_head, -0.25 );
+ const float scaling = computeScaling( (int)ldp.n_state, (int)ldp.n_head );
addRepeatScale( Qcur, layer.attnQuery.b, scaling );
if( 0 == il ) Tracing::tensor( "dec-Qcur-1", Qcur );
@@ -494,7 +494,7 @@ Tensor WhisperContext::decodeLayer( const Tensor& inpL, size_t il, const sLayerD
{
profiler.setNextTag( "dec.layer.7" );
Tensor Qcur = mulMat( layer.crossAttnQuery.w, cur );
- addRepeatScale( Qcur, layer.crossAttnQuery.b, (float)pow( float( (int)ldp.n_state ) / (int)ldp.n_head, -0.25 ) );
+ addRepeatScale( Qcur, layer.crossAttnQuery.b, computeScaling( (int)ldp.n_state, (int)ldp.n_head ) );
// Kcross is already scaled
const uint32_t len = ldp.M * ldp.n_state;