2 files changed, 4 insertions, 0 deletions
diff --git a/Whisper/ML/Context.ops.cpp b/Whisper/ML/Context.ops.cpp
index 7dfca9f..a94497e 100644
--- a/Whisper/ML/Context.ops.cpp
+++ b/Whisper/ML/Context.ops.cpp
@@ -192,6 +192,7 @@ Tensor MlContext::flashAttention( const Tensor& q, const Tensor& k, const Tensor
 			profiler.setNextTag( "flashAttn.1" );
 			Tensor tmp = mulMat( k, q );
 
+			profiler.setNextTag( "flashAttention" );
 			const float tempScale = (float)( 1.0 / sqrt( (double)(int)q.ne[ 0 ] ) );
 			softMax( tmp, tempScale );
 
diff --git a/Whisper/Whisper/WhisperContext.cpp b/Whisper/Whisper/WhisperContext.cpp
index d558aa6..e694930 100644
--- a/Whisper/Whisper/WhisperContext.cpp
+++ b/Whisper/Whisper/WhisperContext.cpp
@@ -457,6 +457,7 @@ Tensor WhisperContext::decodeLayer( const Tensor& inpL, size_t il, const sLayerD
 		if( 0 == il ) Tracing::tensor( "dec-KQ-0", KQ );
 		diagMaskInf( KQ, ldp.n_past );
 		if( 0 == il ) Tracing::tensor( "dec-KQ-1", KQ );
+		profiler.setNextTag( "decLayer.1" );
 		softMax( KQ );
 		if( 0 == il ) Tracing::tensor( "dec-KQ-2", KQ );
 
@@ -506,6 +507,7 @@ Tensor WhisperContext::decodeLayer( const Tensor& inpL, size_t il, const sLayerD
 		Tensor K = permute( Kcross, 0, 2, 1, 3 );
 		profiler.setNextTag( "dec.layer.8" );
 		Tensor KQ = mulMat( K, Q );
+		profiler.setNextTag( "decLayer.2" );
 		softMax( KQ );
 		Tensor V_trans = permute( Vcross, 1, 2, 0, 3 );
 		profiler.setNextTag( "dec.layer.9" );
@@ -628,6 +630,7 @@ void WhisperContext::decode( const int* tokens, const int n_tokens, const sDecod
 	cur = mulMat( gpuModel.dec.tokenEmbedding, cur );
 
 	// logits -> probs
+	profiler.setNextTag( "dec.probs" );
 	softMax( cur );
 
 	decoderOutput.copyFromVram( cur );