summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin <const@const.me>2023-01-29 16:10:16 +0100
committerKonstantin <const@const.me>2023-01-29 16:10:16 +0100
commit850bf49466c15214fc5c232cad268662df6ad68a (patch)
tree41a20172414ccc32a6a39abea28859eeca70e79c
parent34cf65fad2c8964d4ee48e8ec4becef86b5b23dc (diff)
C# console example, diarize feature
-rw-r--r--Examples/TranscribeCS/Transcribe.cs37
-rw-r--r--Examples/TranscribeCS/TranscribeCS.cs4
-rw-r--r--WhisperNet/Context.cs4
3 files changed, 17 insertions, 28 deletions
diff --git a/Examples/TranscribeCS/Transcribe.cs b/Examples/TranscribeCS/Transcribe.cs
index 6a1e500..9bbe387 100644
--- a/Examples/TranscribeCS/Transcribe.cs
+++ b/Examples/TranscribeCS/Transcribe.cs
@@ -69,45 +69,30 @@ namespace TranscribeCS
}
string speaker = "";
-#if false
- if( args.diarize && pcmf32s.size() == 2 )
+ if( args.diarize )
{
- const size_t n_samples = pcmf32s[ 0 ].size();
- const int64_t is0 = SourceAudio::sampleFromTimestamp( seg.time.begin, n_samples );
- const int64_t is1 = SourceAudio::sampleFromTimestamp( seg.time.end, n_samples );
-
- double energy0 = 0.0f;
- double energy1 = 0.0f;
-
- for( int64_t j = is0; j < is1; j++ )
+ speaker = sender.detectSpeaker( seg.time ) switch
{
- energy0 += fabs( pcmf32s[ 0 ][ j ] );
- energy1 += fabs( pcmf32s[ 1 ][ j ] );
- }
-
- if( energy0 > 1.1 * energy1 )
- speaker = "(speaker 0)";
- else if( energy1 > 1.1 * energy0 )
- speaker = "(speaker 1)";
- else
- speaker = "(speaker ?)";
-
- //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, %s\n", is0, is1, energy0, energy1, speaker.c_str());
+ eSpeakerChannel.Unsure => "(speaker ?)",
+ eSpeakerChannel.Left => "(speaker 0)",
+ eSpeakerChannel.Right => "(speaker 1)",
+ _ => ""
+ };
}
-#endif
+
if( args.print_colors && AnsiCodes.enabled )
{
- Console.Write( "[{0} --> {1}] ", printTime( seg.time.begin ), printTime( seg.time.end ) );
+ Console.Write( "[{0} --> {1}] {2} ", printTime( seg.time.begin ), printTime( seg.time.end ), speaker );
foreach( sToken tok in res.getTokens( seg ) )
{
if( !args.print_special && tok.hasFlag( eTokenFlags.Special ) )
continue;
- Console.Write( "{0}{1}{2}{3}", speaker, k_colors[ colorIndex( tok ) ], tok.text, "\x1B[0m" );
+ Console.Write( "{0}{1}{2}", k_colors[ colorIndex( tok ) ], tok.text, "\x1B[0m" );
}
Console.WriteLine();
}
else
- Console.WriteLine( "[{0} --> {1}] {2}{3}", printTime( seg.time.begin ), printTime( seg.time.end ), speaker, seg.text );
+ Console.WriteLine( "[{0} --> {1}] {2} {3}", printTime( seg.time.begin ), printTime( seg.time.end ), speaker, seg.text );
}
}
}
diff --git a/Examples/TranscribeCS/TranscribeCS.cs b/Examples/TranscribeCS/TranscribeCS.cs
index 9b828e3..d94ed21 100644
--- a/Examples/TranscribeCS/TranscribeCS.cs
+++ b/Examples/TranscribeCS/TranscribeCS.cs
@@ -32,12 +32,12 @@ namespace TranscribeCS
{
if( streamAudio )
{
- using iAudioReader reader = mf.openAudioFile( audioFile );
+ using iAudioReader reader = mf.openAudioFile( audioFile, cla.diarize );
context.runFull( reader, transcribe, null, cla.prompt );
}
else
{
- using iAudioBuffer buffer = mf.loadAudioFile( audioFile );
+ using iAudioBuffer buffer = mf.loadAudioFile( audioFile, cla.diarize );
context.runFull( buffer, transcribe, cla.prompt );
}
// When asked to, produce these text files
diff --git a/WhisperNet/Context.cs b/WhisperNet/Context.cs
index 6c6a737..4bea863 100644
--- a/WhisperNet/Context.cs
+++ b/WhisperNet/Context.cs
@@ -197,5 +197,9 @@ namespace Whisper
fullParams.prompt_n_tokens = 0;
}
}
+
+ /// <summary>Try to detect speaker by comparing channels of the stereo PCM data</summary>
+ public eSpeakerChannel detectSpeaker( sTimeInterval interval ) =>
+ context.detectSpeaker( ref interval );
}
} \ No newline at end of file