summaryrefslogtreecommitdiffstats
path: root/Examples/TranscribeCS/TranscribeCS.cs
blob: 65239f117cd66a4258c173bdfe7eeec2f4cf11f0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
using Whisper;

namespace TranscribeCS
{
	static class Program
	{
		static readonly bool streamAudio = true;

		static int Main( string[] args )
		{
			try
			{
				CommandLineArgs cla;
				try
				{
					cla = new CommandLineArgs( args );
				}
				catch( OperationCanceledException )
				{
					return 1;
				}
				const eLoggerFlags loggerFlags = eLoggerFlags.UseStandardError | eLoggerFlags.SkipFormatMessage;
				Library.setLogSink( eLogLevel.Debug, loggerFlags );

				using iModel model = Library.loadModel( cla.model );
				using Context context = model.createContext();
				cla.apply( ref context.parameters );
				// When there're multiple input files, assuming they're independent clips
				context.parameters.setFlag( eFullParamsFlags.NoContext, true );
				using iMediaFoundation mf = Library.initMediaFoundation();
				Transcribe transcribe = new Transcribe( cla );

				foreach( string audioFile in cla.fileNames )
				{
					if( streamAudio )
					{
						using iAudioReader reader = mf.openAudioFile( audioFile, cla.diarize );
						context.runFull( reader, transcribe, null, cla.prompt );
					}
					else
					{
						using iAudioBuffer buffer = mf.loadAudioFile( audioFile, cla.diarize );
						context.runFull( buffer, transcribe, cla.prompt );
					}
					// When asked to, produce these text files
					if( cla.output_txt )
						writeTextFile( context, audioFile );
					if( cla.output_srt )
						writeSubRip( context, audioFile, cla );
					if( cla.output_vtt )
						writeWebVTT( context, audioFile );
				}

				context.timingsPrint();
				return 0;
			}
			catch( Exception ex )
			{
				Console.WriteLine( ex.Message );
				return ex.HResult;
			}
		}

		static void writeTextFile( Context context, string audioPath )
		{
			using var stream = File.CreateText( Path.ChangeExtension( audioPath, ".txt" ) );
			foreach( sSegment seg in context.results().segments )
				stream.WriteLine( seg.text );
		}

		static void writeSubRip( Context context, string audioPath, CommandLineArgs cliArgs )
		{
			using var stream = File.CreateText( Path.ChangeExtension( audioPath, ".srt" ) );
			var segments = context.results( eResultFlags.Timestamps ).segments;

			for( int i = 0; i < segments.Length; i++ )
			{
				stream.WriteLine( i + 1 + cliArgs.offset_n );
				sSegment seg = segments[ i ];
				string begin = Transcribe.printTimeWithComma( seg.time.begin );
				string end = Transcribe.printTimeWithComma( seg.time.end );
				stream.WriteLine( "{0} --> {1}", begin, end );
				stream.WriteLine( seg.text );
				stream.WriteLine();
			}
		}

		static void writeWebVTT( Context context, string audioPath )
		{
			using var stream = File.CreateText( Path.ChangeExtension( audioPath, ".vtt" ) );
			stream.WriteLine( "WEBVTT" );
			stream.WriteLine();

			foreach( sSegment seg in context.results( eResultFlags.Timestamps ).segments )
			{
				string begin = Transcribe.printTime( seg.time.begin );
				string end = Transcribe.printTime( seg.time.end );
				stream.WriteLine( "{0} --> {1}", begin, end );
				stream.WriteLine( seg.text );
				stream.WriteLine();
			}
		}
	}
}