using System.Globalization; using System.Text.RegularExpressions; namespace CompressShaders { static class LanguageCodes { record struct Row { public string keySource; public uint keyValue; public int code; public string name; } static uint makeKey( string str ) { if( str.Length > 4 ) throw new ArgumentException(); uint k = 0; int shift = 0; foreach( char c in str ) { if( c >= 0x80 ) throw new ArgumentException(); uint u = (uint)c; k |= ( u << shift ); shift += 8; } return k; } static IEnumerable load( string path ) { using var stm = File.OpenText( path ); while( true ) { string? line = stm.ReadLine(); if( null == line ) break; if( string.IsNullOrWhiteSpace( line ) ) continue; string[] fields = line.Split( '\t' ); yield return new Row() { keySource = fields[ 0 ], keyValue = makeKey( fields[ 0 ] ), code = int.Parse( fields[ 1 ] ), name = fields[ 2 ] }; } } static void writeCpp( string inl, Row[] data ) { // TODO [very low]: sort them by the key here, then in C++ use binary search instead of the hash map using var stm = File.CreateText( inl ); stm.WriteLine( "// This file is generated by a tool, from the `languageCodez.tsv` file in this repository" ); foreach( Row row in data ) stm.WriteLine( "Lang{{ 0x{0:X}, {1}, \"{2}\" }},", row.keyValue, row.code, row.name ); } static readonly CultureInfo ci = new CultureInfo( "en-US", false ); static string titleCase( this string name ) => ci.TextInfo.ToTitleCase( name.ToLower( ci ) ); static void writeCs( string cs, Row[] data ) { using var stm = File.CreateText( cs ); stm.WriteLine( @"// This file is generated by a tool, from the `languageCodez.tsv` file in this repository namespace Whisper { /// Supported languages /// The values of this enum are zero-padded ASCII strings.
/// It seems OpenAI tried to implement ISO 639-1, but they used the version of the standard from 1988.
public enum eLanguage: uint {" ); foreach( Row row in data ) { string tc = row.name.titleCase(); stm.WriteLine( " /// {0}", tc ); tc = Regex.Replace( tc, @"\s+", string.Empty ); stm.WriteLine( " {0} = 0x{1:X}, // \"{2}\"", tc, row.keyValue, row.keySource ); } stm.Write( @" } }" ); } static void produce( string tsv, string inl, string cs ) { Row[] data = load( tsv ).OrderBy( r => r.name ).ToArray(); writeCpp( inl, data ); writeCs( cs, data ); } public static void produce( string solutionRoot ) { string tsv = Path.Combine( solutionRoot, "Whisper\\Whisper\\languageCodez.tsv" ); string inl = Path.Combine( solutionRoot, "Whisper\\Whisper\\languageCodez.inl" ); string cs = Path.Combine( solutionRoot, "WhisperNet\\API\\eLanguage.cs" ); produce( tsv, inl, cs ); } } }