# This file provides mappings between language codes used by different # third-party libraries. # Whisper to NLLB. whisper_to_nllb = { "catalan": "cat_Ltn", # catalan "czech": "ces_Latn", # czech "danish": "dan_Latn", # danish "dutch": "nld_Latn", # dutch "english": "eng_Latn", # english "finnish": "fin_Latn", # finnish "french": "fra_Latn", # french "german": "deu_Latn", # german "greek": "ell_Grek", # greek "hungarian": "hun_Latn", # hungarian "icelandic": "isl_Latn", # icelandic "italian": "ita_Latn", # italian "latvian": "lvs_Latn", # latvian "lithuanian": "lit_Latn", # lithuanian "norwegian": "nob_Latn", # norwegian (bokmal) "polish": "pol_Latn", # polish "portugese": "por_Latn", # portugese "romanian": "ron_Latn", # romanian "russian": "rus_Cyrl", # russian "slovak": "slk_Latn", # slovak "slovene": "slv_Latn", # slovene "spanish": "spa_Latn", # spanish "swedish": "swe_Latn", # swedish "turkish": "tur_Latn", # turkish } # NLLB to sentence_splitter (SS). nllb_to_ss = { "cat_Ltn": "ca", # catalan "ces_Latn": "cs", # czech "dan_Latn": "da", # danish "nld_Latn": "nl", # dutch "eng_Latn": "en", # english "fin_Latn": "fi", # finnish "fra_Latn": "fr", # french "deu_Latn": "de", # german "ell_Grek": "el", # greek "hun_Latn": "hu", # hungarian "isl_Latn": "is", # icelandic "ita_Latn": "it", # italian "lvs_Latn": "lv", # latvian "lit_Latn": "lt", # lithuanian "nob_Latn": "no", # norwegian (bokmal) "pol_Latn": "pl", # polish "por_Latn": "pt", # portugese "ron_Latn": "ro", # romanian "rus_Cyrl": "ru", # russian "slk_Latn": "sk", # slovak "slv_Latn": "sl", # slovene "spa_Latn": "es", # spanish "swe_Latn": "sv", # swedish "tur_Latn": "tr", # turkish }