Finish translation for Western European language speakersv0.12.0

NLLB needs its input to be split up into sentences. I use the sentence_splitter Python package to do this. It supports ~20 Western European languages, but notably, no Asian languages. * Sort spoken language list. English is still at the top. * Remove 'Translation source' dropdown. Infer this from the spoken language. * Add lang_compat.py to map language codes between the various libraries (whisper, nllb, sentence_splitter). * Fix bug where old text would appear in textbox when you first bring it up.
author: yum <yum.food.vr@gmail.com> 2023-05-30 19:01:56 -0700
committer: yum <yum.food.vr@gmail.com> 2023-05-30 19:13:25 -0700
commit: 0bda49279ec80187d49a922ff2a47141ffb2fd8f (patch)
tree: bd2521c2b2cdca422eb94e1dbef7c85e8dcefe4b /Scripts/lang_compat.py
parent: 84f09e1fdf15644d1ea5f955889581932e4f6a8e (diff)
1 files changed, 58 insertions, 0 deletions
diff --git a/Scripts/lang_compat.py b/Scripts/lang_compat.py
new file mode 100644
index 0000000..af35921
--- /dev/null
+++ b/Scripts/lang_compat.py
@@ -0,0 +1,58 @@
+# This file provides mappings between language codes used by different
+# third-party libraries.
+
+# Whisper to NLLB.
+whisper_to_nllb = {
+        "catalan": "cat_Ltn",  # catalan
+        "czech": "ces_Latn",  # czech
+        "danish": "dan_Latn",  # danish
+        "dutch": "nld_Latn",  # dutch
+        "english": "eng_Latn",  # english
+        "finnish": "fin_Latn",  # finnish
+        "french": "fra_Latn",  # french
+        "german": "deu_Latn",  # german
+        "greek": "ell_Grek",  # greek
+        "hungarian": "hun_Latn",  # hungarian
+        "icelandic": "isl_Latn",  # icelandic
+        "italian": "ita_Latn",  # italian
+        "latvian": "lvs_Latn",  # latvian
+        "lithuanian": "lit_Latn",  # lithuanian
+        "norwegian": "nob_Latn",  # norwegian (bokmal)
+        "polish": "pol_Latn",  # polish
+        "portugese": "por_Latn",  # portugese
+        "romanian": "ron_Latn",  # romanian
+        "russian": "rus_Cyrl",  # russian
+        "slovak": "slk_Latn",  # slovak
+        "slovene": "slv_Latn",  # slovene
+        "spanish": "spa_Latn",  # spanish
+        "swedish": "swe_Latn",  # swedish
+        "turkish": "tur_Latn",  # turkish
+        }
+
+# NLLB to sentence_splitter (SS).
+nllb_to_ss = {
+        "cat_Ltn": "ca",  # catalan
+        "ces_Latn": "cs",  # czech
+        "dan_Latn": "da",  # danish
+        "nld_Latn": "nl",  # dutch
+        "eng_Latn": "en",  # english
+        "fin_Latn": "fi",  # finnish
+        "fra_Latn": "fr",  # french
+        "deu_Latn": "de",  # german
+        "ell_Grek": "el",  # greek
+        "hun_Latn": "hu",  # hungarian
+        "isl_Latn": "is",  # icelandic
+        "ita_Latn": "it",  # italian
+        "lvs_Latn": "lv",  # latvian
+        "lit_Latn": "lt",  # lithuanian
+        "nob_Latn": "no",  # norwegian (bokmal)
+        "pol_Latn": "pl",  # polish
+        "por_Latn": "pt",  # portugese
+        "ron_Latn": "ro",  # romanian
+        "rus_Cyrl": "ru",  # russian
+        "slk_Latn": "sk",  # slovak
+        "slv_Latn": "sl",  # slovene
+        "spa_Latn": "es",  # spanish
+        "swe_Latn": "sv",  # swedish
+        "tur_Latn": "tr",  # turkish
+        }
author	yum <yum.food.vr@gmail.com>	2023-05-30 19:01:56 -0700
committer	yum <yum.food.vr@gmail.com>	2023-05-30 19:13:25 -0700
commit	0bda49279ec80187d49a922ff2a47141ffb2fd8f (patch)
tree	bd2521c2b2cdca422eb94e1dbef7c85e8dcefe4b /Scripts/lang_compat.py
parent	84f09e1fdf15644d1ea5f955889581932e4f6a8e (diff)