summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-05-30 19:01:56 -0700
committeryum <yum.food.vr@gmail.com>2023-05-30 19:13:25 -0700
commit0bda49279ec80187d49a922ff2a47141ffb2fd8f (patch)
treebd2521c2b2cdca422eb94e1dbef7c85e8dcefe4b
parent84f09e1fdf15644d1ea5f955889581932e4f6a8e (diff)
Finish translation for Western European language speakersv0.12.0
NLLB needs its input to be split up into sentences. I use the sentence_splitter Python package to do this. It supports ~20 Western European languages, but notably, no Asian languages. * Sort spoken language list. English is still at the top. * Remove 'Translation source' dropdown. Infer this from the spoken language. * Add lang_compat.py to map language codes between the various libraries (whisper, nllb, sentence_splitter). * Fix bug where old text would appear in textbox when you first bring it up.
-rw-r--r--GUI/GUI/GUI/Config.cpp3
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp235
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/Logging.h3
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp1
-rw-r--r--Scripts/lang_compat.py58
-rw-r--r--Scripts/requirements.txt1
-rw-r--r--Scripts/transcribe.py78
9 files changed, 211 insertions, 170 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index bf0c1c6..2bf17cf 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -64,7 +64,6 @@ AppConfig::AppConfig(wxTextCtrl* out)
microphone("index"),
language("english"),
- language_source("Do not translate"),
language_target("Do not translate"),
model("base.en"),
model_translation("nllb-200-distilled-600M"),
@@ -115,7 +114,6 @@ bool AppConfig::Serialize(const std::filesystem::path& path) {
cm.Set("microphone", microphone);
cm.Set("language", language);
- cm.Set("language_source", language_source);
cm.Set("language_target", language_target);
cm.Set("model", model);
cm.Set("model_translation", model_translation);
@@ -179,7 +177,6 @@ bool AppConfig::Deserialize(const std::filesystem::path& path) {
AppConfig c(out_);
cm.Get("microphone", c.microphone);
cm.Get("language", c.language);
- cm.Get("language_source", c.language_source);
cm.Get("language_target", c.language_target);
cm.Get("model", c.model);
cm.Get("model_translation", c.model_translation);
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index bc942d4..bf714bc 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -50,7 +50,6 @@ public:
// Transcription-specific settings.
std::string microphone;
std::string language;
- std::string language_source;
std::string language_target;
std::string model;
std::string model_translation;
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index 8cebc2d..a2b128a 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -30,7 +30,6 @@ namespace {
ID_PY_APP_MIC,
ID_PY_APP_MIC_PANEL,
ID_PY_APP_LANG,
- ID_PY_APP_TRANSLATE_SOURCE,
ID_PY_APP_TRANSLATE_TARGET,
ID_PY_APP_LANG_PANEL,
ID_PY_APP_MODEL,
@@ -130,107 +129,107 @@ namespace {
constexpr int kMicDefault = 0; // index
// lifted from whisper/tokenizer.py
- const wxString kLangChoices[] = {
- "english",
- "chinese",
- "german",
- "spanish",
- "russian",
- "korean",
- "french",
- "japanese",
- "portuguese",
- "turkish",
- "polish",
- "catalan",
- "dutch",
- "arabic",
- "swedish",
- "italian",
- "indonesian",
- "hindi",
- "finnish",
- "vietnamese",
- "hebrew",
- "ukrainian",
- "greek",
- "malay",
- "czech",
- "romanian",
- "danish",
- "hungarian",
- "tamil",
- "norwegian",
- "thai",
- "urdu",
- "croatian",
- "bulgarian",
- "lithuanian",
- "latin",
- "maori",
- "malayalam",
- "welsh",
- "slovak",
- "telugu",
- "persian",
- "latvian",
- "bengali",
- "serbian",
- "azerbaijani",
- "slovenian",
- "kannada",
- "estonian",
- "macedonian",
- "breton",
- "basque",
- "icelandic",
- "armenian",
- "nepali",
- "mongolian",
- "bosnian",
- "kazakh",
- "albanian",
- "swahili",
- "galician",
- "marathi",
- "punjabi",
- "sinhala",
- "khmer",
- "shona",
- "yoruba",
- "somali",
- "afrikaans",
- "occitan",
- "georgian",
- "belarusian",
- "tajik",
- "sindhi",
- "gujarati",
- "amharic",
- "yiddish",
- "lao",
- "uzbek",
- "faroese",
- "haitian creole",
- "pashto",
- "turkmen",
- "nynorsk",
- "maltese",
- "sanskrit",
- "luxembourgish",
- "myanmar",
- "tibetan",
- "tagalog",
- "malagasy",
- "assamese",
- "tatar",
- "hawaiian",
- "lingala",
- "hausa",
- "bashkir",
- "javanese",
- "sundanese"
- };
+ const wxString kLangChoices[] = {
+ "english",
+ "afrikaans",
+ "albanian",
+ "amharic",
+ "arabic",
+ "armenian",
+ "assamese",
+ "azerbaijani",
+ "bashkir",
+ "basque",
+ "belarusian",
+ "bengali",
+ "bosnian",
+ "breton",
+ "bulgarian",
+ "catalan",
+ "chinese",
+ "croatian",
+ "czech",
+ "danish",
+ "dutch",
+ "estonian",
+ "faroese",
+ "finnish",
+ "french",
+ "galician",
+ "georgian",
+ "german",
+ "greek",
+ "gujarati",
+ "haitian creole",
+ "hausa",
+ "hawaiian",
+ "hebrew",
+ "hindi",
+ "hungarian",
+ "icelandic",
+ "indonesian",
+ "italian",
+ "japanese",
+ "javanese",
+ "kannada",
+ "kazakh",
+ "khmer",
+ "korean",
+ "lao",
+ "latin",
+ "latvian",
+ "lingala",
+ "lithuanian",
+ "luxembourgish",
+ "macedonian",
+ "malagasy",
+ "malay",
+ "malayalam",
+ "maltese",
+ "maori",
+ "marathi",
+ "mongolian",
+ "myanmar",
+ "nepali",
+ "norwegian",
+ "nynorsk",
+ "occitan",
+ "pashto",
+ "persian",
+ "polish",
+ "portuguese",
+ "punjabi",
+ "romanian",
+ "russian",
+ "sanskrit",
+ "serbian",
+ "shona",
+ "sindhi",
+ "sinhala",
+ "slovak",
+ "slovenian",
+ "somali",
+ "spanish",
+ "sundanese"
+ "swahili",
+ "swedish",
+ "tagalog",
+ "tajik",
+ "tamil",
+ "tatar",
+ "telugu",
+ "thai",
+ "tibetan",
+ "turkish",
+ "turkmen",
+ "ukrainian",
+ "urdu",
+ "uzbek",
+ "vietnamese",
+ "welsh",
+ "yiddish",
+ "yoruba",
+ };
const size_t kNumLangChoices = sizeof(kLangChoices) / sizeof(kLangChoices[0]);
constexpr int kLangDefault = 0; // english
@@ -633,17 +632,9 @@ Frame::Frame()
ID_PY_APP_LANG, wxDefaultPosition, wxDefaultSize,
kNumLangChoices, kLangChoices);
py_app_lang->SetToolTip("Select which language you will "
- "speak in. If using something other than English, "
- "make sure you're not using a *.en model.");
+ "speak in.");
py_app_lang_ = py_app_lang;
- auto* py_app_translate_source = new wxChoice(py_app_config_panel_pairs,
- ID_PY_APP_TRANSLATE_SOURCE, wxDefaultPosition, wxDefaultSize,
- kNumLangTargetChoices, kLangTargetChoices);
- py_app_translate_source->SetToolTip("Select which language to "
- "translate from, in other words, the language you are transcribing into.");
- py_app_translate_source_ = py_app_translate_source;
-
auto* py_app_translate_target = new wxChoice(py_app_config_panel_pairs,
ID_PY_APP_TRANSLATE_TARGET, wxDefaultPosition, wxDefaultSize,
kNumLangTargetChoices, kLangTargetChoices);
@@ -674,7 +665,8 @@ Frame::Frame()
py_app_model_translation->SetToolTip("Select which "
"version of the translation model to use. 600M params "
"uses 4.1 GB of memory, while 1.3B uses ~7GB of "
- "memory.");
+ "memory. If 'Translate to' is set to 'Do not "
+ "translate', this does nothing.");
py_app_model_translation_ = py_app_model_translation;
auto* py_app_chars_per_sync = new wxChoice(
@@ -767,8 +759,8 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(new wxStaticText(py_app_config_panel_pairs,
- wxID_ANY, /*label=*/"Translate from:"));
- sizer->Add(py_app_translate_source, /*proportion=*/0,
+ wxID_ANY, /*label=*/"Transcription model:"));
+ sizer->Add(py_app_model, /*proportion=*/0,
/*flags=*/wxEXPAND);
sizer->Add(new wxStaticText(py_app_config_panel_pairs,
@@ -777,11 +769,6 @@ Frame::Frame()
/*flags=*/wxEXPAND);
sizer->Add(new wxStaticText(py_app_config_panel_pairs,
- wxID_ANY, /*label=*/"Transcription model:"));
- sizer->Add(py_app_model, /*proportion=*/0,
- /*flags=*/wxEXPAND);
-
- sizer->Add(new wxStaticText(py_app_config_panel_pairs,
wxID_ANY, /*label=*/"Translation model:"));
sizer->Add(py_app_model_translation, /*proportion=*/0,
/*flags=*/wxEXPAND);
@@ -1344,11 +1331,6 @@ void Frame::ApplyConfigToInputFields()
kNumLangChoices, app_c_->language, kLangDefault);
py_app_lang->SetSelection(lang_idx);
- auto* py_app_translate_source = static_cast<wxChoice*>(FindWindowById(ID_PY_APP_TRANSLATE_SOURCE));
- int translate_source_idx = GetDropdownChoiceIndex(kLangTargetChoices,
- kNumLangTargetChoices, app_c_->language_source, kLangTargetDefault);
- py_app_translate_source->SetSelection(translate_source_idx);
-
auto* py_app_translate_target = static_cast<wxChoice*>(FindWindowById(ID_PY_APP_TRANSLATE_TARGET));
int translate_target_idx = GetDropdownChoiceIndex(kLangTargetChoices,
kNumLangTargetChoices, app_c_->language_target, kLangTargetDefault);
@@ -1968,10 +1950,6 @@ void Frame::OnAppStart(wxCommandEvent& event) {
if (which_lang == wxNOT_FOUND) {
which_lang = kLangDefault;
}
- int which_translate_source = py_app_translate_source_->GetSelection();
- if (which_translate_source == wxNOT_FOUND) {
- which_translate_source = kLangDefault;
- }
int which_translate_target = py_app_translate_target_->GetSelection();
if (which_translate_target == wxNOT_FOUND) {
which_translate_target = kLangDefault;
@@ -2061,7 +2039,6 @@ void Frame::OnAppStart(wxCommandEvent& event) {
app_c_->microphone = kMicChoices[which_mic].ToStdString();
app_c_->language = kLangChoices[which_lang].ToStdString();
- app_c_->language_source = kLangTargetChoices[which_translate_source].ToStdString();
app_c_->language_target = kLangTargetChoices[which_translate_target].ToStdString();
app_c_->model = kModelChoices[which_model].ToStdString();
app_c_->model_translation = kModelTranslationChoices[which_model_translation].ToStdString();
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index 24a0594..4176e83 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -62,7 +62,6 @@ private:
wxChoice* py_app_mic_;
wxChoice* py_app_lang_;
- wxChoice* py_app_translate_source_;
wxChoice* py_app_translate_target_;
wxChoice* py_app_model_;
wxChoice* py_app_model_translation_;
diff --git a/GUI/GUI/GUI/Logging.h b/GUI/GUI/GUI/Logging.h
index c85a376..193617a 100644
--- a/GUI/GUI/GUI/Logging.h
+++ b/GUI/GUI/GUI/Logging.h
@@ -40,8 +40,9 @@ namespace Logging {
void Log(wxTextCtrl* frame, std::string_view format, Args&&... args) {
const std::string raw = std::vformat(format, std::make_format_args(args...));
const std::string masked = HidePII(std::move(raw));
+ const std::string decoded = wxString::FromUTF8(masked).ToStdString();
- kThreadLogger.Append(frame, std::move(masked));
+ kThreadLogger.Append(frame, std::move(decoded));
}
void DrainAsyncOutput(wxProcess* proc, wxTextCtrl* frame);
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index b95d6b3..2172ac4 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -463,7 +463,6 @@ std::future<bool> PythonWrapper::StartApp(
"Resources/Scripts/transcribe.py",
"--mic", config.microphone,
"--language", config.language,
- "--language_source", Quote(config.language_source),
"--language_target", Quote(config.language_target),
"--model", config.model,
"--model_translation", config.model_translation,
diff --git a/Scripts/lang_compat.py b/Scripts/lang_compat.py
new file mode 100644
index 0000000..af35921
--- /dev/null
+++ b/Scripts/lang_compat.py
@@ -0,0 +1,58 @@
+# This file provides mappings between language codes used by different
+# third-party libraries.
+
+# Whisper to NLLB.
+whisper_to_nllb = {
+ "catalan": "cat_Ltn", # catalan
+ "czech": "ces_Latn", # czech
+ "danish": "dan_Latn", # danish
+ "dutch": "nld_Latn", # dutch
+ "english": "eng_Latn", # english
+ "finnish": "fin_Latn", # finnish
+ "french": "fra_Latn", # french
+ "german": "deu_Latn", # german
+ "greek": "ell_Grek", # greek
+ "hungarian": "hun_Latn", # hungarian
+ "icelandic": "isl_Latn", # icelandic
+ "italian": "ita_Latn", # italian
+ "latvian": "lvs_Latn", # latvian
+ "lithuanian": "lit_Latn", # lithuanian
+ "norwegian": "nob_Latn", # norwegian (bokmal)
+ "polish": "pol_Latn", # polish
+ "portugese": "por_Latn", # portugese
+ "romanian": "ron_Latn", # romanian
+ "russian": "rus_Cyrl", # russian
+ "slovak": "slk_Latn", # slovak
+ "slovene": "slv_Latn", # slovene
+ "spanish": "spa_Latn", # spanish
+ "swedish": "swe_Latn", # swedish
+ "turkish": "tur_Latn", # turkish
+ }
+
+# NLLB to sentence_splitter (SS).
+nllb_to_ss = {
+ "cat_Ltn": "ca", # catalan
+ "ces_Latn": "cs", # czech
+ "dan_Latn": "da", # danish
+ "nld_Latn": "nl", # dutch
+ "eng_Latn": "en", # english
+ "fin_Latn": "fi", # finnish
+ "fra_Latn": "fr", # french
+ "deu_Latn": "de", # german
+ "ell_Grek": "el", # greek
+ "hun_Latn": "hu", # hungarian
+ "isl_Latn": "is", # icelandic
+ "ita_Latn": "it", # italian
+ "lvs_Latn": "lv", # latvian
+ "lit_Latn": "lt", # lithuanian
+ "nob_Latn": "no", # norwegian (bokmal)
+ "pol_Latn": "pl", # polish
+ "por_Latn": "pt", # portugese
+ "ron_Latn": "ro", # romanian
+ "rus_Cyrl": "ru", # russian
+ "slk_Latn": "sk", # slovak
+ "slv_Latn": "sl", # slovene
+ "spa_Latn": "es", # spanish
+ "swe_Latn": "sv", # swedish
+ "tur_Latn": "tr", # turkish
+ }
diff --git a/Scripts/requirements.txt b/Scripts/requirements.txt
index 5500a91..647e942 100644
--- a/Scripts/requirements.txt
+++ b/Scripts/requirements.txt
@@ -11,4 +11,5 @@ pyaudio
python-osc
playsound==1.2.2
pyyaml
+sentence_splitter
transformers>=4.21.0
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index e113be1..fe06631 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -5,6 +5,7 @@ from emotes_v2 import EmotesState
from faster_whisper import WhisperModel
from functools import partial
from playsound import playsound
+from sentence_splitter import split_text_into_sentences
import argparse
import copy
@@ -12,6 +13,7 @@ import ctranslate2
import generate_utils
import keybind_event_machine
import keyboard
+import lang_compat
import langcodes
import numpy as np
import os
@@ -71,7 +73,7 @@ class AudioState:
# The language the user is speaking in. Default is English but user may set
# this to whatever they want.
- self.language = "en"
+ self.language = "english"
self.audio_paused = False
@@ -257,6 +259,8 @@ def transcribeAudio(audio_state,
if audio_state.drop_transcription:
audio_state.drop_transcription = False
+ audio_state.text = ""
+ audio_state.filtered_text = ""
print("drop transcription ({} seconds)".format(time.time() - last_transcribe_time))
last_transcribe_time = time.time()
continue
@@ -265,16 +269,30 @@ def transcribeAudio(audio_state,
audio_state.text = string_matcher.matchStrings(audio_state.text,
text, window_size = 25)
+ now = time.time()
+ print("Transcription ({} seconds): {}".format(
+ now - last_transcribe_time,
+ audio_state.text))
+ last_transcribe_time = now
+
# Translate if requested.
- if audio_state.language_source and audio_state.language_target:
- source = audio_state.tokenizer.convert_ids_to_tokens(audio_state.tokenizer.encode(copy.copy(audio_state.text)))
- target_prefix = [audio_state.language_target]
- results = audio_state.translator.translate_batch([source], target_prefix=[target_prefix])
- target = results[0].hypotheses[0][1:]
- translated = audio_state.tokenizer.decode(audio_state.tokenizer.convert_tokens_to_ids(target))
- print(f"Translated text: {translated}")
- else:
- translated = copy.copy(audio_state.text)
+ translated = audio_state.text
+ if audio_state.language_target:
+ whisper_lang = audio_state.whisper_language
+ nllb_lang = lang_compat.whisper_to_nllb[whisper_lang]
+ ss_lang = lang_compat.nllb_to_ss[nllb_lang]
+ sentences = split_text_into_sentences(translated, language=ss_lang)
+
+ translated_sentences = []
+ for sentence in sentences:
+ source = audio_state.tokenizer.convert_ids_to_tokens(audio_state.tokenizer.encode(sentence))
+ target_prefix = [audio_state.language_target]
+ results = audio_state.translator.translate_batch([source], target_prefix=[target_prefix])
+ target = results[0].hypotheses[0][1:]
+ translated_sentence = audio_state.tokenizer.decode(audio_state.tokenizer.convert_tokens_to_ids(target))
+ translated_sentences.append(translated_sentence)
+ translated = " ".join(translated_sentences)
+ print(f"Translation: {translated}")
# Apply filters to transcription
filtered_text = translated
@@ -296,12 +314,6 @@ def transcribeAudio(audio_state,
filtered_text = filtered_text.lower()
audio_state.filtered_text = filtered_text
- now = time.time()
- print("Transcription ({} seconds): {}".format(
- now - last_transcribe_time,
- audio_state.text))
- last_transcribe_time = now
-
if old_text != audio_state.text:
# We think the user said something, so reset the amount of
# time we sleep between transcriptions to the minimum.
@@ -358,10 +370,10 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
#playsound(os.path.abspath("../Sounds/Noise_Off_Quiet.wav"))
- resetAudioLocked(audio_state)
- resetDisplayLocked(audio_state)
audio_state.drop_transcription = True
audio_state.audio_paused = True
+ resetAudioLocked(audio_state)
+ resetDisplayLocked(audio_state)
continue
# Short hold
@@ -383,12 +395,12 @@ def readKeyboardInput(audio_state, enable_local_beep: bool,
osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
osc_ctrl.lockWorld(audio_state.osc_state.client, False)
- resetAudioLocked(audio_state)
- resetDisplayLocked(audio_state)
-
audio_state.drop_transcription = True
audio_state.audio_paused = False
+ resetAudioLocked(audio_state)
+ resetDisplayLocked(audio_state)
+
if enable_local_beep == 1:
playsound(os.path.abspath("Resources/Sounds/Noise_On_Quiet.wav"),
block=False)
@@ -506,7 +518,6 @@ def readControllerInput(audio_state, enable_local_beep: bool,
# whisper/__init__.py. Examples: tiny, base, small, medium.
def transcribeLoop(mic: str,
language: str,
- language_source: str,
language_target: str,
model: str,
model_translation: str,
@@ -523,6 +534,7 @@ def transcribeLoop(mic: str,
gpu_idx: int,
keyboard_hotkey: str):
audio_state = getMicStream(mic)
+ audio_state.whisper_language = language
audio_state.language = langcodes.find(language).language
audio_state.MAX_LENGTH_S = window_duration_s
@@ -532,14 +544,8 @@ def transcribeLoop(mic: str,
audio_state.language_target = lang_code
else:
audio_state.language_target = None
- lang_bits = language_source.split(" | ")
- if len(lang_bits) == 2:
- lang_code = lang_bits[1]
- audio_state.language_source = lang_code
- else:
- audio_state.language_source = None
- if audio_state.language_source and audio_state.language_target:
+ if audio_state.language_target:
print("Translation requested")
print("Installing torch and sentencepiece in virtual environment. "
@@ -579,9 +585,15 @@ def transcribeLoop(mic: str,
print(f"Using model at {output_dir}")
audio_state.translator = ctranslate2.Translator(output_dir)
+
+ whisper_lang = audio_state.whisper_language
+ nllb_lang = lang_compat.whisper_to_nllb[whisper_lang]
+
audio_state.tokenizer = transformers.AutoTokenizer.from_pretrained(
"facebook/" + model_translation,
- src_lang=audio_state.language_source)
+ src_lang=nllb_lang)
+
+ print(f"Translation ready to go")
print("Safe to start talking")
@@ -661,7 +673,6 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--mic", type=str, help="Which mic to use. Options: index, focusrite. Default: index")
parser.add_argument("--language", type=str, help="Which language to use. Ex: english, japanese, chinese, french, german.")
- parser.add_argument("--language_source", type=str, help="Which language to translate from. See kLangTargetChoices in Frame.cpp for valid choices")
parser.add_argument("--language_target", type=str, help="Which language to translate into. See kLangTargetChoices in Frame.cpp for valid choices")
parser.add_argument("--model", type=str, help="Which transcription model to use. " \
"Options: tiny, tiny.en, base, base.en, small, small.en, " \
@@ -692,8 +703,8 @@ if __name__ == "__main__":
if not args.language:
args.language = "english"
- if not args.language_source or not args.language_target:
- print("--language_source and --language_target required", file=sys.stderr)
+ if not args.language_target:
+ print("--language_target required", file=sys.stderr)
if not args.model:
args.model = "base"
@@ -769,7 +780,6 @@ if __name__ == "__main__":
transcribeLoop(args.mic,
args.language,
- args.language_source,
args.language_target,
args.model,
args.model_translation,