summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2023-01-22 15:05:54 -0800
committeryum <yum.food.vr@gmail.com>2023-01-22 15:35:00 -0800
commit1c056bf385d2c48f6e4f77da513060c04415252c (patch)
tree81091edb39e9e2b18652141dd3723751284c82ce
parent06160c37acb26cfac9bab568bd3759c2386fb175 (diff)
Enable using built-in chatboxv0.3
VRChat exposes a built-in chatbox which can be seen by anyone who has it enabled. This was not the case when I started this project: the chatbox would only be visible to friends. Since this is clearly useful, enabling the STT on public models, let's enable sending data to it. Caveats: * The built-in chatbox has anti-spam tech which limits us to updating about once every 2 seconds. The custom chatbox has no such limitation and is thus typically much faster.
-rw-r--r--GUI/GUI/GUI/Config.cpp8
-rw-r--r--GUI/GUI/GUI/Config.h1
-rw-r--r--GUI/GUI/GUI/Frame.cpp13
-rw-r--r--GUI/GUI/GUI/Frame.h1
-rw-r--r--GUI/GUI/GUI/PythonWrapper.cpp1
-rw-r--r--Scripts/osc_ctrl.py18
-rw-r--r--Scripts/transcribe.py50
7 files changed, 71 insertions, 21 deletions
diff --git a/GUI/GUI/GUI/Config.cpp b/GUI/GUI/GUI/Config.cpp
index 9ce498a..4e6eb48 100644
--- a/GUI/GUI/GUI/Config.cpp
+++ b/GUI/GUI/GUI/Config.cpp
@@ -77,7 +77,8 @@ TranscriptionAppConfig::TranscriptionAppConfig()
cols("48"),
window_duration("15"),
enable_local_beep(true),
- use_cpu(false)
+ use_cpu(false),
+ use_builtin(false)
{}
bool TranscriptionAppConfig::Serialize(const std::filesystem::path& path) {
@@ -94,8 +95,8 @@ bool TranscriptionAppConfig::Serialize(const std::filesystem::path& path) {
root["window_duration"] << ryml::to_substr(window_duration);
root["enable_local_beep"] << enable_local_beep;
root["use_cpu"] << use_cpu;
-
- return Config::Serialize(path, &t);
+ root["use_builtin"] << use_builtin;
+ return Config::Serialize(path, &t);
}
bool TranscriptionAppConfig::Deserialize(const std::filesystem::path& path) {
@@ -123,6 +124,7 @@ bool TranscriptionAppConfig::Deserialize(const std::filesystem::path& path) {
root.get_if("window_duration", &c.window_duration);
root.get_if("enable_local_beep", &c.enable_local_beep);
root.get_if("use_cpu", &c.use_cpu);
+ root.get_if("use_builtin", &c.use_builtin);
*this = std::move(c);
return true;
diff --git a/GUI/GUI/GUI/Config.h b/GUI/GUI/GUI/Config.h
index 985380b..fe7b862 100644
--- a/GUI/GUI/GUI/Config.h
+++ b/GUI/GUI/GUI/Config.h
@@ -46,6 +46,7 @@ public:
std::string window_duration;
bool enable_local_beep;
bool use_cpu;
+ bool use_builtin;
};
// Represents the configurable fields for the Unity app.
diff --git a/GUI/GUI/GUI/Frame.cpp b/GUI/GUI/GUI/Frame.cpp
index f26cbec..f9c7998 100644
--- a/GUI/GUI/GUI/Frame.cpp
+++ b/GUI/GUI/GUI/Frame.cpp
@@ -35,6 +35,7 @@ namespace {
ID_PY_APP_MODEL_PANEL,
ID_PY_APP_ENABLE_LOCAL_BEEP,
ID_PY_APP_USE_CPU,
+ ID_PY_APP_USE_BUILTIN,
ID_PY_APP_ROWS,
ID_PY_APP_COLS,
ID_PY_APP_WINDOW_DURATION,
@@ -433,6 +434,15 @@ Frame::Frame()
);
py_app_use_cpu_ = py_app_use_cpu;
+ auto* py_app_use_builtin = new wxCheckBox(py_config_panel,
+ ID_PY_APP_USE_CPU, "Use built-in chatbox");
+ py_app_use_builtin->SetValue(py_c.use_builtin);
+ py_app_use_builtin->SetToolTip(
+ "If checked, text will be sent to the built-in text box "
+ "instead of one attached to the current avatar."
+ );
+ py_app_use_builtin_ = py_app_use_builtin;
+
auto* py_app_start_button = new wxButton(py_config_panel, ID_PY_APP_START_BUTTON, "Begin transcribing");
auto* py_app_stop_button = new wxButton(py_config_panel, ID_PY_APP_STOP_BUTTON, "Stop transcribing");
@@ -443,6 +453,7 @@ Frame::Frame()
sizer->Add(py_app_config_panel_pairs, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(py_app_enable_local_beep, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(py_app_use_cpu, /*proportion=*/0, /*flags=*/wxEXPAND);
+ sizer->Add(py_app_use_builtin, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(py_app_start_button, /*proportion=*/0, /*flags=*/wxEXPAND);
sizer->Add(py_app_stop_button, /*proportion=*/0, /*flags=*/wxEXPAND);
}
@@ -935,6 +946,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
}
const bool enable_local_beep = py_app_enable_local_beep_->GetValue();
const bool use_cpu = py_app_use_cpu_->GetValue();
+ const bool use_builtin = py_app_use_builtin_->GetValue();
std::string rows_str = py_app_rows_->GetValue().ToStdString();
std::string cols_str = py_app_cols_->GetValue().ToStdString();
std::string window_duration_str = py_app_window_duration_->GetValue().ToStdString();
@@ -978,6 +990,7 @@ void Frame::OnAppStart(wxCommandEvent& event) {
py_c.window_duration = std::to_string(window_duration);
py_c.enable_local_beep = enable_local_beep;
py_c.use_cpu = use_cpu;
+ py_c.use_builtin = use_builtin;
py_c.Serialize(TranscriptionAppConfig::kConfigPath);
wxProcess* p = PythonWrapper::StartApp(std::move(cb), py_c);
diff --git a/GUI/GUI/GUI/Frame.h b/GUI/GUI/GUI/Frame.h
index cd62127..621715b 100644
--- a/GUI/GUI/GUI/Frame.h
+++ b/GUI/GUI/GUI/Frame.h
@@ -51,6 +51,7 @@ private:
wxCheckBox* py_app_enable_local_beep_;
wxCheckBox* py_app_use_cpu_;
+ wxCheckBox* py_app_use_builtin_;
wxProcess* py_app_;
wxTimer py_app_drain_;
diff --git a/GUI/GUI/GUI/PythonWrapper.cpp b/GUI/GUI/GUI/PythonWrapper.cpp
index 5581739..f26072a 100644
--- a/GUI/GUI/GUI/PythonWrapper.cpp
+++ b/GUI/GUI/GUI/PythonWrapper.cpp
@@ -159,6 +159,7 @@ wxProcess* PythonWrapper::StartApp(
"--cols", config.cols,
"--window_duration_s", config.window_duration,
"--cpu", config.use_cpu ? "1" : "0",
+ "--use_builtin", config.use_builtin ? "1" : "0",
},
std::move(exit_callback));
}
diff --git a/Scripts/osc_ctrl.py b/Scripts/osc_ctrl.py
index 7c7d0ae..e57a843 100644
--- a/Scripts/osc_ctrl.py
+++ b/Scripts/osc_ctrl.py
@@ -30,6 +30,7 @@ class OscState:
self.client = getClient(ip, port)
self.pager = MultiLinePager(chars_per_sync, rows, cols)
self.encoding= generateEncoding()
+ self.builtin_msg = "" # The last message sent to the built-in chatbox
def reset(self):
self.pager.reset()
@@ -137,6 +138,23 @@ def pageMessage(osc_state: OscState, msg: str) -> bool:
addr="/avatar/parameters/" + generate_utils.getSpeechNoiseToggleParam()
osc_state.client.send_message(addr, False)
+# Like `pageMessage` but uses the built-in chatbox. The built-in chatbox
+# truncates data at about 150 chars, so just send the suffix of the message for
+# now.
+def pageMessageBuiltin(osc_state: OscState, msg: str) -> bool:
+ msg_begin = max(len(msg) - 140, 0)
+ msg_suffix = msg[msg_begin:len(msg)]
+
+ if osc_state.builtin_msg != msg:
+ addr="/chatbox/typing"
+ osc_state.client.send_message(addr, False)
+
+ addr="/chatbox/input"
+ osc_state.client.send_message(addr, (msg_suffix, True))
+ osc_state.builtin_msg = msg
+
+ return False # Not paging
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-i", default="127.0.0.1", help="OSC server IP")
diff --git a/Scripts/transcribe.py b/Scripts/transcribe.py
index 7f07efe..1237334 100644
--- a/Scripts/transcribe.py
+++ b/Scripts/transcribe.py
@@ -303,17 +303,21 @@ def transcribeAudio(audio_state, model, use_cpu: bool):
audio_state.transcribe_no_change_count = 0
audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
-def sendAudio(audio_state):
+def sendAudio(audio_state, use_builtin: bool):
while audio_state.run_app == True:
text = audio_state.committed_text + " " + audio_state.text
- ret = osc_ctrl.pageMessage(audio_state.osc_state, text)
- is_paging = (ret == False)
- osc_ctrl.indicatePaging(audio_state.osc_state.client, is_paging)
+ if use_builtin:
+ ret = osc_ctrl.pageMessageBuiltin(audio_state.osc_state, text)
+ time.sleep(1.5)
+ else:
+ ret = osc_ctrl.pageMessage(audio_state.osc_state, text)
+ is_paging = (ret == False)
+ osc_ctrl.indicatePaging(audio_state.osc_state.client, is_paging)
- # Pace this out
- time.sleep(0.01)
+ # Pace this out
+ time.sleep(0.01)
-def readControllerInput(audio_state, enable_local_beep):
+def readControllerInput(audio_state, enable_local_beep, use_builtin):
session = None
first = True
while session == None and audio_state.run_app == True:
@@ -343,8 +347,9 @@ def readControllerInput(audio_state, enable_local_beep):
if now - last_rising > 0.5:
# Long hold
state = PAUSE_STATE
- osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
- osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
+ if not use_builtin:
+ osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
+ osc_ctrl.toggleBoard(audio_state.osc_state.client, False)
#playsound(os.path.abspath("../Sounds/Noise_Off.wav"))
resetAudioLocked(audio_state)
@@ -355,8 +360,9 @@ def readControllerInput(audio_state, enable_local_beep):
# Short hold
if state == RECORD_STATE:
state = PAUSE_STATE
- osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
- osc_ctrl.lockWorld(audio_state.osc_state.client, True)
+ if not use_builtin:
+ osc_ctrl.indicateSpeech(audio_state.osc_state.client, False)
+ osc_ctrl.lockWorld(audio_state.osc_state.client, True)
audio_state.transcribe_sleep_duration = audio_state.transcribe_sleep_duration_min_s
audio_state.audio_paused = True
@@ -365,9 +371,10 @@ def readControllerInput(audio_state, enable_local_beep):
playsound(os.path.abspath("../Sounds/Noise_Off.wav"))
elif state == PAUSE_STATE:
state = RECORD_STATE
- osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
- osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
- osc_ctrl.lockWorld(audio_state.osc_state.client, False)
+ if not use_builtin:
+ osc_ctrl.indicateSpeech(audio_state.osc_state.client, True)
+ osc_ctrl.toggleBoard(audio_state.osc_state.client, True)
+ osc_ctrl.lockWorld(audio_state.osc_state.client, False)
resetAudioLocked(audio_state)
resetDisplayLocked(audio_state)
@@ -379,7 +386,8 @@ def readControllerInput(audio_state, enable_local_beep):
# model should correspond to one of the Whisper models defined in
# whisper/__init__.py. Examples: tiny, base, small, medium.
-def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool, use_cpu: bool):
+def transcribeLoop(mic: str, language: str, model: str,
+ enable_local_beep: bool, use_cpu: bool, use_builtin: bool):
audio_state = getMicStream(mic)
audio_state.language = whisper.tokenizer.TO_LANGUAGE_CODE[language]
@@ -396,11 +404,11 @@ def transcribeLoop(mic: str, language: str, model: str, enable_local_beep: bool,
transcribe_audio_thd.daemon = True
transcribe_audio_thd.start()
- send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state])
+ send_audio_thd = threading.Thread(target = sendAudio, args = [audio_state, use_builtin])
send_audio_thd.daemon = True
send_audio_thd.start()
- controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep])
+ controller_input_thd = threading.Thread(target = readControllerInput, args = [audio_state, enable_local_beep, use_builtin])
controller_input_thd.daemon = True
controller_input_thd.start()
@@ -443,6 +451,7 @@ if __name__ == "__main__":
parser.add_argument("--cols", type=int, help="The number of columns on the board")
parser.add_argument("--window_duration_s", type=int, help="The length in seconds of the audio recording handed to the transcription algorithm")
parser.add_argument("--cpu", type=int, help="If set to 1, use CPU instead of GPU")
+ parser.add_argument("--use_builtin", type=int, help="If set to 1, use the text box built into the game.")
args = parser.parse_args()
if not args.mic:
@@ -470,11 +479,16 @@ if __name__ == "__main__":
else:
args.cpu = False
+ if args.use_builtin == 1:
+ args.use_builtin = True
+ else:
+ args.use_builtin = False
+
generate_utils.config.BYTES_PER_CHAR = int(args.bytes_per_char)
generate_utils.config.CHARS_PER_SYNC = int(args.chars_per_sync)
generate_utils.config.BOARD_ROWS = int(args.rows)
generate_utils.config.BOARD_COLS = int(args.cols)
transcribeLoop(args.mic, args.language, args.model, args.enable_local_beep,
- args.cpu)
+ args.cpu, args.use_builtin)