summaryrefslogtreecommitdiffstats
path: root/obfuscate.py
diff options
context:
space:
mode:
authoryum <yum.food.vr@gmail.com>2022-11-13 15:39:15 -0800
committeryum <yum.food.vr@gmail.com>2022-11-17 16:52:36 -0800
commit27a67666c320bed3b4a18e415eb9702b03f8f0b5 (patch)
tree5122444e130927312eec3754796cde11f3872e56 /obfuscate.py
parentd2e06445c42b22d2b75f5da1980b7a8d833a9c5b (diff)
Begin work on obfuscation
The basic idea is that we can raise the barrier to entry for potential data miners by encrypting traffic with a pre-shared key. Any data miner would probably have access to both the compiled shader and network data, which is obviously sufficient to decrypt that data. But they would have to spend a little time figuring it out, which should defeat most casual miners.
Diffstat (limited to 'obfuscate.py')
-rw-r--r--obfuscate.py92
1 files changed, 92 insertions, 0 deletions
diff --git a/obfuscate.py b/obfuscate.py
new file mode 100644
index 0000000..8d01e10
--- /dev/null
+++ b/obfuscate.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+# This module is used to implement obfuscation of TaSTT network
+# speech data. At a high level, TaSTT is simply streaming N bits of
+# arbitrary data to a shader via VRChat's parameter sync mechanism.
+#
+# It would be trivial to mine this data for speech information, since
+# we're sending unicode (or ASCII) characters to peers.
+#
+# To raise the cost for the casual data collector, we can obfuscate
+# this data using a one-time pad in cipher-block chaining mode.
+#
+# Making things interesting, encrypted data will arrive at the Unity
+# animator, which processes them in 8 bit chunks. They are written
+# into contiguous blocks of the animator. Thus the shader can decrypt
+# the board by decrypting each block. This is thus stronger than
+# applying a one-time pad to each byte of the plaintext, since the
+# statistical distribution of individual letters is destroyed.
+# Obviously due to the lack of an initialization vector, the
+# distribution of phrases (blocks) is preserved.
+
+import math
+import os
+
+def genKey(n_bits = 128) -> bytearray:
+ return os.urandom(int(n_bits / 8))
+
+def saveKey(filename: str, key: str):
+ with open(filename, "wb") as f:
+ f.write(key)
+
+def loadKey(filename: str) -> bytearray:
+ with open(filename, "rb") as f:
+ return f.read()
+
+# Apply a symmetric cypher to `data` using cypher-block chaining.
+def obfuscate(data: bytearray, key: bytearray) -> str:
+ n_blocks = int(math.ceil(len(data) / len(key)))
+ # This is a misnomer. A true IV would be randomized, but we can't
+ # do that since the shader doesn't have access to it. We just use
+ # this to implement the "chaining" aspect of CBC.
+ iv = bytearray(b'\x00') * len(key)
+ result = bytearray()
+ for i in range(0, n_blocks):
+ block_begin = i * len(key)
+ block_end = (i + 1) * len(key)
+ block_plain = data[block_begin:block_end]
+ block_cypher = block_plain.copy()
+ for i in range(0, len(block_cypher)):
+ block_cypher[i] ^= iv[i]
+ block_cypher[i] ^= key[i]
+ result += block_cypher
+ iv = block_cypher
+ return result
+
+def deobfuscate(data: bytearray, key: bytearray) -> str:
+ n_blocks = int(math.ceil(len(data) / len(key)))
+ # This is a misnomer. A true IV would be randomized, but we can't
+ # do that since the shader doesn't have access to it. We just use
+ # this to implement the "chaining" aspect of CBC.
+ iv = bytearray(b'\x00') * len(key)
+ result = bytearray()
+ for i in range(0, n_blocks):
+ block_begin = i * len(key)
+ block_end = (i + 1) * len(key)
+ block_cypher = data[block_begin:block_end]
+ block_plain = block_cypher.copy()
+ for i in range(0, len(block_plain)):
+ block_plain[i] ^= key[i]
+ block_plain[i] ^= iv[i]
+ result += block_plain
+ iv = block_cypher
+ return result
+
+def test():
+ key = genKey()
+ saveKey("test.key", key)
+ new_key = loadKey("test.key")
+ os.remove("test.key")
+ assert(key == new_key)
+
+ plaintext_original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
+ plaintext_bytes = bytearray(plaintext_original, "utf-8")
+ cyphertext = obfuscate(plaintext_bytes, key)
+ assert(len(plaintext_bytes) == len(cyphertext))
+ plaintext_recovered = deobfuscate(cyphertext, key).decode("utf-8")
+ assert(plaintext_original == plaintext_recovered)
+ assert(plaintext_bytes != cyphertext)
+
+if __name__ == "__main__":
+ test()
+