diff options
| author | yum <yum.food.vr@gmail.com> | 2022-11-13 15:39:15 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2022-11-17 16:52:36 -0800 |
| commit | 27a67666c320bed3b4a18e415eb9702b03f8f0b5 (patch) | |
| tree | 5122444e130927312eec3754796cde11f3872e56 /obfuscate.py | |
| parent | d2e06445c42b22d2b75f5da1980b7a8d833a9c5b (diff) | |
Begin work on obfuscation
The basic idea is that we can raise the barrier to entry for potential
data miners by encrypting traffic with a pre-shared key. Any data miner
would probably have access to both the compiled shader and network data,
which is obviously sufficient to decrypt that data. But they would have
to spend a little time figuring it out, which should defeat most casual
miners.
Diffstat (limited to 'obfuscate.py')
| -rw-r--r-- | obfuscate.py | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/obfuscate.py b/obfuscate.py new file mode 100644 index 0000000..8d01e10 --- /dev/null +++ b/obfuscate.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +# This module is used to implement obfuscation of TaSTT network +# speech data. At a high level, TaSTT is simply streaming N bits of +# arbitrary data to a shader via VRChat's parameter sync mechanism. +# +# It would be trivial to mine this data for speech information, since +# we're sending unicode (or ASCII) characters to peers. +# +# To raise the cost for the casual data collector, we can obfuscate +# this data using a one-time pad in cipher-block chaining mode. +# +# Making things interesting, encrypted data will arrive at the Unity +# animator, which processes them in 8 bit chunks. They are written +# into contiguous blocks of the animator. Thus the shader can decrypt +# the board by decrypting each block. This is thus stronger than +# applying a one-time pad to each byte of the plaintext, since the +# statistical distribution of individual letters is destroyed. +# Obviously due to the lack of an initialization vector, the +# distribution of phrases (blocks) is preserved. + +import math +import os + +def genKey(n_bits = 128) -> bytearray: + return os.urandom(int(n_bits / 8)) + +def saveKey(filename: str, key: str): + with open(filename, "wb") as f: + f.write(key) + +def loadKey(filename: str) -> bytearray: + with open(filename, "rb") as f: + return f.read() + +# Apply a symmetric cypher to `data` using cypher-block chaining. +def obfuscate(data: bytearray, key: bytearray) -> str: + n_blocks = int(math.ceil(len(data) / len(key))) + # This is a misnomer. A true IV would be randomized, but we can't + # do that since the shader doesn't have access to it. We just use + # this to implement the "chaining" aspect of CBC. + iv = bytearray(b'\x00') * len(key) + result = bytearray() + for i in range(0, n_blocks): + block_begin = i * len(key) + block_end = (i + 1) * len(key) + block_plain = data[block_begin:block_end] + block_cypher = block_plain.copy() + for i in range(0, len(block_cypher)): + block_cypher[i] ^= iv[i] + block_cypher[i] ^= key[i] + result += block_cypher + iv = block_cypher + return result + +def deobfuscate(data: bytearray, key: bytearray) -> str: + n_blocks = int(math.ceil(len(data) / len(key))) + # This is a misnomer. A true IV would be randomized, but we can't + # do that since the shader doesn't have access to it. We just use + # this to implement the "chaining" aspect of CBC. + iv = bytearray(b'\x00') * len(key) + result = bytearray() + for i in range(0, n_blocks): + block_begin = i * len(key) + block_end = (i + 1) * len(key) + block_cypher = data[block_begin:block_end] + block_plain = block_cypher.copy() + for i in range(0, len(block_plain)): + block_plain[i] ^= key[i] + block_plain[i] ^= iv[i] + result += block_plain + iv = block_cypher + return result + +def test(): + key = genKey() + saveKey("test.key", key) + new_key = loadKey("test.key") + os.remove("test.key") + assert(key == new_key) + + plaintext_original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + plaintext_bytes = bytearray(plaintext_original, "utf-8") + cyphertext = obfuscate(plaintext_bytes, key) + assert(len(plaintext_bytes) == len(cyphertext)) + plaintext_recovered = deobfuscate(cyphertext, key).decode("utf-8") + assert(plaintext_original == plaintext_recovered) + assert(plaintext_bytes != cyphertext) + +if __name__ == "__main__": + test() + |
