diff options
Diffstat (limited to 'Scripts/obfuscate.py')
| -rw-r--r-- | Scripts/obfuscate.py | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/Scripts/obfuscate.py b/Scripts/obfuscate.py new file mode 100644 index 0000000..8d01e10 --- /dev/null +++ b/Scripts/obfuscate.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +# This module is used to implement obfuscation of TaSTT network +# speech data. At a high level, TaSTT is simply streaming N bits of +# arbitrary data to a shader via VRChat's parameter sync mechanism. +# +# It would be trivial to mine this data for speech information, since +# we're sending unicode (or ASCII) characters to peers. +# +# To raise the cost for the casual data collector, we can obfuscate +# this data using a one-time pad in cipher-block chaining mode. +# +# Making things interesting, encrypted data will arrive at the Unity +# animator, which processes them in 8 bit chunks. They are written +# into contiguous blocks of the animator. Thus the shader can decrypt +# the board by decrypting each block. This is thus stronger than +# applying a one-time pad to each byte of the plaintext, since the +# statistical distribution of individual letters is destroyed. +# Obviously due to the lack of an initialization vector, the +# distribution of phrases (blocks) is preserved. + +import math +import os + +def genKey(n_bits = 128) -> bytearray: + return os.urandom(int(n_bits / 8)) + +def saveKey(filename: str, key: str): + with open(filename, "wb") as f: + f.write(key) + +def loadKey(filename: str) -> bytearray: + with open(filename, "rb") as f: + return f.read() + +# Apply a symmetric cypher to `data` using cypher-block chaining. +def obfuscate(data: bytearray, key: bytearray) -> str: + n_blocks = int(math.ceil(len(data) / len(key))) + # This is a misnomer. A true IV would be randomized, but we can't + # do that since the shader doesn't have access to it. We just use + # this to implement the "chaining" aspect of CBC. + iv = bytearray(b'\x00') * len(key) + result = bytearray() + for i in range(0, n_blocks): + block_begin = i * len(key) + block_end = (i + 1) * len(key) + block_plain = data[block_begin:block_end] + block_cypher = block_plain.copy() + for i in range(0, len(block_cypher)): + block_cypher[i] ^= iv[i] + block_cypher[i] ^= key[i] + result += block_cypher + iv = block_cypher + return result + +def deobfuscate(data: bytearray, key: bytearray) -> str: + n_blocks = int(math.ceil(len(data) / len(key))) + # This is a misnomer. A true IV would be randomized, but we can't + # do that since the shader doesn't have access to it. We just use + # this to implement the "chaining" aspect of CBC. + iv = bytearray(b'\x00') * len(key) + result = bytearray() + for i in range(0, n_blocks): + block_begin = i * len(key) + block_end = (i + 1) * len(key) + block_cypher = data[block_begin:block_end] + block_plain = block_cypher.copy() + for i in range(0, len(block_plain)): + block_plain[i] ^= key[i] + block_plain[i] ^= iv[i] + result += block_plain + iv = block_cypher + return result + +def test(): + key = genKey() + saveKey("test.key", key) + new_key = loadKey("test.key") + os.remove("test.key") + assert(key == new_key) + + plaintext_original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + plaintext_bytes = bytearray(plaintext_original, "utf-8") + cyphertext = obfuscate(plaintext_bytes, key) + assert(len(plaintext_bytes) == len(cyphertext)) + plaintext_recovered = deobfuscate(cyphertext, key).decode("utf-8") + assert(plaintext_original == plaintext_recovered) + assert(plaintext_bytes != cyphertext) + +if __name__ == "__main__": + test() + |
