diff options
Diffstat (limited to 'Scripts/obfuscate.py')
| -rw-r--r-- | Scripts/obfuscate.py | 92 |
1 files changed, 0 insertions, 92 deletions
diff --git a/Scripts/obfuscate.py b/Scripts/obfuscate.py deleted file mode 100644 index 8d01e10..0000000 --- a/Scripts/obfuscate.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 - -# This module is used to implement obfuscation of TaSTT network -# speech data. At a high level, TaSTT is simply streaming N bits of -# arbitrary data to a shader via VRChat's parameter sync mechanism. -# -# It would be trivial to mine this data for speech information, since -# we're sending unicode (or ASCII) characters to peers. -# -# To raise the cost for the casual data collector, we can obfuscate -# this data using a one-time pad in cipher-block chaining mode. -# -# Making things interesting, encrypted data will arrive at the Unity -# animator, which processes them in 8 bit chunks. They are written -# into contiguous blocks of the animator. Thus the shader can decrypt -# the board by decrypting each block. This is thus stronger than -# applying a one-time pad to each byte of the plaintext, since the -# statistical distribution of individual letters is destroyed. -# Obviously due to the lack of an initialization vector, the -# distribution of phrases (blocks) is preserved. - -import math -import os - -def genKey(n_bits = 128) -> bytearray: - return os.urandom(int(n_bits / 8)) - -def saveKey(filename: str, key: str): - with open(filename, "wb") as f: - f.write(key) - -def loadKey(filename: str) -> bytearray: - with open(filename, "rb") as f: - return f.read() - -# Apply a symmetric cypher to `data` using cypher-block chaining. -def obfuscate(data: bytearray, key: bytearray) -> str: - n_blocks = int(math.ceil(len(data) / len(key))) - # This is a misnomer. A true IV would be randomized, but we can't - # do that since the shader doesn't have access to it. We just use - # this to implement the "chaining" aspect of CBC. - iv = bytearray(b'\x00') * len(key) - result = bytearray() - for i in range(0, n_blocks): - block_begin = i * len(key) - block_end = (i + 1) * len(key) - block_plain = data[block_begin:block_end] - block_cypher = block_plain.copy() - for i in range(0, len(block_cypher)): - block_cypher[i] ^= iv[i] - block_cypher[i] ^= key[i] - result += block_cypher - iv = block_cypher - return result - -def deobfuscate(data: bytearray, key: bytearray) -> str: - n_blocks = int(math.ceil(len(data) / len(key))) - # This is a misnomer. A true IV would be randomized, but we can't - # do that since the shader doesn't have access to it. We just use - # this to implement the "chaining" aspect of CBC. - iv = bytearray(b'\x00') * len(key) - result = bytearray() - for i in range(0, n_blocks): - block_begin = i * len(key) - block_end = (i + 1) * len(key) - block_cypher = data[block_begin:block_end] - block_plain = block_cypher.copy() - for i in range(0, len(block_plain)): - block_plain[i] ^= key[i] - block_plain[i] ^= iv[i] - result += block_plain - iv = block_cypher - return result - -def test(): - key = genKey() - saveKey("test.key", key) - new_key = loadKey("test.key") - os.remove("test.key") - assert(key == new_key) - - plaintext_original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit." - plaintext_bytes = bytearray(plaintext_original, "utf-8") - cyphertext = obfuscate(plaintext_bytes, key) - assert(len(plaintext_bytes) == len(cyphertext)) - plaintext_recovered = deobfuscate(cyphertext, key).decode("utf-8") - assert(plaintext_original == plaintext_recovered) - assert(plaintext_bytes != cyphertext) - -if __name__ == "__main__": - test() - |
