summaryrefslogtreecommitdiffstats
path: root/Scripts/obfuscate.py
diff options
context:
space:
mode:
Diffstat (limited to 'Scripts/obfuscate.py')
-rw-r--r--Scripts/obfuscate.py92
1 files changed, 0 insertions, 92 deletions
diff --git a/Scripts/obfuscate.py b/Scripts/obfuscate.py
deleted file mode 100644
index 8d01e10..0000000
--- a/Scripts/obfuscate.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python3
-
-# This module is used to implement obfuscation of TaSTT network
-# speech data. At a high level, TaSTT is simply streaming N bits of
-# arbitrary data to a shader via VRChat's parameter sync mechanism.
-#
-# It would be trivial to mine this data for speech information, since
-# we're sending unicode (or ASCII) characters to peers.
-#
-# To raise the cost for the casual data collector, we can obfuscate
-# this data using a one-time pad in cipher-block chaining mode.
-#
-# Making things interesting, encrypted data will arrive at the Unity
-# animator, which processes them in 8 bit chunks. They are written
-# into contiguous blocks of the animator. Thus the shader can decrypt
-# the board by decrypting each block. This is thus stronger than
-# applying a one-time pad to each byte of the plaintext, since the
-# statistical distribution of individual letters is destroyed.
-# Obviously due to the lack of an initialization vector, the
-# distribution of phrases (blocks) is preserved.
-
-import math
-import os
-
-def genKey(n_bits = 128) -> bytearray:
- return os.urandom(int(n_bits / 8))
-
-def saveKey(filename: str, key: str):
- with open(filename, "wb") as f:
- f.write(key)
-
-def loadKey(filename: str) -> bytearray:
- with open(filename, "rb") as f:
- return f.read()
-
-# Apply a symmetric cypher to `data` using cypher-block chaining.
-def obfuscate(data: bytearray, key: bytearray) -> str:
- n_blocks = int(math.ceil(len(data) / len(key)))
- # This is a misnomer. A true IV would be randomized, but we can't
- # do that since the shader doesn't have access to it. We just use
- # this to implement the "chaining" aspect of CBC.
- iv = bytearray(b'\x00') * len(key)
- result = bytearray()
- for i in range(0, n_blocks):
- block_begin = i * len(key)
- block_end = (i + 1) * len(key)
- block_plain = data[block_begin:block_end]
- block_cypher = block_plain.copy()
- for i in range(0, len(block_cypher)):
- block_cypher[i] ^= iv[i]
- block_cypher[i] ^= key[i]
- result += block_cypher
- iv = block_cypher
- return result
-
-def deobfuscate(data: bytearray, key: bytearray) -> str:
- n_blocks = int(math.ceil(len(data) / len(key)))
- # This is a misnomer. A true IV would be randomized, but we can't
- # do that since the shader doesn't have access to it. We just use
- # this to implement the "chaining" aspect of CBC.
- iv = bytearray(b'\x00') * len(key)
- result = bytearray()
- for i in range(0, n_blocks):
- block_begin = i * len(key)
- block_end = (i + 1) * len(key)
- block_cypher = data[block_begin:block_end]
- block_plain = block_cypher.copy()
- for i in range(0, len(block_plain)):
- block_plain[i] ^= key[i]
- block_plain[i] ^= iv[i]
- result += block_plain
- iv = block_cypher
- return result
-
-def test():
- key = genKey()
- saveKey("test.key", key)
- new_key = loadKey("test.key")
- os.remove("test.key")
- assert(key == new_key)
-
- plaintext_original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
- plaintext_bytes = bytearray(plaintext_original, "utf-8")
- cyphertext = obfuscate(plaintext_bytes, key)
- assert(len(plaintext_bytes) == len(cyphertext))
- plaintext_recovered = deobfuscate(cyphertext, key).decode("utf-8")
- assert(plaintext_original == plaintext_recovered)
- assert(plaintext_bytes != cyphertext)
-
-if __name__ == "__main__":
- test()
-