summaryrefslogtreecommitdiffstats
path: root/Scripts/obfuscate.py
blob: 8d01e1039b744f3c80791aa0c547ce35a07d2d38 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3

# This module is used to implement obfuscation of TaSTT network
# speech data. At a high level, TaSTT is simply streaming N bits of
# arbitrary data to a shader via VRChat's parameter sync mechanism.
#
# It would be trivial to mine this data for speech information, since
# we're sending unicode (or ASCII) characters to peers.
#
# To raise the cost for the casual data collector, we can obfuscate
# this data using a one-time pad in cipher-block chaining mode.
#
# Making things interesting, encrypted data will arrive at the Unity
# animator,  which processes them in 8 bit chunks. They are written
# into contiguous blocks of the animator. Thus the shader can decrypt
# the board by decrypting each block. This is thus stronger than
# applying a one-time pad to each byte of the plaintext, since the
# statistical distribution of individual letters is destroyed.
# Obviously due to the lack of an initialization vector, the
# distribution of phrases (blocks) is preserved.

import math
import os

def genKey(n_bits = 128) -> bytearray:
    return os.urandom(int(n_bits / 8))

def saveKey(filename: str, key: str):
    with open(filename, "wb") as f:
        f.write(key)

def loadKey(filename: str) -> bytearray:
    with open(filename, "rb") as f:
        return f.read()

# Apply a symmetric cypher to `data` using cypher-block chaining.
def obfuscate(data: bytearray, key: bytearray) -> str:
    n_blocks = int(math.ceil(len(data) / len(key)))
    # This is a misnomer. A true IV would be randomized, but we can't
    # do that since the shader doesn't have access to it. We just use
    # this to implement the "chaining" aspect of CBC.
    iv = bytearray(b'\x00') * len(key)
    result = bytearray()
    for i in range(0, n_blocks):
        block_begin = i * len(key)
        block_end = (i + 1) * len(key)
        block_plain = data[block_begin:block_end]
        block_cypher = block_plain.copy()
        for i in range(0, len(block_cypher)):
            block_cypher[i] ^= iv[i]
            block_cypher[i] ^= key[i]
        result += block_cypher
        iv = block_cypher
    return result

def deobfuscate(data: bytearray, key: bytearray) -> str:
    n_blocks = int(math.ceil(len(data) / len(key)))
    # This is a misnomer. A true IV would be randomized, but we can't
    # do that since the shader doesn't have access to it. We just use
    # this to implement the "chaining" aspect of CBC.
    iv = bytearray(b'\x00') * len(key)
    result = bytearray()
    for i in range(0, n_blocks):
        block_begin = i * len(key)
        block_end = (i + 1) * len(key)
        block_cypher = data[block_begin:block_end]
        block_plain = block_cypher.copy()
        for i in range(0, len(block_plain)):
            block_plain[i] ^= key[i]
            block_plain[i] ^= iv[i]
        result += block_plain
        iv = block_cypher
    return result

def test():
    key = genKey()
    saveKey("test.key", key)
    new_key = loadKey("test.key")
    os.remove("test.key")
    assert(key == new_key)

    plaintext_original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
    plaintext_bytes = bytearray(plaintext_original, "utf-8")
    cyphertext = obfuscate(plaintext_bytes, key)
    assert(len(plaintext_bytes) == len(cyphertext))
    plaintext_recovered = deobfuscate(cyphertext, key).decode("utf-8")
    assert(plaintext_original == plaintext_recovered)
    assert(plaintext_bytes != cyphertext)

if __name__ == "__main__":
    test()