#include "slang-byte-encode-util.h" namespace Slang { // Descriptions of algorithms here... // https://github.com/stoklund/varint #if SLANG_LITTLE_ENDIAN && SLANG_UNALIGNED_ACCESS // Testing on i7, unaligned access is around 40% faster # define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 1 #endif #ifndef SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS # define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 0 #endif #define SLANG_REPEAT_2(n) n, n #define SLANG_REPEAT_4(n) SLANG_REPEAT_2(n), SLANG_REPEAT_2(n) #define SLANG_REPEAT_8(n) SLANG_REPEAT_4(n), SLANG_REPEAT_4(n) #define SLANG_REPEAT_16(n) SLANG_REPEAT_8(n), SLANG_REPEAT_8(n) #define SLANG_REPEAT_32(n) SLANG_REPEAT_16(n), SLANG_REPEAT_16(n) #define SLANG_REPEAT_64(n) SLANG_REPEAT_32(n), SLANG_REPEAT_32(n) #define SLANG_REPEAT_128(n) SLANG_REPEAT_64(n), SLANG_REPEAT_64(n) /* static */const int8_t ByteEncodeUtil::s_msb8[256] = { - 1, 0, SLANG_REPEAT_2(1), SLANG_REPEAT_4(2), SLANG_REPEAT_8(3), SLANG_REPEAT_16(4), SLANG_REPEAT_32(5), SLANG_REPEAT_64(6), SLANG_REPEAT_128(7), }; /* static */size_t ByteEncodeUtil::calcEncodeLiteSizeUInt32(const uint32_t* in, size_t num) { size_t totalNumEncodeBytes = 0; for (size_t i = 0; i < num; i++) { const uint32_t v = in[i]; if (v < kLiteCut1) { totalNumEncodeBytes += 1; } else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) { totalNumEncodeBytes += 2; } else { totalNumEncodeBytes += calcNonZeroMsByte32(v) + 2; } } return totalNumEncodeBytes; } /* static */size_t ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, uint8_t* encodeOut) { uint8_t* encodeStart = encodeOut; for (size_t i = 0; i < num; ++i) { uint32_t v = in[i]; if(v < kLiteCut1) { *encodeOut++ = uint8_t(v); } else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) { v -= kLiteCut1; encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8)); encodeOut[1] = uint8_t(v); encodeOut += 2; } else { uint8_t* encodeOutStart = encodeOut++; while (v) { *encodeOut++ = uint8_t(v); v >>= 8; } // Finally write the size to the start const int numBytes = int(encodeOut - encodeOutStart); encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2)); } } return size_t(encodeOut - encodeStart); } /* static */void ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, List& encodeArrayOut) { // Make sure there is at least enough space for all bytes encodeArrayOut.setCount(num); uint8_t* encodeOut = encodeArrayOut.begin(); uint8_t* encodeOutEnd = encodeArrayOut.end(); for (size_t i = 0; i < num; ++i) { // Check if we need some more space if (encodeOut + kMaxLiteEncodeUInt32 > encodeOutEnd) { const size_t offset = size_t(encodeOut - encodeArrayOut.begin()); const UInt oldCapacity = encodeArrayOut.getCapacity(); // Make some more space encodeArrayOut.reserve(oldCapacity + (oldCapacity >> 1) + kMaxLiteEncodeUInt32); // Make the size the capacity const UInt capacity = encodeArrayOut.getCapacity(); encodeArrayOut.setCount(capacity); encodeOut = encodeArrayOut.begin() + offset; encodeOutEnd = encodeArrayOut.end(); } uint32_t v = in[i]; if (v < kLiteCut1) { *encodeOut++ = uint8_t(v); } else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) { v -= kLiteCut1; encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8)); encodeOut[1] = uint8_t(v); encodeOut += 2; } else { uint8_t* encodeOutStart = encodeOut++; while (v) { *encodeOut++ = uint8_t(v); v >>= 8; } // Finally write the size to the start const int numBytes = int(encodeOut - encodeOutStart); encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2)); } } encodeArrayOut.setCount(UInt(encodeOut - encodeArrayOut.begin())); encodeArrayOut.compress(); } /* static */int ByteEncodeUtil::encodeLiteUInt32(uint32_t in, uint8_t out[kMaxLiteEncodeUInt32]) { // 0-184 1 byte value = B0 // 185 - 248 2 bytes value = 185 + 256 * (B0 - 185) + B1 // 249 - 255 3 - 9 bytes value = (B0 - 249 + 2) little - endian bytes following B0. if (in < kLiteCut1) { out[0] = uint8_t(in); return 1; } else if (in <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) { in -= kLiteCut1; out[0] = uint8_t(kLiteCut1 + (in >> 8)); out[1] = uint8_t(in); return 2; } else { int numBytes = 1; while (in) { out[numBytes++] = uint8_t(in); in >>= 8; } // Finally write the size out[0] = uint8_t(kLiteCut2 + (numBytes - 2)); return numBytes; } } static const uint32_t s_unalignedUInt32Mask[5] = { 0x00000000, 0x000000ff, 0x0000ffff, 0x00ffffff, 0xffffffff, }; // Decode the >= kLiteCut2. // in is pointing past the first byte. // Only valid numBytesRemaining is 2, 3, or 4 SLANG_FORCE_INLINE static uint32_t _decodeLiteCut2UInt32(const uint8_t* in, int numBytesRemaining) { uint32_t value = 0; #if SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS switch (numBytesRemaining) { case 2: value = *(const uint16_t*)in; break; case 3: value = (uint32_t(in[2]) << 16) | (uint32_t(in[1]) << 8) | uint32_t(in[0]); break; case 4: value = *(const uint32_t*)in; break; default: break; } #else // This works on all cpus although slower value = in[0]; switch (numBytesRemaining) { case 4: value |= uint32_t(in[3]) << 24; /* fall thru */ case 3: value |= uint32_t(in[2]) << 16; /* fall thru */ case 2: value |= uint32_t(in[1]) << 8; /* fall thru */ case 1: break; } #endif return value; } /* static */int ByteEncodeUtil::decodeLiteUInt32(const uint8_t* in, uint32_t* out) { uint8_t b0 = *in++; if (b0 < kLiteCut1) { *out = uint32_t(b0); return 1; } else if (b0 < kLiteCut2) { uint8_t b1 = *in++; *out = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8); return 2; } else { const int numBytesRemaining = b0 - kLiteCut2 + 2 - 1; *out = _decodeLiteCut2UInt32(in, numBytesRemaining); return numBytesRemaining + 1; } } /* static */size_t ByteEncodeUtil::decodeLiteUInt32(const uint8_t* encodeIn, size_t numValues, uint32_t* valuesOut) { const uint8_t* encodeStart = encodeIn; for (size_t i = 0; i < numValues; ++i) { uint8_t b0 = *encodeIn++; if (b0 < kLiteCut1) { valuesOut[i] = uint32_t(b0); } else if (b0 < kLiteCut2) { uint8_t b1 = *encodeIn++; valuesOut[i] = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8); } else { const int numBytesRemaining = b0 - kLiteCut2 + 2 - 1; // For unaligned access, do not use unaligned access for the last two values, // (3rd last is safe because this value will have at least 2 bytes, followed by at worst two 1-byte values) // otherwise we can access outside the bounds of the encoded array // This prevents memory validation tools from causing an exception here if (SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS && i < numValues - 2) { const uint32_t mask = s_unalignedUInt32Mask[numBytesRemaining]; //const uint32_t mask = ~(uint32_t(0xffffff00) << ((numBytesRemaining - 1) * 8)); valuesOut[i] = (*(const uint32_t*)encodeIn) & mask; } else { valuesOut[i] = _decodeLiteCut2UInt32(encodeIn, numBytesRemaining); } encodeIn += numBytesRemaining; } } return size_t(encodeIn - encodeStart); } } // namespace Slang