From 7ea9ff03f4fc766f21d5896aea220d17f236dd70 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 9 Oct 2018 11:51:41 -0400 Subject: Feature/var byte encoding (#665) * * Remove the need for IRHighLevelDecoration in Emit * Use the IRLayoutDecoration for GeometryShaderPrimitiveTypeModifier * Initial look at at variable byte encoding, and simple unit test. * Fixing problems with comparison due to naming differences with slang/fxc. * * More tests and perf improvements for byte encoding. * Mechanism to detect processor and processor features in main slang header. * Split out cpu based defines into slang-cpu-defines.h so do not polute slang.h * Support for variable byte encoding on serialization. * Removed unused flag. * Fix warning. * Fix calcMsByte32 for 0 values without using intrinsic. * Fix a mistake in calculating maximum instruction size. --- source/core/core.vcxproj | 4 +- source/core/core.vcxproj.filters | 8 +- source/core/list.h | 2 +- source/core/slang-byte-encode-util.cpp | 283 +++++++++++++ source/core/slang-byte-encode-util.h | 196 +++++++++ source/core/slang-cpu-defines.h | 89 ++++ source/slang/ir-serialize.cpp | 633 +++++++++++++++++++++++++--- source/slang/ir-serialize.h | 85 +++- tools/slang-test/slang-test.vcxproj | 3 +- tools/slang-test/slang-test.vcxproj.filters | 5 +- tools/slang-test/unit-test-byte-encode.cpp | 142 +++++++ 11 files changed, 1393 insertions(+), 57 deletions(-) create mode 100644 source/core/slang-byte-encode-util.cpp create mode 100644 source/core/slang-byte-encode-util.h create mode 100644 source/core/slang-cpu-defines.h create mode 100644 tools/slang-test/unit-test-byte-encode.cpp diff --git a/source/core/core.vcxproj b/source/core/core.vcxproj index 8ec4b7031..ec6195940 100644 --- a/source/core/core.vcxproj +++ b/source/core/core.vcxproj @@ -182,6 +182,7 @@ + @@ -197,6 +198,7 @@ + @@ -213,4 +215,4 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/source/core/core.vcxproj.filters b/source/core/core.vcxproj.filters index f30cad939..f9eeef43a 100644 --- a/source/core/core.vcxproj.filters +++ b/source/core/core.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -81,6 +81,9 @@ Header Files + + Header Files + @@ -113,6 +116,9 @@ Source Files + + Source Files + diff --git a/source/core/list.h b/source/core/list.h index cddcbb6c0..11d798dcf 100644 --- a/source/core/list.h +++ b/source/core/list.h @@ -472,7 +472,7 @@ namespace Slang if (bufferSize > _count && _count > 0) { T * newBuffer = Allocate(_count); - for (int i = 0; i < _count; i++) + for (UInt i = 0; i < _count; i++) newBuffer[i] = static_cast(buffer[i]); FreeBuffer(); buffer = newBuffer; diff --git a/source/core/slang-byte-encode-util.cpp b/source/core/slang-byte-encode-util.cpp new file mode 100644 index 000000000..6eabdc16f --- /dev/null +++ b/source/core/slang-byte-encode-util.cpp @@ -0,0 +1,283 @@ +#include "slang-byte-encode-util.h" + + + +namespace Slang { + +// Descriptions of algorithms here... +// https://github.com/stoklund/varint + +#if SLANG_LITTLE_ENDIAN && SLANG_UNALIGNED_ACCESS +// Testing on i7, unaligned access is around 40% faster +# define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 1 +#endif + +#ifndef SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS +# define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 0 +#endif + +#define SLANG_REPEAT_2(n) n, n +#define SLANG_REPEAT_4(n) SLANG_REPEAT_2(n), SLANG_REPEAT_2(n) +#define SLANG_REPEAT_8(n) SLANG_REPEAT_4(n), SLANG_REPEAT_4(n) +#define SLANG_REPEAT_16(n) SLANG_REPEAT_8(n), SLANG_REPEAT_8(n) +#define SLANG_REPEAT_32(n) SLANG_REPEAT_16(n), SLANG_REPEAT_16(n) +#define SLANG_REPEAT_64(n) SLANG_REPEAT_32(n), SLANG_REPEAT_32(n) +#define SLANG_REPEAT_128(n) SLANG_REPEAT_64(n), SLANG_REPEAT_64(n) + +/* static */const int8_t ByteEncodeUtil::s_msb8[256] = +{ + - 1, + 0, + SLANG_REPEAT_2(1), + SLANG_REPEAT_4(2), + SLANG_REPEAT_8(3), + SLANG_REPEAT_16(4), + SLANG_REPEAT_32(5), + SLANG_REPEAT_64(6), + SLANG_REPEAT_128(7), +}; + +/* static */size_t ByteEncodeUtil::calcEncodeLiteSizeUInt32(const uint32_t* in, size_t num) +{ + size_t totalNumEncodeBytes = 0; + + for (size_t i = 0; i < num; i++) + { + const uint32_t v = in[i]; + + if (v < kLiteCut1) + { + totalNumEncodeBytes += 1; + } + else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) + { + totalNumEncodeBytes += 2; + } + else + { + totalNumEncodeBytes += calcNonZeroMsByte32(v) + 2; + } + } + return totalNumEncodeBytes; +} + +/* static */size_t ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, uint8_t* encodeOut) +{ + uint8_t* encodeStart = encodeOut; + + for (size_t i = 0; i < num; ++i) + { + uint32_t v = in[i]; + + if(v < kLiteCut1) + { + *encodeOut++ = uint8_t(v); + } + else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) + { + v -= kLiteCut1; + + encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8)); + encodeOut[1] = uint8_t(v); + encodeOut += 2; + } + else + { + uint8_t* encodeOutStart = encodeOut++; + while (v) + { + *encodeOut++ = uint8_t(v); + v >>= 8; + } + // Finally write the size to the start + const int numBytes = int(encodeOut - encodeOutStart); + encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2)); + } + } + return size_t(encodeOut - encodeStart); +} + +/* static */void ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, List& encodeArrayOut) +{ + // Make sure there is at least enough space for all bytes + encodeArrayOut.SetSize(num); + + uint8_t* encodeOut = encodeArrayOut.begin(); + uint8_t* encodeOutEnd = encodeArrayOut.end(); + + for (size_t i = 0; i < num; ++i) + { + // Check if we need some more space + if (encodeOut + kMaxLiteEncodeUInt32 > encodeOutEnd) + { + const size_t offset = size_t(encodeOut - encodeArrayOut.begin()); + + const UInt oldCapacity = encodeArrayOut.Capacity(); + + // Make some more space + encodeArrayOut.Reserve(oldCapacity + (oldCapacity >> 1) + kMaxLiteEncodeUInt32); + // Make the size the capacity + const UInt capacity = encodeArrayOut.Capacity(); + encodeArrayOut.SetSize(capacity); + + encodeOut = encodeArrayOut.begin() + offset; + encodeOutEnd = encodeArrayOut.end(); + } + + uint32_t v = in[i]; + + if (v < kLiteCut1) + { + *encodeOut++ = uint8_t(v); + } + else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) + { + v -= kLiteCut1; + + encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8)); + encodeOut[1] = uint8_t(v); + encodeOut += 2; + } + else + { + uint8_t* encodeOutStart = encodeOut++; + while (v) + { + *encodeOut++ = uint8_t(v); + v >>= 8; + } + // Finally write the size to the start + const int numBytes = int(encodeOut - encodeOutStart); + encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2)); + } + } + + encodeArrayOut.SetSize(UInt(encodeOut - encodeArrayOut.begin())); + encodeArrayOut.Compress(); +} + +/* static */int ByteEncodeUtil::encodeLiteUInt32(uint32_t in, uint8_t out[kMaxLiteEncodeUInt32]) +{ + // 0-184 1 byte value = B0 + // 185 - 248 2 bytes value = 185 + 256 * (B0 - 185) + B1 + // 249 - 255 3 - 9 bytes value = (B0 - 249 + 2) little - endian bytes following B0. + + if (in < kLiteCut1) + { + out[0] = uint8_t(in); + return 1; + } + else if (in <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) + { + in -= kLiteCut1; + + out[0] = uint8_t(kLiteCut1 + (in >> 8)); + out[1] = uint8_t(in); + return 2; + } + else + { + int numBytes = 1; + while (in) + { + out[numBytes++] = uint8_t(in); + in >>= 8; + } + // Finally write the size + out[0] = uint8_t(kLiteCut2 + (numBytes - 2)); + return numBytes; + } +} + +static const uint32_t s_unalignedUInt32Mask[5] = +{ + 0x00000000, + 0x000000ff, + 0x0000ffff, + 0x00ffffff, + 0xffffffff, +}; + +/* static */int ByteEncodeUtil::decodeLiteUInt32(const uint8_t* in, uint32_t* out) +{ + uint8_t b0 = *in++; + if (b0 < kLiteCut1) + { + *out = uint32_t(b0); + return 1; + } + else if (b0 < kLiteCut2) + { + uint8_t b1 = *in++; + *out = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8); + return 2; + } + else + { + int numBytesRemaining = b0 - kLiteCut2 + 2 - 1; + +#if SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS + //const uint32_t mask = s_unalignedUInt32Mask[numBytesRemaining]; + const uint32_t mask = ~(uint32_t(0xffffff00) << ((numBytesRemaining - 1) * 8)); + const uint32_t value = (*(const uint32_t*)in) & mask; +#else + // This works on all cpus although slower + uint32_t value = in[0]; + + switch (numBytesRemaining) + { + case 4: value |= uint32_t(in[3]) << 24; /* fall thru */ + case 3: value |= uint32_t(in[2]) << 16; /* fall thru */ + case 2: value |= uint32_t(in[1]) << 8; /* fall thru */ + case 1: break; + } +#endif + *out = value; + return numBytesRemaining + 1; + } +} + +/* static */size_t ByteEncodeUtil::decodeLiteUInt32(const uint8_t* encodeIn, size_t numValues, uint32_t* valuesOut) +{ + const uint8_t* encodeStart = encodeIn; + + for (size_t i = 0; i < numValues; ++i) + { + uint8_t b0 = *encodeIn++; + if (b0 < kLiteCut1) + { + valuesOut[i] = uint32_t(b0); + } + else if (b0 < kLiteCut2) + { + uint8_t b1 = *encodeIn++; + valuesOut[i] = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8); + } + else + { + int numBytesRemaining = b0 - kLiteCut2 + 2 - 1; + +#if SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS + const uint32_t mask = s_unalignedUInt32Mask[numBytesRemaining]; + //const uint32_t mask = ~(uint32_t(0xffffff00) << ((numBytesRemaining - 1) * 8)); + const uint32_t value = (*(const uint32_t*)encodeIn) & mask; +#else + // This works on all cpus although slower + uint32_t value = encodeIn[0]; + switch (numBytesRemaining) + { + case 4: value |= uint32_t(encodeIn[3]) << 24; /* fall thru */ + case 3: value |= uint32_t(encodeIn[2]) << 16; /* fall thru */ + case 2: value |= uint32_t(encodeIn[1]) << 8; /* fall thru */ + case 1: break; + } +#endif + valuesOut[i] = value; + encodeIn += numBytesRemaining; + } + } + + return size_t(encodeIn - encodeStart); +} + +} // namespace Slang diff --git a/source/core/slang-byte-encode-util.h b/source/core/slang-byte-encode-util.h new file mode 100644 index 000000000..77ddc2f65 --- /dev/null +++ b/source/core/slang-byte-encode-util.h @@ -0,0 +1,196 @@ +#ifndef SLANG_BYTE_ENCODE_UTIL_H +#define SLANG_BYTE_ENCODE_UTIL_H + +#include "list.h" + +#include "slang-cpu-defines.h" + +namespace Slang { + +struct ByteEncodeUtil +{ + enum + { + kMaxLiteEncodeUInt32 = 5, /// One byte for prefix, the remaining 4 bytes hold the value + // Cut values for 'Lite' encoding style + kLiteCut1 = 185, + kLiteCut2 = 249, + }; + + /** Find the most significant bit for 8 bits + @param v The value to find most significant bit on + @return The most significant bit, or -1 if no bits are set + */ + SLANG_FORCE_INLINE static int calcMsb8(uint32_t v); + + /** Find the most significant bit for 32 bits + @param v The value to find most significant bit on + @return The most significant bit, or -1 if no bits are set + */ + SLANG_FORCE_INLINE static int calcMsb32(uint32_t v); + + /** Calculates the 'most significant' byte ie the highest bytes that is non zero. + Note return value is *undefined* if in is 0. + @param in Value - cannot be 0. + @return The byte index of the highest byte that is non zero. + */ + SLANG_FORCE_INLINE static int calcNonZeroMsByte32(uint32_t in); + + /** Calculates the 'most significant' byte ie the highest bytes that is non zero. + @param in Value - cannot be 0. + @return The byte index of the highest byte that is non zero. + */ + SLANG_FORCE_INLINE static int calcMsByte32(uint32_t in); + + /// Calculate the size of encoding bytes + static size_t calcEncodeLiteSizeUInt32(const uint32_t* in, size_t num); + + /// Calculate the size of a single value + static size_t calcEncodeLiteSizeUInt32(uint32_t in); + + /** Encodes a uint32_t as an integer + @return the number of bytes needed to encode */ + static int encodeLiteUInt32(uint32_t in, uint8_t out[kMaxLiteEncodeUInt32]); + + /** Decode a lite encoding. + @param in The lite encoded bytes + @param out Value constructed + @return number of bytes on in consumed */ + static int decodeLiteUInt32(const uint8_t* in, uint32_t* out); + + /** Encode an array of uint32_t + @param in The values to encode + @param num The amount of values to encode + @param encodeOut The buffer to hold the encoded value. MUST be large enough to hold the encoding + @return The size of the encoding in bytes + */ + static size_t encodeLiteUInt32(const uint32_t* in, size_t num, uint8_t* encodeOut); + + /** Encode an array of uint32_t + @param in The values to encode + @param num The amount of values to encode + @param encodeOut The buffer to hold the encoded value. + */ + static void encodeLiteUInt32(const uint32_t* in, size_t num, List& encodeOut); + + /** Encode an array of uint32_t + @param encodeIn The encoded values + @param numValues The amount of values to be decoded (NOTE! This is the number of valuesOut, not encodeIn) + @param valuesOut The buffer to hold the encoded value. MUST be large enough to hold the encoding + @return The amount of bytes decoded + */ + static size_t decodeLiteUInt32(const uint8_t* encodeIn, size_t numValues, uint32_t* valuesOut); + + /// Table that maps 8 bits to it's most significant bit. If 0 returns -1. + static const int8_t s_msb8[256]; +}; + +#if SLANG_VC +// Works on ARM and x86/64 on visual studio compiler + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE int ByteEncodeUtil::calcNonZeroMsByte32(uint32_t in) +{ + SLANG_ASSERT(in != 0); + // Can use intrinsic + // https://msdn.microsoft.com/en-us/library/fbxyd7zd.aspx + unsigned long index; + _BitScanReverse(&index, in); + return index >> 3; +} + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE int ByteEncodeUtil::calcMsByte32(uint32_t in) +{ + if (in == 0) + { + return -1; + } + // Can use intrinsic + // https://msdn.microsoft.com/en-us/library/fbxyd7zd.aspx + unsigned long index; + _BitScanReverse(&index, in); + return index >> 3; +} + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE /* static */int ByteEncodeUtil::calcMsb8(uint32_t v) +{ + SLANG_ASSERT((v & 0xffffff00) == 0); + if (v == 0) + { + return -1; + } + unsigned long index; + _BitScanReverse(&index, v); + return index; +} + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE /* static */int ByteEncodeUtil::calcMsb32(uint32_t v) +{ + if (v == 0) + { + return -1; + } + unsigned long index; + _BitScanReverse(&index, v); + return index; +} + +#else + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE /* static */int ByteEncodeUtil::calcNonZeroMsByte32(uint32_t in) +{ + return (in & 0xffff0000) ? + ((in & 0xff000000) ? 3 : 2) : + ((in & 0x0000ff00) ? 1 : 0); +} + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE /* static */int ByteEncodeUtil::calcMsByte32(uint32_t in) +{ + return (in & 0xffff0000) ? + ((in & 0xff000000) ? 3 : 2) : + ((in & 0x0000ff00) ? 1 : + ((in == 0) ? -1 : 0)); +} + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE /* static */int ByteEncodeUtil::calcMsb8(uint32_t v) +{ + SLANG_ASSERT((v & 0xffffff00) == 0); + return s_msb8[v]; +} + +// --------------------------------------------------------------------------- +SLANG_FORCE_INLINE /* static */int ByteEncodeUtil::calcMsb32(uint32_t v) +{ + return (v & 0xffff0000) ? + ((v & 0xff000000) ? s_msb8[v >> 24] + 24 : s_msb8[v >> 16] + 16) : + ((v & 0x0000ff00) ? s_msb8[v >> 8] + 8 : s_msb8[v]); +} + +#endif + +// --------------------------------------------------------------------------- +inline /* static */size_t ByteEncodeUtil::calcEncodeLiteSizeUInt32(uint32_t v) +{ + if (v < kLiteCut1) + { + return 1; + } + else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1)) + { + return 2; + } + else + { + return calcNonZeroMsByte32(v) + 2; + } +} + +} // namespace Slang + +#endif // SLANG_BYTE_ENCODE_UTIL_H diff --git a/source/core/slang-cpu-defines.h b/source/core/slang-cpu-defines.h new file mode 100644 index 000000000..dc76008b9 --- /dev/null +++ b/source/core/slang-cpu-defines.h @@ -0,0 +1,89 @@ +#ifndef SLANG_CPU_DEFINES_H +#define SLANG_CPU_DEFINES_H + +/* Macros for detecting processor */ +#if defined(_M_ARM) || defined(__ARM_EABI__) +// This is special case for nVidia tegra +# define SLANG_PROCESSOR_ARM 1 +#elif defined(__i386__) || defined(_M_IX86) +# define SLANG_PROCESSOR_X86 1 +#elif defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) || defined(__x86_64) +# define SLANG_PROCESSOR_X86_64 1 +#elif defined(_PPC_) || defined(__ppc__) || defined(__POWERPC__) || defined(_M_PPC) +# if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) +# define SLANG_PROCESSOR_POWER_PC_64 1 +# else +# define SLANG_PROCESSOR_POWER_PC 1 +# endif +#elif defined(__arm__) +# define SLANG_PROCESSOR_ARM 1 +#elif defined(__aarch64__) +# define SLANG_PROCESSOR_ARM_64 1 +#endif + +#ifndef SLANG_PROCESSOR_ARM +# define SLANG_PROCESSOR_ARM 0 +#endif + +#ifndef SLANG_PROCESSOR_ARM_64 +# define SLANG_PROCESSOR_ARM_64 0 +#endif + +#ifndef SLANG_PROCESSOR_X86 +# define SLANG_PROCESSOR_X86 0 +#endif + +#ifndef SLANG_PROCESSOR_X86_64 +# define SLANG_PROCESSOR_X86_64 0 +#endif + +#ifndef SLANG_PROCESSOR_POWER_PC +# define SLANG_PROCESSOR_POWER_PC 0 +#endif + +#ifndef SLANG_PROCESSOR_POWER_PC_64 +# define SLANG_PROCESSOR_POWER_PC_64 0 +#endif + +// Processor families + +#define SLANG_PROCESSOR_FAMILY_X86 (SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_X86) +#define SLANG_PROCESSOR_FAMILY_ARM (SLANG_PROCESSOR_ARM | SLANG_PROCESSOR_ARM_64) +#define SLANG_PROCESSOR_FAMILY_POWER_PC (SLANG_PROCESSOR_POWER_PC_64 | SLANG_PROCESSOR_POWER_PC) + +#define SLANG_PTR_IS_64 (SLANG_PROCESSOR_ARM_64 | SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_POWER_PC_64) +#define SLANG_PTR_IS_32 (SLANG_PTR_IS_64 ^ 1) + +// Processor features +#if SLANG_PROCESSOR_FAMILY_X86 +# define SLANG_LITTLE_ENDIAN 1 +# define SLANG_UNALIGNED_ACCESS 1 +#elif SLANG_PROCESSOR_FAMILY_ARM +# if defined(__ARMEB__) +# define SLANG_BIG_ENDIAN 1 +# else +# define SLANG_LITTLE_ENDIAN 1 +# endif +#elif SLANG_PROCESSOR_FAMILY_POWER_PC +# define SLANG_BIG_ENDIAN 1 +#endif + +#ifndef SLANG_LITTLE_ENDIAN +# define SLANG_LITTLE_ENDIAN 0 +#endif + +#ifndef SLANG_BIG_ENDIAN +# define SLANG_BIG_ENDIAN 0 +#endif + +#ifndef SLANG_UNALIGNED_ACCESS +# define SLANG_UNALIGNED_ACCESS 0 +#endif + +// One endianess must be set +#if ((SLANG_BIG_ENDIAN | SLANG_LITTLE_ENDIAN) == 0) +# error "Couldn't determine endianess" +#endif + + +#endif // SLANG_CPU_DEFINES_H \ No newline at end of file diff --git a/source/slang/ir-serialize.cpp b/source/slang/ir-serialize.cpp index 6def3d9e1..c02ef7ee5 100644 --- a/source/slang/ir-serialize.cpp +++ b/source/slang/ir-serialize.cpp @@ -2,9 +2,12 @@ #include "ir-serialize.h" #include "../core/text-io.h" +#include "../core/slang-byte-encode-util.h" #include "ir-insts.h" +#include "../core/slang-math.h" + namespace Slang { // Needed for linkage with some compilers @@ -124,6 +127,48 @@ void IRSerialData::clear() m_decorationBaseIndex = 0; } +template +static bool _isEqual(const List& aIn, const List& bIn) +{ + if (aIn.Count() != bIn.Count()) + { + return false; + } + + size_t size = size_t(aIn.Count()); + + const T* a = aIn.begin(); + const T* b = bIn.begin(); + + if (a == b) + { + return true; + } + + for (size_t i = 0; i < size; ++i) + { + if (a[i] != b[i]) + { + return false; + } + } + + return true; +} + + +bool IRSerialData::operator==(const ThisType& rhs) const +{ + return (this == &rhs) || + (m_decorationBaseIndex == rhs.m_decorationBaseIndex && + _isEqual(m_insts, rhs.m_insts) && + _isEqual(m_childRuns, rhs.m_childRuns) && + _isEqual(m_decorationRuns, rhs.m_decorationRuns) && + _isEqual(m_externalOperands, rhs.m_externalOperands) && + _isEqual(m_rawSourceLocs, rhs.m_rawSourceLocs) && + _isEqual(m_strings, rhs.m_strings)); +} + // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! IRSerialWriter !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! void IRSerialWriter::_addInstruction(IRInst* inst) @@ -585,14 +630,31 @@ IRSerialData::StringIndex IRSerialWriter::getStringIndex(const UnownedStringSlic } template -static size_t _calcChunkSize(const List& array) +static size_t _calcChunkSize(IRSerialBinary::CompressionType compressionType, const List& array) { typedef IRSerialBinary Bin; if (array.Count()) { - const size_t size = sizeof(Bin::ArrayHeader) + sizeof(T) * array.Count(); - return (size + 3) & ~size_t(3); + switch (compressionType) + { + case Bin::CompressionType::None: + { + const size_t size = sizeof(Bin::ArrayHeader) + sizeof(T) * array.Count(); + return (size + 3) & ~size_t(3); + } + case Bin::CompressionType::VariableByteLite: + { + const size_t payloadSize = ByteEncodeUtil::calcEncodeLiteSizeUInt32((const uint32_t*)array.begin(), (array.Count() * sizeof(T)) / sizeof(uint32_t)); + const size_t size = sizeof(Bin::CompressedArrayHeader) + payloadSize; + return (size + 3) & ~size_t(3); + } + default: + { + SLANG_ASSERT(!"Unhandled compression type"); + return 0; + } + } } else { @@ -600,27 +662,59 @@ static size_t _calcChunkSize(const List& array) } } -template -Result _writeArrayChunk(uint32_t chunkId, const List& array, Stream* stream) +static Result _writeArrayChunk(IRSerialBinary::CompressionType compressionType, uint32_t chunkId, const void* data, size_t numEntries, size_t typeSize, Stream* stream) { typedef IRSerialBinary Bin; - if (array.Count() == 0) + if (numEntries == 0) { return SLANG_OK; } - size_t payloadSize = sizeof(Bin::ArrayHeader) - sizeof(Bin::Chunk) + sizeof(T) * array.Count(); + size_t payloadSize; - Bin::ArrayHeader header; - header.m_chunk.m_type = chunkId; - header.m_chunk.m_size = uint32_t(payloadSize); - header.m_numEntries = uint32_t(array.Count()); + switch (compressionType) + { + case Bin::CompressionType::None: + { + payloadSize = sizeof(Bin::ArrayHeader) - sizeof(Bin::Chunk) + typeSize * numEntries; + + Bin::ArrayHeader header; + header.m_chunk.m_type = chunkId; + header.m_chunk.m_size = uint32_t(payloadSize); + header.m_numEntries = uint32_t(numEntries); + + stream->Write(&header, sizeof(header)); + + stream->Write(data, typeSize * numEntries); + break; + } + case Bin::CompressionType::VariableByteLite: + { + List compressedPayload; + + size_t numCompressedEntries = (numEntries * typeSize) / sizeof(uint32_t); + + ByteEncodeUtil::encodeLiteUInt32((const uint32_t*)data, numCompressedEntries, compressedPayload); + + payloadSize = sizeof(Bin::CompressedArrayHeader) - sizeof(Bin::Chunk) + compressedPayload.Count(); - stream->Write(&header, sizeof(header)); + Bin::CompressedArrayHeader header; + header.m_chunk.m_type = SLANG_MAKE_COMPRESSED_FOUR_CC(chunkId); + header.m_chunk.m_size = uint32_t(payloadSize); + header.m_numEntries = uint32_t(numEntries); + header.m_numCompressedEntries = uint32_t(numCompressedEntries); - stream->Write(array.begin(), sizeof(T) * array.Count()); + stream->Write(&header, sizeof(header)); + stream->Write(compressedPayload.begin(), compressedPayload.Count()); + break; + } + default: + { + return SLANG_FAIL; + } + } // All chunks have sizes rounded to dword size if (payloadSize & 3) { @@ -633,25 +727,237 @@ Result _writeArrayChunk(uint32_t chunkId, const List& array, Stream* stream) return SLANG_OK; } -/* static */Result IRSerialWriter::writeStream(const IRSerialData& data, Stream* stream) +template +Result _writeArrayChunk(IRSerialBinary::CompressionType compressionType, uint32_t chunkId, const List& array, Stream* stream) +{ + return _writeArrayChunk(compressionType, chunkId, array.begin(), size_t(array.Count()), sizeof(T), stream); +} + +Result _encodeInsts(IRSerialBinary::CompressionType compressionType, const List& instsIn, List& encodeArrayOut) { typedef IRSerialBinary Bin; + typedef IRSerialData::Inst::PayloadType PayloadType; + + if (compressionType != Bin::CompressionType::VariableByteLite) + { + return SLANG_FAIL; + } + encodeArrayOut.Clear(); + + const size_t numInsts = size_t(instsIn.Count()); + const IRSerialData::Inst* insts = instsIn.begin(); + + uint8_t* encodeOut = encodeArrayOut.begin(); + uint8_t* encodeEnd = encodeArrayOut.end(); + // Calculate the maximum instruction size with worst case possible encoding + // 2 bytes hold the payload size, and the result type + // Note that if there were some free bits, we could encode some of this stuff into bits, but if we remove payloadType, then there are no free bits + const size_t maxInstSize = 2 + ByteEncodeUtil::kMaxLiteEncodeUInt32 + Math::Max(sizeof(insts->m_payload.m_float64), size_t(2 * ByteEncodeUtil::kMaxLiteEncodeUInt32)); + + for (size_t i = 0; i < numInsts; ++i) + { + const auto& inst = insts[i]; + + // Make sure there is space for the largest possible instruction + if (encodeOut + maxInstSize >= encodeEnd) + { + const size_t offset = size_t(encodeOut - encodeArrayOut.begin()); + + const UInt oldCapacity = encodeArrayOut.Capacity(); + + encodeArrayOut.Reserve(oldCapacity + (oldCapacity >> 1) + maxInstSize); + const UInt capacity = encodeArrayOut.Capacity(); + encodeArrayOut.SetSize(capacity); + + encodeOut = encodeArrayOut.begin() + offset; + encodeEnd = encodeArrayOut.end(); + } + + *encodeOut++ = uint8_t(inst.m_op); + *encodeOut++ = uint8_t(inst.m_payloadType); + + encodeOut += ByteEncodeUtil::encodeLiteUInt32((uint32_t)inst.m_resultTypeIndex, encodeOut); + + switch (inst.m_payloadType) + { + case PayloadType::Empty: + { + break; + } + case PayloadType::Operand_1: + case PayloadType::String_1: + case PayloadType::UInt32: + { + // 1 UInt32 + encodeOut += ByteEncodeUtil::encodeLiteUInt32((uint32_t)inst.m_payload.m_operands[0], encodeOut); + break; + } + case PayloadType::Operand_2: + case PayloadType::OperandAndUInt32: + case PayloadType::OperandExternal: + case PayloadType::String_2: + { + // 2 UInt32 + encodeOut += ByteEncodeUtil::encodeLiteUInt32((uint32_t)inst.m_payload.m_operands[0], encodeOut); + encodeOut += ByteEncodeUtil::encodeLiteUInt32((uint32_t)inst.m_payload.m_operands[1], encodeOut); + break; + } + case PayloadType::Float64: + { + memcpy(encodeOut, &inst.m_payload.m_float64, sizeof(inst.m_payload.m_float64)); + encodeOut += sizeof(inst.m_payload.m_float64); + break; + } + case PayloadType::Int64: + { + memcpy(encodeOut, &inst.m_payload.m_int64, sizeof(inst.m_payload.m_int64)); + encodeOut += sizeof(inst.m_payload.m_int64); + break; + } + } + } + + // Fix the size + encodeArrayOut.SetSize(UInt(encodeOut - encodeArrayOut.begin())); + return SLANG_OK; +} + +Result _writeInstArrayChunk(IRSerialBinary::CompressionType compressionType, uint32_t chunkId, const List& array, Stream* stream) +{ + typedef IRSerialBinary Bin; + if (array.Count() == 0) + { + return SLANG_OK; + } + + switch (compressionType) + { + case Bin::CompressionType::None: + { + return _writeArrayChunk(compressionType, chunkId, array, stream); + } + case Bin::CompressionType::VariableByteLite: + { + List compressedPayload; + SLANG_RETURN_ON_FAIL(_encodeInsts(compressionType, array, compressedPayload)); + + size_t payloadSize = sizeof(Bin::CompressedArrayHeader) - sizeof(Bin::Chunk) + compressedPayload.Count(); + + Bin::CompressedArrayHeader header; + header.m_chunk.m_type = SLANG_MAKE_COMPRESSED_FOUR_CC(chunkId); + header.m_chunk.m_size = uint32_t(payloadSize); + header.m_numEntries = uint32_t(array.Count()); + header.m_numCompressedEntries = 0; + + stream->Write(&header, sizeof(header)); + stream->Write(compressedPayload.begin(), compressedPayload.Count()); + + // All chunks have sizes rounded to dword size + if (payloadSize & 3) + { + const uint8_t pad[4] = { 0, 0, 0, 0 }; + // Pad outs + int padSize = 4 - (payloadSize & 3); + stream->Write(pad, padSize); + } + return SLANG_OK; + } + default: break; + } + return SLANG_FAIL; +} + +static size_t _calcInstChunkSize(IRSerialBinary::CompressionType compressionType, const List& instsIn) +{ + typedef IRSerialBinary Bin; + typedef IRSerialData::Inst::PayloadType PayloadType; + + switch (compressionType) + { + case Bin::CompressionType::None: + { + return _calcChunkSize(compressionType, instsIn); + } + case Bin::CompressionType::VariableByteLite: + { + size_t size = sizeof(Bin::CompressedArrayHeader); + + size_t numInsts = size_t(instsIn.Count()); + size += numInsts * 2; // op and payload + + + IRSerialData::Inst* insts = instsIn.begin(); + + for (size_t i = 0; i < numInsts; ++i) + { + const auto& inst = insts[i]; + + size += ByteEncodeUtil::calcEncodeLiteSizeUInt32((uint32_t)inst.m_resultTypeIndex); + + switch (inst.m_payloadType) + { + case PayloadType::Empty: + { + break; + } + case PayloadType::Operand_1: + case PayloadType::String_1: + case PayloadType::UInt32: + { + // 1 UInt32 + size += ByteEncodeUtil::calcEncodeLiteSizeUInt32((uint32_t)inst.m_payload.m_operands[0]); + break; + } + case PayloadType::Operand_2: + case PayloadType::OperandAndUInt32: + case PayloadType::OperandExternal: + case PayloadType::String_2: + { + // 2 UInt32 + size += ByteEncodeUtil::calcEncodeLiteSizeUInt32((uint32_t)inst.m_payload.m_operands[0]); + size += ByteEncodeUtil::calcEncodeLiteSizeUInt32((uint32_t)inst.m_payload.m_operands[1]); + break; + } + case PayloadType::Float64: + { + size += sizeof(inst.m_payload.m_float64); + break; + } + case PayloadType::Int64: + { + size += sizeof(inst.m_payload.m_int64); + break; + } + } + } + + return (size + 3) & ~size_t(3); + } + default: break; + } + + SLANG_ASSERT(!"Unhandled compression type"); + return 0; +} + +/* static */Result IRSerialWriter::writeStream(const IRSerialData& data, Bin::CompressionType compressionType, Stream* stream) +{ size_t totalSize = 0; totalSize += sizeof(Bin::SlangHeader) + - _calcChunkSize(data.m_insts) + - _calcChunkSize(data.m_childRuns) + - _calcChunkSize(data.m_decorationRuns) + - _calcChunkSize(data.m_externalOperands) + - _calcChunkSize(data.m_strings) + - _calcChunkSize(data.m_rawSourceLocs); + _calcInstChunkSize(compressionType, data.m_insts) + + _calcChunkSize(compressionType, data.m_childRuns) + + _calcChunkSize(compressionType, data.m_decorationRuns) + + _calcChunkSize(compressionType, data.m_externalOperands) + + _calcChunkSize(Bin::CompressionType::None, data.m_strings) + + _calcChunkSize(Bin::CompressionType::None, data.m_rawSourceLocs); { Bin::Chunk riffHeader; riffHeader.m_type = Bin::kRiffFourCc; riffHeader.m_size = uint32_t(totalSize); - + stream->Write(&riffHeader, sizeof(riffHeader)); } { @@ -659,19 +965,20 @@ Result _writeArrayChunk(uint32_t chunkId, const List& array, Stream* stream) slangHeader.m_chunk.m_type = Bin::kSlangFourCc; slangHeader.m_chunk.m_size = uint32_t(sizeof(slangHeader) - sizeof(Bin::Chunk)); slangHeader.m_decorationBase = uint32_t(data.m_decorationBaseIndex); + slangHeader.m_compressionType = uint32_t(Bin::CompressionType::VariableByteLite); stream->Write(&slangHeader, sizeof(slangHeader)); } - _writeArrayChunk(Bin::kInstFourCc, data.m_insts, stream); - _writeArrayChunk(Bin::kChildRunFourCc, data.m_childRuns, stream); - _writeArrayChunk(Bin::kDecoratorRunFourCc, data.m_decorationRuns, stream); - _writeArrayChunk(Bin::kExternalOperandsFourCc, data.m_externalOperands, stream); - _writeArrayChunk(Bin::kStringFourCc, data.m_strings, stream); + SLANG_RETURN_ON_FAIL(_writeInstArrayChunk(compressionType, Bin::kInstFourCc, data.m_insts, stream)); + SLANG_RETURN_ON_FAIL(_writeArrayChunk(compressionType, Bin::kChildRunFourCc, data.m_childRuns, stream)); + SLANG_RETURN_ON_FAIL(_writeArrayChunk(compressionType, Bin::kDecoratorRunFourCc, data.m_decorationRuns, stream)); + SLANG_RETURN_ON_FAIL(_writeArrayChunk(compressionType, Bin::kExternalOperandsFourCc, data.m_externalOperands, stream)); + SLANG_RETURN_ON_FAIL(_writeArrayChunk(Bin::CompressionType::None, Bin::kStringFourCc, data.m_strings, stream)); { uint32_t fourCc = sizeof(IRSerialData::RawSourceLoc) == 4 ? Bin::kUInt32SourceLocFourCc : Bin::kUInt64SourceLocFourCc; - _writeArrayChunk(fourCc, data.m_rawSourceLocs, stream); + SLANG_RETURN_ON_FAIL(_writeArrayChunk(Bin::CompressionType::None, fourCc, data.m_rawSourceLocs, stream)); } return SLANG_OK; @@ -679,33 +986,250 @@ Result _writeArrayChunk(uint32_t chunkId, const List& array, Stream* stream) // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! IRSerialReader !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +class ListResizer +{ + public: + virtual void* setSize(size_t newSize) = 0; + SLANG_FORCE_INLINE size_t getTypeSize() const { return m_typeSize; } + ListResizer(size_t typeSize):m_typeSize(typeSize) {} + + protected: + size_t m_typeSize; +}; + +template +class ListResizerForType: public ListResizer +{ + public: + typedef ListResizer Parent; + + SLANG_FORCE_INLINE ListResizerForType(List& list): + Parent(sizeof(T)), + m_list(list) + {} + + virtual void* setSize(size_t newSize) SLANG_OVERRIDE + { + m_list.SetSize(UInt(newSize)); + return (void*)m_list.begin(); + } + + protected: + List& m_list; +}; + +static Result _readArrayChunk(IRSerialBinary::CompressionType compressionType, const IRSerialBinary::Chunk& chunk, Stream* stream, size_t* numReadInOut, ListResizer& listOut) +{ + typedef IRSerialBinary Bin; + + const size_t typeSize = listOut.getTypeSize(); + + switch (compressionType) + { + case Bin::CompressionType::VariableByteLite: + { + // We have a compressed header + Bin::CompressedArrayHeader header; + header.m_chunk = chunk; + + stream->Read(&header.m_chunk + 1, sizeof(header) - sizeof(Bin::Chunk)); + *numReadInOut += sizeof(header) - sizeof(Bin::Chunk); + + void* data = listOut.setSize(header.m_numEntries); + + // Need to read all the compressed data... + size_t payloadSize = header.m_chunk.m_size - (sizeof(header) - sizeof(Bin::Chunk)); + + List compressedPayload; + compressedPayload.SetSize(payloadSize); + + stream->Read(compressedPayload.begin(), payloadSize); + *numReadInOut += payloadSize; + + SLANG_ASSERT(header.m_numCompressedEntries == uint32_t((header.m_numEntries * typeSize) / sizeof(uint32_t))); + + // Decode.. + ByteEncodeUtil::decodeLiteUInt32(compressedPayload.begin(), header.m_numCompressedEntries, (uint32_t*)data); + break; + } + case Bin::CompressionType::None: + { + // Read uncompressed + Bin::ArrayHeader header; + header.m_chunk = chunk; + + stream->Read(&header.m_chunk + 1, sizeof(header) - sizeof(Bin::Chunk)); + *numReadInOut += sizeof(header) - sizeof(Bin::Chunk); + + const size_t payloadSize = header.m_numEntries * typeSize; + + void* data = listOut.setSize(header.m_numEntries); + + stream->Read(data, payloadSize); + *numReadInOut += payloadSize; + break; + } + } + + // All chunks have sizes rounded to dword size + if (*numReadInOut & 3) + { + const uint8_t pad[4] = { 0, 0, 0, 0 }; + // Pad outs + int padSize = 4 - int(*numReadInOut & 3); + stream->Seek(SeekOrigin::Current, padSize); + + *numReadInOut += padSize; + } + + return SLANG_OK; +} + template -Result _readArrayChunk(const IRSerialBinary::Chunk& chunk, Stream* stream, List& arrayOut) +Result _readArrayChunk(const IRSerialBinary::SlangHeader& header, const IRSerialBinary::Chunk& chunk, Stream* stream, size_t* numReadInOut, List& arrayOut) { typedef IRSerialBinary Bin; - Bin::ArrayHeader header; - header.m_chunk = chunk; + Bin::CompressionType compressionType = Bin::CompressionType::None; - stream->Read(&header.m_chunk + 1, sizeof(header) - sizeof(Bin::Chunk)); + if (chunk.m_type == SLANG_MAKE_COMPRESSED_FOUR_CC(chunk.m_type)) + { + // If it has compression, use the compression type set in the header + compressionType = Bin::CompressionType(header.m_compressionType); + } + ListResizerForType resizer(arrayOut); + return _readArrayChunk(compressionType, chunk, stream, numReadInOut, resizer); +} - size_t payloadSize = sizeof(Bin::ArrayHeader) - sizeof(Bin::Chunk) + sizeof(T) * header.m_numEntries; - if (payloadSize != header.m_chunk.m_size) +template +Result _readArrayUncompressedChunk(const IRSerialBinary::SlangHeader& header, const IRSerialBinary::Chunk& chunk, Stream* stream, size_t* numReadInOut, List& arrayOut) +{ + typedef IRSerialBinary Bin; + SLANG_UNUSED(header); + ListResizerForType resizer(arrayOut); + return _readArrayChunk(Bin::CompressionType::None, chunk, stream, numReadInOut, resizer); +} + +static Result _decodeInsts(IRSerialBinary::CompressionType compressionType, const List& encodeIn, List& instsOut) +{ + typedef IRSerialBinary Bin; + typedef IRSerialData::Inst::PayloadType PayloadType; + + if (compressionType != Bin::CompressionType::VariableByteLite) { return SLANG_FAIL; } - arrayOut.SetSize(header.m_numEntries); + const size_t numInsts = size_t(instsOut.Count()); + IRSerialData::Inst* insts = instsOut.begin(); + + const uint8_t* encodeCur = encodeIn.begin(); + + for (size_t i = 0; i < numInsts; ++i) + { + auto& inst = insts[i]; + + inst.m_op = *encodeCur++; + const PayloadType payloadType = PayloadType(*encodeCur++); + inst.m_payloadType = payloadType; + + // Read the result value + encodeCur += ByteEncodeUtil::decodeLiteUInt32(encodeCur, (uint32_t*)&inst.m_resultTypeIndex); + + switch (inst.m_payloadType) + { + case PayloadType::Empty: + { + break; + } + case PayloadType::Operand_1: + case PayloadType::String_1: + case PayloadType::UInt32: + { + // 1 UInt32 + encodeCur += ByteEncodeUtil::decodeLiteUInt32(encodeCur, (uint32_t*)&inst.m_payload.m_operands[0]); + break; + } + case PayloadType::Operand_2: + case PayloadType::OperandAndUInt32: + case PayloadType::OperandExternal: + case PayloadType::String_2: + { + // 2 UInt32 + encodeCur += ByteEncodeUtil::decodeLiteUInt32(encodeCur, 2, (uint32_t*)&inst.m_payload.m_operands[0]); + break; + } + case PayloadType::Float64: + { + memcpy(&inst.m_payload.m_float64, encodeCur, sizeof(inst.m_payload.m_float64)); + encodeCur += sizeof(inst.m_payload.m_float64); + break; + } + case PayloadType::Int64: + { + memcpy(&inst.m_payload.m_int64, encodeCur, sizeof(inst.m_payload.m_int64)); + encodeCur += sizeof(inst.m_payload.m_int64); + break; + } + } + } + + return SLANG_OK; +} - stream->Read(arrayOut.begin(), sizeof(T) * header.m_numEntries); +Result _readInstArrayChunk(const IRSerialBinary::SlangHeader& slangHeader, const IRSerialBinary::Chunk& chunk, Stream* stream, size_t* numReadInOut, List& arrayOut) +{ + typedef IRSerialBinary Bin; + + Bin::CompressionType compressionType = Bin::CompressionType::None; + if (chunk.m_type == SLANG_MAKE_COMPRESSED_FOUR_CC(chunk.m_type)) + { + compressionType = Bin::CompressionType(slangHeader.m_compressionType); + } + + switch (compressionType) + { + case Bin::CompressionType::None: + { + ListResizerForType resizer(arrayOut); + return _readArrayChunk(compressionType, chunk, stream, numReadInOut, resizer); + } + case Bin::CompressionType::VariableByteLite: + { + // We have a compressed header + Bin::CompressedArrayHeader header; + header.m_chunk = chunk; + + stream->Read(&header.m_chunk + 1, sizeof(header) - sizeof(Bin::Chunk)); + *numReadInOut += sizeof(header) - sizeof(Bin::Chunk); + + // Need to read all the compressed data... + size_t payloadSize = header.m_chunk.m_size - (sizeof(header) - sizeof(Bin::Chunk)); + + List compressedPayload; + compressedPayload.SetSize(payloadSize); + + stream->Read(compressedPayload.begin(), payloadSize); + *numReadInOut += payloadSize; + + arrayOut.SetSize(header.m_numEntries); + + SLANG_RETURN_ON_FAIL(_decodeInsts(compressionType, compressedPayload, arrayOut)); + break; + } + default: + { + return SLANG_FAIL; + } + } // All chunks have sizes rounded to dword size - if (payloadSize & 3) + if (*numReadInOut & 3) { - const uint8_t pad[4] = { 0, 0, 0, 0 }; // Pad outs - int padSize = 4 - (payloadSize & 3); + int padSize = 4 - int(*numReadInOut & 3); stream->Seek(SeekOrigin::Current, padSize); + *numReadInOut += padSize; } return SLANG_OK; @@ -749,57 +1273,66 @@ int64_t _calcChunkTotalSize(const IRSerialBinary::Chunk& chunk) remainingBytes = header.m_size; } + // Header + // Chunk will not be kSlangFourCC if not read yet + Bin::SlangHeader slangHeader; + memset(&slangHeader, 0, sizeof(slangHeader)); + while (remainingBytes > 0) { Bin::Chunk chunk; stream->Read(&chunk, sizeof(chunk)); + size_t bytesRead = sizeof(chunk); + switch (chunk.m_type) { case Bin::kSlangFourCc: { // Slang header - Bin::SlangHeader header; - header.m_chunk = chunk; + slangHeader.m_chunk = chunk; // NOTE! Really we should only read what we know the size to be... // and skip if it's larger - stream->Read(&header.m_chunk + 1, sizeof(header) - sizeof(chunk)); - - dataOut->m_decorationBaseIndex = header.m_decorationBase; + stream->Read(&slangHeader.m_chunk + 1, sizeof(slangHeader) - sizeof(chunk)); + dataOut->m_decorationBaseIndex = slangHeader.m_decorationBase; remainingBytes -= _calcChunkTotalSize(chunk); break; } + case SLANG_MAKE_COMPRESSED_FOUR_CC(Bin::kInstFourCc): case Bin::kInstFourCc: { - SLANG_RETURN_ON_FAIL(_readArrayChunk(chunk, stream, dataOut->m_insts)); + SLANG_RETURN_ON_FAIL(_readInstArrayChunk(slangHeader, chunk, stream, &bytesRead, dataOut->m_insts)); remainingBytes -= _calcChunkTotalSize(chunk); break; } + case SLANG_MAKE_COMPRESSED_FOUR_CC(Bin::kDecoratorRunFourCc): case Bin::kDecoratorRunFourCc: { - SLANG_RETURN_ON_FAIL(_readArrayChunk(chunk, stream, dataOut->m_decorationRuns)); + SLANG_RETURN_ON_FAIL(_readArrayChunk(slangHeader, chunk, stream, &bytesRead, dataOut->m_decorationRuns)); remainingBytes -= _calcChunkTotalSize(chunk); break; } + case SLANG_MAKE_COMPRESSED_FOUR_CC(Bin::kChildRunFourCc): case Bin::kChildRunFourCc: { - SLANG_RETURN_ON_FAIL(_readArrayChunk(chunk, stream, dataOut->m_childRuns)); + SLANG_RETURN_ON_FAIL(_readArrayChunk(slangHeader, chunk, stream, &bytesRead, dataOut->m_childRuns)); remainingBytes -= _calcChunkTotalSize(chunk); break; } + case SLANG_MAKE_COMPRESSED_FOUR_CC(Bin::kExternalOperandsFourCc): case Bin::kExternalOperandsFourCc: { - SLANG_RETURN_ON_FAIL(_readArrayChunk(chunk, stream, dataOut->m_externalOperands)); + SLANG_RETURN_ON_FAIL(_readArrayChunk(slangHeader, chunk, stream, &bytesRead, dataOut->m_externalOperands)); remainingBytes -= _calcChunkTotalSize(chunk); break; } case Bin::kStringFourCc: { - SLANG_RETURN_ON_FAIL(_readArrayChunk(chunk, stream, dataOut->m_strings)); + SLANG_RETURN_ON_FAIL(_readArrayUncompressedChunk(slangHeader, chunk, stream, &bytesRead, dataOut->m_strings)); remainingBytes -= _calcChunkTotalSize(chunk); break; } @@ -809,7 +1342,7 @@ int64_t _calcChunkTotalSize(const IRSerialBinary::Chunk& chunk) if ((sizeof(IRSerialData::RawSourceLoc) == 4 && chunk.m_type == Bin::kUInt32SourceLocFourCc) || (sizeof(IRSerialData::RawSourceLoc) == 8 && chunk.m_type == Bin::kUInt64SourceLocFourCc)) { - SLANG_RETURN_ON_FAIL(_readArrayChunk(chunk, stream, dataOut->m_rawSourceLocs)); + SLANG_RETURN_ON_FAIL(_readArrayUncompressedChunk(slangHeader, chunk, stream, &bytesRead, dataOut->m_rawSourceLocs)); remainingBytes -= _calcChunkTotalSize(chunk); } else @@ -1270,7 +1803,7 @@ Result serializeModule(IRModule* module, SourceManager* sourceManager, Stream* s if (stream) { - SLANG_RETURN_ON_FAIL(IRSerialWriter::writeStream(serialData, stream)); + SLANG_RETURN_ON_FAIL(IRSerialWriter::writeStream(serialData, IRSerialBinary::CompressionType::VariableByteLite, stream)); } return SLANG_OK; diff --git a/source/slang/ir-serialize.h b/source/slang/ir-serialize.h index 356e14055..28f90303e 100644 --- a/source/slang/ir-serialize.h +++ b/source/slang/ir-serialize.h @@ -17,6 +17,8 @@ class Name; struct IRSerialData { + typedef IRSerialData ThisType; + enum class InstIndex : uint32_t; enum class StringIndex : uint32_t; enum class ArrayIndex : uint32_t; @@ -32,6 +34,15 @@ struct IRSerialData /// A run of instructions struct InstRun { + typedef InstRun ThisType; + SLANG_FORCE_INLINE bool operator==(const ThisType& rhs) const + { + return m_parentIndex == rhs.m_parentIndex && + m_startInstIndex == rhs.m_startInstIndex && + m_numChildren == rhs.m_numChildren; + } + SLANG_FORCE_INLINE bool operator!=(const ThisType& rhs) const { return !(*this == rhs); } + InstIndex m_parentIndex; ///< The parent instruction InstIndex m_startInstIndex; ///< The index to the first instruction SizeType m_numChildren; ///< The number of children @@ -49,6 +60,7 @@ struct IRSerialData // Decoration information is stored in m_decorationRuns struct Inst { + typedef Inst ThisType; enum { kMaxOperands = 2, ///< Maximum number of operands that can be held in an instruction (otherwise held 'externally') @@ -76,6 +88,10 @@ struct IRSerialData /// Get the number of operands SLANG_FORCE_INLINE int getNumOperands() const; + bool operator==(const ThisType& rhs) const; + + SLANG_FORCE_INLINE bool operator!=(const ThisType& rhs) const { return !(*this == rhs); } + uint8_t m_op; ///< For now one of IROp PayloadType m_payloadType; ///< The type of payload uint16_t m_pad0; ///< Not currently used @@ -115,6 +131,10 @@ struct IRSerialData /// Get the operands of an instruction SLANG_FORCE_INLINE int getOperands(const Inst& inst, const InstIndex** operandsOut) const; + /// == + bool operator==(const ThisType& rhs) const; + SLANG_FORCE_INLINE bool operator!=(const ThisType& rhs) const { return !(*this == rhs); } + /// Calculate the amount of memory used by this IRSerialData size_t calcSizeInBytes() const; @@ -145,6 +165,43 @@ SLANG_FORCE_INLINE int IRSerialData::Inst::getNumOperands() const return (m_payloadType == PayloadType::OperandExternal) ? m_payload.m_externalOperand.m_size : s_payloadInfos[int(m_payloadType)].m_numOperands; } +// -------------------------------------------------------------------------- +SLANG_FORCE_INLINE bool IRSerialData::Inst::operator==(const ThisType& rhs) const +{ + if (m_op == rhs.m_op && + m_payloadType == rhs.m_payloadType && + m_resultTypeIndex == rhs.m_resultTypeIndex) + { + switch (m_payloadType) + { + case PayloadType::Empty: + { + return true; + } + case PayloadType::Operand_1: + case PayloadType::String_1: + case PayloadType::UInt32: + { + return m_payload.m_operands[0] == rhs.m_payload.m_operands[0]; + } + case PayloadType::OperandAndUInt32: + case PayloadType::OperandExternal: + case PayloadType::Operand_2: + case PayloadType::String_2: + { + return m_payload.m_operands[0] == rhs.m_payload.m_operands[0] && + m_payload.m_operands[1] == rhs.m_payload.m_operands[1]; + } + case PayloadType::Float64: + case PayloadType::Int64: + { + return m_payload.m_int64 == rhs.m_payload.m_int64; + } + default: break; + } + } + return false; +} // -------------------------------------------------------------------------- SLANG_FORCE_INLINE int IRSerialData::getOperands(const Inst& inst, const InstIndex** operandsOut) const { @@ -163,6 +220,8 @@ SLANG_FORCE_INLINE int IRSerialData::getOperands(const Inst& inst, const InstInd #define SLANG_FOUR_CC(c0, c1, c2, c3) ((uint32_t(c0) << 0) | (uint32_t(c1) << 8) | (uint32_t(c2) << 16) | (uint32_t(c3) << 24)) +#define SLANG_MAKE_COMPRESSED_FOUR_CC(fourCc) (((fourCc) & 0xffff00ff) | (uint32_t('c') << 8)) + struct IRSerialBinary { // http://fileformats.archiveteam.org/wiki/RIFF @@ -173,6 +232,14 @@ struct IRSerialBinary uint32_t m_type; uint32_t m_size; }; + + enum class CompressionType + { + None, + VariableByteLite, + }; + + static const uint32_t kRiffFourCc = SLANG_FOUR_CC('R', 'I', 'F', 'F'); static const uint32_t kSlangFourCc = SLANG_FOUR_CC('S', 'L', 'N', 'G'); ///< Holds all the slang specific chunks @@ -180,6 +247,12 @@ struct IRSerialBinary static const uint32_t kDecoratorRunFourCc = SLANG_FOUR_CC('S', 'L', 'd', 'r'); static const uint32_t kChildRunFourCc = SLANG_FOUR_CC('S', 'L', 'c', 'r'); static const uint32_t kExternalOperandsFourCc = SLANG_FOUR_CC('S', 'L', 'e', 'o'); + + static const uint32_t kCompressedInstFourCc = SLANG_MAKE_COMPRESSED_FOUR_CC(kInstFourCc); + static const uint32_t kCompressedDecoratorRunFourCc = SLANG_MAKE_COMPRESSED_FOUR_CC(kDecoratorRunFourCc); + static const uint32_t kCompressedChildRunFourCc = SLANG_MAKE_COMPRESSED_FOUR_CC(kChildRunFourCc); + static const uint32_t kCompressedExternalOperandsFourCc = SLANG_MAKE_COMPRESSED_FOUR_CC(kExternalOperandsFourCc); + static const uint32_t kStringFourCc = SLANG_FOUR_CC('S', 'L', 's', 't'); /// 4 bytes per entry static const uint32_t kUInt32SourceLocFourCc = SLANG_FOUR_CC('S', 'r', 's', '4'); @@ -190,32 +263,40 @@ struct IRSerialBinary { Chunk m_chunk; uint32_t m_decorationBase; + uint32_t m_compressionType; ///< Holds the compression type used (if used at all) }; struct ArrayHeader { Chunk m_chunk; uint32_t m_numEntries; }; + struct CompressedArrayHeader + { + Chunk m_chunk; + uint32_t m_numEntries; ///< The number of entries + uint32_t m_numCompressedEntries; ///< The amount of compressed entries + }; }; struct IRSerialWriter { typedef IRSerialData Ser; + typedef IRSerialBinary Bin; struct OptionFlag { typedef uint32_t Type; enum Enum: Type { - RawSourceLocation = 1, + RawSourceLocation = 0x01, }; }; typedef OptionFlag::Type OptionFlags; Result write(IRModule* module, SourceManager* sourceManager, OptionFlags options, IRSerialData* serialData); - static Result writeStream(const IRSerialData& data, Stream* stream); + static Result writeStream(const IRSerialData& data, Bin::CompressionType compressionType, Stream* stream); /// Get a slice from an index UnownedStringSlice getStringSlice(Ser::StringIndex index) const; diff --git a/tools/slang-test/slang-test.vcxproj b/tools/slang-test/slang-test.vcxproj index 4987e1ce3..54ea6f342 100644 --- a/tools/slang-test/slang-test.vcxproj +++ b/tools/slang-test/slang-test.vcxproj @@ -171,6 +171,7 @@ + @@ -182,4 +183,4 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tools/slang-test/slang-test.vcxproj.filters b/tools/slang-test/slang-test.vcxproj.filters index 4c000ba58..03bb10348 100644 --- a/tools/slang-test/slang-test.vcxproj.filters +++ b/tools/slang-test/slang-test.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -38,5 +38,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/tools/slang-test/unit-test-byte-encode.cpp b/tools/slang-test/unit-test-byte-encode.cpp new file mode 100644 index 000000000..c0944b27d --- /dev/null +++ b/tools/slang-test/unit-test-byte-encode.cpp @@ -0,0 +1,142 @@ +// unit-test-byte-encode.cpp + +#include "../../source/core/slang-byte-encode-util.h" + +#include +#include +#include + +#include "test-context.h" + +#include "../../source/core/slang-random-generator.h" +#include "../../source/core/list.h" + +using namespace Slang; + +static void checkUInt32(uint32_t value) +{ + uint8_t buffer[ByteEncodeUtil::kMaxLiteEncodeUInt32 + 1]; + + int writeLen = ByteEncodeUtil::encodeLiteUInt32(value, buffer); + buffer[writeLen] = 0xcd; + + uint32_t decode; + int readLen = ByteEncodeUtil::decodeLiteUInt32(buffer, &decode); + + SLANG_CHECK(readLen == writeLen && decode == value); +} + +static void byteEncodeUnitTest() +{ + DefaultRandomGenerator randGen(0x5346536a); + + { + SLANG_CHECK(ByteEncodeUtil::calcMsb8(0) == -1); + SLANG_CHECK(ByteEncodeUtil::calcMsb8(1) == 0); + SLANG_CHECK(ByteEncodeUtil::calcMsb8(0x81) == 7); + } + + { + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0) == -1); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x81) == 7); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x00000001) == 0); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x00000081) == 7); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x00000181) == 8); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x00008181) == 15); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x00018181) == 16); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x00818181) == 23); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x01818181) == 24); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0x81818181) == 31); + SLANG_CHECK(ByteEncodeUtil::calcMsb32(0xffffffff) == 31); + } + + { + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x00000000) == -1); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x00000001) == 0); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x00000081) == 0); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x00000181) == 1); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x00008181) == 1); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x00018181) == 2); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x00818181) == 2); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x01818181) == 3); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0x81818181) == 3); + SLANG_CHECK(ByteEncodeUtil::calcMsByte32(0xffffffff) == 3); + } + + { + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x00000001) == 0); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x00000081) == 0); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x00000181) == 1); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x00008181) == 1); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x00018181) == 2); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x00818181) == 2); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x01818181) == 3); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0x81818181) == 3); + SLANG_CHECK(ByteEncodeUtil::calcNonZeroMsByte32(0xffffffff) == 3); + } + + { + const int blockSize = 1024; + + List encodedBuffer; + encodedBuffer.SetSize(ByteEncodeUtil::kMaxLiteEncodeUInt32 * blockSize); + + List initialBuffer; + initialBuffer.SetSize(blockSize); + List decodeBuffer; + decodeBuffer.SetSize(blockSize); + // Put in cache? + memset(decodeBuffer.begin(), 0, blockSize * sizeof(uint32_t)); + + for (int i = 0; i < blockSize; i++) + { + const int v = ByteEncodeUtil::calcMsb8(uint32_t((randGen.nextInt32() & 0xf) | 1)); + + // Make the commonality of different numbers that bytes are most common, then shorts etc.. + uint32_t mask; + switch (v) + { + case 0: mask = 0xffffffff; break; + case 1: mask = 0x00ffffff; break; + case 2: mask = 0x0000ffff; break; + case 3: mask = 0x000000ff; break; + } + + initialBuffer[i] = randGen.nextInt32() & mask; + } + + size_t numEncodeBytes = ByteEncodeUtil::encodeLiteUInt32(initialBuffer.begin(), blockSize, encodedBuffer.begin()); + + SLANG_CHECK(ByteEncodeUtil::calcEncodeLiteSizeUInt32(initialBuffer.begin(), blockSize) == numEncodeBytes); + + size_t numEncodeBytes2 = ByteEncodeUtil::decodeLiteUInt32(encodedBuffer.begin(), blockSize, decodeBuffer.begin()); + + SLANG_CHECK(numEncodeBytes2 == numEncodeBytes); + + SLANG_CHECK(memcmp(decodeBuffer.begin(), initialBuffer.begin(), sizeof(uint32_t) * blockSize) == 0); + } + + { + checkUInt32(uint32_t(0)); + checkUInt32(uint32_t(0x7fffff)); + checkUInt32(uint32_t(0x7fff)); + checkUInt32(uint32_t(0x7f)); + checkUInt32(uint32_t(0x7fffffff)); + checkUInt32(uint32_t(0xffffffff)); + +#if 1 + for (int64_t i = 0; i < SLANG_INT64(0x100000000); i += 371) + { + checkUInt32(uint32_t(i)); + } +#else + for (int64_t i = 0; i < SLANG_INT64(0x100000000); i += 1) + { + checkUInt32(uint32_t(i)); + } +#endif + } + +} + +SLANG_UNIT_TEST("ByteEncode", byteEncodeUnitTest); \ No newline at end of file -- cgit v1.2.3