From d4f99c8bac8b28f18c864a717d8833db6a1c872d Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 22 Mar 2023 12:04:33 -0400 Subject: Source map obfuscation (#2717) * #include an absolute path didn't work - because paths were taken to always be relative. * WIP source map. * Split out handling of RttiTypeFuncs to a map type. * Make RttiTypeFuncsMap hold default impls. * Slightly more sophisticated RttiTypeFuncsMap * Source map decoding. * Fix tabs. * Fix asserts due to negative values. * Use less obscure mechanisms in SourceMap. * Source map decoding. Simplifying SourceMap usage. * First attempt at ouputting a source map as part of emit. * Added support for -source-map option. SourceMap is added to the artifact. * Small improvements around column calculation in SourceWriter. * Source Loc obuscation WIP. * Fix some issues around SourceMap obfuscation. * Split out obfuscation into its own file. * Keep obfuscated SourceMap even through serialization bottleneck. --- source/core/slang-char-encode.cpp | 34 ++++++++++++++++++++++++++++++++++ source/core/slang-char-encode.h | 10 ++++++++++ source/core/slang-char-util.h | 3 +++ 3 files changed, 47 insertions(+) (limited to 'source/core') diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp index d061e34ba..687040fa2 100644 --- a/source/core/slang-char-encode.cpp +++ b/source/core/slang-char-encode.cpp @@ -178,4 +178,38 @@ CharEncoding* CharEncoding::UTF16 = &_utf16Encoding; CharEncoding* CharEncoding::UTF16Reversed = &_utf16EncodingReversed; CharEncoding* CharEncoding::UTF32 = &_utf32Encoding; +/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! UTF8Util !!!!!!!!!!!!!!!!!!!!!!!!! */ + +/* static */Index UTF8Util::calcCodePointCount(const UnownedStringSlice& in) +{ + Index count = 0; + + // Analyse with bytes... + const int8_t* cur = (const int8_t*)in.begin(); + const int8_t*const end = (const int8_t*)in.end(); + + while (cur < end) + { + const auto c = *cur++; + + count++; + + // If c < 0 it means the top bit is set... which means we have multiple bytes + if (c < 0) + { + // https://en.wikipedia.org/wiki/UTF-8 + // All continuation bytes contain exactly six bits from the code point.So the next six bits of the code point + /// are stored in the low order six bits of the next byte, and 10 is stored in the high order two bits to + // mark it as a continuation byte(so 10000010). + + while (cur < end && (*cur & 0xc0) == 0x80) + { + cur++; + } + } + } + + return count; +} + } // namespace Slang diff --git a/source/core/slang-char-encode.h b/source/core/slang-char-encode.h index a778cc3c9..2bb4cba29 100644 --- a/source/core/slang-char-encode.h +++ b/source/core/slang-char-encode.h @@ -195,6 +195,16 @@ protected: static CharEncoding*const g_encoding[Index(CharEncodeType::CountOf)]; }; +struct UTF8Util +{ + /// Given a slice calculate the number of code points (unicode chars) + /// + /// NOTE! This doesn't check the *validity* of code points/encoding. + /// Non valid utf8 input or ending starting in partial characters, will produce + /// undefined results without error. + static Index calcCodePointCount(const UnownedStringSlice& in); +}; + } #endif diff --git a/source/core/slang-char-util.h b/source/core/slang-char-util.h index 40abee602..1ed8f7f73 100644 --- a/source/core/slang-char-util.h +++ b/source/core/slang-char-util.h @@ -47,6 +47,9 @@ struct CharUtil /// Given a character return the upper case equivalent SLANG_FORCE_INLINE static char toUpper(char c) { return (c >= 'a' && c <= 'z') ? (c -'a' + 'A') : c; } + /// Given a value between 0-15 inclusive returns the hex digit. Uses lower case hex. + SLANG_FORCE_INLINE static char getHexChar(Index i) { SLANG_ASSERT((i & ~Index(0xf)) == 0); return char(i >= 10 ? (i - 10 + 'a') : (i + '0')); } + /// Returns the value if c interpretted as a hex digit /// If c is not a valid hex returns -1 inline static int getHexDigitValue(char c); -- cgit v1.2.3