summaryrefslogtreecommitdiffstats
path: root/source/core/slang-char-encode.cpp
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2023-03-22 12:04:33 -0400
committerGitHub <noreply@github.com>2023-03-22 12:04:33 -0400
commitd4f99c8bac8b28f18c864a717d8833db6a1c872d (patch)
treeebea06c019130d8248d5e4f6bccf5e4b2649e3cb /source/core/slang-char-encode.cpp
parentd8a40abba5223fbcb56c52b04ccb88c02bbaf79f (diff)
Source map obfuscation (#2717)
* #include an absolute path didn't work - because paths were taken to always be relative. * WIP source map. * Split out handling of RttiTypeFuncs to a map type. * Make RttiTypeFuncsMap hold default impls. * Slightly more sophisticated RttiTypeFuncsMap * Source map decoding. * Fix tabs. * Fix asserts due to negative values. * Use less obscure mechanisms in SourceMap. * Source map decoding. Simplifying SourceMap usage. * First attempt at ouputting a source map as part of emit. * Added support for -source-map option. SourceMap is added to the artifact. * Small improvements around column calculation in SourceWriter. * Source Loc obuscation WIP. * Fix some issues around SourceMap obfuscation. * Split out obfuscation into its own file. * Keep obfuscated SourceMap even through serialization bottleneck.
Diffstat (limited to 'source/core/slang-char-encode.cpp')
-rw-r--r--source/core/slang-char-encode.cpp34
1 files changed, 34 insertions, 0 deletions
diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp
index d061e34ba..687040fa2 100644
--- a/source/core/slang-char-encode.cpp
+++ b/source/core/slang-char-encode.cpp
@@ -178,4 +178,38 @@ CharEncoding* CharEncoding::UTF16 = &_utf16Encoding;
CharEncoding* CharEncoding::UTF16Reversed = &_utf16EncodingReversed;
CharEncoding* CharEncoding::UTF32 = &_utf32Encoding;
+/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! UTF8Util !!!!!!!!!!!!!!!!!!!!!!!!! */
+
+/* static */Index UTF8Util::calcCodePointCount(const UnownedStringSlice& in)
+{
+ Index count = 0;
+
+ // Analyse with bytes...
+ const int8_t* cur = (const int8_t*)in.begin();
+ const int8_t*const end = (const int8_t*)in.end();
+
+ while (cur < end)
+ {
+ const auto c = *cur++;
+
+ count++;
+
+ // If c < 0 it means the top bit is set... which means we have multiple bytes
+ if (c < 0)
+ {
+ // https://en.wikipedia.org/wiki/UTF-8
+ // All continuation bytes contain exactly six bits from the code point.So the next six bits of the code point
+ /// are stored in the low order six bits of the next byte, and 10 is stored in the high order two bits to
+ // mark it as a continuation byte(so 10000010).
+
+ while (cur < end && (*cur & 0xc0) == 0x80)
+ {
+ cur++;
+ }
+ }
+ }
+
+ return count;
+}
+
} // namespace Slang