From 10e1bae34733f1cdb5abc001666b1aafa1c1f406 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Thu, 22 Oct 2020 08:46:12 -0400 Subject: Single pass C++ extraction (#1583) * #include an absolute path didn't work - because paths were taken to always be relative. * Added CharUtil. Added TypeSet to extractor. First pass at being able to specify all headers for multiple output headers. * Fix includes for new C++ extractor convension. Update premake5 to use new extractor mechanisms. * Small improvements around StringUtil. * Split out NameConventionUtil. * Use a 'convert' to convert between convention types. * Fix output of build message for C++ extractor. Improve NameConventionUtil interface. * Improve comments. * Fix warning on gcc. * Fix clang warning. * Fix some typos in NameConventionUtil. * Small fix to premake5.lua * Fix generated includes. * Remove m_reflectType as no longer applicable with TypeSet. * Fix .gitignore for slang-generated-* files. Added getConvention to determine convention from slice. Add versions of split and convert that infer the from convention * Fix typo in spliting camel. * LineWhitespace -> HorizontalWhitespace * Improve CharUtil comments. --- source/core/core.vcxproj | 4 + source/core/core.vcxproj.filters | 12 ++ source/core/slang-char-util.cpp | 48 +++++++ source/core/slang-char-util.h | 51 +++++++ source/core/slang-name-convention-util.cpp | 213 +++++++++++++++++++++++++++++ source/core/slang-name-convention-util.h | 54 ++++++++ source/core/slang-string-util.h | 1 + source/core/slang-string.cpp | 20 ++- source/core/slang-string.h | 1 + 9 files changed, 397 insertions(+), 7 deletions(-) create mode 100644 source/core/slang-char-util.cpp create mode 100644 source/core/slang-char-util.h create mode 100644 source/core/slang-name-convention-util.cpp create mode 100644 source/core/slang-name-convention-util.h (limited to 'source/core') diff --git a/source/core/core.vcxproj b/source/core/core.vcxproj index 38f06b407..063d1aa0b 100644 --- a/source/core/core.vcxproj +++ b/source/core/core.vcxproj @@ -176,6 +176,7 @@ + @@ -189,6 +190,7 @@ + @@ -219,12 +221,14 @@ + + diff --git a/source/core/core.vcxproj.filters b/source/core/core.vcxproj.filters index 7514fa1c9..afae1d124 100644 --- a/source/core/core.vcxproj.filters +++ b/source/core/core.vcxproj.filters @@ -27,6 +27,9 @@ Header Files + + Header Files + Header Files @@ -66,6 +69,9 @@ Header Files + + Header Files + Header Files @@ -152,6 +158,9 @@ Source Files + + Source Files + Source Files @@ -170,6 +179,9 @@ Source Files + + Source Files + Source Files diff --git a/source/core/slang-char-util.cpp b/source/core/slang-char-util.cpp new file mode 100644 index 000000000..53dd98541 --- /dev/null +++ b/source/core/slang-char-util.cpp @@ -0,0 +1,48 @@ +#include "slang-char-util.h" + +namespace Slang { + +static const CharUtil::CharFlagMap _calcCharFlagsMap() +{ + typedef CharUtil::Flag Flag; + + CharUtil::CharFlagMap map; + memset(&map, 0, sizeof(map)); + + { + for (Index i = 'a'; i <= 'z'; ++i) + { + map.flags[i] |= Flag::Lower; + } + } + { + for (Index i = 'A'; i <= 'Z'; ++i) + { + map.flags[i] |= Flag::Upper; + } + } + { + for (Index i = '0'; i <= '9'; ++i) + { + map.flags[i] |= Flag::Digit | Flag::HexDigit; + } + } + { + for (Index i = 'a'; i <= 'f'; ++i) + { + map.flags[i] |= Flag::HexDigit; + map.flags[size_t(CharUtil::toUpper(char(i)))] |= Flag::HexDigit; + } + } + + { + map.flags[size_t(' ')] |= Flag::HorizontalWhitespace; + map.flags[size_t('\t')] |= Flag::HorizontalWhitespace; + } + + return map; +} + +/* static */const CharUtil::CharFlagMap CharUtil::g_charFlagMap = _calcCharFlagsMap(); + +} // namespace Slang diff --git a/source/core/slang-char-util.h b/source/core/slang-char-util.h new file mode 100644 index 000000000..810cde0b1 --- /dev/null +++ b/source/core/slang-char-util.h @@ -0,0 +1,51 @@ +#ifndef SLANG_CORE_CHAR_UTIL_H +#define SLANG_CORE_CHAR_UTIL_H + +#include "slang-string.h" + +namespace Slang { + +struct CharUtil +{ + typedef uint8_t Flags; + struct Flag + { + enum Enum : Flags + { + Upper = 0x01, ///< A-Z + Lower = 0x02, ///< a-z + Digit = 0x04, ///< 0-9 + HorizontalWhitespace = 0x08, ///< Whitespace that can appear horizontally (ie excluding CR/LF) + HexDigit = 0x10, ///< 0-9, a-f, A-F + }; + }; + + SLANG_FORCE_INLINE static bool isDigit(char c) { return c >= '0' && c <= '9'; } + SLANG_FORCE_INLINE static bool isLower(char c) { return c >= 'a' && c <= 'z'; } + SLANG_FORCE_INLINE static bool isUpper(char c) { return c >= 'A' && c <= 'Z'; } + SLANG_FORCE_INLINE static bool isHorizontalWhitespace(char c) { return c == ' ' || c == '\t'; } + + /// True if it's alpha + SLANG_FORCE_INLINE static bool isAlpha(char c) { return (getFlags(c) & (Flag::Upper | Flag::Lower)) != 0; } + + SLANG_FORCE_INLINE static bool isHexDigit(char c) { return (getFlags(c) & Flag::HexDigit) != 0; } + + /// For a given character get the associated flags + SLANG_FORCE_INLINE static Flags getFlags(char c) { return g_charFlagMap.flags[size_t(c)]; } + + /// Given a character return the lower case equivalent + SLANG_FORCE_INLINE static char toLower(char c) { return (c >= 'A' && c <= 'Z') ? (c -'A' + 'a') : c; } + /// Given a character return the upper case equivalent + SLANG_FORCE_INLINE static char toUpper(char c) { return (c >= 'a' && c <= 'z') ? (c -'a' + 'A') : c; } + + struct CharFlagMap + { + Flags flags[0x100]; + }; + + static const CharFlagMap g_charFlagMap; +}; + +} // namespace Slang + +#endif // SLANG_CHAR_UTIL_H diff --git a/source/core/slang-name-convention-util.cpp b/source/core/slang-name-convention-util.cpp new file mode 100644 index 000000000..a5acc6370 --- /dev/null +++ b/source/core/slang-name-convention-util.cpp @@ -0,0 +1,213 @@ + +#include "slang-name-convention-util.h" + +#include "slang-char-util.h" +#include "slang-string-util.h" + +namespace Slang +{ + +/* static */NameConvention NameConventionUtil::getConvention(const UnownedStringSlice& slice) +{ + for (const char c : slice) + { + switch (c) + { + case '-': return NameConvention::Kabab; + case '_': return NameConvention::Snake; + default: break; + } + } + return NameConvention::Camel; +} + +/* static */void NameConventionUtil::split(NameConvention convention, const UnownedStringSlice& slice, List& out) +{ + switch (convention) + { + case NameConvention::Kabab: + { + StringUtil::split(slice, '-', out); + break; + } + case NameConvention::Snake: + { + StringUtil::split(slice, '_', out); + break; + } + case NameConvention::Camel: + { + typedef CharUtil::Flags CharFlags; + typedef CharUtil::Flag CharFlag; + + CharFlags prevFlags = 0; + const char*const end = slice.end(); + + const char* start = slice.begin(); + for (const char* cur = start; cur < end; ++cur) + { + const char c = *cur; + const CharUtil::Flags flags = CharUtil::getFlags(c); + + if (flags & CharFlag::Upper) + { + if (prevFlags & CharFlag::Lower) + { + // If we go from lower to upper, we have a transition + out.add(UnownedStringSlice(start, cur)); + start = cur; + } + else if ((prevFlags & CharFlag::Upper) && cur + 1 < end) + { + // This works with capital or uncapitalized acronyms, but if we have two capitalized acronyms following each other - it can't split. + // + // For example + // "IAABBSystem" -> "IAABB", "System" + // + // If it only accepted lower case acronyms the logic could be changed such that the following could be produced + // "IAabbSystem" -> "I", "Aabb", "System" + // + // Since Slang source largely goes with upper case acronyms, we work with the heuristic here.. + + if (CharUtil::isLower(cur[1])) + { + out.add(UnownedStringSlice(start, cur)); + start = cur; + } + } + } + + prevFlags = flags; + } + + // Add any end section + if (start < end) + { + out.add(UnownedStringSlice(start, end)); + } + break; + } + } +} + +void NameConventionUtil::split(const UnownedStringSlice& slice, List& out) +{ + split(getConvention(slice), slice, out); +} + +/* static */void NameConventionUtil::join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, char joinChar, StringBuilder& out) +{ + if (slicesCount <= 0) + { + return; + } + + Index totalSize = slicesCount - 1; + for (Index i = 0; i < slicesCount; ++i) + { + totalSize += slices[i].getLength(); + } + + char*const dstStart = out.prepareForAppend(totalSize); + char* dst = dstStart; + + for (Index i = 0; i < slicesCount; ++i) + { + const UnownedStringSlice& slice = slices[i]; + const Index count = slice.getLength(); + const char*const src = slice.begin(); + + if (i > 0) + { + *dst++ = joinChar; + } + + switch (charCase) + { + case CharCase::Upper: + { + for (Index j = 0; j < count; ++j) + { + dst[j] = CharUtil::toUpper(src[j]); + } + break; + } + case CharCase::Lower: + { + for (Index j = 0; j < count; ++j) + { + dst[j] = CharUtil::toLower(src[j]); + } + break; + } + } + + dst += count; + } + + SLANG_ASSERT(dstStart + totalSize == dst); + out.appendInPlace(dstStart, totalSize); +} + +/* static */void NameConventionUtil::join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, NameConvention convention, StringBuilder& out) +{ + switch (convention) + { + case NameConvention::Kabab: return join(slices, slicesCount, charCase, '-', out); + case NameConvention::Snake: return join(slices, slicesCount, charCase, '_', out); + case NameConvention::Camel: + { + Index totalSize = 0; + + for (Index i = 0; i < slicesCount; ++i) + { + totalSize += slices[i].getLength(); + } + + char*const dstStart = out.prepareForAppend(totalSize); + char* dst = dstStart; + + for (Index i = 0; i < slicesCount; ++i) + { + const UnownedStringSlice& slice = slices[i]; + Index count = slice.getLength(); + const char* src = slice.begin(); + + Int j = 0; + + if (count > 0 && !(i == 0 && charCase == CharCase::Lower)) + { + // Capitalize first letter of each word, unless on first word and 'lower' + dst[j] = CharUtil::toUpper(src[j]); + j++; + } + + for (; j < count; ++j) + { + dst[j] = CharUtil::toLower(src[j]); + } + + dst += count; + } + break; + } + } +} + +/* static */void NameConventionUtil::convert(NameConvention fromConvention, const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out) +{ + // Split into slices + List slices; + split(fromConvention, slice, slices); + + // Join the slices in the toConvention + join(slices.getBuffer(), slices.getCount(), charCase, toConvention, out); +} + +/* static */void NameConventionUtil::convert(const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out) +{ + convert(getConvention(slice), slice, charCase, toConvention, out); +} + +} + diff --git a/source/core/slang-name-convention-util.h b/source/core/slang-name-convention-util.h new file mode 100644 index 000000000..d4a984ca0 --- /dev/null +++ b/source/core/slang-name-convention-util.h @@ -0,0 +1,54 @@ +#ifndef SLANG_CORE_NAME_CONVENTION_UTIL_H +#define SLANG_CORE_NAME_CONVENTION_UTIL_H + +#include "slang-string.h" +#include "slang-list.h" + +namespace Slang +{ + +enum class NameConvention +{ + Kabab, /// Words are separated with -. WORDS-ARE-SEPARATED + Snake, /// Words are separated with _. WORDS_ARE_SEPARATED + Camel, /// Words start with a capital. (Upper will make first words character capitalized, aka PascalCase) +}; + +enum class CharCase +{ + Upper, + Lower, +}; + +/* This utility is to enable easy conversion and interpretation of names that use standard conventions, typically in programming +languages. The conventions are largely how to represent multiple words together. + +Split is used to split up a name into it's constituent 'words' based on a convention. +Join is used to combine words based on a convention/character case + +Convert uses split and join to allow easy conversion between conventions. +*/ +struct NameConventionUtil +{ + /// Given a slice tries to determine the convention used. + /// If no separators are found, will assume Camel + static NameConvention getConvention(const UnownedStringSlice& slice); + + /// Given a slice and a naming convention, split into it's constituent parts. If convention isn't specified, will infer from slice using getConvention. + static void split(NameConvention convention, const UnownedStringSlice& slice, List& out); + static void split(const UnownedStringSlice& slice, List& out); + + /// Given slices, join together with the specified convention into out + static void join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, NameConvention convention, StringBuilder& out); + + /// Join with a join char, and potentially changing case of input slices + static void join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, char joinChar, StringBuilder& out); + + /// Convert from one convention to another. If fromConvention isn't specified, will infer from slice using getConvention. + static void convert(NameConvention fromConvention, const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out); + static void convert(const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out); +}; + +} + +#endif // SLANG_CORE_NAME_CONVENTION_UTIL_H diff --git a/source/core/slang-string-util.h b/source/core/slang-string-util.h index 9f1508cb1..dee4c7d66 100644 --- a/source/core/slang-string-util.h +++ b/source/core/slang-string-util.h @@ -93,6 +93,7 @@ struct StringUtil /// Convert in to int. Returns SLANG_FAIL on error static SlangResult parseInt(const UnownedStringSlice& in, Int& outValue); + }; /* A helper class that allows parsing of lines from text with iteration. Uses StringUtil::extractLine for the actual underlying implementation. */ diff --git a/source/core/slang-string.cpp b/source/core/slang-string.cpp index bcf5853d5..3ce4c7ec9 100644 --- a/source/core/slang-string.cpp +++ b/source/core/slang-string.cpp @@ -1,6 +1,8 @@ #include "slang-string.h" #include "slang-text-io.h" +#include "slang-char-util.h" + namespace Slang { // TODO: this belongs in a different file: @@ -12,11 +14,6 @@ namespace Slang throw InternalError(message); } - SLANG_FORCE_INLINE static bool _isWhiteSpace(char c) - { - return c == ' ' || c == '\t'; - } - // OSString OSString::OSString() @@ -112,11 +109,20 @@ namespace Slang const char* start = m_begin; const char* end = m_end; - while (start < end && _isWhiteSpace(*start)) start++; - while (end > start && _isWhiteSpace(end[-1])) end--; + while (start < end && CharUtil::isHorizontalWhitespace(*start)) start++; + while (end > start && CharUtil::isHorizontalWhitespace(end[-1])) end--; return UnownedStringSlice(start, end); } + UnownedStringSlice UnownedStringSlice::trim(char c) const + { + const char* start = m_begin; + const char* end = m_end; + + while (start < end && *start == c) start++; + while (end > start && end[-1] == c) end--; + return UnownedStringSlice(start, end); + } // StringSlice diff --git a/source/core/slang-string.h b/source/core/slang-string.h index 25bf99023..75c282a58 100644 --- a/source/core/slang-string.h +++ b/source/core/slang-string.h @@ -174,6 +174,7 @@ namespace Slang UnownedStringSlice trim() const; + UnownedStringSlice trim(char c) const; HashCode getHashCode() const { -- cgit v1.2.3