Single pass C++ extraction (#1583)

* #include an absolute path didn't work - because paths were taken to always be relative. * Added CharUtil. Added TypeSet to extractor. First pass at being able to specify all headers for multiple output headers. * Fix includes for new C++ extractor convension. Update premake5 to use new extractor mechanisms. * Small improvements around StringUtil. * Split out NameConventionUtil. * Use a 'convert' to convert between convention types. * Fix output of build message for C++ extractor. Improve NameConventionUtil interface. * Improve comments. * Fix warning on gcc. * Fix clang warning. * Fix some typos in NameConventionUtil. * Small fix to premake5.lua * Fix generated includes. * Remove m_reflectType as no longer applicable with TypeSet. * Fix .gitignore for slang-generated-* files. Added getConvention to determine convention from slice. Add versions of split and convert that infer the from convention * Fix typo in spliting camel. * LineWhitespace -> HorizontalWhitespace * Improve CharUtil comments.
author: jsmall-nvidia <jsmall@nvidia.com> 2020-10-22 08:46:12 -0400
committer: GitHub <noreply@github.com> 2020-10-22 08:46:12 -0400
commit: 10e1bae34733f1cdb5abc001666b1aafa1c1f406 (patch)
tree: ad9571c071b7b7c2384cdd42426851d257fc5f7b /source/core
parent: c0943661e5441bfb996430c4f67fb4dddea9dfcf (diff)
9 files changed, 397 insertions, 7 deletions
diff --git a/source/core/core.vcxproj b/source/core/core.vcxproj
index 38f06b407..063d1aa0b 100644
--- a/source/core/core.vcxproj
+++ b/source/core/core.vcxproj
@@ -176,6 +176,7 @@
     <ClInclude Include="slang-basic.h" />
     <ClInclude Include="slang-blob.h" />
     <ClInclude Include="slang-byte-encode-util.h" />
+    <ClInclude Include="slang-char-util.h" />
     <ClInclude Include="slang-common.h" />
     <ClInclude Include="slang-dictionary.h" />
     <ClInclude Include="slang-downstream-compiler.h" />
@@ -189,6 +190,7 @@
     <ClInclude Include="slang-list.h" />
     <ClInclude Include="slang-math.h" />
     <ClInclude Include="slang-memory-arena.h" />
+    <ClInclude Include="slang-name-convention-util.h" />
     <ClInclude Include="slang-nvrtc-compiler.h" />
     <ClInclude Include="slang-offset-container.h" />
     <ClInclude Include="slang-platform.h" />
@@ -219,12 +221,14 @@
   <ItemGroup>
     <ClCompile Include="slang-blob.cpp" />
     <ClCompile Include="slang-byte-encode-util.cpp" />
+    <ClCompile Include="slang-char-util.cpp" />
     <ClCompile Include="slang-downstream-compiler.cpp" />
     <ClCompile Include="slang-free-list.cpp" />
     <ClCompile Include="slang-gcc-compiler-util.cpp" />
     <ClCompile Include="slang-hex-dump-util.cpp" />
     <ClCompile Include="slang-io.cpp" />
     <ClCompile Include="slang-memory-arena.cpp" />
+    <ClCompile Include="slang-name-convention-util.cpp" />
     <ClCompile Include="slang-nvrtc-compiler.cpp" />
     <ClCompile Include="slang-offset-container.cpp" />
     <ClCompile Include="slang-platform.cpp" />
diff --git a/source/core/core.vcxproj.filters b/source/core/core.vcxproj.filters
index 7514fa1c9..afae1d124 100644
--- a/source/core/core.vcxproj.filters
+++ b/source/core/core.vcxproj.filters
@@ -27,6 +27,9 @@
     <ClInclude Include="slang-byte-encode-util.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="slang-char-util.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="slang-common.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -66,6 +69,9 @@
     <ClInclude Include="slang-memory-arena.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="slang-name-convention-util.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="slang-nvrtc-compiler.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -152,6 +158,9 @@
     <ClCompile Include="slang-byte-encode-util.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="slang-char-util.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="slang-downstream-compiler.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -170,6 +179,9 @@
     <ClCompile Include="slang-memory-arena.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="slang-name-convention-util.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="slang-nvrtc-compiler.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/source/core/slang-char-util.cpp b/source/core/slang-char-util.cpp
new file mode 100644
index 000000000..53dd98541
--- /dev/null
+++ b/source/core/slang-char-util.cpp
@@ -0,0 +1,48 @@
+#include "slang-char-util.h"
+
+namespace Slang {
+
+static const CharUtil::CharFlagMap _calcCharFlagsMap()
+{
+    typedef CharUtil::Flag Flag;
+
+    CharUtil::CharFlagMap map;
+    memset(&map, 0, sizeof(map));
+
+    {
+        for (Index i = 'a'; i <= 'z'; ++i)
+        {
+            map.flags[i] |= Flag::Lower;
+        }
+    }
+    {
+        for (Index i = 'A'; i <= 'Z'; ++i)
+        {
+            map.flags[i] |= Flag::Upper;
+        }
+    }
+    {
+        for (Index i = '0'; i <= '9'; ++i)
+        {
+            map.flags[i] |= Flag::Digit | Flag::HexDigit;
+        }
+    }
+    {
+        for (Index i = 'a'; i <= 'f'; ++i)
+        {
+            map.flags[i] |= Flag::HexDigit;
+            map.flags[size_t(CharUtil::toUpper(char(i)))] |= Flag::HexDigit;
+        }
+    }
+
+    {
+        map.flags[size_t(' ')] |= Flag::HorizontalWhitespace;
+        map.flags[size_t('\t')] |= Flag::HorizontalWhitespace;
+    }
+
+    return map;
+}
+
+/* static */const CharUtil::CharFlagMap CharUtil::g_charFlagMap = _calcCharFlagsMap();
+
+} // namespace Slang
diff --git a/source/core/slang-char-util.h b/source/core/slang-char-util.h
new file mode 100644
index 000000000..810cde0b1
--- /dev/null
+++ b/source/core/slang-char-util.h
@@ -0,0 +1,51 @@
+#ifndef SLANG_CORE_CHAR_UTIL_H
+#define SLANG_CORE_CHAR_UTIL_H
+
+#include "slang-string.h"
+
+namespace Slang {
+
+struct CharUtil
+{
+    typedef uint8_t Flags;
+    struct Flag
+    {
+        enum Enum : Flags
+        {
+            Upper                       = 0x01,         ///< A-Z
+            Lower                       = 0x02,         ///< a-z
+            Digit                       = 0x04,         ///< 0-9
+            HorizontalWhitespace        = 0x08,         ///< Whitespace that can appear horizontally (ie excluding CR/LF)
+            HexDigit                    = 0x10,         ///< 0-9, a-f, A-F
+        };
+    };
+
+    SLANG_FORCE_INLINE static bool isDigit(char c) { return c >= '0' && c <= '9'; }
+    SLANG_FORCE_INLINE static bool isLower(char c) { return c >= 'a' && c <= 'z'; }
+    SLANG_FORCE_INLINE static bool isUpper(char c) { return c >= 'A' && c <= 'Z'; }
+    SLANG_FORCE_INLINE static bool isHorizontalWhitespace(char c) { return c == ' ' || c == '\t'; }
+
+        /// True if it's alpha
+    SLANG_FORCE_INLINE static bool isAlpha(char c) { return (getFlags(c) & (Flag::Upper | Flag::Lower)) != 0; }
+
+    SLANG_FORCE_INLINE static bool isHexDigit(char c) { return (getFlags(c) & Flag::HexDigit) != 0; }
+
+        /// For a given character get the associated flags
+    SLANG_FORCE_INLINE static Flags getFlags(char c) { return g_charFlagMap.flags[size_t(c)]; }
+
+        /// Given a character return the lower case equivalent 
+    SLANG_FORCE_INLINE static char toLower(char c) { return (c >= 'A' && c <= 'Z') ? (c -'A' + 'a') : c; }
+        /// Given a character return the upper case equivalent
+    SLANG_FORCE_INLINE static char toUpper(char c) { return (c >= 'a' && c <= 'z') ? (c -'a' + 'A') : c; }
+
+    struct CharFlagMap
+    {
+        Flags flags[0x100];
+    };
+
+    static const CharFlagMap g_charFlagMap;
+};
+    
+} // namespace Slang
+
+#endif // SLANG_CHAR_UTIL_H
diff --git a/source/core/slang-name-convention-util.cpp b/source/core/slang-name-convention-util.cpp
new file mode 100644
index 000000000..a5acc6370
--- /dev/null
+++ b/source/core/slang-name-convention-util.cpp
@@ -0,0 +1,213 @@
+
+#include "slang-name-convention-util.h"
+
+#include "slang-char-util.h"
+#include "slang-string-util.h"
+
+namespace Slang
+{
+
+/* static */NameConvention NameConventionUtil::getConvention(const UnownedStringSlice& slice)
+{
+    for (const char c : slice)
+    {
+        switch (c)
+        {
+            case '-':   return NameConvention::Kabab;
+            case '_':   return NameConvention::Snake;
+            default: break;
+        }
+    }
+    return NameConvention::Camel;
+}
+
+/* static */void NameConventionUtil::split(NameConvention convention, const UnownedStringSlice& slice, List<UnownedStringSlice>& out)
+{
+    switch (convention)
+    {
+        case NameConvention::Kabab:
+        {
+            StringUtil::split(slice, '-', out);
+            break;
+        }
+        case NameConvention::Snake:
+        {
+            StringUtil::split(slice, '_', out);
+            break;
+        }
+        case NameConvention::Camel:
+        {
+            typedef CharUtil::Flags CharFlags;
+            typedef CharUtil::Flag CharFlag;
+
+            CharFlags prevFlags = 0;
+            const char*const end = slice.end();
+
+            const char* start = slice.begin();
+            for (const char* cur = start; cur < end; ++cur)
+            {
+                const char c = *cur;
+                const CharUtil::Flags flags = CharUtil::getFlags(c);
+
+                if (flags & CharFlag::Upper)
+                {
+                    if (prevFlags & CharFlag::Lower)
+                    {
+                        // If we go from lower to upper, we have a transition
+                        out.add(UnownedStringSlice(start, cur));
+                        start = cur;
+                    }
+                    else if ((prevFlags & CharFlag::Upper) && cur + 1 < end)
+                    {
+                        // This works with capital or uncapitalized acronyms, but if we have two capitalized acronyms following each other - it can't split.
+                        // 
+                        // For example 
+                        // "IAABBSystem" -> "IAABB", "System"
+                        // 
+                        // If it only accepted lower case acronyms the logic could be changed such that the following could be produced
+                        // "IAabbSystem" -> "I", "Aabb", "System" 
+                        //
+                        // Since Slang source largely goes with upper case acronyms, we work with the heuristic here..
+
+                        if (CharUtil::isLower(cur[1]))
+                        {
+                            out.add(UnownedStringSlice(start, cur));
+                            start = cur;
+                        }
+                    }
+                }
+                
+                prevFlags = flags;
+            }
+
+            // Add any end section
+            if (start < end)
+            {
+                out.add(UnownedStringSlice(start, end));
+            }
+            break;
+        }
+    }
+}
+
+void NameConventionUtil::split(const UnownedStringSlice& slice, List<UnownedStringSlice>& out)
+{
+    split(getConvention(slice), slice, out);
+}
+
+/* static */void NameConventionUtil::join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, char joinChar, StringBuilder& out)
+{
+    if (slicesCount <= 0)
+    {
+        return;
+    }
+
+    Index totalSize = slicesCount - 1;
+    for (Index i = 0; i < slicesCount; ++i)
+    {
+        totalSize += slices[i].getLength();
+    }
+
+    char*const dstStart = out.prepareForAppend(totalSize);
+    char* dst = dstStart;
+
+    for (Index i = 0; i < slicesCount; ++i)
+    {
+        const UnownedStringSlice& slice = slices[i];
+        const Index count = slice.getLength();
+        const char*const src = slice.begin();
+
+        if (i > 0)
+        {
+            *dst++ = joinChar;
+        }
+
+        switch (charCase)
+        {
+            case CharCase::Upper:
+            {
+                for (Index j = 0; j < count; ++j)
+                {
+                    dst[j] = CharUtil::toUpper(src[j]);
+                }
+                break;
+            }
+            case CharCase::Lower:
+            {
+                for (Index j = 0; j < count; ++j)
+                {
+                    dst[j] = CharUtil::toLower(src[j]);
+                }
+                break;
+            }
+        }
+
+        dst += count;
+    }
+
+    SLANG_ASSERT(dstStart + totalSize == dst);
+    out.appendInPlace(dstStart, totalSize);
+}
+
+/* static */void NameConventionUtil::join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, NameConvention convention, StringBuilder& out)
+{
+    switch (convention)
+    {
+        case NameConvention::Kabab:        return join(slices, slicesCount, charCase, '-', out);
+        case NameConvention::Snake:        return join(slices, slicesCount, charCase, '_', out);
+        case NameConvention::Camel:
+        {
+            Index totalSize = 0;
+
+            for (Index i = 0; i < slicesCount; ++i)
+            {
+                totalSize += slices[i].getLength();
+            }
+
+            char*const dstStart = out.prepareForAppend(totalSize);
+            char* dst = dstStart;
+
+            for (Index i = 0; i < slicesCount; ++i)
+            {
+                const UnownedStringSlice& slice = slices[i];
+                Index count = slice.getLength();
+                const char* src = slice.begin();
+
+                Int j = 0;
+
+                if (count > 0 && !(i == 0 && charCase == CharCase::Lower))
+                {
+                    // Capitalize first letter of each word, unless on first word and 'lower'
+                    dst[j] = CharUtil::toUpper(src[j]);
+                    j++;
+                }
+
+                for (; j < count; ++j)
+                {
+                    dst[j] = CharUtil::toLower(src[j]);
+                }
+
+                dst += count;
+            }
+            break;
+        }
+    }
+}
+
+/* static */void NameConventionUtil::convert(NameConvention fromConvention, const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out)
+{
+    // Split into slices
+    List<UnownedStringSlice> slices;
+    split(fromConvention, slice, slices);
+
+    // Join the slices in the toConvention
+    join(slices.getBuffer(), slices.getCount(), charCase, toConvention, out);
+}
+
+/* static */void NameConventionUtil::convert(const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out)
+{
+    convert(getConvention(slice), slice, charCase, toConvention, out);
+}
+
+}
+
diff --git a/source/core/slang-name-convention-util.h b/source/core/slang-name-convention-util.h
new file mode 100644
index 000000000..d4a984ca0
--- /dev/null
+++ b/source/core/slang-name-convention-util.h
@@ -0,0 +1,54 @@
+#ifndef SLANG_CORE_NAME_CONVENTION_UTIL_H
+#define SLANG_CORE_NAME_CONVENTION_UTIL_H
+
+#include "slang-string.h"
+#include "slang-list.h"
+
+namespace Slang
+{
+
+enum class NameConvention
+{
+    Kabab,     /// Words are separated with -. WORDS-ARE-SEPARATED
+    Snake,     /// Words are separated with _. WORDS_ARE_SEPARATED
+    Camel,     /// Words start with a capital. (Upper will make first words character capitalized, aka PascalCase)
+};
+
+enum class CharCase
+{
+    Upper,
+    Lower,
+};
+
+/* This utility is to enable easy conversion and interpretation of names that use standard conventions, typically in programming
+languages. The conventions are largely how to represent multiple words together.
+
+Split is used to split up a name into it's constituent 'words' based on a convention.
+Join is used to combine words based on a convention/character case
+
+Convert uses split and join to allow easy conversion between conventions. 
+*/
+struct NameConventionUtil
+{
+        /// Given a slice tries to determine the convention used.
+        /// If no separators are found, will assume Camel
+    static NameConvention getConvention(const UnownedStringSlice& slice);
+
+        /// Given a slice and a naming convention, split into it's constituent parts. If convention isn't specified, will infer from slice using getConvention.
+    static void split(NameConvention convention, const UnownedStringSlice& slice, List<UnownedStringSlice>& out);
+    static void split(const UnownedStringSlice& slice, List<UnownedStringSlice>& out);
+
+        /// Given slices, join together with the specified convention into out
+    static void join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, NameConvention convention, StringBuilder& out);
+
+        /// Join with a join char, and potentially changing case of input slices
+    static void join(const UnownedStringSlice* slices, Index slicesCount, CharCase charCase, char joinChar, StringBuilder& out);
+
+        /// Convert from one convention to another. If fromConvention isn't specified, will infer from slice using getConvention.
+    static void convert(NameConvention fromConvention, const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out);
+    static void convert(const UnownedStringSlice& slice, CharCase charCase, NameConvention toConvention, StringBuilder& out);
+};
+
+}
+
+#endif // SLANG_CORE_NAME_CONVENTION_UTIL_H
diff --git a/source/core/slang-string-util.h b/source/core/slang-string-util.h
index 9f1508cb1..dee4c7d66 100644
--- a/source/core/slang-string-util.h
+++ b/source/core/slang-string-util.h
@@ -93,6 +93,7 @@ struct StringUtil
 
         /// Convert in to int. Returns SLANG_FAIL on error
     static SlangResult parseInt(const UnownedStringSlice& in, Int& outValue);
+
 };
 
 /* A helper class that allows parsing of lines from text with iteration. Uses StringUtil::extractLine for the actual underlying implementation. */
diff --git a/source/core/slang-string.cpp b/source/core/slang-string.cpp
index bcf5853d5..3ce4c7ec9 100644
--- a/source/core/slang-string.cpp
+++ b/source/core/slang-string.cpp
@@ -1,6 +1,8 @@
 #include "slang-string.h"
 #include "slang-text-io.h"
 
+#include "slang-char-util.h"
+
 namespace Slang
 {
     // TODO: this belongs in a different file:
@@ -12,11 +14,6 @@ namespace Slang
         throw InternalError(message);
     }
 
-    SLANG_FORCE_INLINE static bool _isWhiteSpace(char c)
-    {
-        return c == ' ' || c == '\t';
-    }
-
     // OSString
 
     OSString::OSString()
@@ -112,11 +109,20 @@ namespace Slang
         const char* start = m_begin;
         const char* end = m_end;
 
-        while (start < end && _isWhiteSpace(*start)) start++;
-        while (end > start && _isWhiteSpace(end[-1])) end--;
+        while (start < end && CharUtil::isHorizontalWhitespace(*start)) start++;
+        while (end > start && CharUtil::isHorizontalWhitespace(end[-1])) end--;
         return UnownedStringSlice(start, end);
     }
 
+    UnownedStringSlice UnownedStringSlice::trim(char c) const
+    {
+        const char* start = m_begin;
+        const char* end = m_end;
+
+        while (start < end && *start == c) start++;
+        while (end > start && end[-1] == c) end--;
+        return UnownedStringSlice(start, end);
+    }
 
     // StringSlice
 
diff --git a/source/core/slang-string.h b/source/core/slang-string.h
index 25bf99023..75c282a58 100644
--- a/source/core/slang-string.h
+++ b/source/core/slang-string.h
@@ -174,6 +174,7 @@ namespace Slang
 
 
         UnownedStringSlice trim() const;
+        UnownedStringSlice trim(char c) const;
 
         HashCode getHashCode() const
         {
author	jsmall-nvidia <jsmall@nvidia.com>	2020-10-22 08:46:12 -0400
committer	GitHub <noreply@github.com>	2020-10-22 08:46:12 -0400
commit	10e1bae34733f1cdb5abc001666b1aafa1c1f406 (patch)
tree	ad9571c071b7b7c2384cdd42426851d257fc5f7b /source/core
parent	c0943661e5441bfb996430c4f67fb4dddea9dfcf (diff)