Fix problem with C++ extractor ernoneous concating of type tokens (#1382)

* Try to fix problem with C++ extractor concating tokens producing an erroneous result. * Improve naming/comments around C++ extractor fix. * Another small improvement around space concating when outputing token list. * Handle some more special cases for consecutive tokens for C++ extractor concat of tokens.
author: jsmall-nvidia <jsmall@nvidia.com> 2020-06-11 14:06:27 -0400
committer: GitHub <noreply@github.com> 2020-06-11 14:06:27 -0400
commit: 1c77c4454facf4cb31d0b647ef6e5ce766d61498 (patch)
tree: f23e49cfb2ed7e9edb410ce1abdba0e18321b0c0 /tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
parent: 98459bac44711237b8d3629a51a8ba0c01163756 (diff)
1 files changed, 47 insertions, 0 deletions
diff --git a/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp b/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
index 3fa191394..5c8b521fe 100644
--- a/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
+++ b/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
@@ -1288,21 +1288,68 @@ void CPPExtractor::_consumeTypeModifiers()
     while (advanceIfStyle(IdentifierStyle::TypeModifier));
 }
 
+// True if two of these token types of the same type placed immediately after one another 
+// produce a different token. Can be conservative, as if not strictly required
+// it will just mean more spacing in the output
+static bool _canRepeatTokenType(TokenType type)
+{
+    switch (type)
+    {
+        case TokenType::OpAdd:
+        case TokenType::OpSub:
+        case TokenType::OpAnd:
+        case TokenType::OpOr:
+        case TokenType::OpGreater:
+        case TokenType::OpLess:
+        case TokenType::Identifier:
+        case TokenType::OpAssign:
+        case TokenType::Colon:
+        {
+            return false;
+        }
+        default: break;
+    }
+    return true;
+}
+
+// Returns true if there needs to be a spave between the previous token type, and the current token
+// type for correct output. It is assumed that the token stream is appropriate.
+// The implementation might need more sophistication, but this at least avoids Blah const *  -> Blahconst* 
+static bool _tokenConcatNeedsSpace(TokenType prev, TokenType cur)
+{
+    if ((cur == TokenType::OpAssign) ||
+        (prev == cur && !_canRepeatTokenType(cur)))
+    {
+        return true;
+    }
+    return false;
+}
+
 UnownedStringSlice CPPExtractor::_concatTokens(TokenReader::ParsingCursor start)
 {
     auto endCursor = m_reader.getCursor();
     m_reader.setCursor(start);
 
+    TokenType prevTokenType = TokenType::Unknown;
+
     StringBuilder buf;
     while (!m_reader.isAtCursor(endCursor))
     {
         const Token token = m_reader.advanceToken();
+        // Check if we need a space between tokens
+        if (_tokenConcatNeedsSpace(prevTokenType, token.type))
+        {
+            buf << " ";
+        }
         buf << token.getContent();
+            
+        prevTokenType = token.type;
     }
 
     return m_typePool->getSlice(m_typePool->add(buf));
 }
 
+
 SlangResult CPPExtractor::_maybeParseType(UnownedStringSlice& outType, Index& ioTemplateDepth)
 {
     auto startCursor = m_reader.getCursor();
author	jsmall-nvidia <jsmall@nvidia.com>	2020-06-11 14:06:27 -0400
committer	GitHub <noreply@github.com>	2020-06-11 14:06:27 -0400
commit	1c77c4454facf4cb31d0b647ef6e5ce766d61498 (patch)
tree	f23e49cfb2ed7e9edb410ce1abdba0e18321b0c0 /tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
parent	98459bac44711237b8d3629a51a8ba0c01163756 (diff)