From 1c77c4454facf4cb31d0b647ef6e5ce766d61498 Mon Sep 17 00:00:00 2001
From: jsmall-nvidia <jsmall@nvidia.com>
Date: Thu, 11 Jun 2020 14:06:27 -0400
Subject: Fix problem with C++ extractor ernoneous concating of type tokens 
 (#1382)

* Try to fix problem with C++ extractor concating tokens producing an erroneous result.

* Improve naming/comments around C++ extractor fix.

* Another small improvement around space concating when outputing token list.

* Handle some more special cases for consecutive tokens for C++ extractor concat of tokens.
---
 .../slang-cpp-extractor-main.cpp                   | 47 ++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp')

diff --git a/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp b/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
index 3fa191394..5c8b521fe 100644
--- a/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
+++ b/tools/slang-cpp-extractor/slang-cpp-extractor-main.cpp
@@ -1288,21 +1288,68 @@ void CPPExtractor::_consumeTypeModifiers()
     while (advanceIfStyle(IdentifierStyle::TypeModifier));
 }
 
+// True if two of these token types of the same type placed immediately after one another 
+// produce a different token. Can be conservative, as if not strictly required
+// it will just mean more spacing in the output
+static bool _canRepeatTokenType(TokenType type)
+{
+    switch (type)
+    {
+        case TokenType::OpAdd:
+        case TokenType::OpSub:
+        case TokenType::OpAnd:
+        case TokenType::OpOr:
+        case TokenType::OpGreater:
+        case TokenType::OpLess:
+        case TokenType::Identifier:
+        case TokenType::OpAssign:
+        case TokenType::Colon:
+        {
+            return false;
+        }
+        default: break;
+    }
+    return true;
+}
+
+// Returns true if there needs to be a spave between the previous token type, and the current token
+// type for correct output. It is assumed that the token stream is appropriate.
+// The implementation might need more sophistication, but this at least avoids Blah const *  -> Blahconst* 
+static bool _tokenConcatNeedsSpace(TokenType prev, TokenType cur)
+{
+    if ((cur == TokenType::OpAssign) ||
+        (prev == cur && !_canRepeatTokenType(cur)))
+    {
+        return true;
+    }
+    return false;
+}
+
 UnownedStringSlice CPPExtractor::_concatTokens(TokenReader::ParsingCursor start)
 {
     auto endCursor = m_reader.getCursor();
     m_reader.setCursor(start);
 
+    TokenType prevTokenType = TokenType::Unknown;
+
     StringBuilder buf;
     while (!m_reader.isAtCursor(endCursor))
     {
         const Token token = m_reader.advanceToken();
+        // Check if we need a space between tokens
+        if (_tokenConcatNeedsSpace(prevTokenType, token.type))
+        {
+            buf << " ";
+        }
         buf << token.getContent();
+            
+        prevTokenType = token.type;
     }
 
     return m_typePool->getSlice(m_typePool->add(buf));
 }
 
+
 SlangResult CPPExtractor::_maybeParseType(UnownedStringSlice& outType, Index& ioTemplateDepth)
 {
     auto startCursor = m_reader.getCursor();
-- 
cgit v1.2.3