Support raw string literals. (#2405)

* Support raw string literals. * Use raw string literal in tests. Co-authored-by: Yong He <yhe@nvidia.com>
author: Yong He <yonghe@outlook.com> 2022-09-20 14:51:11 -0700
committer: GitHub <noreply@github.com> 2022-09-20 14:51:11 -0700
commit: e60a6fd40cbc0f0d8548f0160bb92437e3d79509 (patch)
tree: 7957ea8939a8335509a06ee9c2b9c4baa45bda64 /source
parent: 5ac7ba2c6d3405f1a59f4350c753ec990af8f6dc (diff)
2 files changed, 85 insertions, 1 deletions
diff --git a/source/compiler-core/slang-lexer-diagnostic-defs.h b/source/compiler-core/slang-lexer-diagnostic-defs.h
index 666ab057f..ceeb62455 100644
--- a/source/compiler-core/slang-lexer-diagnostic-defs.h
+++ b/source/compiler-core/slang-lexer-diagnostic-defs.h
@@ -29,5 +29,7 @@ DIAGNOSTIC(10003, Error, invalidDigitForBase, "invalid digit for base-$1 literal
 
 DIAGNOSTIC(10004, Error, endOfFileInLiteral, "end of file in literal")
 DIAGNOSTIC(10005, Error, newlineInLiteral, "newline in literal")
+DIAGNOSTIC(10010, Error, quoteCannotBeDelimiter, "'\"' encountered before '(' in raw string literal. '\"' cannot be a part of a delimiter.")
+
 
 #undef DIAGNOSTIC
diff --git a/source/compiler-core/slang-lexer.cpp b/source/compiler-core/slang-lexer.cpp
index 4a6d7d392..a9d20471a 100644
--- a/source/compiler-core/slang-lexer.cpp
+++ b/source/compiler-core/slang-lexer.cpp
@@ -784,11 +784,80 @@ namespace Slang
         }
     }
 
+    static void _lexRawStringLiteralBody(Lexer* lexer)
+    {
+        const char* start = lexer->m_cursor;
+        const char* endOfDelimiter = nullptr;
+        for (;;)
+        {
+            int c = _peek(lexer);
+            if (c == '(' && endOfDelimiter == nullptr)
+                endOfDelimiter = lexer->m_cursor;
+            if (c == '\"')
+            {
+                if (!endOfDelimiter)
+                {
+                    if (auto sink = lexer->getDiagnosticSink())
+                    {
+                        sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::quoteCannotBeDelimiter);
+                    }
+                }
+                else
+                {
+                    auto testStart = lexer->m_cursor - (endOfDelimiter - start);
+                    if (testStart > endOfDelimiter)
+                    {
+                        auto testDelimiter = UnownedStringSlice(testStart, lexer->m_cursor);
+                        auto delimiter = UnownedStringSlice(start, endOfDelimiter);
+                        if (*(testStart - 1) == ')' && testDelimiter == delimiter)
+                        {
+                            _advance(lexer);
+                            return;
+                        }
+                    }
+                }
+            }
+
+            switch (c)
+            {
+            case kEOF:
+                if (auto sink = lexer->getDiagnosticSink())
+                {
+                    sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::endOfFileInLiteral);
+                }
+                return;
+            default:
+                _advance(lexer);
+                continue;
+            }
+        }
+    }
+
+    UnownedStringSlice getRawStringLiteralTokenValue(Token const& token)
+    {
+        auto content = token.getContent();
+        if (content.getLength() <= 5)
+            return UnownedStringSlice();
+        auto start = content.begin() + 2;
+        auto delimEnd = start;
+        while (delimEnd < content.end() && *delimEnd != '(')
+            delimEnd++;
+        auto delimLength = delimEnd - start;
+        auto contentEnd = content.end() - delimLength - 2;
+        auto contentBegin = start + delimLength + 1;
+        if (contentEnd <= contentBegin)
+            return UnownedStringSlice();
+        return UnownedStringSlice(contentBegin, contentEnd);
+    }
+
     String getStringLiteralTokenValue(Token const& token)
     {
         SLANG_ASSERT(token.type == TokenType::StringLiteral
             || token.type == TokenType::CharLiteral);
 
+        if (token.getContent().startsWith("R"))
+            return getRawStringLiteralTokenValue(token);
+
         const UnownedStringSlice content = token.getContent();
 
         char const* cursor = content.begin();
@@ -1016,12 +1085,24 @@ namespace Slang
         case 'A': case 'B': case 'C': case 'D': case 'E':
         case 'F': case 'G': case 'H': case 'I': case 'J':
         case 'K': case 'L': case 'M': case 'N': case 'O':
-        case 'P': case 'Q': case 'R': case 'S': case 'T':
+        case 'P': case 'Q': case 'S': case 'T':
         case 'U': case 'V': case 'W': case 'X': case 'Y':
         case 'Z':
         case '_':
             _lexIdentifier(lexer);
             return TokenType::Identifier;
+        case 'R':
+            _advance(lexer);
+            switch (_peek(lexer))
+            {
+            default:
+                _lexIdentifier(lexer);
+                return TokenType::Identifier;
+            case '\"':
+                _advance(lexer);
+                _lexRawStringLiteralBody(lexer);
+                return TokenType::StringLiteral;
+            }
 
         case '\"':
             _advance(lexer);
@@ -1033,6 +1114,7 @@ namespace Slang
             _lexStringLiteralBody(lexer, '\'');
             return TokenType::CharLiteral;
 
+
         case '+':
             _advance(lexer);
             switch(_peek(lexer))
author	Yong He <yonghe@outlook.com>	2022-09-20 14:51:11 -0700
committer	GitHub <noreply@github.com>	2022-09-20 14:51:11 -0700
commit	e60a6fd40cbc0f0d8548f0160bb92437e3d79509 (patch)
tree	7957ea8939a8335509a06ee9c2b9c4baa45bda64 /source
parent	5ac7ba2c6d3405f1a59f4350c753ec990af8f6dc (diff)