summaryrefslogtreecommitdiffstats
path: root/source/compiler-core
diff options
context:
space:
mode:
authorJulius Ikkala <julius.ikkala@gmail.com>2025-05-06 20:56:18 +0300
committerGitHub <noreply@github.com>2025-05-06 10:56:18 -0700
commitcf8e75fae7702855b3d81ed11b2fb480c31a7fde (patch)
treeec5ec1a7ccb9728627fea0f9636fe45846e7f5b8 /source/compiler-core
parent10376faffee1ab51b2d23c311212b5724c3c6ac6 (diff)
Parse char literals as integers (#6989)
* Parse char literals as integers * Fix formatting * Parse escaped chars correctly --------- Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'source/compiler-core')
-rw-r--r--source/compiler-core/slang-lexer.cpp38
-rw-r--r--source/compiler-core/slang-lexer.h2
2 files changed, 37 insertions, 3 deletions
diff --git a/source/compiler-core/slang-lexer.cpp b/source/compiler-core/slang-lexer.cpp
index d19ea388d..358a0ceff 100644
--- a/source/compiler-core/slang-lexer.cpp
+++ b/source/compiler-core/slang-lexer.cpp
@@ -6,6 +6,7 @@
//
#include "core/slang-char-encode.h"
+#include "core/slang-string-escape-util.h"
#include "slang-core-diagnostics.h"
#include "slang-name.h"
#include "slang-source-loc.h"
@@ -829,17 +830,48 @@ FloatingPointLiteralValue getFloatingPointLiteralValue(
return value;
}
-static void _lexStringLiteralBody(Lexer* lexer, char quote)
+IntegerLiteralValue getCharLiteralValue(Token const& token)
{
+ String unquotedContent = StringEscapeUtil::unquote('\'', token.getContent());
+ StringBuilder unescaped(4);
+ auto escapeHandler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp);
+ escapeHandler->appendUnescaped(unquotedContent.getUnownedSlice(), unescaped);
+
+ char const* cursor = unescaped.getBuffer();
+
+ IntegerLiteralValue codepoint = getUnicodePointFromUTF8([&]() { return *cursor++; });
+ return codepoint;
+}
+
+static void _lexStringLiteralBody(Lexer* lexer, char quote, bool singleChar)
+{
+ int len = 0;
for (;;)
{
int c = _peek(lexer);
if (c == quote)
{
+ if (singleChar && len == 0)
+ { // Empty char literal - size must be exactly 1.
+ if (auto sink = lexer->getDiagnosticSink())
+ {
+ sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::illegalCharacterLiteral);
+ }
+ }
_advance(lexer);
return;
}
+ len++;
+
+ if (singleChar && len == 2)
+ { // Char literal about to have more than 1 char.
+ if (auto sink = lexer->getDiagnosticSink())
+ {
+ sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::illegalCharacterLiteral);
+ }
+ }
+
switch (c)
{
case kEOF:
@@ -1346,12 +1378,12 @@ static TokenType _lexTokenImpl(Lexer* lexer)
case '\"':
_advance(lexer);
- _lexStringLiteralBody(lexer, '\"');
+ _lexStringLiteralBody(lexer, '\"', false);
return TokenType::StringLiteral;
case '\'':
_advance(lexer);
- _lexStringLiteralBody(lexer, '\'');
+ _lexStringLiteralBody(lexer, '\'', true);
return TokenType::CharLiteral;
diff --git a/source/compiler-core/slang-lexer.h b/source/compiler-core/slang-lexer.h
index c39d130b7..99ad87681 100644
--- a/source/compiler-core/slang-lexer.h
+++ b/source/compiler-core/slang-lexer.h
@@ -184,6 +184,8 @@ IntegerLiteralValue getIntegerLiteralValue(
FloatingPointLiteralValue getFloatingPointLiteralValue(
Token const& token,
UnownedStringSlice* outSuffix = 0);
+
+IntegerLiteralValue getCharLiteralValue(Token const& token);
} // namespace Slang
#endif