From cf8e75fae7702855b3d81ed11b2fb480c31a7fde Mon Sep 17 00:00:00 2001 From: Julius Ikkala Date: Tue, 6 May 2025 20:56:18 +0300 Subject: Parse char literals as integers (#6989) * Parse char literals as integers * Fix formatting * Parse escaped chars correctly --------- Co-authored-by: Yong He --- source/compiler-core/slang-lexer.cpp | 38 +++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'source/compiler-core/slang-lexer.cpp') diff --git a/source/compiler-core/slang-lexer.cpp b/source/compiler-core/slang-lexer.cpp index d19ea388d..358a0ceff 100644 --- a/source/compiler-core/slang-lexer.cpp +++ b/source/compiler-core/slang-lexer.cpp @@ -6,6 +6,7 @@ // #include "core/slang-char-encode.h" +#include "core/slang-string-escape-util.h" #include "slang-core-diagnostics.h" #include "slang-name.h" #include "slang-source-loc.h" @@ -829,17 +830,48 @@ FloatingPointLiteralValue getFloatingPointLiteralValue( return value; } -static void _lexStringLiteralBody(Lexer* lexer, char quote) +IntegerLiteralValue getCharLiteralValue(Token const& token) { + String unquotedContent = StringEscapeUtil::unquote('\'', token.getContent()); + StringBuilder unescaped(4); + auto escapeHandler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp); + escapeHandler->appendUnescaped(unquotedContent.getUnownedSlice(), unescaped); + + char const* cursor = unescaped.getBuffer(); + + IntegerLiteralValue codepoint = getUnicodePointFromUTF8([&]() { return *cursor++; }); + return codepoint; +} + +static void _lexStringLiteralBody(Lexer* lexer, char quote, bool singleChar) +{ + int len = 0; for (;;) { int c = _peek(lexer); if (c == quote) { + if (singleChar && len == 0) + { // Empty char literal - size must be exactly 1. + if (auto sink = lexer->getDiagnosticSink()) + { + sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::illegalCharacterLiteral); + } + } _advance(lexer); return; } + len++; + + if (singleChar && len == 2) + { // Char literal about to have more than 1 char. + if (auto sink = lexer->getDiagnosticSink()) + { + sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::illegalCharacterLiteral); + } + } + switch (c) { case kEOF: @@ -1346,12 +1378,12 @@ static TokenType _lexTokenImpl(Lexer* lexer) case '\"': _advance(lexer); - _lexStringLiteralBody(lexer, '\"'); + _lexStringLiteralBody(lexer, '\"', false); return TokenType::StringLiteral; case '\'': _advance(lexer); - _lexStringLiteralBody(lexer, '\''); + _lexStringLiteralBody(lexer, '\'', true); return TokenType::CharLiteral; -- cgit v1.2.3