From cf8e75fae7702855b3d81ed11b2fb480c31a7fde Mon Sep 17 00:00:00 2001 From: Julius Ikkala Date: Tue, 6 May 2025 20:56:18 +0300 Subject: Parse char literals as integers (#6989) * Parse char literals as integers * Fix formatting * Parse escaped chars correctly --------- Co-authored-by: Yong He --- source/compiler-core/slang-lexer.cpp | 38 +++++++++++++++++++++++++++++++++--- source/compiler-core/slang-lexer.h | 2 ++ source/slang/slang-parser.cpp | 15 ++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) (limited to 'source') diff --git a/source/compiler-core/slang-lexer.cpp b/source/compiler-core/slang-lexer.cpp index d19ea388d..358a0ceff 100644 --- a/source/compiler-core/slang-lexer.cpp +++ b/source/compiler-core/slang-lexer.cpp @@ -6,6 +6,7 @@ // #include "core/slang-char-encode.h" +#include "core/slang-string-escape-util.h" #include "slang-core-diagnostics.h" #include "slang-name.h" #include "slang-source-loc.h" @@ -829,17 +830,48 @@ FloatingPointLiteralValue getFloatingPointLiteralValue( return value; } -static void _lexStringLiteralBody(Lexer* lexer, char quote) +IntegerLiteralValue getCharLiteralValue(Token const& token) { + String unquotedContent = StringEscapeUtil::unquote('\'', token.getContent()); + StringBuilder unescaped(4); + auto escapeHandler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp); + escapeHandler->appendUnescaped(unquotedContent.getUnownedSlice(), unescaped); + + char const* cursor = unescaped.getBuffer(); + + IntegerLiteralValue codepoint = getUnicodePointFromUTF8([&]() { return *cursor++; }); + return codepoint; +} + +static void _lexStringLiteralBody(Lexer* lexer, char quote, bool singleChar) +{ + int len = 0; for (;;) { int c = _peek(lexer); if (c == quote) { + if (singleChar && len == 0) + { // Empty char literal - size must be exactly 1. + if (auto sink = lexer->getDiagnosticSink()) + { + sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::illegalCharacterLiteral); + } + } _advance(lexer); return; } + len++; + + if (singleChar && len == 2) + { // Char literal about to have more than 1 char. + if (auto sink = lexer->getDiagnosticSink()) + { + sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::illegalCharacterLiteral); + } + } + switch (c) { case kEOF: @@ -1346,12 +1378,12 @@ static TokenType _lexTokenImpl(Lexer* lexer) case '\"': _advance(lexer); - _lexStringLiteralBody(lexer, '\"'); + _lexStringLiteralBody(lexer, '\"', false); return TokenType::StringLiteral; case '\'': _advance(lexer); - _lexStringLiteralBody(lexer, '\''); + _lexStringLiteralBody(lexer, '\'', true); return TokenType::CharLiteral; diff --git a/source/compiler-core/slang-lexer.h b/source/compiler-core/slang-lexer.h index c39d130b7..99ad87681 100644 --- a/source/compiler-core/slang-lexer.h +++ b/source/compiler-core/slang-lexer.h @@ -184,6 +184,8 @@ IntegerLiteralValue getIntegerLiteralValue( FloatingPointLiteralValue getFloatingPointLiteralValue( Token const& token, UnownedStringSlice* outSuffix = 0); + +IntegerLiteralValue getCharLiteralValue(Token const& token); } // namespace Slang #endif diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp index c17a086a7..7ff0a6a2f 100644 --- a/source/slang/slang-parser.cpp +++ b/source/slang/slang-parser.cpp @@ -7584,6 +7584,21 @@ static Expr* parseAtomicExpr(Parser* parser) return constExpr; } + + case TokenType::CharLiteral: + { + IntegerLiteralExpr* constExpr = parser->astBuilder->create(); + parser->FillPosition(constExpr); + + auto token = parser->tokenReader.advanceToken(); + constExpr->token = token; + + IntegerLiteralValue value = getCharLiteralValue(token); + constExpr->value = value; + constExpr->suffixType = BaseType::UInt; + return constExpr; + } + case TokenType::CompletionRequest: { VarExpr* varExpr = parser->astBuilder->create(); -- cgit v1.2.3