From 1caef5907d0b0f16f686a8fcca479c6afc09f146 Mon Sep 17 00:00:00 2001
From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com>
Date: Tue, 9 Jul 2024 12:45:57 -0700
Subject: Fix Lexer to recognize swizzling on an integer scalar value (#4515)

* Fix Lexer to recognize swizzling on an integer scalar value

Close #4413
---
 source/compiler-core/slang-lexer.cpp | 78 ++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 25 deletions(-)

(limited to 'source/compiler-core')

diff --git a/source/compiler-core/slang-lexer.cpp b/source/compiler-core/slang-lexer.cpp
index 5954dc668..8c428159c 100644
--- a/source/compiler-core/slang-lexer.cpp
+++ b/source/compiler-core/slang-lexer.cpp
@@ -173,38 +173,49 @@ namespace Slang
 
     // Look ahead one code point, dealing with complications like
     // escaped newlines.
-    static int _peek(Lexer* lexer)
+    static int _peek(Lexer* lexer, int offset = 0)
     {
-        // Look at the next raw byte, and decide what to do
-        int c = _peekRaw(lexer);
+        int pos = 0;
+        int c = kEOF;
 
-        if(c == '\\')
+        do
         {
-            // We might have a backslash-escaped newline.
-            // Look at the next byte (if any) to see.
-            //
-            // Note(tfoley): We are assuming a null-terminated input here,
-            // so that we can safely look at the next byte without issue.
-            int d = lexer->m_cursor[1];
-            switch (d)
+            if (lexer->m_cursor + pos == lexer->m_end)
+                return kEOF;
+
+            c = lexer->m_cursor[pos++];
+
+            if (c == '\\')
             {
-            case '\r': case '\n':
+                // We might have a backslash-escaped newline.
+                // Look at the next byte (if any) to see.
+                //
+                // Note(tfoley): We are assuming a null-terminated input here,
+                // so that we can safely look at the next byte without issue.
+                int d = lexer->m_cursor[pos++];
+                switch (d)
+                {
+                case '\r': case '\n':
                 {
                     // The newline was escaped, so return the code point after *that*
 
-                    int e = lexer->m_cursor[2];
+                    int e = lexer->m_cursor[pos++];
                     if ((d ^ e) == ('\r' ^ '\n'))
-                        return lexer->m_cursor[3];
-                    return e;
+                        c = lexer->m_cursor[pos++];
+                    else
+                        c = e;
+                    break;
                 }
 
-            default:
-                break;
+                default:
+                    break;
+                }
             }
-        }
-        // TODO: handle UTF-8 encoding for non-ASCII code points here
+            // TODO: handle UTF-8 encoding for non-ASCII code points here
+
+            // Default case is to just hand along the byte we read as an ASCII code point.
+        } while (offset--);
 
-        // Default case is to just hand along the byte we read as an ASCII code point.
         return c;
     }
 
@@ -494,10 +505,19 @@ namespace Slang
 
         if( _peek(lexer) == '.' )
         {
-            tokenType = TokenType::FloatingPointLiteral;
+            switch (_peek(lexer, 1))
+            {
+                // 123.xxxx or 123.rrrr
+            case 'x':
+            case 'r':
+                break;
 
-            _advance(lexer);
-            _lexDigits(lexer, base);
+            default:
+                tokenType = TokenType::FloatingPointLiteral;
+
+                _advance(lexer);
+                _lexDigits(lexer, base);
+            }
         }
 
         if( _maybeLexNumberExponent(lexer, base))
@@ -1089,8 +1109,16 @@ namespace Slang
                     return _maybeLexNumberSuffix(lexer, TokenType::IntegerLiteral);
 
                 case '.':
-                    _advance(lexer);
-                    return _lexNumberAfterDecimalPoint(lexer, 10);
+                    switch (_peek(lexer, 1))
+                    {
+                        // 0.xxxx or 0.rrrr
+                    case 'x':
+                    case 'r':
+                        return _maybeLexNumberSuffix(lexer, TokenType::IntegerLiteral);
+                    default:
+                        _advance(lexer);
+                        return _lexNumberAfterDecimalPoint(lexer, 10);
+                    }
 
                 case 'x': case 'X':
                     _advance(lexer);
-- 
cgit v1.2.3