Actually respect suffixes on numeric literals.

- Add logic to extract the value and suffix from a numeric literal - This duplicates some of the lexing logic, but this is hard to avoid without redundant runtime work - Note that I'm not using and stdlib string-to-number code. This should be more robust once it is working, but it is obviously error prone in the near term. The main up-sides to this are: - We can handle binary integer literals - We can handle hexadecimal floating-point literals without stdlib support - We can hypothetically support digit separators, if we ever wanted - The parser looks at the suffix characters sliced off by the lexer, and tries to pick a type to use for a literal - It uses `NULL` if there is no suffix, to avoid some nasty order dependencies where the stdlib might need to parse a number before it has seen the definition of `int` - Right now I only handle a few cases, so there may be bugs lurking here - The emit logic needs to handle the fact that a literal node in the AST might have a non-default type attached. - Right now I just quickly check for the most likely types, and emit the literal with a matching suffix. This doesn't seem robust if any source language supports a suffix for a type where a target has no corresponding suffix. In the long term some amount of casting is probably required.
author: Tim Foley <tfoley@nvidia.com> 2017-06-28 10:20:16 -0700
committer: Tim Foley <tfoley@nvidia.com> 2017-06-28 11:08:03 -0700
commit: d601921b71ed44835e8d4fa6f13ff7aefcf7649d (patch)
tree: 79b9227ef038d173d780a440035e616dc31104bb /source/slang/lexer.cpp
parent: 4b3936e2983dcecd36a3437bd6c7eef8d5fbbffa (diff)
1 files changed, 205 insertions, 5 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 73fbb9605..786376baf 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -415,25 +415,34 @@ namespace Slang
         return tokenType;
     }
 
-    static bool maybeLexNumberExponent(Lexer* lexer, int base)
+    static bool isNumberExponent(char c, int base)
     {
-        switch( peek(lexer) )
+        switch( c )
         {
         default:
             return false;
 
         case 'e': case 'E':
             if(base != 10) return false;
-            advance(lexer);
             break;
 
         case 'p': case 'P':
             if(base != 16) return false;
-            advance(lexer);
             break;
         }
 
-        // we saw an exponent marker, so we must 
+        return true;
+    }
+
+    static bool maybeLexNumberExponent(Lexer* lexer, int base)
+    {
+        if(!isNumberExponent(peek(lexer), base))
+            return false;
+
+        // we saw an exponent marker
+        advance(lexer);
+
+        // Now start to read the exponent
         switch( peek(lexer) )
         {
         case '+': case '-':
@@ -482,6 +491,197 @@ namespace Slang
         return tokenType;
     }
 
+    static int maybeReadDigit(char const** ioCursor, int base)
+    {
+        auto& cursor = *ioCursor;
+
+        for(;;)
+        {
+            int digitVal = 0;
+            int c = *cursor;
+            switch(c)
+            {
+            default:
+                return -1;
+
+            // TODO: need to decide on digit separator characters
+            case '_':
+                cursor++;
+                continue;
+
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+                cursor++;
+                return c - '0';
+
+            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                if(base > 10)
+                {
+                    cursor++;
+                    return c - 'a';
+                }
+                return -1;
+
+            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                if(base > 10)
+                {
+                    cursor++;
+                    return c - 'A';
+                }
+                return -1;
+            }
+        }
+    }
+
+    static int readOptionalBase(char const** ioCursor)
+    {
+        auto& cursor = *ioCursor;
+        if( *cursor == '0' )
+        {
+            cursor++;
+            switch(*cursor)
+            {
+            case 'x': case 'X':
+                cursor++;
+                return 16;
+
+            case 'b': case 'B':
+                cursor++;
+                return 2;
+
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+                return 8;
+
+            default:
+                return 10;
+            }
+        }
+
+        return 10;
+    }
+
+
+
+    IntegerLiteralValue getIntegerLiteralValue(Token const& token, String* outSuffix)
+    {
+        IntegerLiteralValue value = 0;
+
+        char const* cursor = token.Content.begin();
+        char const* end = token.Content.end();
+
+        int base = readOptionalBase(&cursor);
+
+        for( ;;)
+        {
+            int digit = maybeReadDigit(&cursor, base);
+            if(digit < 0)
+                break;
+
+            value = value*base + digit;
+        }
+
+        if(outSuffix)
+        {
+            *outSuffix = String(cursor, end);
+        }
+
+        return value;
+    }
+
+    FloatingPointLiteralValue getFloatingPointLiteralValue(Token const& token, String* outSuffix)
+    {
+        FloatingPointLiteralValue value = 0;
+
+        char const* cursor = token.Content.begin();
+        char const* end = token.Content.end();
+
+        int radix = readOptionalBase(&cursor);
+
+        bool seenDot = false;
+        FloatingPointLiteralValue divisor = 1;
+        for( ;;)
+        {
+            if(*cursor == '.')
+            {
+                cursor++;
+                seenDot = true;
+                continue;
+            }
+
+            int digit = maybeReadDigit(&cursor, radix);
+            if(digit < 0)
+                break;
+
+            value = value*radix + digit;
+
+            if(seenDot)
+            {
+                divisor *= radix;
+            }
+        }
+
+        // Now read optional exponent
+        if(isNumberExponent(*cursor, radix))
+        {
+            cursor++;
+
+            bool exponentIsNegative = false;
+            switch(*cursor)
+            {
+            default:
+                break;
+
+            case '-':
+                exponentIsNegative = true;
+                cursor++;
+                break;
+
+            case '+':
+                cursor++;
+                break;
+            }
+
+            int exponentRadix = 10;
+            int exponent = 0;
+
+            for(;;)
+            {
+                int digit = maybeReadDigit(&cursor, exponentRadix);
+                if(digit < 0)
+                    break;
+
+                exponent = exponent*exponentRadix + digit;
+            }
+
+            FloatingPointLiteralValue exponentBase = 10;
+            if(radix == 16)
+            {
+                exponentBase = 2;
+            }
+
+            FloatingPointLiteralValue exponentValue = pow(exponentBase, exponent);
+
+            if( exponentIsNegative )
+            {
+                divisor *= exponentValue;
+            }
+            else
+            {
+                value *= exponentValue;
+            }
+        }
+
+        value /= divisor;
+
+        if(outSuffix)
+        {
+            *outSuffix = String(cursor, end);
+        }
+
+        return value;
+    }
+
     static void lexStringLiteralBody(Lexer* lexer, char quote)
     {
         for(;;)
author	Tim Foley <tfoley@nvidia.com>	2017-06-28 10:20:16 -0700
committer	Tim Foley <tfoley@nvidia.com>	2017-06-28 11:08:03 -0700
commit	d601921b71ed44835e8d4fa6f13ff7aefcf7649d (patch)
tree	79b9227ef038d173d780a440035e616dc31104bb /source/slang/lexer.cpp
parent	4b3936e2983dcecd36a3437bd6c7eef8d5fbbffa (diff)