diff options
| author | Tim Foley <tfoley@nvidia.com> | 2017-06-28 10:20:16 -0700 |
|---|---|---|
| committer | Tim Foley <tfoley@nvidia.com> | 2017-06-28 11:08:03 -0700 |
| commit | d601921b71ed44835e8d4fa6f13ff7aefcf7649d (patch) | |
| tree | 79b9227ef038d173d780a440035e616dc31104bb /source/slang/lexer.cpp | |
| parent | 4b3936e2983dcecd36a3437bd6c7eef8d5fbbffa (diff) | |
Actually respect suffixes on numeric literals.
- Add logic to extract the value and suffix from a numeric literal
- This duplicates some of the lexing logic, but this is hard to avoid without redundant runtime work
- Note that I'm not using and stdlib string-to-number code. This should be more robust once it is working, but it is obviously error prone in the near term. The main up-sides to this are:
- We can handle binary integer literals
- We can handle hexadecimal floating-point literals without stdlib support
- We can hypothetically support digit separators, if we ever wanted
- The parser looks at the suffix characters sliced off by the lexer, and tries to pick a type to use for a literal
- It uses `NULL` if there is no suffix, to avoid some nasty order dependencies where the stdlib might need to parse a number before it has seen the definition of `int`
- Right now I only handle a few cases, so there may be bugs lurking here
- The emit logic needs to handle the fact that a literal node in the AST might have a non-default type attached.
- Right now I just quickly check for the most likely types, and emit the literal with a matching suffix. This doesn't seem robust if any source language supports a suffix for a type where a target has no corresponding suffix. In the long term some amount of casting is probably required.
Diffstat (limited to 'source/slang/lexer.cpp')
| -rw-r--r-- | source/slang/lexer.cpp | 210 |
1 files changed, 205 insertions, 5 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp index 73fbb9605..786376baf 100644 --- a/source/slang/lexer.cpp +++ b/source/slang/lexer.cpp @@ -415,25 +415,34 @@ namespace Slang return tokenType; } - static bool maybeLexNumberExponent(Lexer* lexer, int base) + static bool isNumberExponent(char c, int base) { - switch( peek(lexer) ) + switch( c ) { default: return false; case 'e': case 'E': if(base != 10) return false; - advance(lexer); break; case 'p': case 'P': if(base != 16) return false; - advance(lexer); break; } - // we saw an exponent marker, so we must + return true; + } + + static bool maybeLexNumberExponent(Lexer* lexer, int base) + { + if(!isNumberExponent(peek(lexer), base)) + return false; + + // we saw an exponent marker + advance(lexer); + + // Now start to read the exponent switch( peek(lexer) ) { case '+': case '-': @@ -482,6 +491,197 @@ namespace Slang return tokenType; } + static int maybeReadDigit(char const** ioCursor, int base) + { + auto& cursor = *ioCursor; + + for(;;) + { + int digitVal = 0; + int c = *cursor; + switch(c) + { + default: + return -1; + + // TODO: need to decide on digit separator characters + case '_': + cursor++; + continue; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + cursor++; + return c - '0'; + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + if(base > 10) + { + cursor++; + return c - 'a'; + } + return -1; + + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + if(base > 10) + { + cursor++; + return c - 'A'; + } + return -1; + } + } + } + + static int readOptionalBase(char const** ioCursor) + { + auto& cursor = *ioCursor; + if( *cursor == '0' ) + { + cursor++; + switch(*cursor) + { + case 'x': case 'X': + cursor++; + return 16; + + case 'b': case 'B': + cursor++; + return 2; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return 8; + + default: + return 10; + } + } + + return 10; + } + + + + IntegerLiteralValue getIntegerLiteralValue(Token const& token, String* outSuffix) + { + IntegerLiteralValue value = 0; + + char const* cursor = token.Content.begin(); + char const* end = token.Content.end(); + + int base = readOptionalBase(&cursor); + + for( ;;) + { + int digit = maybeReadDigit(&cursor, base); + if(digit < 0) + break; + + value = value*base + digit; + } + + if(outSuffix) + { + *outSuffix = String(cursor, end); + } + + return value; + } + + FloatingPointLiteralValue getFloatingPointLiteralValue(Token const& token, String* outSuffix) + { + FloatingPointLiteralValue value = 0; + + char const* cursor = token.Content.begin(); + char const* end = token.Content.end(); + + int radix = readOptionalBase(&cursor); + + bool seenDot = false; + FloatingPointLiteralValue divisor = 1; + for( ;;) + { + if(*cursor == '.') + { + cursor++; + seenDot = true; + continue; + } + + int digit = maybeReadDigit(&cursor, radix); + if(digit < 0) + break; + + value = value*radix + digit; + + if(seenDot) + { + divisor *= radix; + } + } + + // Now read optional exponent + if(isNumberExponent(*cursor, radix)) + { + cursor++; + + bool exponentIsNegative = false; + switch(*cursor) + { + default: + break; + + case '-': + exponentIsNegative = true; + cursor++; + break; + + case '+': + cursor++; + break; + } + + int exponentRadix = 10; + int exponent = 0; + + for(;;) + { + int digit = maybeReadDigit(&cursor, exponentRadix); + if(digit < 0) + break; + + exponent = exponent*exponentRadix + digit; + } + + FloatingPointLiteralValue exponentBase = 10; + if(radix == 16) + { + exponentBase = 2; + } + + FloatingPointLiteralValue exponentValue = pow(exponentBase, exponent); + + if( exponentIsNegative ) + { + divisor *= exponentValue; + } + else + { + value *= exponentValue; + } + } + + value /= divisor; + + if(outSuffix) + { + *outSuffix = String(cursor, end); + } + + return value; + } + static void lexStringLiteralBody(Lexer* lexer, char quote) { for(;;) |
