summaryrefslogtreecommitdiffstats
path: root/source/slang/lexer.cpp
diff options
context:
space:
mode:
authorTim Foley <tfoley@nvidia.com>2017-06-15 13:12:51 -0700
committerTim Foley <tfoley@nvidia.com>2017-06-15 13:12:51 -0700
commit517513645afb8eaf4841e7b7035f1ba3a9c7cd57 (patch)
treeeb0fdf58f5f42c427ade3aac136a9053fbf21d54 /source/slang/lexer.cpp
parentc34a433d7aa3fdbfefee22f20d5aac2d960f392a (diff)
Rename `Slang::Compiler` -> `Slang`
This gets rid of one unecessary namespace.
Diffstat (limited to 'source/slang/lexer.cpp')
-rw-r--r--source/slang/lexer.cpp1711
1 files changed, 854 insertions, 857 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 87b3eaf63..cb718b538 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -4,397 +4,402 @@
namespace Slang
{
- namespace Compiler
+ static Token GetEndOfFileToken()
{
- static Token GetEndOfFileToken()
- {
- return Token(TokenType::EndOfFile, "", 0, 0, 0, "");
- }
+ return Token(TokenType::EndOfFile, "", 0, 0, 0, "");
+ }
- Token* TokenList::begin() const
- {
- assert(mTokens.Count());
- return &mTokens[0];
- }
+ Token* TokenList::begin() const
+ {
+ assert(mTokens.Count());
+ return &mTokens[0];
+ }
- Token* TokenList::end() const
- {
- assert(mTokens.Count());
- assert(mTokens[mTokens.Count()-1].Type == TokenType::EndOfFile);
- return &mTokens[mTokens.Count() - 1];
- }
+ Token* TokenList::end() const
+ {
+ assert(mTokens.Count());
+ assert(mTokens[mTokens.Count()-1].Type == TokenType::EndOfFile);
+ return &mTokens[mTokens.Count() - 1];
+ }
- TokenSpan::TokenSpan()
- : mBegin(NULL)
- , mEnd (NULL)
- {}
+ TokenSpan::TokenSpan()
+ : mBegin(NULL)
+ , mEnd (NULL)
+ {}
- TokenReader::TokenReader()
- : mCursor(NULL)
- , mEnd (NULL)
- {}
+ TokenReader::TokenReader()
+ : mCursor(NULL)
+ , mEnd (NULL)
+ {}
- Token TokenReader::PeekToken() const
- {
- if (!mCursor)
- return GetEndOfFileToken();
+ Token TokenReader::PeekToken() const
+ {
+ if (!mCursor)
+ return GetEndOfFileToken();
- Token token = *mCursor;
- if (mCursor == mEnd)
- token.Type = TokenType::EndOfFile;
- return token;
- }
+ Token token = *mCursor;
+ if (mCursor == mEnd)
+ token.Type = TokenType::EndOfFile;
+ return token;
+ }
- TokenType TokenReader::PeekTokenType() const
- {
- if (mCursor == mEnd)
- return TokenType::EndOfFile;
- assert(mCursor);
- return mCursor->Type;
- }
+ TokenType TokenReader::PeekTokenType() const
+ {
+ if (mCursor == mEnd)
+ return TokenType::EndOfFile;
+ assert(mCursor);
+ return mCursor->Type;
+ }
- CodePosition TokenReader::PeekLoc() const
- {
- if (!mCursor)
- return CodePosition();
- assert(mCursor);
- return mCursor->Position;
- }
+ CodePosition TokenReader::PeekLoc() const
+ {
+ if (!mCursor)
+ return CodePosition();
+ assert(mCursor);
+ return mCursor->Position;
+ }
- Token TokenReader::AdvanceToken()
- {
- if (!mCursor)
- return GetEndOfFileToken();
+ Token TokenReader::AdvanceToken()
+ {
+ if (!mCursor)
+ return GetEndOfFileToken();
+
+ Token token = *mCursor;
+ if (mCursor == mEnd)
+ token.Type = TokenType::EndOfFile;
+ else
+ mCursor++;
+ return token;
+ }
- Token token = *mCursor;
- if (mCursor == mEnd)
- token.Type = TokenType::EndOfFile;
- else
- mCursor++;
- return token;
- }
+ // Lexer
- // Lexer
+ Lexer::Lexer(
+ String const& path,
+ String const& content,
+ DiagnosticSink* sink)
+ : path(path)
+ , content(content)
+ , sink(sink)
+ {
+ cursor = content.begin();
+ end = content.end();
- Lexer::Lexer(
- String const& path,
- String const& content,
- DiagnosticSink* sink)
- : path(path)
- , content(content)
- , sink(sink)
- {
- cursor = content.begin();
- end = content.end();
+ loc = CodePosition(1, 1, 0, path);
+ tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
+ lexerFlags = 0;
+ }
- loc = CodePosition(1, 1, 0, path);
- tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
- lexerFlags = 0;
- }
+ Lexer::~Lexer()
+ {
+ }
- Lexer::~Lexer()
- {
- }
+ enum { kEOF = -1 };
- enum { kEOF = -1 };
+ // Get the next input byte, without any handling of
+ // escaped newlines, non-ASCII code points, source locations, etc.
+ static int peekRaw(Lexer* lexer)
+ {
+ // If we are at the end of the input, return a designated end-of-file value
+ if(lexer->cursor == lexer->end)
+ return kEOF;
- // Get the next input byte, without any handling of
- // escaped newlines, non-ASCII code points, source locations, etc.
- static int peekRaw(Lexer* lexer)
- {
- // If we are at the end of the input, return a designated end-of-file value
- if(lexer->cursor == lexer->end)
- return kEOF;
+ // Otherwise, just look at the next byte
+ return *lexer->cursor;
+ }
- // Otherwise, just look at the next byte
- return *lexer->cursor;
- }
+ // Read one input byte without any special handling (similar to `peekRaw`)
+ static int advanceRaw(Lexer* lexer)
+ {
+ // The logic here is basically the same as for `peekRaw()`,
+ // escape we advance `cursor` if we aren't at the end.
- // Read one input byte without any special handling (similar to `peekRaw`)
- static int advanceRaw(Lexer* lexer)
- {
- // The logic here is basically the same as for `peekRaw()`,
- // escape we advance `cursor` if we aren't at the end.
+ if (lexer->cursor == lexer->end)
+ return kEOF;
- if (lexer->cursor == lexer->end)
- return kEOF;
+ return *lexer->cursor++;
+ }
- return *lexer->cursor++;
- }
+ // When the cursor is already at the first byte of an end-of-line sequence,
+ // consume one or two bytes that compose the sequence.
+ //
+ // Basically, a newline is one of:
+ //
+ // "\n"
+ // "\r"
+ // "\r\n"
+ // "\n\r"
+ //
+ // We always look for the longest match possible.
+ //
+ static void handleNewLineInner(Lexer* lexer, int c)
+ {
+ assert(c == '\n' || c == '\r');
- // When the cursor is already at the first byte of an end-of-line sequence,
- // consume one or two bytes that compose the sequence.
- //
- // Basically, a newline is one of:
- //
- // "\n"
- // "\r"
- // "\r\n"
- // "\n\r"
- //
- // We always look for the longest match possible.
- //
- static void handleNewLineInner(Lexer* lexer, int c)
+ int d = peekRaw(lexer);
+ if( (c ^ d) == ('\n' ^ '\r') )
{
- assert(c == '\n' || c == '\r');
+ advanceRaw(lexer);
+ }
+
+ lexer->loc.Line++;
+ lexer->loc.Col = 1;
+ }
- int d = peekRaw(lexer);
- if( (c ^ d) == ('\n' ^ '\r') )
+ // Look ahead one code point, dealing with complications like
+ // escaped newlines.
+ static int peek(Lexer* lexer)
+ {
+ // Look at the next raw byte, and decide what to do
+ int c = peekRaw(lexer);
+
+ if(c == '\\')
+ {
+ // We might have a backslash-escaped newline.
+ // Look at the next byte (if any) to see.
+ //
+ // Note(tfoley): We are assuming a null-terminated input here,
+ // so that we can safely look at the next byte without issue.
+ int d = lexer->cursor[1];
+ switch (d)
{
- advanceRaw(lexer);
- }
+ case '\r': case '\n':
+ {
+ // The newline was escaped, so return the code point after *that*
- lexer->loc.Line++;
- lexer->loc.Col = 1;
+ int e = lexer->cursor[2];
+ if ((d ^ e) == ('\r' ^ '\n'))
+ return lexer->cursor[3];
+ return e;
+ }
+
+ default:
+ break;
+ }
}
+ // TODO: handle UTF-8 encoding for non-ASCII code points here
+
+ // Default case is to just hand along the byte we read as an ASCII code point.
+ return c;
+ }
- // Look ahead one code point, dealing with complications like
- // escaped newlines.
- static int peek(Lexer* lexer)
+ // Get the next code point from the input, and advance the cursor.
+ static int advance(Lexer* lexer)
+ {
+ // We are going to loop, but only as a way of handling
+ // escaped line endings.
+ for (;;)
{
+ // If we are at the end of the input, then the task is easy.
+ if (lexer->cursor == lexer->end)
+ return kEOF;
+
// Look at the next raw byte, and decide what to do
- int c = peekRaw(lexer);
+ int c = *lexer->cursor++;
- if(c == '\\')
+ if (c == '\\')
{
// We might have a backslash-escaped newline.
// Look at the next byte (if any) to see.
//
// Note(tfoley): We are assuming a null-terminated input here,
// so that we can safely look at the next byte without issue.
- int d = lexer->cursor[1];
+ int d = *lexer->cursor;
switch (d)
{
case '\r': case '\n':
- {
- // The newline was escaped, so return the code point after *that*
+ // handle the end-of-line for our source location tracking
+ lexer->cursor++;
+ handleNewLineInner(lexer, d);
- int e = lexer->cursor[2];
- if ((d ^ e) == ('\r' ^ '\n'))
- return lexer->cursor[3];
- return e;
- }
+ // Now try again, looking at the character after the
+ // escaped nmewline.
+ continue;
default:
break;
}
}
- // TODO: handle UTF-8 encoding for non-ASCII code points here
-
- // Default case is to just hand along the byte we read as an ASCII code point.
- return c;
- }
-
- // Get the next code point from the input, and advance the cursor.
- static int advance(Lexer* lexer)
- {
- // We are going to loop, but only as a way of handling
- // escaped line endings.
- for (;;)
- {
- // If we are at the end of the input, then the task is easy.
- if (lexer->cursor == lexer->end)
- return kEOF;
-
- // Look at the next raw byte, and decide what to do
- int c = *lexer->cursor++;
- if (c == '\\')
- {
- // We might have a backslash-escaped newline.
- // Look at the next byte (if any) to see.
- //
- // Note(tfoley): We are assuming a null-terminated input here,
- // so that we can safely look at the next byte without issue.
- int d = *lexer->cursor;
- switch (d)
- {
- case '\r': case '\n':
- // handle the end-of-line for our source location tracking
- lexer->cursor++;
- handleNewLineInner(lexer, d);
-
- // Now try again, looking at the character after the
- // escaped nmewline.
- continue;
+ // TODO: Need to handle non-ASCII code points.
- default:
- break;
- }
- }
+ // Default case is to advance by one location
+ // and return the raw byte we saw.
- // TODO: Need to handle non-ASCII code points.
+ lexer->loc.Col++;
+ lexer->loc.Pos++;
- // Default case is to advance by one location
- // and return the raw byte we saw.
-
- lexer->loc.Col++;
- lexer->loc.Pos++;
-
- return c;
- }
+ return c;
}
+ }
- static void handleNewLine(Lexer* lexer)
- {
- int c = advance(lexer);
- handleNewLineInner(lexer, c);
- }
+ static void handleNewLine(Lexer* lexer)
+ {
+ int c = advance(lexer);
+ handleNewLineInner(lexer, c);
+ }
- static void lexLineComment(Lexer* lexer)
+ static void lexLineComment(Lexer* lexer)
+ {
+ for(;;)
{
- for(;;)
+ switch(peek(lexer))
{
- switch(peek(lexer))
- {
- case '\n': case '\r': case kEOF:
- return;
+ case '\n': case '\r': case kEOF:
+ return;
- default:
- advance(lexer);
- continue;
- }
+ default:
+ advance(lexer);
+ continue;
}
}
+ }
- static void lexBlockComment(Lexer* lexer)
+ static void lexBlockComment(Lexer* lexer)
+ {
+ for(;;)
{
- for(;;)
+ switch(peek(lexer))
{
- switch(peek(lexer))
- {
- case kEOF:
- // TODO(tfoley) diagnostic!
- return;
+ case kEOF:
+ // TODO(tfoley) diagnostic!
+ return;
- case '\n': case '\r':
- handleNewLine(lexer);
- continue;
+ case '\n': case '\r':
+ handleNewLine(lexer);
+ continue;
- case '*':
+ case '*':
+ advance(lexer);
+ switch( peek(lexer) )
+ {
+ case '/':
advance(lexer);
- switch( peek(lexer) )
- {
- case '/':
- advance(lexer);
- return;
-
- default:
- continue;
- }
+ return;
default:
- advance(lexer);
continue;
}
+
+ default:
+ advance(lexer);
+ continue;
}
}
+ }
- static void lexHorizontalSpace(Lexer* lexer)
+ static void lexHorizontalSpace(Lexer* lexer)
+ {
+ for(;;)
{
- for(;;)
+ switch(peek(lexer))
{
- switch(peek(lexer))
- {
- case ' ': case '\t':
- advance(lexer);
- continue;
+ case ' ': case '\t':
+ advance(lexer);
+ continue;
- default:
- return;
- }
+ default:
+ return;
}
}
+ }
- static void lexIdentifier(Lexer* lexer)
+ static void lexIdentifier(Lexer* lexer)
+ {
+ for(;;)
{
- for(;;)
+ int c = peek(lexer);
+ if(('a' <= c ) && (c <= 'z')
+ || ('A' <= c) && (c <= 'Z')
+ || ('0' <= c) && (c <= '9')
+ || (c == '_'))
{
- int c = peek(lexer);
- if(('a' <= c ) && (c <= 'z')
- || ('A' <= c) && (c <= 'Z')
- || ('0' <= c) && (c <= '9')
- || (c == '_'))
- {
- advance(lexer);
- continue;
- }
-
- return;
+ advance(lexer);
+ continue;
}
+
+ return;
}
+ }
- static void lexDigits(Lexer* lexer, int base)
+ static void lexDigits(Lexer* lexer, int base)
+ {
+ for(;;)
{
- for(;;)
- {
- int c = peek(lexer);
-
- int digitVal = 0;
- switch(c)
- {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- digitVal = c - '0';
- break;
+ int c = peek(lexer);
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- if(base <= 10) return;
- digitVal = 10 + c - 'a';
- break;
+ int digitVal = 0;
+ switch(c)
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ digitVal = c - '0';
+ break;
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- if(base <= 10) return;
- digitVal = 10 + c - 'A';
- break;
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ if(base <= 10) return;
+ digitVal = 10 + c - 'a';
+ break;
- default:
- // Not more digits!
- return;
- }
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ if(base <= 10) return;
+ digitVal = 10 + c - 'A';
+ break;
- if(digitVal >= base)
- {
- char buffer[] = { (char) c, 0 };
- lexer->sink->diagnose(lexer->loc, Diagnostics::invalidDigitForBase, buffer, base);
- }
+ default:
+ // Not more digits!
+ return;
+ }
- advance(lexer);
+ if(digitVal >= base)
+ {
+ char buffer[] = { (char) c, 0 };
+ lexer->sink->diagnose(lexer->loc, Diagnostics::invalidDigitForBase, buffer, base);
}
+
+ advance(lexer);
}
+ }
- static TokenType maybeLexNumberSuffix(Lexer* lexer, TokenType tokenType)
+ static TokenType maybeLexNumberSuffix(Lexer* lexer, TokenType tokenType)
+ {
+ // First check for suffixes that
+ // indicate a floating-point number
+ switch(peek(lexer))
{
- // First check for suffixes that
- // indicate a floating-point number
- switch(peek(lexer))
- {
- case 'f': case 'F':
- advance(lexer);
- return TokenType::DoubleLiterial;
+ case 'f': case 'F':
+ advance(lexer);
+ return TokenType::DoubleLiterial;
- default:
- break;
- }
+ default:
+ break;
+ }
- // Once we've ruled out floating-point
- // suffixes, we can check for the inter cases
+ // Once we've ruled out floating-point
+ // suffixes, we can check for the inter cases
- // TODO: allow integer suffixes in any order...
+ // TODO: allow integer suffixes in any order...
- // Leading `u` or `U` for unsigned
- switch(peek(lexer))
- {
- default:
- break;
+ // Leading `u` or `U` for unsigned
+ switch(peek(lexer))
+ {
+ default:
+ break;
- case 'u': case 'U':
- advance(lexer);
- break;
- }
+ case 'u': case 'U':
+ advance(lexer);
+ break;
+ }
+
+ // Optional `l`, `L`, `ll`, or `LL`
+ switch(peek(lexer))
+ {
+ default:
+ break;
- // Optional `l`, `L`, `ll`, or `LL`
+ case 'l': case 'L':
+ advance(lexer);
switch(peek(lexer))
{
default:
@@ -402,720 +407,712 @@ namespace Slang
case 'l': case 'L':
advance(lexer);
- switch(peek(lexer))
- {
- default:
- break;
-
- case 'l': case 'L':
- advance(lexer);
- break;
- }
break;
}
+ break;
+ }
+
+ return tokenType;
+ }
- return tokenType;
+ static bool maybeLexNumberExponent(Lexer* lexer, int base)
+ {
+ switch( peek(lexer) )
+ {
+ default:
+ return false;
+
+ case 'e': case 'E':
+ if(base != 10) return false;
+ advance(lexer);
+ break;
+
+ case 'p': case 'P':
+ if(base != 16) return false;
+ advance(lexer);
+ break;
}
- static bool maybeLexNumberExponent(Lexer* lexer, int base)
+ // we saw an exponent marker, so we must
+ switch( peek(lexer) )
{
- switch( peek(lexer) )
- {
- default:
- return false;
+ case '+': case '-':
+ advance(lexer);
+ break;
+ }
- case 'e': case 'E':
- if(base != 10) return false;
- advance(lexer);
- break;
+ // TODO(tfoley): it would be an error to not see digits here...
- case 'p': case 'P':
- if(base != 16) return false;
- advance(lexer);
- break;
- }
+ lexDigits(lexer, 10);
- // we saw an exponent marker, so we must
- switch( peek(lexer) )
- {
- case '+': case '-':
- advance(lexer);
- break;
- }
+ return true;
+ }
- // TODO(tfoley): it would be an error to not see digits here...
+ static TokenType lexNumberAfterDecimalPoint(Lexer* lexer, int base)
+ {
+ lexDigits(lexer, base);
+ maybeLexNumberExponent(lexer, base);
+
+ return maybeLexNumberSuffix(lexer, TokenType::DoubleLiterial);
+ }
+
+ static TokenType lexNumber(Lexer* lexer, int base)
+ {
+ // TODO(tfoley): Need to consider whehter to allow any kind of digit separator character.
- lexDigits(lexer, 10);
+ TokenType tokenType = TokenType::IntLiterial;
- return true;
- }
+ // At the start of things, we just concern ourselves with digits
+ lexDigits(lexer, base);
- static TokenType lexNumberAfterDecimalPoint(Lexer* lexer, int base)
+ if( peek(lexer) == '.' )
{
+ tokenType = TokenType::DoubleLiterial;
+
+ advance(lexer);
lexDigits(lexer, base);
- maybeLexNumberExponent(lexer, base);
-
- return maybeLexNumberSuffix(lexer, TokenType::DoubleLiterial);
}
- static TokenType lexNumber(Lexer* lexer, int base)
+ if( maybeLexNumberExponent(lexer, base))
{
- // TODO(tfoley): Need to consider whehter to allow any kind of digit separator character.
-
- TokenType tokenType = TokenType::IntLiterial;
+ tokenType = TokenType::DoubleLiterial;
+ }
- // At the start of things, we just concern ourselves with digits
- lexDigits(lexer, base);
+ maybeLexNumberSuffix(lexer, tokenType);
+ return tokenType;
+ }
- if( peek(lexer) == '.' )
+ static void lexStringLiteralBody(Lexer* lexer, char quote)
+ {
+ for(;;)
+ {
+ int c = peek(lexer);
+ if(c == quote)
{
- tokenType = TokenType::DoubleLiterial;
-
advance(lexer);
- lexDigits(lexer, base);
+ return;
}
- if( maybeLexNumberExponent(lexer, base))
+ switch(c)
{
- tokenType = TokenType::DoubleLiterial;
- }
+ case kEOF:
+ lexer->sink->diagnose(lexer->loc, Diagnostics::endOfFileInLiteral);
+ return;
- maybeLexNumberSuffix(lexer, tokenType);
- return tokenType;
- }
+ case '\n': case '\r':
+ lexer->sink->diagnose(lexer->loc, Diagnostics::newlineInLiteral);
+ return;
- static void lexStringLiteralBody(Lexer* lexer, char quote)
- {
- for(;;)
- {
- int c = peek(lexer);
- if(c == quote)
+ case '\\':
+ // Need to handle various escape sequence cases
+ advance(lexer);
+ switch(peek(lexer))
{
+ case '\'':
+ case '\"':
+ case '\\':
+ case '?':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
advance(lexer);
- return;
- }
-
- switch(c)
- {
- case kEOF:
- lexer->sink->diagnose(lexer->loc, Diagnostics::endOfFileInLiteral);
- return;
-
- case '\n': case '\r':
- lexer->sink->diagnose(lexer->loc, Diagnostics::newlineInLiteral);
- return;
+ break;
- case '\\':
- // Need to handle various escape sequence cases
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ // octal escape: up to 3 characters
advance(lexer);
- switch(peek(lexer))
+ for(int ii = 0; ii < 3; ++ii)
{
- case '\'':
- case '\"':
- case '\\':
- case '?':
- case 'a':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- advance(lexer);
- break;
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
- // octal escape: up to 3 characters
- advance(lexer);
- for(int ii = 0; ii < 3; ++ii)
+ int d = peek(lexer);
+ if(('0' <= d) && (d <= '7'))
{
- int d = peek(lexer);
- if(('0' <= d) && (d <= '7'))
- {
- advance(lexer);
- continue;
- }
- else
- {
- break;
- }
+ advance(lexer);
+ continue;
}
- break;
-
- case 'x':
- // hexadecimal escape: any number of characters
- advance(lexer);
- for(;;)
+ else
{
- int d = peek(lexer);
- if(('0' <= d) && (d <= '9')
- || ('a' <= d) && (d <= 'f')
- || ('A' <= d) && (d <= 'F'))
- {
- advance(lexer);
- continue;
- }
- else
- {
- break;
- }
+ break;
}
- break;
-
- // TODO: Unicode escape sequences
-
}
break;
- default:
+ case 'x':
+ // hexadecimal escape: any number of characters
advance(lexer);
- continue;
+ for(;;)
+ {
+ int d = peek(lexer);
+ if(('0' <= d) && (d <= '9')
+ || ('a' <= d) && (d <= 'f')
+ || ('A' <= d) && (d <= 'F'))
+ {
+ advance(lexer);
+ continue;
+ }
+ else
+ {
+ break;
+ }
+ }
+ break;
+
+ // TODO: Unicode escape sequences
+
}
+ break;
+
+ default:
+ advance(lexer);
+ continue;
}
}
+ }
- String getStringLiteralTokenValue(Token const& token)
- {
- assert(token.Type == TokenType::StringLiterial
- || token.Type == TokenType::CharLiterial);
+ String getStringLiteralTokenValue(Token const& token)
+ {
+ assert(token.Type == TokenType::StringLiterial
+ || token.Type == TokenType::CharLiterial);
- char const* cursor = token.Content.begin();
- char const* end = token.Content.end();
+ char const* cursor = token.Content.begin();
+ char const* end = token.Content.end();
- auto quote = *cursor++;
- assert(quote == '\'' || quote == '"');
+ auto quote = *cursor++;
+ assert(quote == '\'' || quote == '"');
- StringBuilder valueBuilder;
- for(;;)
- {
- assert(cursor != end);
+ StringBuilder valueBuilder;
+ for(;;)
+ {
+ assert(cursor != end);
- auto c = *cursor++;
+ auto c = *cursor++;
- // If we see a closing quote, then we are at the end of the string literal
- if(c == quote)
- {
- assert(cursor == end);
- return valueBuilder.ProduceString();
- }
+ // If we see a closing quote, then we are at the end of the string literal
+ if(c == quote)
+ {
+ assert(cursor == end);
+ return valueBuilder.ProduceString();
+ }
- // Charcters that don't being escape sequences are easy;
- // just append them to the buffer and move on.
- if(c != '\\')
- {
- valueBuilder.Append(c);
- continue;
- }
+ // Charcters that don't being escape sequences are easy;
+ // just append them to the buffer and move on.
+ if(c != '\\')
+ {
+ valueBuilder.Append(c);
+ continue;
+ }
- // Now we look at another character to figure out the kind of
- // escape sequence we are dealing with:
+ // Now we look at another character to figure out the kind of
+ // escape sequence we are dealing with:
- int d = *cursor++;
+ int d = *cursor++;
- switch(d)
+ switch(d)
+ {
+ // Simple characters that just needed to be escaped
+ case '\'':
+ case '\"':
+ case '\\':
+ case '?':
+ valueBuilder.Append(d);
+ continue;
+
+ // Traditional escape sequences for special characters
+ case 'a': valueBuilder.Append('\a'); continue;
+ case 'b': valueBuilder.Append('\b'); continue;
+ case 'f': valueBuilder.Append('\f'); continue;
+ case 'n': valueBuilder.Append('\n'); continue;
+ case 'r': valueBuilder.Append('\r'); continue;
+ case 't': valueBuilder.Append('\t'); continue;
+ case 'v': valueBuilder.Append('\v'); continue;
+
+ // Octal escape: up to 3 characterws
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
{
- // Simple characters that just needed to be escaped
- case '\'':
- case '\"':
- case '\\':
- case '?':
- valueBuilder.Append(d);
- continue;
-
- // Traditional escape sequences for special characters
- case 'a': valueBuilder.Append('\a'); continue;
- case 'b': valueBuilder.Append('\b'); continue;
- case 'f': valueBuilder.Append('\f'); continue;
- case 'n': valueBuilder.Append('\n'); continue;
- case 'r': valueBuilder.Append('\r'); continue;
- case 't': valueBuilder.Append('\t'); continue;
- case 'v': valueBuilder.Append('\v'); continue;
-
- // Octal escape: up to 3 characterws
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
+ cursor--;
+ int value = 0;
+ for(int ii = 0; ii < 3; ++ii)
{
- cursor--;
- int value = 0;
- for(int ii = 0; ii < 3; ++ii)
+ d = *cursor;
+ if(('0' <= d) && (d <= '7'))
{
- d = *cursor;
- if(('0' <= d) && (d <= '7'))
- {
- value = value*8 + (d - '0');
-
- cursor++;
- continue;
- }
- else
- {
- break;
- }
- }
+ value = value*8 + (d - '0');
- // TODO: add support for appending an arbitrary code point?
- valueBuilder.Append((char) value);
+ cursor++;
+ continue;
+ }
+ else
+ {
+ break;
+ }
}
- continue;
- // Hexadecimal escape: any number of characters
- case 'x':
+ // TODO: add support for appending an arbitrary code point?
+ valueBuilder.Append((char) value);
+ }
+ continue;
+
+ // Hexadecimal escape: any number of characters
+ case 'x':
+ {
+ cursor--;
+ int value = 0;
+ for(;;)
{
- cursor--;
- int value = 0;
- for(;;)
+ d = *cursor++;
+ int digitValue = 0;
+ if(('0' <= d) && (d <= '9'))
+ {
+ digitValue = d - '0';
+ }
+ else if( ('a' <= d) && (d <= 'f') )
+ {
+ digitValue = d - 'a';
+ }
+ else if( ('A' <= d) && (d <= 'F') )
{
- d = *cursor++;
- int digitValue = 0;
- if(('0' <= d) && (d <= '9'))
- {
- digitValue = d - '0';
- }
- else if( ('a' <= d) && (d <= 'f') )
- {
- digitValue = d - 'a';
- }
- else if( ('A' <= d) && (d <= 'F') )
- {
- digitValue = d - 'A';
- }
- else
- {
- cursor--;
- break;
- }
-
- value = value*16 + digitValue;
+ digitValue = d - 'A';
+ }
+ else
+ {
+ cursor--;
+ break;
}
- // TODO: add support for appending an arbitrary code point?
- valueBuilder.Append((char) value);
+ value = value*16 + digitValue;
}
- continue;
-
- // TODO: Unicode escape sequences
+ // TODO: add support for appending an arbitrary code point?
+ valueBuilder.Append((char) value);
}
+ continue;
+
+ // TODO: Unicode escape sequences
+
}
}
+ }
- String getFileNameTokenValue(Token const& token)
- {
- // A file name usually doesn't process escape sequences
- // (this is import on Windows, where `\\` is a valid
- // path separator cahracter).
+ String getFileNameTokenValue(Token const& token)
+ {
+ // A file name usually doesn't process escape sequences
+ // (this is import on Windows, where `\\` is a valid
+ // path separator cahracter).
- // Just trim off the first and last characters to remove the quotes
- // (whether they were `""` or `<>`.
- return token.Content.SubString(1, token.Content.Length()-2);
- }
+ // Just trim off the first and last characters to remove the quotes
+ // (whether they were `""` or `<>`.
+ return token.Content.SubString(1, token.Content.Length()-2);
+ }
- static TokenType lexTokenImpl(Lexer* lexer)
+ static TokenType lexTokenImpl(Lexer* lexer)
+ {
+ switch(peek(lexer))
{
+ default:
+ break;
+
+ case kEOF:
+ if((lexer->lexerFlags & kLexerFlag_InDirective) != 0)
+ return TokenType::EndOfDirective;
+ return TokenType::EndOfFile;
+
+ case '\r': case '\n':
+ if((lexer->lexerFlags & kLexerFlag_InDirective) != 0)
+ return TokenType::EndOfDirective;
+ handleNewLine(lexer);
+ return TokenType::NewLine;
+
+ case ' ': case '\t':
+ lexHorizontalSpace(lexer);
+ return TokenType::WhiteSpace;
+
+ case '.':
+ advance(lexer);
switch(peek(lexer))
{
- default:
- break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return lexNumberAfterDecimalPoint(lexer, 10);
- case kEOF:
- if((lexer->lexerFlags & kLexerFlag_InDirective) != 0)
- return TokenType::EndOfDirective;
- return TokenType::EndOfFile;
+ // TODO(tfoley): handle ellipsis (`...`)
- case '\r': case '\n':
- if((lexer->lexerFlags & kLexerFlag_InDirective) != 0)
- return TokenType::EndOfDirective;
- handleNewLine(lexer);
- return TokenType::NewLine;
+ default:
+ return TokenType::Dot;
+ }
- case ' ': case '\t':
- lexHorizontalSpace(lexer);
- return TokenType::WhiteSpace;
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return lexNumber(lexer, 10);
- case '.':
+ case '0':
+ {
+ auto loc = lexer->loc;
advance(lexer);
switch(peek(lexer))
{
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- return lexNumberAfterDecimalPoint(lexer, 10);
-
- // TODO(tfoley): handle ellipsis (`...`)
-
default:
- return TokenType::Dot;
- }
+ return TokenType::IntLiterial;
- case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- return lexNumber(lexer, 10);
-
- case '0':
- {
- auto loc = lexer->loc;
+ case '.':
advance(lexer);
- switch(peek(lexer))
- {
- default:
- return TokenType::IntLiterial;
-
- case '.':
- advance(lexer);
- return lexNumberAfterDecimalPoint(lexer, 10);
+ return lexNumberAfterDecimalPoint(lexer, 10);
- case 'x': case 'X':
- advance(lexer);
- return lexNumber(lexer, 16);
+ case 'x': case 'X':
+ advance(lexer);
+ return lexNumber(lexer, 16);
- case 'b': case 'B':
- advance(lexer);
- return lexNumber(lexer, 2);
+ case 'b': case 'B':
+ advance(lexer);
+ return lexNumber(lexer, 2);
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- lexer->sink->diagnose(loc, Diagnostics::octalLiteral);
- return lexNumber(lexer, 8);
- }
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ lexer->sink->diagnose(loc, Diagnostics::octalLiteral);
+ return lexNumber(lexer, 8);
}
+ }
- case 'a': case 'b': case 'c': case 'd': case 'e':
- case 'f': case 'g': case 'h': case 'i': case 'j':
- case 'k': case 'l': case 'm': case 'n': case 'o':
- case 'p': case 'q': case 'r': case 's': case 't':
- case 'u': case 'v': case 'w': case 'x': case 'y':
- case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F': case 'G': case 'H': case 'I': case 'J':
- case 'K': case 'L': case 'M': case 'N': case 'O':
- case 'P': case 'Q': case 'R': case 'S': case 'T':
- case 'U': case 'V': case 'W': case 'X': case 'Y':
- case 'Z':
- case '_':
- lexIdentifier(lexer);
- return TokenType::Identifier;
-
- case '\"':
- advance(lexer);
- lexStringLiteralBody(lexer, '\"');
- return TokenType::StringLiterial;
-
- case '\'':
- advance(lexer);
- lexStringLiteralBody(lexer, '\'');
- return TokenType::CharLiterial;
-
- case '+':
- advance(lexer);
- switch(peek(lexer))
- {
- case '+': advance(lexer); return TokenType::OpInc;
- case '=': advance(lexer); return TokenType::OpAddAssign;
- default:
- return TokenType::OpAdd;
- }
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z':
+ case '_':
+ lexIdentifier(lexer);
+ return TokenType::Identifier;
+
+ case '\"':
+ advance(lexer);
+ lexStringLiteralBody(lexer, '\"');
+ return TokenType::StringLiterial;
+
+ case '\'':
+ advance(lexer);
+ lexStringLiteralBody(lexer, '\'');
+ return TokenType::CharLiterial;
+
+ case '+':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '+': advance(lexer); return TokenType::OpInc;
+ case '=': advance(lexer); return TokenType::OpAddAssign;
+ default:
+ return TokenType::OpAdd;
+ }
- case '-':
- advance(lexer);
- switch(peek(lexer))
- {
- case '-': advance(lexer); return TokenType::OpDec;
- case '=': advance(lexer); return TokenType::OpSubAssign;
- case '>': advance(lexer); return TokenType::RightArrow;
- default:
- return TokenType::OpSub;
- }
+ case '-':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '-': advance(lexer); return TokenType::OpDec;
+ case '=': advance(lexer); return TokenType::OpSubAssign;
+ case '>': advance(lexer); return TokenType::RightArrow;
+ default:
+ return TokenType::OpSub;
+ }
- case '*':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpMulAssign;
- default:
- return TokenType::OpMul;
- }
+ case '*':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '=': advance(lexer); return TokenType::OpMulAssign;
+ default:
+ return TokenType::OpMul;
+ }
- case '/':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpDivAssign;
- case '/': advance(lexer); lexLineComment(lexer); return TokenType::LineComment;
- case '*': advance(lexer); lexBlockComment(lexer); return TokenType::BlockComment;
- default:
- return TokenType::OpDiv;
- }
+ case '/':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '=': advance(lexer); return TokenType::OpDivAssign;
+ case '/': advance(lexer); lexLineComment(lexer); return TokenType::LineComment;
+ case '*': advance(lexer); lexBlockComment(lexer); return TokenType::BlockComment;
+ default:
+ return TokenType::OpDiv;
+ }
- case '%':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpModAssign;
- default:
- return TokenType::OpMod;
- }
+ case '%':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '=': advance(lexer); return TokenType::OpModAssign;
+ default:
+ return TokenType::OpMod;
+ }
- case '|':
- advance(lexer);
- switch(peek(lexer))
- {
- case '|': advance(lexer); return TokenType::OpOr;
- case '=': advance(lexer); return TokenType::OpOrAssign;
- default:
- return TokenType::OpBitOr;
- }
+ case '|':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '|': advance(lexer); return TokenType::OpOr;
+ case '=': advance(lexer); return TokenType::OpOrAssign;
+ default:
+ return TokenType::OpBitOr;
+ }
- case '&':
- advance(lexer);
- switch(peek(lexer))
- {
- case '&': advance(lexer); return TokenType::OpAnd;
- case '=': advance(lexer); return TokenType::OpAndAssign;
- default:
- return TokenType::OpBitAnd;
- }
+ case '&':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '&': advance(lexer); return TokenType::OpAnd;
+ case '=': advance(lexer); return TokenType::OpAndAssign;
+ default:
+ return TokenType::OpBitAnd;
+ }
- case '^':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpXorAssign;
- default:
- return TokenType::OpBitXor;
- }
+ case '^':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '=': advance(lexer); return TokenType::OpXorAssign;
+ default:
+ return TokenType::OpBitXor;
+ }
+ case '>':
+ advance(lexer);
+ switch(peek(lexer))
+ {
case '>':
advance(lexer);
switch(peek(lexer))
{
- case '>':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpShrAssign;
- default: return TokenType::OpRsh;
- }
- case '=': advance(lexer); return TokenType::OpGeq;
- default:
- return TokenType::OpGreater;
+ case '=': advance(lexer); return TokenType::OpShrAssign;
+ default: return TokenType::OpRsh;
}
+ case '=': advance(lexer); return TokenType::OpGeq;
+ default:
+ return TokenType::OpGreater;
+ }
+ case '<':
+ advance(lexer);
+ switch(peek(lexer))
+ {
case '<':
advance(lexer);
switch(peek(lexer))
{
- case '<':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpShlAssign;
- default: return TokenType::OpLsh;
- }
- case '=': advance(lexer); return TokenType::OpLeq;
- default:
- return TokenType::OpLess;
- }
-
- case '=':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpEql;
- default:
- return TokenType::OpAssign;
+ case '=': advance(lexer); return TokenType::OpShlAssign;
+ default: return TokenType::OpLsh;
}
+ case '=': advance(lexer); return TokenType::OpLeq;
+ default:
+ return TokenType::OpLess;
+ }
- case '!':
- advance(lexer);
- switch(peek(lexer))
- {
- case '=': advance(lexer); return TokenType::OpNeq;
- default:
- return TokenType::OpNot;
- }
+ case '=':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '=': advance(lexer); return TokenType::OpEql;
+ default:
+ return TokenType::OpAssign;
+ }
- case '#':
- advance(lexer);
- switch(peek(lexer))
- {
- case '#': advance(lexer); return TokenType::PoundPound;
- default:
- return TokenType::Pound;
- }
+ case '!':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '=': advance(lexer); return TokenType::OpNeq;
+ default:
+ return TokenType::OpNot;
+ }
- case '~': advance(lexer); return TokenType::OpBitNot;
+ case '#':
+ advance(lexer);
+ switch(peek(lexer))
+ {
+ case '#': advance(lexer); return TokenType::PoundPound;
+ default:
+ return TokenType::Pound;
+ }
- case ':': advance(lexer); return TokenType::Colon;
- case ';': advance(lexer); return TokenType::Semicolon;
- case ',': advance(lexer); return TokenType::Comma;
+ case '~': advance(lexer); return TokenType::OpBitNot;
- case '{': advance(lexer); return TokenType::LBrace;
- case '}': advance(lexer); return TokenType::RBrace;
- case '[': advance(lexer); return TokenType::LBracket;
- case ']': advance(lexer); return TokenType::RBracket;
- case '(': advance(lexer); return TokenType::LParent;
- case ')': advance(lexer); return TokenType::RParent;
+ case ':': advance(lexer); return TokenType::Colon;
+ case ';': advance(lexer); return TokenType::Semicolon;
+ case ',': advance(lexer); return TokenType::Comma;
- case '?': advance(lexer); return TokenType::QuestionMark;
- case '@': advance(lexer); return TokenType::At;
- case '$': advance(lexer); return TokenType::Dollar;
+ case '{': advance(lexer); return TokenType::LBrace;
+ case '}': advance(lexer); return TokenType::RBrace;
+ case '[': advance(lexer); return TokenType::LBracket;
+ case ']': advance(lexer); return TokenType::RBracket;
+ case '(': advance(lexer); return TokenType::LParent;
+ case ')': advance(lexer); return TokenType::RParent;
- }
+ case '?': advance(lexer); return TokenType::QuestionMark;
+ case '@': advance(lexer); return TokenType::At;
+ case '$': advance(lexer); return TokenType::Dollar;
- // TODO(tfoley): If we ever wanted to support proper Unicode
- // in identifiers, etc., then this would be the right place
- // to perform a more expensive dispatch based on the actual
- // code point (and not just the first byte).
+ }
- {
- // If none of the above cases matched, then we have an
- // unexpected/invalid character.
+ // TODO(tfoley): If we ever wanted to support proper Unicode
+ // in identifiers, etc., then this would be the right place
+ // to perform a more expensive dispatch based on the actual
+ // code point (and not just the first byte).
- auto loc = lexer->loc;
- auto sink = lexer->sink;
- int c = advance(lexer);
- if(c >= 0x20 && c <= 0x7E)
- {
- char buffer[] = { (char) c, 0 };
- sink->diagnose(loc, Diagnostics::illegalCharacterPrint, buffer);
- }
- else
- {
- // Fallback: print as hexadecimal
- sink->diagnose(loc, Diagnostics::illegalCharacterHex, String((unsigned char)c, 16));
- }
+ {
+ // If none of the above cases matched, then we have an
+ // unexpected/invalid character.
- return TokenType::Invalid;
+ auto loc = lexer->loc;
+ auto sink = lexer->sink;
+ int c = advance(lexer);
+ if(c >= 0x20 && c <= 0x7E)
+ {
+ char buffer[] = { (char) c, 0 };
+ sink->diagnose(loc, Diagnostics::illegalCharacterPrint, buffer);
}
- }
-
- Token Lexer::lexToken()
- {
- auto flags = this->tokenFlags;
- for(;;)
+ else
{
- Token token;
- token.Position = loc;
-
- char const* textBegin = cursor;
+ // Fallback: print as hexadecimal
+ sink->diagnose(loc, Diagnostics::illegalCharacterHex, String((unsigned char)c, 16));
+ }
- auto tokenType = lexTokenImpl(this);
+ return TokenType::Invalid;
+ }
+ }
- // The low-level lexer produces tokens for things we want
- // to ignore, such as white space, so we skip them here.
- switch(tokenType)
- {
- case TokenType::Invalid:
- flags = 0;
- continue;
+ Token Lexer::lexToken()
+ {
+ auto flags = this->tokenFlags;
+ for(;;)
+ {
+ Token token;
+ token.Position = loc;
- case TokenType::NewLine:
- flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
- continue;
+ char const* textBegin = cursor;
- case TokenType::WhiteSpace:
- case TokenType::LineComment:
- case TokenType::BlockComment:
- flags |= TokenFlag::AfterWhitespace;
- continue;
+ auto tokenType = lexTokenImpl(this);
- // We don't want to skip the end-of-file token, but we *do*
- // want to make sure it has appropriate flags to make our life easier
- case TokenType::EndOfFile:
- flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
- break;
+ // The low-level lexer produces tokens for things we want
+ // to ignore, such as white space, so we skip them here.
+ switch(tokenType)
+ {
+ case TokenType::Invalid:
+ flags = 0;
+ continue;
+
+ case TokenType::NewLine:
+ flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
+ continue;
+
+ case TokenType::WhiteSpace:
+ case TokenType::LineComment:
+ case TokenType::BlockComment:
+ flags |= TokenFlag::AfterWhitespace;
+ continue;
+
+ // We don't want to skip the end-of-file token, but we *do*
+ // want to make sure it has appropriate flags to make our life easier
+ case TokenType::EndOfFile:
+ flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
+ break;
- // We will also do some book-keeping around preprocessor directives here:
- //
- // If we see a `#` at the start of a line, then we are entering a
- // preprocessor directive.
- case TokenType::Pound:
- if((flags & TokenFlag::AtStartOfLine) != 0)
- lexerFlags |= kLexerFlag_InDirective;
- break;
- //
- // And if we saw an end-of-line during a directive, then we are
- // now leaving that directive.
- //
- case TokenType::EndOfDirective:
- lexerFlags &= ~kLexerFlag_InDirective;
- break;
+ // We will also do some book-keeping around preprocessor directives here:
+ //
+ // If we see a `#` at the start of a line, then we are entering a
+ // preprocessor directive.
+ case TokenType::Pound:
+ if((flags & TokenFlag::AtStartOfLine) != 0)
+ lexerFlags |= kLexerFlag_InDirective;
+ break;
+ //
+ // And if we saw an end-of-line during a directive, then we are
+ // now leaving that directive.
+ //
+ case TokenType::EndOfDirective:
+ lexerFlags &= ~kLexerFlag_InDirective;
+ break;
- default:
- break;
- }
+ default:
+ break;
+ }
- token.Type = tokenType;
+ token.Type = tokenType;
- char const* textEnd = cursor;
+ char const* textEnd = cursor;
- // Note(tfoley): `StringBuilder::Append()` seems to crash when appending zero bytes
- if(textEnd != textBegin)
- {
- StringBuilder valueBuilder;
- valueBuilder.Append(textBegin, int(textEnd - textBegin));
- token.Content = valueBuilder.ProduceString();
- }
+ // Note(tfoley): `StringBuilder::Append()` seems to crash when appending zero bytes
+ if(textEnd != textBegin)
+ {
+ StringBuilder valueBuilder;
+ valueBuilder.Append(textBegin, int(textEnd - textBegin));
+ token.Content = valueBuilder.ProduceString();
+ }
- token.flags = flags;
+ token.flags = flags;
- this->tokenFlags = 0;
+ this->tokenFlags = 0;
- return token;
- }
+ return token;
}
+ }
- TokenList Lexer::lexAllTokens()
+ TokenList Lexer::lexAllTokens()
+ {
+ TokenList tokenList;
+ for(;;)
{
- TokenList tokenList;
- for(;;)
- {
- Token token = lexToken();
- tokenList.mTokens.Add(token);
+ Token token = lexToken();
+ tokenList.mTokens.Add(token);
- if(token.Type == TokenType::EndOfFile)
- return tokenList;
- }
+ if(token.Type == TokenType::EndOfFile)
+ return tokenList;
}
+ }
#if 0
- TokenList Lexer::Parse(const String & fileName, const String & str, DiagnosticSink * sink)
+ TokenList Lexer::Parse(const String & fileName, const String & str, DiagnosticSink * sink)
+ {
+ TokenList tokenList;
+ tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, CodePosition pos)
{
- TokenList tokenList;
- tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, CodePosition pos)
+ auto curChar = str[pos.Pos];
+ switch (errType)
{
- auto curChar = str[pos.Pos];
- switch (errType)
+ case TokenizeErrorType::InvalidCharacter:
+ // Check if inside the ASCII "printable" range
+ if(curChar >= 0x20 && curChar <= 0x7E)
{
- case TokenizeErrorType::InvalidCharacter:
- // Check if inside the ASCII "printable" range
- if(curChar >= 0x20 && curChar <= 0x7E)
- {
- char buffer[] = { curChar, 0 };
- sink->diagnose(pos, Diagnostics::illegalCharacterPrint, buffer);
- }
- else
- {
- // Fallback: print as hexadecimal
- sink->diagnose(pos, Diagnostics::illegalCharacterHex, String((unsigned char)curChar, 16));
- }
- break;
- case TokenizeErrorType::InvalidEscapeSequence:
- sink->diagnose(pos, Diagnostics::illegalCharacterLiteral);
- break;
- default:
- break;
+ char buffer[] = { curChar, 0 };
+ sink->diagnose(pos, Diagnostics::illegalCharacterPrint, buffer);
+ }
+ else
+ {
+ // Fallback: print as hexadecimal
+ sink->diagnose(pos, Diagnostics::illegalCharacterHex, String((unsigned char)curChar, 16));
}
- });
+ break;
+ case TokenizeErrorType::InvalidEscapeSequence:
+ sink->diagnose(pos, Diagnostics::illegalCharacterLiteral);
+ break;
+ default:
+ break;
+ }
+ });
- // Add an end-of-file token so that we can reference it in diagnostic messages
- tokenList.mTokens.Add(Token(TokenType::EndOfFile, "", 0, 0, 0, fileName, TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace));
- return tokenList;
- }
-#endif
+ // Add an end-of-file token so that we can reference it in diagnostic messages
+ tokenList.mTokens.Add(Token(TokenType::EndOfFile, "", 0, 0, 0, fileName, TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace));
+ return tokenList;
}
+#endif
} \ No newline at end of file