summaryrefslogtreecommitdiffstats
path: root/source/compiler-core/slang-lexer.cpp
diff options
context:
space:
mode:
authorEllie Hermaszewska <ellieh@nvidia.com>2024-10-29 14:49:26 +0800
committerGitHub <noreply@github.com>2024-10-29 14:49:26 +0800
commitf65d756bff8d4c5cbc15bd0322a2ae8e6b896a21 (patch)
treeea1d61342cd29368e19135000ec2948813096205 /source/compiler-core/slang-lexer.cpp
parenta729c15e9dce9f5116a38afc66329ab2ca4cea54 (diff)
format
* format * Minor test fixes * enable checking cpp format in ci
Diffstat (limited to 'source/compiler-core/slang-lexer.cpp')
-rw-r--r--source/compiler-core/slang-lexer.cpp2484
1 files changed, 1271 insertions, 1213 deletions
diff --git a/source/compiler-core/slang-lexer.cpp b/source/compiler-core/slang-lexer.cpp
index 366af9114..df95f5f1e 100644
--- a/source/compiler-core/slang-lexer.cpp
+++ b/source/compiler-core/slang-lexer.cpp
@@ -6,196 +6,197 @@
//
#include "core/slang-char-encode.h"
+#include "slang-core-diagnostics.h"
#include "slang-name.h"
#include "slang-source-loc.h"
-#include "slang-core-diagnostics.h"
namespace Slang
{
- Token TokenReader::getEndOfFileToken()
- {
- return Token(TokenType::EndOfFile, UnownedStringSlice::fromLiteral(""), SourceLoc());
- }
+Token TokenReader::getEndOfFileToken()
+{
+ return Token(TokenType::EndOfFile, UnownedStringSlice::fromLiteral(""), SourceLoc());
+}
- const Token* TokenList::begin() const
- {
- SLANG_ASSERT(m_tokens.getCount());
- return &m_tokens[0];
- }
+const Token* TokenList::begin() const
+{
+ SLANG_ASSERT(m_tokens.getCount());
+ return &m_tokens[0];
+}
- const Token* TokenList::end() const
- {
- SLANG_ASSERT(m_tokens.getCount());
- SLANG_ASSERT(m_tokens[m_tokens.getCount() - 1].type == TokenType::EndOfFile);
- return &m_tokens[m_tokens.getCount() - 1];
- }
+const Token* TokenList::end() const
+{
+ SLANG_ASSERT(m_tokens.getCount());
+ SLANG_ASSERT(m_tokens[m_tokens.getCount() - 1].type == TokenType::EndOfFile);
+ return &m_tokens[m_tokens.getCount() - 1];
+}
- TokenSpan::TokenSpan()
- : m_begin(nullptr)
- , m_end (nullptr)
- {}
+TokenSpan::TokenSpan()
+ : m_begin(nullptr), m_end(nullptr)
+{
+}
- TokenReader::TokenReader()
- : m_cursor(nullptr)
- , m_end (nullptr)
- {
- _updateLookaheadToken();
- }
+TokenReader::TokenReader()
+ : m_cursor(nullptr), m_end(nullptr)
+{
+ _updateLookaheadToken();
+}
- Token& TokenReader::peekToken()
- {
- return m_nextToken;
- }
+Token& TokenReader::peekToken()
+{
+ return m_nextToken;
+}
- TokenType TokenReader::peekTokenType() const
- {
- return m_nextToken.type;
- }
+TokenType TokenReader::peekTokenType() const
+{
+ return m_nextToken.type;
+}
- SourceLoc TokenReader::peekLoc() const
- {
- return m_nextToken.loc;
- }
+SourceLoc TokenReader::peekLoc() const
+{
+ return m_nextToken.loc;
+}
- Token TokenReader::advanceToken()
- {
- Token result = m_nextToken;
- if (m_cursor != m_end)
- m_cursor++;
- _updateLookaheadToken();
- return result;
- }
+Token TokenReader::advanceToken()
+{
+ Token result = m_nextToken;
+ if (m_cursor != m_end)
+ m_cursor++;
+ _updateLookaheadToken();
+ return result;
+}
- void TokenReader::_updateLookaheadToken()
- {
- // We assume here that we can read a token from a non-null `m_cursor`
- // *even* in the case where `m_cursor == m_end`, because the invariant
- // for lists of tokens is that they should be terminated with and
- // end-of-file token, so that there is always a token "one past the end."
- //
- m_nextToken = m_cursor ? *m_cursor : getEndOfFileToken();
+void TokenReader::_updateLookaheadToken()
+{
+ // We assume here that we can read a token from a non-null `m_cursor`
+ // *even* in the case where `m_cursor == m_end`, because the invariant
+ // for lists of tokens is that they should be terminated with and
+ // end-of-file token, so that there is always a token "one past the end."
+ //
+ m_nextToken = m_cursor ? *m_cursor : getEndOfFileToken();
- // If the token we read came from the end of the sub-sequence we are
- // reading, then we will change the token type to an end-of-file token
- // so that code that reads from the sequence and expects a terminating
- // EOF will find it.
- //
- // TODO: We might eventually want a way to look at the actual token type
- // and not just use EOF in all cases: e.g., when emitting diagnostic
- // messages that include the token that is seen.
- //
- if(m_cursor == m_end)
- m_nextToken.type = TokenType::EndOfFile;
- }
+ // If the token we read came from the end of the sub-sequence we are
+ // reading, then we will change the token type to an end-of-file token
+ // so that code that reads from the sequence and expects a terminating
+ // EOF will find it.
+ //
+ // TODO: We might eventually want a way to look at the actual token type
+ // and not just use EOF in all cases: e.g., when emitting diagnostic
+ // messages that include the token that is seen.
+ //
+ if (m_cursor == m_end)
+ m_nextToken.type = TokenType::EndOfFile;
+}
- // Lexer
+// Lexer
- void Lexer::initialize(
- SourceView* sourceView,
- DiagnosticSink* sink,
- NamePool* namePool,
- MemoryArena* memoryArena)
- {
- m_sourceView = sourceView;
- m_sink = sink;
- m_namePool = namePool;
- m_memoryArena = memoryArena;
-
- auto content = sourceView->getContent();
-
- m_begin = content.begin();
- m_cursor = content.begin();
- m_end = content.end();
-
- // Set the start location
- m_startLoc = sourceView->getRange().begin;
-
- // The first token read from a translation unit should be considered to be at
- // the start of a line, and *also* as coming after whitespace (conceptually
- // both the end-of-file and beginning-of-file pseudo-tokens are whitespace).
- //
- m_tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
- m_lexerFlags = 0;
- }
+void Lexer::initialize(
+ SourceView* sourceView,
+ DiagnosticSink* sink,
+ NamePool* namePool,
+ MemoryArena* memoryArena)
+{
+ m_sourceView = sourceView;
+ m_sink = sink;
+ m_namePool = namePool;
+ m_memoryArena = memoryArena;
- Lexer::~Lexer()
- {
- }
+ auto content = sourceView->getContent();
- enum { kEOF = -1 };
+ m_begin = content.begin();
+ m_cursor = content.begin();
+ m_end = content.end();
- // Get the next input byte, without any handling of
- // escaped newlines, non-ASCII code points, source locations, etc.
- static int _peekRaw(Lexer* lexer)
- {
- // If we are at the end of the input, return a designated end-of-file value
- if(lexer->m_cursor == lexer->m_end)
- return kEOF;
+ // Set the start location
+ m_startLoc = sourceView->getRange().begin;
- // Otherwise, just look at the next byte
- return *lexer->m_cursor;
- }
+ // The first token read from a translation unit should be considered to be at
+ // the start of a line, and *also* as coming after whitespace (conceptually
+ // both the end-of-file and beginning-of-file pseudo-tokens are whitespace).
+ //
+ m_tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
+ m_lexerFlags = 0;
+}
- // Read one input byte without any special handling (similar to `peekRaw`)
- static int _advanceRaw(Lexer* lexer)
- {
- // The logic here is basically the same as for `peekRaw()`,
- // escape we advance `cursor` if we aren't at the end.
+Lexer::~Lexer() {}
- if (lexer->m_cursor == lexer->m_end)
- return kEOF;
+enum
+{
+ kEOF = -1
+};
- return *lexer->m_cursor++;
- }
+// Get the next input byte, without any handling of
+// escaped newlines, non-ASCII code points, source locations, etc.
+static int _peekRaw(Lexer* lexer)
+{
+ // If we are at the end of the input, return a designated end-of-file value
+ if (lexer->m_cursor == lexer->m_end)
+ return kEOF;
- // When the cursor is already at the first byte of an end-of-line sequence,
- // consume one or two bytes that compose the sequence.
- //
- // Basically, a newline is one of:
- //
- // "\n"
- // "\r"
- // "\r\n"
- // "\n\r"
- //
- // We always look for the longest match possible.
- //
- static void _handleNewLineInner(Lexer* lexer, int c)
- {
- SLANG_ASSERT(c == '\n' || c == '\r');
+ // Otherwise, just look at the next byte
+ return *lexer->m_cursor;
+}
- int d = _peekRaw(lexer);
- if( (c ^ d) == ('\n' ^ '\r') )
- {
- _advanceRaw(lexer);
- }
- }
+// Read one input byte without any special handling (similar to `peekRaw`)
+static int _advanceRaw(Lexer* lexer)
+{
+ // The logic here is basically the same as for `peekRaw()`,
+ // escape we advance `cursor` if we aren't at the end.
- // Look ahead one code point, dealing with complications like
- // escaped newlines.
- static int _peek(Lexer* lexer, int offset = 0)
+ if (lexer->m_cursor == lexer->m_end)
+ return kEOF;
+
+ return *lexer->m_cursor++;
+}
+
+// When the cursor is already at the first byte of an end-of-line sequence,
+// consume one or two bytes that compose the sequence.
+//
+// Basically, a newline is one of:
+//
+// "\n"
+// "\r"
+// "\r\n"
+// "\n\r"
+//
+// We always look for the longest match possible.
+//
+static void _handleNewLineInner(Lexer* lexer, int c)
+{
+ SLANG_ASSERT(c == '\n' || c == '\r');
+
+ int d = _peekRaw(lexer);
+ if ((c ^ d) == ('\n' ^ '\r'))
{
- int pos = 0;
- int c = kEOF;
+ _advanceRaw(lexer);
+ }
+}
- do
- {
- if (lexer->m_cursor + pos == lexer->m_end)
- return kEOF;
+// Look ahead one code point, dealing with complications like
+// escaped newlines.
+static int _peek(Lexer* lexer, int offset = 0)
+{
+ int pos = 0;
+ int c = kEOF;
+
+ do
+ {
+ if (lexer->m_cursor + pos == lexer->m_end)
+ return kEOF;
- c = lexer->m_cursor[pos++];
+ c = lexer->m_cursor[pos++];
- while (c == '\\')
+ while (c == '\\')
+ {
+ // We might have a backslash-escaped newline.
+ // Look at the next byte (if any) to see.
+ //
+ // Note(tfoley): We are assuming a null-terminated input here,
+ // so that we can safely look at the next byte without issue.
+ int d = lexer->m_cursor[pos++];
+ switch (d)
{
- // We might have a backslash-escaped newline.
- // Look at the next byte (if any) to see.
- //
- // Note(tfoley): We are assuming a null-terminated input here,
- // so that we can safely look at the next byte without issue.
- int d = lexer->m_cursor[pos++];
- switch (d)
- {
- case '\r': case '\n':
+ case '\r':
+ case '\n':
{
// The newline was escaped, so return the code point after *that*
int e = lexer->m_cursor[pos++];
@@ -205,1139 +206,1198 @@ namespace Slang
c = e;
continue;
}
- default:
- break;
- }
-
- // Only continue this while loop in the case where we consumed
- // some newlines
- break;
- }
- if (isUtf8LeadingByte((Byte)c))
- {
- // Consume all unicode characters.
- pos--;
- c = getUnicodePointFromUTF8([&]() {return lexer->m_cursor[pos++]; });
+ default: break;
}
- // Default case is to just hand along the byte we read as an ASCII code point.
- } while (offset--);
- return c;
- }
+ // Only continue this while loop in the case where we consumed
+ // some newlines
+ break;
+ }
+ if (isUtf8LeadingByte((Byte)c))
+ {
+ // Consume all unicode characters.
+ pos--;
+ c = getUnicodePointFromUTF8([&]() { return lexer->m_cursor[pos++]; });
+ }
+ // Default case is to just hand along the byte we read as an ASCII code point.
+ } while (offset--);
+
+ return c;
+}
- // Get the next code point from the input, and advance the cursor.
- static int _advance(Lexer* lexer)
+// Get the next code point from the input, and advance the cursor.
+static int _advance(Lexer* lexer)
+{
+ // We are going to loop, but only as a way of handling
+ // escaped line endings.
+ for (;;)
{
- // We are going to loop, but only as a way of handling
- // escaped line endings.
- for (;;)
- {
- // If we are at the end of the input, then the task is easy.
- if (lexer->m_cursor == lexer->m_end)
- return kEOF;
+ // If we are at the end of the input, then the task is easy.
+ if (lexer->m_cursor == lexer->m_end)
+ return kEOF;
- // Look at the next raw byte, and decide what to do
- int c = *lexer->m_cursor++;
+ // Look at the next raw byte, and decide what to do
+ int c = *lexer->m_cursor++;
- if (c == '\\')
+ if (c == '\\')
+ {
+ // We might have a backslash-escaped newline.
+ // Look at the next byte (if any) to see.
+ //
+ // Note(tfoley): We are assuming a null-terminated input here,
+ // so that we can safely look at the next byte without issue.
+ int d = *lexer->m_cursor;
+ switch (d)
{
- // We might have a backslash-escaped newline.
- // Look at the next byte (if any) to see.
- //
- // Note(tfoley): We are assuming a null-terminated input here,
- // so that we can safely look at the next byte without issue.
- int d = *lexer->m_cursor;
- switch (d)
- {
- case '\r': case '\n':
- // handle the end-of-line for our source location tracking
- lexer->m_cursor++;
- _handleNewLineInner(lexer, d);
-
- lexer->m_tokenFlags |= TokenFlag::ScrubbingNeeded;
+ case '\r':
+ case '\n':
+ // handle the end-of-line for our source location tracking
+ lexer->m_cursor++;
+ _handleNewLineInner(lexer, d);
- // Now try again, looking at the character after the
- // escaped newline.
- continue;
+ lexer->m_tokenFlags |= TokenFlag::ScrubbingNeeded;
- default:
- break;
- }
- }
+ // Now try again, looking at the character after the
+ // escaped newline.
+ continue;
- // Consume all unicode characters.
- if (isUtf8LeadingByte((Byte)c))
- {
- lexer->m_cursor--;
- c = getUnicodePointFromUTF8([&]() {return *lexer->m_cursor++; });
+ default: break;
}
+ }
- // Default case is to return the raw byte we saw.
- return c;
+ // Consume all unicode characters.
+ if (isUtf8LeadingByte((Byte)c))
+ {
+ lexer->m_cursor--;
+ c = getUnicodePointFromUTF8([&]() { return *lexer->m_cursor++; });
}
- }
- static void _handleNewLine(Lexer* lexer)
- {
- int c = _advance(lexer);
- _handleNewLineInner(lexer, c);
+ // Default case is to return the raw byte we saw.
+ return c;
}
+}
+
+static void _handleNewLine(Lexer* lexer)
+{
+ int c = _advance(lexer);
+ _handleNewLineInner(lexer, c);
+}
- static void _lexLineComment(Lexer* lexer)
+static void _lexLineComment(Lexer* lexer)
+{
+ for (;;)
{
- for(;;)
+ switch (_peek(lexer))
{
- switch(_peek(lexer))
- {
- case '\n': case '\r': case kEOF:
- return;
+ case '\n':
+ case '\r':
+ case kEOF: return;
- default:
- _advance(lexer);
- continue;
- }
+ default: _advance(lexer); continue;
}
}
+}
- static void _lexBlockComment(Lexer* lexer)
+static void _lexBlockComment(Lexer* lexer)
+{
+ for (;;)
{
- for(;;)
+ switch (_peek(lexer))
{
- switch(_peek(lexer))
- {
- case kEOF:
- // TODO(tfoley) diagnostic!
- return;
-
- case '\n': case '\r':
- _handleNewLine(lexer);
- continue;
+ case kEOF:
+ // TODO(tfoley) diagnostic!
+ return;
- case '*':
- _advance(lexer);
- switch( _peek(lexer) )
- {
- case '/':
- _advance(lexer);
- return;
+ case '\n':
+ case '\r': _handleNewLine(lexer); continue;
- default:
- continue;
- }
+ case '*':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '/': _advance(lexer); return;
- default:
- _advance(lexer);
- continue;
+ default: continue;
}
+
+ default: _advance(lexer); continue;
}
}
+}
- static void _lexHorizontalSpace(Lexer* lexer)
+static void _lexHorizontalSpace(Lexer* lexer)
+{
+ for (;;)
{
- for(;;)
+ switch (_peek(lexer))
{
- switch(_peek(lexer))
- {
- case ' ': case '\t':
- _advance(lexer);
- continue;
+ case ' ':
+ case '\t': _advance(lexer); continue;
- default:
- return;
- }
+ default: return;
}
}
+}
- static bool isNonAsciiCodePoint(unsigned int codePoint)
- {
- return codePoint != 0xFFFFFFFF && codePoint >= 0x80;
- }
+static bool isNonAsciiCodePoint(unsigned int codePoint)
+{
+ return codePoint != 0xFFFFFFFF && codePoint >= 0x80;
+}
- static void _lexIdentifier(Lexer* lexer)
+static void _lexIdentifier(Lexer* lexer)
+{
+ for (;;)
{
- for(;;)
+ int c = _peek(lexer);
+ if (('a' <= c) && (c <= 'z') || ('A' <= c) && (c <= 'Z') || ('0' <= c) && (c <= '9') ||
+ (c == '_') || isNonAsciiCodePoint((unsigned int)c))
{
- int c = _peek(lexer);
- if(('a' <= c ) && (c <= 'z')
- || ('A' <= c) && (c <= 'Z')
- || ('0' <= c) && (c <= '9')
- || (c == '_')
- || isNonAsciiCodePoint((unsigned int)c))
- {
- _advance(lexer);
- continue;
- }
- return;
+ _advance(lexer);
+ continue;
}
+ return;
}
+}
- static SourceLoc _getSourceLoc(Lexer* lexer)
- {
- return lexer->m_startLoc + (lexer->m_cursor - lexer->m_begin);
- }
+static SourceLoc _getSourceLoc(Lexer* lexer)
+{
+ return lexer->m_startLoc + (lexer->m_cursor - lexer->m_begin);
+}
- static void _lexDigits(Lexer* lexer, int base)
+static void _lexDigits(Lexer* lexer, int base)
+{
+ for (;;)
{
- for(;;)
- {
- int c = _peek(lexer);
-
- int digitVal = 0;
- switch(c)
- {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- digitVal = c - '0';
- break;
+ int c = _peek(lexer);
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- if(base <= 10) return;
- digitVal = 10 + c - 'a';
- break;
-
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- if(base <= 10) return;
- digitVal = 10 + c - 'A';
- break;
-
- default:
- // Not more digits!
+ int digitVal = 0;
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': digitVal = c - '0'; break;
+
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ if (base <= 10)
return;
- }
+ digitVal = 10 + c - 'a';
+ break;
- if(digitVal >= base)
- {
- if (auto sink = lexer->getDiagnosticSink())
- {
- char buffer[] = { (char) c, 0 };
- sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::invalidDigitForBase, buffer, base);
- }
- }
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ if (base <= 10)
+ return;
+ digitVal = 10 + c - 'A';
+ break;
- _advance(lexer);
+ default:
+ // Not more digits!
+ return;
}
- }
- static TokenType _maybeLexNumberSuffix(Lexer* lexer, TokenType tokenType)
- {
- // Be liberal in what we accept here, so that figuring out
- // the semantics of a numeric suffix is left up to the parser
- // and semantic checking logic.
- //
- for( ;;)
+ if (digitVal >= base)
{
- int c = _peek(lexer);
-
- // Accept any alphanumeric character, plus underscores.
- if(('a' <= c ) && (c <= 'z')
- || ('A' <= c) && (c <= 'Z')
- || ('0' <= c) && (c <= '9')
- || (c == '_'))
+ if (auto sink = lexer->getDiagnosticSink())
{
- _advance(lexer);
- continue;
+ char buffer[] = {(char)c, 0};
+ sink->diagnose(
+ _getSourceLoc(lexer),
+ LexerDiagnostics::invalidDigitForBase,
+ buffer,
+ base);
}
-
- // Stop at the first character that isn't
- // alphanumeric.
- return tokenType;
}
+
+ _advance(lexer);
}
+}
- static bool _isNumberExponent(int c, int base)
+static TokenType _maybeLexNumberSuffix(Lexer* lexer, TokenType tokenType)
+{
+ // Be liberal in what we accept here, so that figuring out
+ // the semantics of a numeric suffix is left up to the parser
+ // and semantic checking logic.
+ //
+ for (;;)
{
- switch( c )
- {
- default:
- return false;
-
- case 'e': case 'E':
- if(base != 10) return false;
- break;
+ int c = _peek(lexer);
- case 'p': case 'P':
- if(base != 16) return false;
- break;
+ // Accept any alphanumeric character, plus underscores.
+ if (('a' <= c) && (c <= 'z') || ('A' <= c) && (c <= 'Z') || ('0' <= c) && (c <= '9') ||
+ (c == '_'))
+ {
+ _advance(lexer);
+ continue;
}
- return true;
+ // Stop at the first character that isn't
+ // alphanumeric.
+ return tokenType;
}
+}
- static bool _maybeLexNumberExponent(Lexer* lexer, int base)
+static bool _isNumberExponent(int c, int base)
+{
+ switch (c)
{
- if (_peek(lexer) == '#')
- {
- // Special case #INF
- const auto inf = toSlice("#INF");
- for (auto c : inf)
- {
- if (_peek(lexer) != c)
- {
- return false;
- }
- _advance(lexer);
- }
+ default: return false;
- return true;
- }
+ case 'e':
+ case 'E':
+ if (base != 10)
+ return false;
+ break;
- if(!_isNumberExponent(_peek(lexer), base))
+ case 'p':
+ case 'P':
+ if (base != 16)
return false;
+ break;
+ }
- // we saw an exponent marker
- _advance(lexer);
+ return true;
+}
- // Now start to read the exponent
- switch( _peek(lexer) )
+static bool _maybeLexNumberExponent(Lexer* lexer, int base)
+{
+ if (_peek(lexer) == '#')
+ {
+ // Special case #INF
+ const auto inf = toSlice("#INF");
+ for (auto c : inf)
{
- case '+': case '-':
+ if (_peek(lexer) != c)
+ {
+ return false;
+ }
_advance(lexer);
- break;
}
- // TODO(tfoley): it would be an error to not see digits here...
-
- _lexDigits(lexer, 10);
-
return true;
}
- static TokenType _lexNumberAfterDecimalPoint(Lexer* lexer, int base)
- {
- _lexDigits(lexer, base);
- _maybeLexNumberExponent(lexer, base);
+ if (!_isNumberExponent(_peek(lexer), base))
+ return false;
- return _maybeLexNumberSuffix(lexer, TokenType::FloatingPointLiteral);
- }
+ // we saw an exponent marker
+ _advance(lexer);
- static TokenType _lexNumber(Lexer* lexer, int base)
+ // Now start to read the exponent
+ switch (_peek(lexer))
{
- // TODO(tfoley): Need to consider whether to allow any kind of digit separator character.
+ case '+':
+ case '-': _advance(lexer); break;
+ }
- TokenType tokenType = TokenType::IntegerLiteral;
+ // TODO(tfoley): it would be an error to not see digits here...
- // At the start of things, we just concern ourselves with digits
- _lexDigits(lexer, base);
+ _lexDigits(lexer, 10);
- if( _peek(lexer) == '.' )
- {
- switch (_peek(lexer, 1))
- {
- // 123.xxxx or 123.rrrr
- case 'x':
- case 'r':
- break;
+ return true;
+}
- default:
- tokenType = TokenType::FloatingPointLiteral;
+static TokenType _lexNumberAfterDecimalPoint(Lexer* lexer, int base)
+{
+ _lexDigits(lexer, base);
+ _maybeLexNumberExponent(lexer, base);
- _advance(lexer);
- _lexDigits(lexer, base);
- }
- }
+ return _maybeLexNumberSuffix(lexer, TokenType::FloatingPointLiteral);
+}
+
+static TokenType _lexNumber(Lexer* lexer, int base)
+{
+ // TODO(tfoley): Need to consider whether to allow any kind of digit separator character.
+
+ TokenType tokenType = TokenType::IntegerLiteral;
- if( _maybeLexNumberExponent(lexer, base))
+ // At the start of things, we just concern ourselves with digits
+ _lexDigits(lexer, base);
+
+ if (_peek(lexer) == '.')
+ {
+ switch (_peek(lexer, 1))
{
+ // 123.xxxx or 123.rrrr
+ case 'x':
+ case 'r': break;
+
+ default:
tokenType = TokenType::FloatingPointLiteral;
- }
- _maybeLexNumberSuffix(lexer, tokenType);
- return tokenType;
+ _advance(lexer);
+ _lexDigits(lexer, base);
+ }
}
- static int _maybeReadDigit(char const** ioCursor, int base)
+ if (_maybeLexNumberExponent(lexer, base))
{
- auto& cursor = *ioCursor;
+ tokenType = TokenType::FloatingPointLiteral;
+ }
- for(;;)
- {
- int c = *cursor;
- switch(c)
- {
- default:
- return -1;
+ _maybeLexNumberSuffix(lexer, tokenType);
+ return tokenType;
+}
- // TODO: need to decide on digit separator characters
- case '_':
- cursor++;
- continue;
+static int _maybeReadDigit(char const** ioCursor, int base)
+{
+ auto& cursor = *ioCursor;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- cursor++;
- return c - '0';
+ for (;;)
+ {
+ int c = *cursor;
+ switch (c)
+ {
+ default: return -1;
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- if(base > 10)
- {
- cursor++;
- return 10 + c - 'a';
- }
- return -1;
+ // TODO: need to decide on digit separator characters
+ case '_': cursor++; continue;
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- if(base > 10)
- {
- cursor++;
- return 10 + c - 'A';
- }
- return -1;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': cursor++; return c - '0';
+
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ if (base > 10)
+ {
+ cursor++;
+ return 10 + c - 'a';
+ }
+ return -1;
+
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ if (base > 10)
+ {
+ cursor++;
+ return 10 + c - 'A';
}
+ return -1;
}
}
+}
- static int _readOptionalBase(char const** ioCursor)
+static int _readOptionalBase(char const** ioCursor)
+{
+ auto& cursor = *ioCursor;
+ if (*cursor == '0')
{
- auto& cursor = *ioCursor;
- if( *cursor == '0' )
+ cursor++;
+ switch (*cursor)
{
- cursor++;
- switch(*cursor)
- {
- case 'x': case 'X':
- cursor++;
- return 16;
+ case 'x':
+ case 'X': cursor++; return 16;
- case 'b': case 'B':
- cursor++;
- return 2;
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- return 8;
+ case 'b':
+ case 'B': cursor++; return 2;
- default:
- return 10;
- }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': return 8;
+
+ default: return 10;
}
-
- return 10;
}
+ return 10;
+}
- IntegerLiteralValue getIntegerLiteralValue(Token const& token, UnownedStringSlice* outSuffix)
- {
- IntegerLiteralValue value = 0;
-
- const UnownedStringSlice content = token.getContent();
-
- char const* cursor = content.begin();
- char const* end = content.end();
+IntegerLiteralValue getIntegerLiteralValue(Token const& token, UnownedStringSlice* outSuffix)
+{
+ IntegerLiteralValue value = 0;
- int base = _readOptionalBase(&cursor);
+ const UnownedStringSlice content = token.getContent();
- for( ;;)
- {
- int digit = _maybeReadDigit(&cursor, base);
- if(digit < 0)
- break;
+ char const* cursor = content.begin();
+ char const* end = content.end();
- value = value*base + digit;
- }
+ int base = _readOptionalBase(&cursor);
- if(outSuffix)
- {
- *outSuffix = UnownedStringSlice(cursor, end);
- }
+ for (;;)
+ {
+ int digit = _maybeReadDigit(&cursor, base);
+ if (digit < 0)
+ break;
- return value;
+ value = value * base + digit;
}
- FloatingPointLiteralValue getFloatingPointLiteralValue(Token const& token, UnownedStringSlice* outSuffix)
+ if (outSuffix)
{
- FloatingPointLiteralValue value = 0;
+ *outSuffix = UnownedStringSlice(cursor, end);
+ }
+
+ return value;
+}
+
+FloatingPointLiteralValue getFloatingPointLiteralValue(
+ Token const& token,
+ UnownedStringSlice* outSuffix)
+{
+ FloatingPointLiteralValue value = 0;
- const UnownedStringSlice content = token.getContent();
+ const UnownedStringSlice content = token.getContent();
- char const* cursor = content.begin();
- char const* end = content.end();
+ char const* cursor = content.begin();
+ char const* end = content.end();
- int radix = _readOptionalBase(&cursor);
+ int radix = _readOptionalBase(&cursor);
- bool seenDot = false;
- FloatingPointLiteralValue divisor = 1;
- for( ;;)
+ bool seenDot = false;
+ FloatingPointLiteralValue divisor = 1;
+ for (;;)
+ {
+ if (*cursor == '.')
{
- if(*cursor == '.')
- {
- cursor++;
- seenDot = true;
- continue;
- }
+ cursor++;
+ seenDot = true;
+ continue;
+ }
- int digit = _maybeReadDigit(&cursor, radix);
- if(digit < 0)
- break;
+ int digit = _maybeReadDigit(&cursor, radix);
+ if (digit < 0)
+ break;
- value = value*radix + digit;
+ value = value * radix + digit;
- if(seenDot)
- {
- divisor *= radix;
- }
+ if (seenDot)
+ {
+ divisor *= radix;
}
+ }
- if (*cursor == '#')
- {
- // It must be INF
- const auto inf = toSlice("#INF");
+ if (*cursor == '#')
+ {
+ // It must be INF
+ const auto inf = toSlice("#INF");
- if (UnownedStringSlice(cursor, end).startsWith(inf))
+ if (UnownedStringSlice(cursor, end).startsWith(inf))
+ {
+ if (outSuffix)
{
- if(outSuffix)
- {
- *outSuffix = UnownedStringSlice(cursor + inf.getLength(), end);
- }
+ *outSuffix = UnownedStringSlice(cursor + inf.getLength(), end);
+ }
- value = INFINITY;
+ value = INFINITY;
- return value;
- }
+ return value;
}
+ }
+
+ // Now read optional exponent
+ if (_isNumberExponent(*cursor, radix))
+ {
+ cursor++;
- // Now read optional exponent
- if(_isNumberExponent(*cursor, radix))
+ bool exponentIsNegative = false;
+ switch (*cursor)
{
+ default: break;
+
+ case '-':
+ exponentIsNegative = true;
cursor++;
+ break;
- bool exponentIsNegative = false;
- switch(*cursor)
- {
- default:
- break;
+ case '+': cursor++; break;
+ }
- case '-':
- exponentIsNegative = true;
- cursor++;
- break;
+ int exponentRadix = 10;
+ int exponent = 0;
- case '+':
- cursor++;
+ for (;;)
+ {
+ int digit = _maybeReadDigit(&cursor, exponentRadix);
+ if (digit < 0)
break;
- }
-
- int exponentRadix = 10;
- int exponent = 0;
-
- for(;;)
- {
- int digit = _maybeReadDigit(&cursor, exponentRadix);
- if(digit < 0)
- break;
-
- exponent = exponent*exponentRadix + digit;
- }
-
- FloatingPointLiteralValue exponentBase = 10;
- if(radix == 16)
- {
- exponentBase = 2;
- }
- FloatingPointLiteralValue exponentValue = pow(exponentBase, exponent);
+ exponent = exponent * exponentRadix + digit;
+ }
- if( exponentIsNegative )
- {
- divisor *= exponentValue;
- }
- else
- {
- value *= exponentValue;
- }
+ FloatingPointLiteralValue exponentBase = 10;
+ if (radix == 16)
+ {
+ exponentBase = 2;
}
- value /= divisor;
+ FloatingPointLiteralValue exponentValue = pow(exponentBase, exponent);
- if(outSuffix)
+ if (exponentIsNegative)
{
- *outSuffix = UnownedStringSlice(cursor, end);
+ divisor *= exponentValue;
}
+ else
+ {
+ value *= exponentValue;
+ }
+ }
- return value;
+ value /= divisor;
+
+ if (outSuffix)
+ {
+ *outSuffix = UnownedStringSlice(cursor, end);
}
- static void _lexStringLiteralBody(Lexer* lexer, char quote)
+ return value;
+}
+
+static void _lexStringLiteralBody(Lexer* lexer, char quote)
+{
+ for (;;)
{
- for(;;)
+ int c = _peek(lexer);
+ if (c == quote)
+ {
+ _advance(lexer);
+ return;
+ }
+
+ switch (c)
{
- int c = _peek(lexer);
- if(c == quote)
+ case kEOF:
+ if (auto sink = lexer->getDiagnosticSink())
{
- _advance(lexer);
- return;
+ sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::endOfFileInLiteral);
}
+ return;
- switch(c)
+ case '\n':
+ case '\r':
+ if (auto sink = lexer->getDiagnosticSink())
{
- case kEOF:
- if (auto sink = lexer->getDiagnosticSink())
- {
- sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::endOfFileInLiteral);
- }
- return;
+ sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::newlineInLiteral);
+ }
+ return;
- case '\n': case '\r':
- if (auto sink = lexer->getDiagnosticSink())
+ case '\\':
+ // Need to handle various escape sequence cases
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '\'':
+ case '\"':
+ case '\\':
+ case '?':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v': _advance(lexer); break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ // octal escape: up to 3 characters
+ _advance(lexer);
+ for (int ii = 0; ii < 3; ++ii)
{
- sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::newlineInLiteral);
+ int d = _peek(lexer);
+ if (('0' <= d) && (d <= '7'))
+ {
+ _advance(lexer);
+ continue;
+ }
+ else
+ {
+ break;
+ }
}
- return;
+ break;
- case '\\':
- // Need to handle various escape sequence cases
+ case 'x':
+ // hexadecimal escape: any number of characters
_advance(lexer);
- switch(_peek(lexer))
+ for (;;)
{
- case '\'':
- case '\"':
- case '\\':
- case '?':
- case 'a':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- _advance(lexer);
- break;
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
- // octal escape: up to 3 characters
- _advance(lexer);
- for(int ii = 0; ii < 3; ++ii)
+ int d = _peek(lexer);
+ if (('0' <= d) && (d <= '9') || ('a' <= d) && (d <= 'f') ||
+ ('A' <= d) && (d <= 'F'))
{
- int d = _peek(lexer);
- if(('0' <= d) && (d <= '7'))
- {
- _advance(lexer);
- continue;
- }
- else
- {
- break;
- }
+ _advance(lexer);
+ continue;
}
- break;
-
- case 'x':
- // hexadecimal escape: any number of characters
- _advance(lexer);
- for(;;)
+ else
{
- int d = _peek(lexer);
- if(('0' <= d) && (d <= '9')
- || ('a' <= d) && (d <= 'f')
- || ('A' <= d) && (d <= 'F'))
- {
- _advance(lexer);
- continue;
- }
- else
- {
- break;
- }
+ break;
}
- break;
-
- // TODO: Unicode escape sequences
-
}
break;
- default:
- _advance(lexer);
- continue;
+ // TODO: Unicode escape sequences
}
+ break;
+
+ default: _advance(lexer); continue;
}
}
+}
- static void _lexRawStringLiteralBody(Lexer* lexer)
+static void _lexRawStringLiteralBody(Lexer* lexer)
+{
+ const char* start = lexer->m_cursor;
+ const char* endOfDelimiter = nullptr;
+ for (;;)
{
- const char* start = lexer->m_cursor;
- const char* endOfDelimiter = nullptr;
- for (;;)
+ int c = _peek(lexer);
+ if (c == '(' && endOfDelimiter == nullptr)
+ endOfDelimiter = lexer->m_cursor;
+ if (c == '\"')
{
- int c = _peek(lexer);
- if (c == '(' && endOfDelimiter == nullptr)
- endOfDelimiter = lexer->m_cursor;
- if (c == '\"')
+ if (!endOfDelimiter)
{
- if (!endOfDelimiter)
+ if (auto sink = lexer->getDiagnosticSink())
{
- if (auto sink = lexer->getDiagnosticSink())
- {
- sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::quoteCannotBeDelimiter);
- }
+ sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::quoteCannotBeDelimiter);
}
- else
+ }
+ else
+ {
+ auto testStart = lexer->m_cursor - (endOfDelimiter - start);
+ if (testStart > endOfDelimiter)
{
- auto testStart = lexer->m_cursor - (endOfDelimiter - start);
- if (testStart > endOfDelimiter)
+ auto testDelimiter = UnownedStringSlice(testStart, lexer->m_cursor);
+ auto delimiter = UnownedStringSlice(start, endOfDelimiter);
+ if (*(testStart - 1) == ')' && testDelimiter == delimiter)
{
- auto testDelimiter = UnownedStringSlice(testStart, lexer->m_cursor);
- auto delimiter = UnownedStringSlice(start, endOfDelimiter);
- if (*(testStart - 1) == ')' && testDelimiter == delimiter)
- {
- _advance(lexer);
- return;
- }
+ _advance(lexer);
+ return;
}
}
}
+ }
- switch (c)
+ switch (c)
+ {
+ case kEOF:
+ if (auto sink = lexer->getDiagnosticSink())
{
- case kEOF:
- if (auto sink = lexer->getDiagnosticSink())
- {
- sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::endOfFileInLiteral);
- }
- return;
- default:
- _advance(lexer);
- continue;
+ sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::endOfFileInLiteral);
}
+ return;
+ default: _advance(lexer); continue;
}
}
+}
- UnownedStringSlice getRawStringLiteralTokenValue(Token const& token)
- {
- auto content = token.getContent();
- if (content.getLength() <= 5)
- return UnownedStringSlice();
- auto start = content.begin() + 2;
- auto delimEnd = start;
- while (delimEnd < content.end() && *delimEnd != '(')
- delimEnd++;
- auto delimLength = delimEnd - start;
- auto contentEnd = content.end() - delimLength - 2;
- auto contentBegin = start + delimLength + 1;
- if (contentEnd <= contentBegin)
- return UnownedStringSlice();
- return UnownedStringSlice(contentBegin, contentEnd);
- }
+UnownedStringSlice getRawStringLiteralTokenValue(Token const& token)
+{
+ auto content = token.getContent();
+ if (content.getLength() <= 5)
+ return UnownedStringSlice();
+ auto start = content.begin() + 2;
+ auto delimEnd = start;
+ while (delimEnd < content.end() && *delimEnd != '(')
+ delimEnd++;
+ auto delimLength = delimEnd - start;
+ auto contentEnd = content.end() - delimLength - 2;
+ auto contentBegin = start + delimLength + 1;
+ if (contentEnd <= contentBegin)
+ return UnownedStringSlice();
+ return UnownedStringSlice(contentBegin, contentEnd);
+}
- String getStringLiteralTokenValue(Token const& token)
- {
- SLANG_ASSERT(token.type == TokenType::StringLiteral
- || token.type == TokenType::CharLiteral);
+String getStringLiteralTokenValue(Token const& token)
+{
+ SLANG_ASSERT(token.type == TokenType::StringLiteral || token.type == TokenType::CharLiteral);
- if (token.getContent().startsWith("R"))
- return getRawStringLiteralTokenValue(token);
+ if (token.getContent().startsWith("R"))
+ return getRawStringLiteralTokenValue(token);
- const UnownedStringSlice content = token.getContent();
+ const UnownedStringSlice content = token.getContent();
- char const* cursor = content.begin();
- char const* end = content.end();
- SLANG_UNREFERENCED_VARIABLE(end);
+ char const* cursor = content.begin();
+ char const* end = content.end();
+ SLANG_UNREFERENCED_VARIABLE(end);
- auto quote = *cursor++;
- SLANG_ASSERT(quote == '\'' || quote == '"');
+ auto quote = *cursor++;
+ SLANG_ASSERT(quote == '\'' || quote == '"');
- StringBuilder valueBuilder;
- for(;;)
- {
- SLANG_ASSERT(cursor != end);
+ StringBuilder valueBuilder;
+ for (;;)
+ {
+ SLANG_ASSERT(cursor != end);
- auto c = *cursor++;
+ auto c = *cursor++;
- // If we see a closing quote, then we are at the end of the string literal
- if(c == quote)
- {
- SLANG_ASSERT(cursor == end);
- return valueBuilder.produceString();
- }
+ // If we see a closing quote, then we are at the end of the string literal
+ if (c == quote)
+ {
+ SLANG_ASSERT(cursor == end);
+ return valueBuilder.produceString();
+ }
- // Characters that don't being escape sequences are easy;
- // just append them to the buffer and move on.
- if(c != '\\')
- {
- valueBuilder.append(c);
- continue;
- }
+ // Characters that don't being escape sequences are easy;
+ // just append them to the buffer and move on.
+ if (c != '\\')
+ {
+ valueBuilder.append(c);
+ continue;
+ }
- // Now we look at another character to figure out the kind of
- // escape sequence we are dealing with:
+ // Now we look at another character to figure out the kind of
+ // escape sequence we are dealing with:
- char d = *cursor++;
+ char d = *cursor++;
- switch(d)
+ switch (d)
+ {
+ // Simple characters that just needed to be escaped
+ case '\'':
+ case '\"':
+ case '\\':
+ case '?': valueBuilder.append(d); continue;
+
+ // Traditional escape sequences for special characters
+ case 'a': valueBuilder.append('\a'); continue;
+ case 'b': valueBuilder.append('\b'); continue;
+ case 'f': valueBuilder.append('\f'); continue;
+ case 'n': valueBuilder.append('\n'); continue;
+ case 'r': valueBuilder.append('\r'); continue;
+ case 't': valueBuilder.append('\t'); continue;
+ case 'v': valueBuilder.append('\v'); continue;
+
+ // Octal escape: up to 3 characters
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
{
- // Simple characters that just needed to be escaped
- case '\'':
- case '\"':
- case '\\':
- case '?':
- valueBuilder.append(d);
- continue;
-
- // Traditional escape sequences for special characters
- case 'a': valueBuilder.append('\a'); continue;
- case 'b': valueBuilder.append('\b'); continue;
- case 'f': valueBuilder.append('\f'); continue;
- case 'n': valueBuilder.append('\n'); continue;
- case 'r': valueBuilder.append('\r'); continue;
- case 't': valueBuilder.append('\t'); continue;
- case 'v': valueBuilder.append('\v'); continue;
-
- // Octal escape: up to 3 characters
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
+ cursor--;
+ int value = 0;
+ for (int ii = 0; ii < 3; ++ii)
{
- cursor--;
- int value = 0;
- for(int ii = 0; ii < 3; ++ii)
+ d = *cursor;
+ if (('0' <= d) && (d <= '7'))
{
- d = *cursor;
- if(('0' <= d) && (d <= '7'))
- {
- value = value*8 + (d - '0');
+ value = value * 8 + (d - '0');
- cursor++;
- continue;
- }
- else
- {
- break;
- }
+ cursor++;
+ continue;
+ }
+ else
+ {
+ break;
}
-
- // TODO: add support for appending an arbitrary code point?
- valueBuilder.append((char) value);
}
- continue;
- // Hexadecimal escape: any number of characters
- case 'x':
+ // TODO: add support for appending an arbitrary code point?
+ valueBuilder.append((char)value);
+ }
+ continue;
+
+ // Hexadecimal escape: any number of characters
+ case 'x':
+ {
+ int value = 0;
+ for (;;)
{
- int value = 0;
- for(;;)
+ d = *cursor++;
+ int digitValue = 0;
+ if (('0' <= d) && (d <= '9'))
{
- d = *cursor++;
- int digitValue = 0;
- if(('0' <= d) && (d <= '9'))
- {
- digitValue = d - '0';
- }
- else if( ('a' <= d) && (d <= 'f') )
- {
- digitValue = d - 'a';
- }
- else if( ('A' <= d) && (d <= 'F') )
- {
- digitValue = d - 'A';
- }
- else
- {
- cursor--;
- break;
- }
-
- value = value*16 + digitValue;
+ digitValue = d - '0';
+ }
+ else if (('a' <= d) && (d <= 'f'))
+ {
+ digitValue = d - 'a';
+ }
+ else if (('A' <= d) && (d <= 'F'))
+ {
+ digitValue = d - 'A';
+ }
+ else
+ {
+ cursor--;
+ break;
}
- // TODO: add support for appending an arbitrary code point?
- valueBuilder.append((char) value);
+ value = value * 16 + digitValue;
}
- continue;
-
- // TODO: Unicode escape sequences
+ // TODO: add support for appending an arbitrary code point?
+ valueBuilder.append((char)value);
}
+ continue;
+
+ // TODO: Unicode escape sequences
}
}
+}
- String getFileNameTokenValue(Token const& token)
- {
- const UnownedStringSlice content = token.getContent();
+String getFileNameTokenValue(Token const& token)
+{
+ const UnownedStringSlice content = token.getContent();
- // A file name usually doesn't process escape sequences
- // (this is import on Windows, where `\\` is a valid
- // path separator character).
+ // A file name usually doesn't process escape sequences
+ // (this is import on Windows, where `\\` is a valid
+ // path separator character).
- // Just trim off the first and last characters to remove the quotes
- // (whether they were `""` or `<>`.
- return String(content.begin() + 1, content.end() - 1);
- }
+ // Just trim off the first and last characters to remove the quotes
+ // (whether they were `""` or `<>`.
+ return String(content.begin() + 1, content.end() - 1);
+}
- static TokenType _lexTokenImpl(Lexer* lexer)
+static TokenType _lexTokenImpl(Lexer* lexer)
+{
+ int nextCodePoint = _peek(lexer);
+ switch (nextCodePoint)
{
- int nextCodePoint = _peek(lexer);
- switch(nextCodePoint)
- {
- default:
- break;
+ default: break;
- case kEOF:
- return TokenType::EndOfFile;
+ case kEOF: return TokenType::EndOfFile;
+
+ case '\r':
+ case '\n': _handleNewLine(lexer); return TokenType::NewLine;
- case '\r': case '\n':
- _handleNewLine(lexer);
- return TokenType::NewLine;
+ case ' ':
+ case '\t': _lexHorizontalSpace(lexer); return TokenType::WhiteSpace;
- case ' ': case '\t':
- _lexHorizontalSpace(lexer);
- return TokenType::WhiteSpace;
+ case '.':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': return _lexNumberAfterDecimalPoint(lexer, 10);
case '.':
+ // Note: consuming the second `.` here means that
+ // we cannot back up and return a `.` token by itself
+ // any more. We thus end up having distinct tokens for
+ // `.`, `..`, and `...` even though the `..` case is
+ // not part of HLSL.
+ //
_advance(lexer);
- switch(_peek(lexer))
+ switch (_peek(lexer))
{
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- return _lexNumberAfterDecimalPoint(lexer, 10);
-
- case '.':
- // Note: consuming the second `.` here means that
- // we cannot back up and return a `.` token by itself
- // any more. We thus end up having distinct tokens for
- // `.`, `..`, and `...` even though the `..` case is
- // not part of HLSL.
- //
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '.':
- _advance(lexer);
- return TokenType::Ellipsis;
+ case '.': _advance(lexer); return TokenType::Ellipsis;
- default:
- return TokenType::DotDot;
- }
-
- default:
- return TokenType::Dot;
+ default: return TokenType::DotDot;
}
- case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- return _lexNumber(lexer, 10);
+ default: return TokenType::Dot;
+ }
- case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': return _lexNumber(lexer, 10);
+
+ case '0':
+ {
+ auto loc = _getSourceLoc(lexer);
+ _advance(lexer);
+ switch (_peek(lexer))
{
- auto loc = _getSourceLoc(lexer);
- _advance(lexer);
- switch(_peek(lexer))
- {
- default:
- return _maybeLexNumberSuffix(lexer, TokenType::IntegerLiteral);
-
- case '.':
- switch (_peek(lexer, 1))
- {
- // 0.xxxx or 0.rrrr
- case 'x':
- case 'r':
- return _maybeLexNumberSuffix(lexer, TokenType::IntegerLiteral);
- default:
- _advance(lexer);
- return _lexNumberAfterDecimalPoint(lexer, 10);
- }
-
- case 'x': case 'X':
- _advance(lexer);
- return _lexNumber(lexer, 16);
+ default: return _maybeLexNumberSuffix(lexer, TokenType::IntegerLiteral);
- case 'b': case 'B':
- _advance(lexer);
- return _lexNumber(lexer, 2);
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- if (auto sink = lexer->getDiagnosticSink())
- {
- sink->diagnose(loc, LexerDiagnostics::octalLiteral);
- }
- return _lexNumber(lexer, 8);
+ case '.':
+ switch (_peek(lexer, 1))
+ {
+ // 0.xxxx or 0.rrrr
+ case 'x':
+ case 'r': return _maybeLexNumberSuffix(lexer, TokenType::IntegerLiteral);
+ default: _advance(lexer); return _lexNumberAfterDecimalPoint(lexer, 10);
}
- }
- case 'a': case 'b': case 'c': case 'd': case 'e':
- case 'f': case 'g': case 'h': case 'i': case 'j':
- case 'k': case 'l': case 'm': case 'n': case 'o':
- case 'p': case 'q': case 'r': case 's': case 't':
- case 'u': case 'v': case 'w': case 'x': case 'y':
- case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F': case 'G': case 'H': case 'I': case 'J':
- case 'K': case 'L': case 'M': case 'N': case 'O':
- case 'P': case 'Q': case 'S': case 'T':
- case 'U': case 'V': case 'W': case 'X': case 'Y':
- case 'Z':
- case '_':
- _lexIdentifier(lexer);
- return TokenType::Identifier;
- case 'R':
- _advance(lexer);
- switch (_peek(lexer))
- {
- default:
- _lexIdentifier(lexer);
- return TokenType::Identifier;
- case '\"':
- _advance(lexer);
- _lexRawStringLiteralBody(lexer);
- return TokenType::StringLiteral;
+ case 'x':
+ case 'X': _advance(lexer); return _lexNumber(lexer, 16);
+
+ case 'b':
+ case 'B': _advance(lexer); return _lexNumber(lexer, 2);
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (auto sink = lexer->getDiagnosticSink())
+ {
+ sink->diagnose(loc, LexerDiagnostics::octalLiteral);
+ }
+ return _lexNumber(lexer, 8);
}
+ }
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ case '_': _lexIdentifier(lexer); return TokenType::Identifier;
+ case 'R':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ default: _lexIdentifier(lexer); return TokenType::Identifier;
case '\"':
_advance(lexer);
- _lexStringLiteralBody(lexer, '\"');
+ _lexRawStringLiteralBody(lexer);
return TokenType::StringLiteral;
+ }
- case '\'':
- _advance(lexer);
- _lexStringLiteralBody(lexer, '\'');
- return TokenType::CharLiteral;
+ case '\"':
+ _advance(lexer);
+ _lexStringLiteralBody(lexer, '\"');
+ return TokenType::StringLiteral;
+ case '\'':
+ _advance(lexer);
+ _lexStringLiteralBody(lexer, '\'');
+ return TokenType::CharLiteral;
- case '+':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '+': _advance(lexer); return TokenType::OpInc;
- case '=': _advance(lexer); return TokenType::OpAddAssign;
- default:
- return TokenType::OpAdd;
- }
- case '-':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '-': _advance(lexer); return TokenType::OpDec;
- case '=': _advance(lexer); return TokenType::OpSubAssign;
- case '>': _advance(lexer); return TokenType::RightArrow;
- default:
- return TokenType::OpSub;
- }
+ case '+':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '+': _advance(lexer); return TokenType::OpInc;
+ case '=': _advance(lexer); return TokenType::OpAddAssign;
+ default: return TokenType::OpAdd;
+ }
- case '*':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpMulAssign;
- default:
- return TokenType::OpMul;
- }
+ case '-':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '-': _advance(lexer); return TokenType::OpDec;
+ case '=': _advance(lexer); return TokenType::OpSubAssign;
+ case '>': _advance(lexer); return TokenType::RightArrow;
+ default: return TokenType::OpSub;
+ }
+
+ case '*':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '=': _advance(lexer); return TokenType::OpMulAssign;
+ default: return TokenType::OpMul;
+ }
+ case '/':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '=': _advance(lexer); return TokenType::OpDivAssign;
case '/':
_advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpDivAssign;
- case '/': _advance(lexer); _lexLineComment(lexer); return TokenType::LineComment;
- case '*': _advance(lexer); _lexBlockComment(lexer); return TokenType::BlockComment;
- default:
- return TokenType::OpDiv;
- }
-
- case '%':
+ _lexLineComment(lexer);
+ return TokenType::LineComment;
+ case '*':
_advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpModAssign;
- default:
- return TokenType::OpMod;
- }
+ _lexBlockComment(lexer);
+ return TokenType::BlockComment;
+ default: return TokenType::OpDiv;
+ }
- case '|':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '|': _advance(lexer); return TokenType::OpOr;
- case '=': _advance(lexer); return TokenType::OpOrAssign;
- default:
- return TokenType::OpBitOr;
- }
+ case '%':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '=': _advance(lexer); return TokenType::OpModAssign;
+ default: return TokenType::OpMod;
+ }
- case '&':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '&': _advance(lexer); return TokenType::OpAnd;
- case '=': _advance(lexer); return TokenType::OpAndAssign;
- default:
- return TokenType::OpBitAnd;
- }
+ case '|':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '|': _advance(lexer); return TokenType::OpOr;
+ case '=': _advance(lexer); return TokenType::OpOrAssign;
+ default: return TokenType::OpBitOr;
+ }
- case '^':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpXorAssign;
- default:
- return TokenType::OpBitXor;
- }
+ case '&':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '&': _advance(lexer); return TokenType::OpAnd;
+ case '=': _advance(lexer); return TokenType::OpAndAssign;
+ default: return TokenType::OpBitAnd;
+ }
+ case '^':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '=': _advance(lexer); return TokenType::OpXorAssign;
+ default: return TokenType::OpBitXor;
+ }
+
+ case '>':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
case '>':
_advance(lexer);
- switch(_peek(lexer))
+ switch (_peek(lexer))
{
- case '>':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpShrAssign;
- default: return TokenType::OpRsh;
- }
- case '=': _advance(lexer); return TokenType::OpGeq;
- default:
- return TokenType::OpGreater;
+ case '=': _advance(lexer); return TokenType::OpShrAssign;
+ default: return TokenType::OpRsh;
}
+ case '=': _advance(lexer); return TokenType::OpGeq;
+ default: return TokenType::OpGreater;
+ }
+ case '<':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
case '<':
_advance(lexer);
- switch(_peek(lexer))
+ switch (_peek(lexer))
{
- case '<':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpShlAssign;
- default: return TokenType::OpLsh;
- }
- case '=': _advance(lexer); return TokenType::OpLeq;
- default:
- return TokenType::OpLess;
+ case '=': _advance(lexer); return TokenType::OpShlAssign;
+ default: return TokenType::OpLsh;
}
+ case '=': _advance(lexer); return TokenType::OpLeq;
+ default: return TokenType::OpLess;
+ }
- case '=':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpEql;
- default:
- return TokenType::OpAssign;
- }
+ case '=':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '=': _advance(lexer); return TokenType::OpEql;
+ default: return TokenType::OpAssign;
+ }
- case '!':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '=': _advance(lexer); return TokenType::OpNeq;
- default:
- return TokenType::OpNot;
- }
+ case '!':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '=': _advance(lexer); return TokenType::OpNeq;
+ default: return TokenType::OpNot;
+ }
- case '#':
- _advance(lexer);
- switch(_peek(lexer))
- {
- case '#': _advance(lexer); return TokenType::PoundPound;
+ case '#':
+ _advance(lexer);
+ switch (_peek(lexer))
+ {
+ case '#': _advance(lexer); return TokenType::PoundPound;
- case '?': _advance(lexer); return TokenType::CompletionRequest;
+ case '?': _advance(lexer); return TokenType::CompletionRequest;
- default:
- return TokenType::Pound;
- }
+ default: return TokenType::Pound;
+ }
- case '~': _advance(lexer); return TokenType::OpBitNot;
+ case '~': _advance(lexer); return TokenType::OpBitNot;
- case ':':
+ case ':':
{
_advance(lexer);
if (_peek(lexer) == ':')
@@ -1347,151 +1407,154 @@ namespace Slang
}
return TokenType::Colon;
}
- case ';': _advance(lexer); return TokenType::Semicolon;
- case ',': _advance(lexer); return TokenType::Comma;
-
- case '{': _advance(lexer); return TokenType::LBrace;
- case '}': _advance(lexer); return TokenType::RBrace;
- case '[': _advance(lexer); return TokenType::LBracket;
- case ']': _advance(lexer); return TokenType::RBracket;
- case '(': _advance(lexer); return TokenType::LParent;
- case ')': _advance(lexer); return TokenType::RParent;
-
- case '?': _advance(lexer); return TokenType::QuestionMark;
- case '@': _advance(lexer); return TokenType::At;
- case '$':
+ case ';': _advance(lexer); return TokenType::Semicolon;
+ case ',': _advance(lexer); return TokenType::Comma;
+
+ case '{': _advance(lexer); return TokenType::LBrace;
+ case '}': _advance(lexer); return TokenType::RBrace;
+ case '[': _advance(lexer); return TokenType::LBracket;
+ case ']': _advance(lexer); return TokenType::RBracket;
+ case '(': _advance(lexer); return TokenType::LParent;
+ case ')': _advance(lexer); return TokenType::RParent;
+
+ case '?': _advance(lexer); return TokenType::QuestionMark;
+ case '@': _advance(lexer); return TokenType::At;
+ case '$':
{
_advance(lexer);
- if(_peek(lexer) == '$')
+ if (_peek(lexer) == '$')
{
_advance(lexer);
return TokenType::DollarDollar;
}
return TokenType::Dollar;
}
+ }
- }
-
- // We treat all unicode characters as a part of an identifier.
- if (isNonAsciiCodePoint(nextCodePoint))
- {
- _lexIdentifier(lexer);
- return TokenType::Identifier;
- }
+ // We treat all unicode characters as a part of an identifier.
+ if (isNonAsciiCodePoint(nextCodePoint))
+ {
+ _lexIdentifier(lexer);
+ return TokenType::Identifier;
+ }
- {
- // If none of the above cases matched, then we have an
- // unexpected/invalid character.
+ {
+ // If none of the above cases matched, then we have an
+ // unexpected/invalid character.
- auto loc = _getSourceLoc(lexer);
- int c = _advance(lexer);
+ auto loc = _getSourceLoc(lexer);
+ int c = _advance(lexer);
- if (auto sink = lexer->getDiagnosticSink())
+ if (auto sink = lexer->getDiagnosticSink())
+ {
+ if (c >= 0x20 && c <= 0x7E)
{
- if(c >= 0x20 && c <= 0x7E)
- {
- char buffer[] = { (char) c, 0 };
- sink->diagnose(loc, LexerDiagnostics::illegalCharacterPrint, buffer);
- }
- else if(c == kEOF)
- {
- sink->diagnose(loc, LexerDiagnostics::unexpectedEndOfInput);
- }
- else
- {
- // Fallback: print as hexadecimal
- sink->diagnose(loc, LexerDiagnostics::illegalCharacterHex, String((unsigned char)c, 16));
- }
+ char buffer[] = {(char)c, 0};
+ sink->diagnose(loc, LexerDiagnostics::illegalCharacterPrint, buffer);
+ }
+ else if (c == kEOF)
+ {
+ sink->diagnose(loc, LexerDiagnostics::unexpectedEndOfInput);
+ }
+ else
+ {
+ // Fallback: print as hexadecimal
+ sink->diagnose(
+ loc,
+ LexerDiagnostics::illegalCharacterHex,
+ String((unsigned char)c, 16));
}
-
- return TokenType::Invalid;
}
+
+ return TokenType::Invalid;
}
+}
- Token Lexer::lexToken()
+Token Lexer::lexToken()
+{
+ for (;;)
{
- for(;;)
- {
- Token token;
- token.loc = _getSourceLoc(this);
+ Token token;
+ token.loc = _getSourceLoc(this);
- char const* textBegin = m_cursor;
+ char const* textBegin = m_cursor;
- auto tokenType = _lexTokenImpl(this);
+ auto tokenType = _lexTokenImpl(this);
- // The flags on the token we just lexed will be based
- // on the current state of the lexer.
- //
- auto tokenFlags = m_tokenFlags;
- //
- // Depending on what kind of token we just lexed, the
- // flags that will be used for the *next* token might
- // need to be updated.
- //
- switch(tokenType)
+ // The flags on the token we just lexed will be based
+ // on the current state of the lexer.
+ //
+ auto tokenFlags = m_tokenFlags;
+ //
+ // Depending on what kind of token we just lexed, the
+ // flags that will be used for the *next* token might
+ // need to be updated.
+ //
+ switch (tokenType)
+ {
+ case TokenType::NewLine:
{
- case TokenType::NewLine:
- {
- // If we just reached the end of a line, then the next token
- // should count as being at the start of a line, and also after
- // whitespace.
- //
- m_tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
- break;
- }
+ // If we just reached the end of a line, then the next token
+ // should count as being at the start of a line, and also after
+ // whitespace.
+ //
+ m_tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
+ break;
+ }
- case TokenType::WhiteSpace:
- case TokenType::BlockComment:
- case TokenType::LineComment:
- {
- // True horizontal whitespace and comments both count as whitespace.
- //
- // Note that a line comment does not include the terminating newline,
- // we do not need to set `AtStartOfLine` here.
- //
- m_tokenFlags |= TokenFlag::AfterWhitespace;
- break;
- }
-
- default:
- {
- // If we read some token other then the above cases, then we are
- // neither after whitespace nor at the start of a line.
- //
- m_tokenFlags = 0;
- break;
- }
+ case TokenType::WhiteSpace:
+ case TokenType::BlockComment:
+ case TokenType::LineComment:
+ {
+ // True horizontal whitespace and comments both count as whitespace.
+ //
+ // Note that a line comment does not include the terminating newline,
+ // we do not need to set `AtStartOfLine` here.
+ //
+ m_tokenFlags |= TokenFlag::AfterWhitespace;
+ break;
+ }
+
+ default:
+ {
+ // If we read some token other then the above cases, then we are
+ // neither after whitespace nor at the start of a line.
+ //
+ m_tokenFlags = 0;
+ break;
}
+ }
- token.type = tokenType;
- token.flags = tokenFlags;
+ token.type = tokenType;
+ token.flags = tokenFlags;
- char const* textEnd = m_cursor;
+ char const* textEnd = m_cursor;
- // Note(tfoley): `StringBuilder::Append()` seems to crash when appending zero bytes
- if(textEnd != textBegin)
+ // Note(tfoley): `StringBuilder::Append()` seems to crash when appending zero bytes
+ if (textEnd != textBegin)
+ {
+ // "scrubbing" token value here to remove escaped newlines...
+ //
+ // Only perform this work if we encountered an escaped newline
+ // while lexing this token (e.g., keep a flag on the lexer), or
+ // do it on-demand when the actual value of the token is needed.
+ if (tokenFlags & TokenFlag::ScrubbingNeeded)
{
- // "scrubbing" token value here to remove escaped newlines...
- //
- // Only perform this work if we encountered an escaped newline
- // while lexing this token (e.g., keep a flag on the lexer), or
- // do it on-demand when the actual value of the token is needed.
- if (tokenFlags & TokenFlag::ScrubbingNeeded)
- {
- // Allocate space that will always be more than enough for stripped contents
- char* startDst = (char*)m_memoryArena->allocateUnaligned(textEnd - textBegin);
- char* dst = startDst;
+ // Allocate space that will always be more than enough for stripped contents
+ char* startDst = (char*)m_memoryArena->allocateUnaligned(textEnd - textBegin);
+ char* dst = startDst;
- auto tt = textBegin;
- while (tt != textEnd)
+ auto tt = textBegin;
+ while (tt != textEnd)
+ {
+ char c = *tt++;
+ if (c == '\\')
{
- char c = *tt++;
- if (c == '\\')
+ char d = *tt;
+ switch (d)
{
- char d = *tt;
- switch (d)
- {
- case '\r': case '\n':
+ case '\r':
+ case '\n':
{
tt++;
char e = *tt;
@@ -1502,116 +1565,111 @@ namespace Slang
}
continue;
- default:
- break;
- }
+ default: break;
}
- *dst++ = c;
}
- token.setContent(UnownedStringSlice(startDst, dst));
- }
- else
- {
- token.setContent(UnownedStringSlice(textBegin, textEnd));
+ *dst++ = c;
}
+ token.setContent(UnownedStringSlice(startDst, dst));
}
-
- if (m_namePool)
+ else
{
- if (tokenType == TokenType::Identifier || tokenType == TokenType::CompletionRequest)
- {
- token.setName(m_namePool->getName(token.getContent()));
- }
+ token.setContent(UnownedStringSlice(textBegin, textEnd));
}
-
- return token;
}
- }
- TokenList Lexer::lexAllSemanticTokens()
- {
- TokenList tokenList;
- for(;;)
+ if (m_namePool)
{
- Token token = lexToken();
-
- // We are only interested intokens that are semantically
- // significant, so we will skip over forms of whitespace
- // and comments.
- //
- switch( token.type )
+ if (tokenType == TokenType::Identifier || tokenType == TokenType::CompletionRequest)
{
- default:
- break;
-
- case TokenType::WhiteSpace:
- case TokenType::BlockComment:
- case TokenType::LineComment:
- case TokenType::NewLine:
- continue;
+ token.setName(m_namePool->getName(token.getContent()));
}
-
- tokenList.add(token);
- if(token.type == TokenType::EndOfFile)
- return tokenList;
}
+
+ return token;
}
+}
- TokenList Lexer::lexAllMarkupTokens()
+TokenList Lexer::lexAllSemanticTokens()
+{
+ TokenList tokenList;
+ for (;;)
{
- TokenList tokenList;
- for(;;)
- {
- Token token = lexToken();
- switch( token.type )
- {
- default:
- break;
+ Token token = lexToken();
- case TokenType::WhiteSpace:
- case TokenType::NewLine:
- continue;
- }
+ // We are only interested intokens that are semantically
+ // significant, so we will skip over forms of whitespace
+ // and comments.
+ //
+ switch (token.type)
+ {
+ default: break;
- tokenList.add(token);
- if(token.type == TokenType::EndOfFile)
- return tokenList;
+ case TokenType::WhiteSpace:
+ case TokenType::BlockComment:
+ case TokenType::LineComment:
+ case TokenType::NewLine: continue;
}
+
+ tokenList.add(token);
+ if (token.type == TokenType::EndOfFile)
+ return tokenList;
}
+}
- /* static */UnownedStringSlice Lexer::sourceLocationLexer(const UnownedStringSlice& in)
+TokenList Lexer::lexAllMarkupTokens()
+{
+ TokenList tokenList;
+ for (;;)
{
- Lexer lexer;
+ Token token = lexToken();
+ switch (token.type)
+ {
+ default: break;
- SourceManager sourceManager;
- sourceManager.initialize(nullptr, nullptr);
+ case TokenType::WhiteSpace:
+ case TokenType::NewLine: continue;
+ }
- auto sourceFile = sourceManager.createSourceFileWithString(PathInfo::makeUnknown(), in);
- auto sourceView = sourceManager.createSourceView(sourceFile, nullptr, SourceLoc::fromRaw(0));
+ tokenList.add(token);
+ if (token.type == TokenType::EndOfFile)
+ return tokenList;
+ }
+}
- DiagnosticSink sink(&sourceManager, nullptr);
+/* static */ UnownedStringSlice Lexer::sourceLocationLexer(const UnownedStringSlice& in)
+{
+ Lexer lexer;
- MemoryArena arena;
+ SourceManager sourceManager;
+ sourceManager.initialize(nullptr, nullptr);
- RootNamePool rootNamePool;
- NamePool namePool;
- namePool.setRootNamePool(&rootNamePool);
+ auto sourceFile = sourceManager.createSourceFileWithString(PathInfo::makeUnknown(), in);
+ auto sourceView = sourceManager.createSourceView(sourceFile, nullptr, SourceLoc::fromRaw(0));
- lexer.initialize(sourceView, &sink, &namePool, &arena);
+ DiagnosticSink sink(&sourceManager, nullptr);
- Token tok = lexer.lexToken();
+ MemoryArena arena;
- if (tok.type == TokenType::Invalid)
- {
- return UnownedStringSlice();
- }
+ RootNamePool rootNamePool;
+ NamePool namePool;
+ namePool.setRootNamePool(&rootNamePool);
- const int offset = sourceView->getRange().getOffset(tok.loc);
+ lexer.initialize(sourceView, &sink, &namePool, &arena);
- SLANG_ASSERT(offset >= 0 && offset <= in.getLength());
- SLANG_ASSERT(Index(offset + tok.charsCount) <= in.getLength());
+ Token tok = lexer.lexToken();
- return UnownedStringSlice(in.begin() + offset, in.begin() + offset + tok.charsCount);
+ if (tok.type == TokenType::Invalid)
+ {
+ return UnownedStringSlice();
}
+ const int offset = sourceView->getRange().getOffset(tok.loc);
+
+ SLANG_ASSERT(offset >= 0 && offset <= in.getLength());
+ SLANG_ASSERT(Index(offset + tok.charsCount) <= in.getLength());
+
+ return UnownedStringSlice(in.begin() + offset, in.begin() + offset + tok.charsCount);
}
+
+} // namespace Slang