diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2017-06-12 15:34:12 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-06-12 15:34:12 -0700 |
| commit | 7fc4c40b17f340800d6616e0bae111606cef18cc (patch) | |
| tree | e1c59d0b48397e8e33428e65a2e0f3c6925c65d9 /source/slang/lexer.cpp | |
| parent | ce90fec1c795eaafbd91d7b8a83501a57eeb1946 (diff) | |
| parent | 97fc943b476e2482bd1f99c9e76f0dfe8fdd36e0 (diff) | |
Merge pull request #4 from tfoleyNV/escaped-newlines
Escaped newlines
Diffstat (limited to 'source/slang/lexer.cpp')
| -rw-r--r-- | source/slang/lexer.cpp | 116 |
1 files changed, 107 insertions, 9 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp index 7234c4983..5127c876c 100644 --- a/source/slang/lexer.cpp +++ b/source/slang/lexer.cpp @@ -99,40 +99,138 @@ namespace Slang enum { kEOF = -1 }; - static int peek(Lexer* lexer) + // Get the next input byte, without any handling of + // escaped newlines, non-ASCII code points, source locations, etc. + static int peekRaw(Lexer* lexer) { + // If we are at the end of the input, return a designated end-of-file value if(lexer->cursor == lexer->end) return kEOF; + // Otherwise, just look at the next byte return *lexer->cursor; } - static int advance(Lexer* lexer) + // Read one input byte without any special handling (similar to `peekRaw`) + static int advanceRaw(Lexer* lexer) { - if(lexer->cursor == lexer->end) - return kEOF; + // The logic here is basically the same as for `peekRaw()`, + // escape we advance `cursor` if we aren't at the end. - lexer->loc.Col++; - lexer->loc.Pos++; + if (lexer->cursor == lexer->end) + return kEOF; return *lexer->cursor++; } + // When the cursor is already at the first byte of an end-of-line sequence, + // consume one or two bytes that compose the sequence. + // + // Basically, a newline is one of: + // + // "\n" + // "\r" + // "\r\n" + // "\n\r" + // + // We always look for the longest match possible. + // static void handleNewLine(Lexer* lexer) { - int c = advance(lexer); + int c = advanceRaw(lexer); assert(c == '\n' || c == '\r'); - int d = peek(lexer); + int d = peekRaw(lexer); if( (c ^ d) == ('\n' ^ '\r') ) { - advance(lexer); + advanceRaw(lexer); } lexer->loc.Line++; lexer->loc.Col = 1; } + // Look ahead one code point, dealing with complications like + // escaped newlines. + static int peek(Lexer* lexer) + { + // Look at the next raw byte, and decide what to do + int c = peekRaw(lexer); + + if(c == '\\') + { + // We might have a backslash-escaped newline. + // Look at the next byte (if any) to see. + // + // Note(tfoley): We are assuming a null-terminated input here, + // so that we can safely look at the next byte without issue. + int d = lexer->cursor[1]; + switch (d) + { + case '\r': case '\n': + // The newline was escaped, so return the character after *that* + return lexer->cursor[2]; + + default: + break; + } + } + // TODO: handle UTF-8 encoding for non-ASCII code points here + + // Default case is to just hand along the byte we read as an ASCII code point. + return c; + } + + // Get the next code point from the input, and advance the cursor. + static int advance(Lexer* lexer) + { + // We are going to loop, but only as a way of handling + // escaped line endings. + for (;;) + { + // If we are at the end of the input, then the task is easy. + if (lexer->cursor == lexer->end) + return kEOF; + + // Look at the next raw byte, and decide what to do + int c = *lexer->cursor++; + + if (c == '\\') + { + // We might have a backslash-escaped newline. + // Look at the next byte (if any) to see. + // + // Note(tfoley): We are assuming a null-terminated input here, + // so that we can safely look at the next byte without issue. + int d = *lexer->cursor; + switch (d) + { + case '\r': case '\n': + // handle the end-of-line for our source location tracking + handleNewLine(lexer); + + // Now try again, looking at the character after the + // escaped nmewline. + continue; + + default: + break; + } + } + + // TODO: Need to handle non-ASCII code points. + + // Default case is to advance by one location + // and return the raw byte we saw. + + lexer->loc.Col++; + lexer->loc.Pos++; + + return c; + } + } + + static void lexLineComment(Lexer* lexer) { for(;;) |
