diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2017-06-12 15:34:12 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-06-12 15:34:12 -0700 |
| commit | 7fc4c40b17f340800d6616e0bae111606cef18cc (patch) | |
| tree | e1c59d0b48397e8e33428e65a2e0f3c6925c65d9 | |
| parent | ce90fec1c795eaafbd91d7b8a83501a57eeb1946 (diff) | |
| parent | 97fc943b476e2482bd1f99c9e76f0dfe8fdd36e0 (diff) | |
Merge pull request #4 from tfoleyNV/escaped-newlines
Escaped newlines
| -rw-r--r-- | slang.sln | 2 | ||||
| -rw-r--r-- | source/slang/lexer.cpp | 116 | ||||
| -rw-r--r-- | source/slang/preprocessor.cpp | 29 | ||||
| -rw-r--r-- | tests/preprocessor/escaped-newlines.slang | 23 |
4 files changed, 158 insertions, 12 deletions
@@ -8,8 +8,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hello", "examples\hello\hel EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "core", "source\core\core.vcxproj", "{F9BE7957-8399-899E-0C49-E714FDDD4B65}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "utils", "utils", "{37016FF6-E6AF-4316-BC2B-0152FC0C969E}" -EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{74C5F0DC-93BB-4BF3-AC65-8C65491570F7}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "slang", "source\slang\slang.vcxproj", "{DB00DA62-0533-4AFD-B59F-A67D5B3A0808}" diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp index 7234c4983..5127c876c 100644 --- a/source/slang/lexer.cpp +++ b/source/slang/lexer.cpp @@ -99,40 +99,138 @@ namespace Slang enum { kEOF = -1 }; - static int peek(Lexer* lexer) + // Get the next input byte, without any handling of + // escaped newlines, non-ASCII code points, source locations, etc. + static int peekRaw(Lexer* lexer) { + // If we are at the end of the input, return a designated end-of-file value if(lexer->cursor == lexer->end) return kEOF; + // Otherwise, just look at the next byte return *lexer->cursor; } - static int advance(Lexer* lexer) + // Read one input byte without any special handling (similar to `peekRaw`) + static int advanceRaw(Lexer* lexer) { - if(lexer->cursor == lexer->end) - return kEOF; + // The logic here is basically the same as for `peekRaw()`, + // escape we advance `cursor` if we aren't at the end. - lexer->loc.Col++; - lexer->loc.Pos++; + if (lexer->cursor == lexer->end) + return kEOF; return *lexer->cursor++; } + // When the cursor is already at the first byte of an end-of-line sequence, + // consume one or two bytes that compose the sequence. + // + // Basically, a newline is one of: + // + // "\n" + // "\r" + // "\r\n" + // "\n\r" + // + // We always look for the longest match possible. + // static void handleNewLine(Lexer* lexer) { - int c = advance(lexer); + int c = advanceRaw(lexer); assert(c == '\n' || c == '\r'); - int d = peek(lexer); + int d = peekRaw(lexer); if( (c ^ d) == ('\n' ^ '\r') ) { - advance(lexer); + advanceRaw(lexer); } lexer->loc.Line++; lexer->loc.Col = 1; } + // Look ahead one code point, dealing with complications like + // escaped newlines. + static int peek(Lexer* lexer) + { + // Look at the next raw byte, and decide what to do + int c = peekRaw(lexer); + + if(c == '\\') + { + // We might have a backslash-escaped newline. + // Look at the next byte (if any) to see. + // + // Note(tfoley): We are assuming a null-terminated input here, + // so that we can safely look at the next byte without issue. + int d = lexer->cursor[1]; + switch (d) + { + case '\r': case '\n': + // The newline was escaped, so return the character after *that* + return lexer->cursor[2]; + + default: + break; + } + } + // TODO: handle UTF-8 encoding for non-ASCII code points here + + // Default case is to just hand along the byte we read as an ASCII code point. + return c; + } + + // Get the next code point from the input, and advance the cursor. + static int advance(Lexer* lexer) + { + // We are going to loop, but only as a way of handling + // escaped line endings. + for (;;) + { + // If we are at the end of the input, then the task is easy. + if (lexer->cursor == lexer->end) + return kEOF; + + // Look at the next raw byte, and decide what to do + int c = *lexer->cursor++; + + if (c == '\\') + { + // We might have a backslash-escaped newline. + // Look at the next byte (if any) to see. + // + // Note(tfoley): We are assuming a null-terminated input here, + // so that we can safely look at the next byte without issue. + int d = *lexer->cursor; + switch (d) + { + case '\r': case '\n': + // handle the end-of-line for our source location tracking + handleNewLine(lexer); + + // Now try again, looking at the character after the + // escaped nmewline. + continue; + + default: + break; + } + } + + // TODO: Need to handle non-ASCII code points. + + // Default case is to advance by one location + // and return the raw byte we saw. + + lexer->loc.Col++; + lexer->loc.Pos++; + + return c; + } + } + + static void lexLineComment(Lexer* lexer) { for(;;) diff --git a/source/slang/preprocessor.cpp b/source/slang/preprocessor.cpp index cdde2591d..60329c275 100644 --- a/source/slang/preprocessor.cpp +++ b/source/slang/preprocessor.cpp @@ -450,8 +450,35 @@ static PreprocessorMacro* LookupMacro(PreprocessorEnvironment* environment, Stri static PreprocessorEnvironment* GetCurrentEnvironment(Preprocessor* preprocessor) { + // The environment we will use for looking up a macro is assocaited + // with the current input stream (because it may include entries + // for macro arguments). + // + // We need to be careful, though, when we are at the end of an + // input stream (e.g., representing one argument), so that we + // don't use its environment. + PreprocessorInputStream* inputStream = preprocessor->inputStream; - return inputStream ? inputStream->environment : &preprocessor->globalEnv; + + for(;;) + { + // If there is no input stream that isn't at its end, + // then fall back to the global environment. + if (!inputStream) + return &preprocessor->globalEnv; + + // If the current input stream is at its end, then + // fall back to its parent stream. + if (inputStream->tokenReader.PeekTokenType() == TokenType::EndOfFile) + { + inputStream = inputStream->parent; + continue; + } + + // If we've found an active stream that isn't at its end, + // then use that for lookup. + return inputStream->environment; + } } static PreprocessorMacro* LookupMacro(Preprocessor* preprocessor, String const& name) diff --git a/tests/preprocessor/escaped-newlines.slang b/tests/preprocessor/escaped-newlines.slang new file mode 100644 index 000000000..1c1fa8f10 --- /dev/null +++ b/tests/preprocessor/escaped-newlines.slang @@ -0,0 +1,23 @@ +//TEST:SIMPLE: + +// Test support for escaped newlines in macro definitions. +// +// A complete lexer would handle backslash-escaped newlines +// in every possible context (including, e.g., in the middle +// of an identifier), but we are not going to go to such +// lengths right now. + +#define FOO(x, y) \ + x \ + y \ + /* */ + +FOO(float, bar)(float a) +{ + FOO(return, a); +} + +float foo(float x) +{ + return bar(x); +} |
