summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2017-06-12 15:34:12 -0700
committerGitHub <noreply@github.com>2017-06-12 15:34:12 -0700
commit7fc4c40b17f340800d6616e0bae111606cef18cc (patch)
treee1c59d0b48397e8e33428e65a2e0f3c6925c65d9
parentce90fec1c795eaafbd91d7b8a83501a57eeb1946 (diff)
parent97fc943b476e2482bd1f99c9e76f0dfe8fdd36e0 (diff)
Merge pull request #4 from tfoleyNV/escaped-newlines
Escaped newlines
-rw-r--r--slang.sln2
-rw-r--r--source/slang/lexer.cpp116
-rw-r--r--source/slang/preprocessor.cpp29
-rw-r--r--tests/preprocessor/escaped-newlines.slang23
4 files changed, 158 insertions, 12 deletions
diff --git a/slang.sln b/slang.sln
index ba96d6014..502bd60a1 100644
--- a/slang.sln
+++ b/slang.sln
@@ -8,8 +8,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hello", "examples\hello\hel
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "core", "source\core\core.vcxproj", "{F9BE7957-8399-899E-0C49-E714FDDD4B65}"
EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "utils", "utils", "{37016FF6-E6AF-4316-BC2B-0152FC0C969E}"
-EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{74C5F0DC-93BB-4BF3-AC65-8C65491570F7}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "slang", "source\slang\slang.vcxproj", "{DB00DA62-0533-4AFD-B59F-A67D5B3A0808}"
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 7234c4983..5127c876c 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -99,40 +99,138 @@ namespace Slang
enum { kEOF = -1 };
- static int peek(Lexer* lexer)
+ // Get the next input byte, without any handling of
+ // escaped newlines, non-ASCII code points, source locations, etc.
+ static int peekRaw(Lexer* lexer)
{
+ // If we are at the end of the input, return a designated end-of-file value
if(lexer->cursor == lexer->end)
return kEOF;
+ // Otherwise, just look at the next byte
return *lexer->cursor;
}
- static int advance(Lexer* lexer)
+ // Read one input byte without any special handling (similar to `peekRaw`)
+ static int advanceRaw(Lexer* lexer)
{
- if(lexer->cursor == lexer->end)
- return kEOF;
+ // The logic here is basically the same as for `peekRaw()`,
+ // escape we advance `cursor` if we aren't at the end.
- lexer->loc.Col++;
- lexer->loc.Pos++;
+ if (lexer->cursor == lexer->end)
+ return kEOF;
return *lexer->cursor++;
}
+ // When the cursor is already at the first byte of an end-of-line sequence,
+ // consume one or two bytes that compose the sequence.
+ //
+ // Basically, a newline is one of:
+ //
+ // "\n"
+ // "\r"
+ // "\r\n"
+ // "\n\r"
+ //
+ // We always look for the longest match possible.
+ //
static void handleNewLine(Lexer* lexer)
{
- int c = advance(lexer);
+ int c = advanceRaw(lexer);
assert(c == '\n' || c == '\r');
- int d = peek(lexer);
+ int d = peekRaw(lexer);
if( (c ^ d) == ('\n' ^ '\r') )
{
- advance(lexer);
+ advanceRaw(lexer);
}
lexer->loc.Line++;
lexer->loc.Col = 1;
}
+ // Look ahead one code point, dealing with complications like
+ // escaped newlines.
+ static int peek(Lexer* lexer)
+ {
+ // Look at the next raw byte, and decide what to do
+ int c = peekRaw(lexer);
+
+ if(c == '\\')
+ {
+ // We might have a backslash-escaped newline.
+ // Look at the next byte (if any) to see.
+ //
+ // Note(tfoley): We are assuming a null-terminated input here,
+ // so that we can safely look at the next byte without issue.
+ int d = lexer->cursor[1];
+ switch (d)
+ {
+ case '\r': case '\n':
+ // The newline was escaped, so return the character after *that*
+ return lexer->cursor[2];
+
+ default:
+ break;
+ }
+ }
+ // TODO: handle UTF-8 encoding for non-ASCII code points here
+
+ // Default case is to just hand along the byte we read as an ASCII code point.
+ return c;
+ }
+
+ // Get the next code point from the input, and advance the cursor.
+ static int advance(Lexer* lexer)
+ {
+ // We are going to loop, but only as a way of handling
+ // escaped line endings.
+ for (;;)
+ {
+ // If we are at the end of the input, then the task is easy.
+ if (lexer->cursor == lexer->end)
+ return kEOF;
+
+ // Look at the next raw byte, and decide what to do
+ int c = *lexer->cursor++;
+
+ if (c == '\\')
+ {
+ // We might have a backslash-escaped newline.
+ // Look at the next byte (if any) to see.
+ //
+ // Note(tfoley): We are assuming a null-terminated input here,
+ // so that we can safely look at the next byte without issue.
+ int d = *lexer->cursor;
+ switch (d)
+ {
+ case '\r': case '\n':
+ // handle the end-of-line for our source location tracking
+ handleNewLine(lexer);
+
+ // Now try again, looking at the character after the
+ // escaped nmewline.
+ continue;
+
+ default:
+ break;
+ }
+ }
+
+ // TODO: Need to handle non-ASCII code points.
+
+ // Default case is to advance by one location
+ // and return the raw byte we saw.
+
+ lexer->loc.Col++;
+ lexer->loc.Pos++;
+
+ return c;
+ }
+ }
+
+
static void lexLineComment(Lexer* lexer)
{
for(;;)
diff --git a/source/slang/preprocessor.cpp b/source/slang/preprocessor.cpp
index cdde2591d..60329c275 100644
--- a/source/slang/preprocessor.cpp
+++ b/source/slang/preprocessor.cpp
@@ -450,8 +450,35 @@ static PreprocessorMacro* LookupMacro(PreprocessorEnvironment* environment, Stri
static PreprocessorEnvironment* GetCurrentEnvironment(Preprocessor* preprocessor)
{
+ // The environment we will use for looking up a macro is assocaited
+ // with the current input stream (because it may include entries
+ // for macro arguments).
+ //
+ // We need to be careful, though, when we are at the end of an
+ // input stream (e.g., representing one argument), so that we
+ // don't use its environment.
+
PreprocessorInputStream* inputStream = preprocessor->inputStream;
- return inputStream ? inputStream->environment : &preprocessor->globalEnv;
+
+ for(;;)
+ {
+ // If there is no input stream that isn't at its end,
+ // then fall back to the global environment.
+ if (!inputStream)
+ return &preprocessor->globalEnv;
+
+ // If the current input stream is at its end, then
+ // fall back to its parent stream.
+ if (inputStream->tokenReader.PeekTokenType() == TokenType::EndOfFile)
+ {
+ inputStream = inputStream->parent;
+ continue;
+ }
+
+ // If we've found an active stream that isn't at its end,
+ // then use that for lookup.
+ return inputStream->environment;
+ }
}
static PreprocessorMacro* LookupMacro(Preprocessor* preprocessor, String const& name)
diff --git a/tests/preprocessor/escaped-newlines.slang b/tests/preprocessor/escaped-newlines.slang
new file mode 100644
index 000000000..1c1fa8f10
--- /dev/null
+++ b/tests/preprocessor/escaped-newlines.slang
@@ -0,0 +1,23 @@
+//TEST:SIMPLE:
+
+// Test support for escaped newlines in macro definitions.
+//
+// A complete lexer would handle backslash-escaped newlines
+// in every possible context (including, e.g., in the middle
+// of an identifier), but we are not going to go to such
+// lengths right now.
+
+#define FOO(x, y) \
+ x \
+ y \
+ /* */
+
+FOO(float, bar)(float a)
+{
+ FOO(return, a);
+}
+
+float foo(float x)
+{
+ return bar(x);
+}