Merge pull request #4 from tfoleyNV/escaped-newlines

Escaped newlines
author: Tim Foley <tfoleyNV@users.noreply.github.com> 2017-06-12 15:34:12 -0700
committer: GitHub <noreply@github.com> 2017-06-12 15:34:12 -0700
commit: 7fc4c40b17f340800d6616e0bae111606cef18cc (patch)
tree: e1c59d0b48397e8e33428e65a2e0f3c6925c65d9
parent: ce90fec1c795eaafbd91d7b8a83501a57eeb1946 (diff)
parent: 97fc943b476e2482bd1f99c9e76f0dfe8fdd36e0 (diff)
4 files changed, 158 insertions, 12 deletions
diff --git a/slang.sln b/slang.sln
index ba96d6014..502bd60a1 100644
--- a/slang.sln
+++ b/slang.sln
@@ -8,8 +8,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hello", "examples\hello\hel
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "core", "source\core\core.vcxproj", "{F9BE7957-8399-899E-0C49-E714FDDD4B65}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "utils", "utils", "{37016FF6-E6AF-4316-BC2B-0152FC0C969E}"
-EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{74C5F0DC-93BB-4BF3-AC65-8C65491570F7}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "slang", "source\slang\slang.vcxproj", "{DB00DA62-0533-4AFD-B59F-A67D5B3A0808}"
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 7234c4983..5127c876c 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -99,40 +99,138 @@ namespace Slang
 
         enum { kEOF = -1 };
 
-        static int peek(Lexer* lexer)
+        // Get the next input byte, without any handling of
+        // escaped newlines, non-ASCII code points, source locations, etc.
+        static int peekRaw(Lexer* lexer)
         {
+            // If we are at the end of the input, return a designated end-of-file value
             if(lexer->cursor == lexer->end)
                 return kEOF;
 
+            // Otherwise, just look at the next byte
             return *lexer->cursor;
         }
 
-        static int advance(Lexer* lexer)
+        // Read one input byte without any special handling (similar to `peekRaw`)
+        static int advanceRaw(Lexer* lexer)
         {
-            if(lexer->cursor == lexer->end)
-                return kEOF;
+            // The logic here is basically the same as for `peekRaw()`,
+            // escape we advance `cursor` if we aren't at the end.
 
-            lexer->loc.Col++;
-            lexer->loc.Pos++;
+            if (lexer->cursor == lexer->end)
+                return kEOF;
 
             return *lexer->cursor++;
         }
 
+        // When the cursor is already at the first byte of an end-of-line sequence,
+        // consume one or two bytes that compose the sequence.
+        //
+        // Basically, a newline is one of:
+        //
+        //  "\n"
+        //  "\r"
+        //  "\r\n"
+        //  "\n\r"
+        //
+        // We always look for the longest match possible.
+        //
         static void handleNewLine(Lexer* lexer)
         {
-            int c = advance(lexer);
+            int c = advanceRaw(lexer);
             assert(c == '\n' || c == '\r');
 
-            int d = peek(lexer);
+            int d = peekRaw(lexer);
             if( (c ^ d) == ('\n' ^ '\r') )
             {
-                advance(lexer);
+                advanceRaw(lexer);
             }
 
             lexer->loc.Line++;
             lexer->loc.Col = 1;
         }
 
+        // Look ahead one code point, dealing with complications like
+        // escaped newlines.
+        static int peek(Lexer* lexer)
+        {
+            // Look at the next raw byte, and decide what to do
+            int c = peekRaw(lexer);
+
+            if(c == '\\')
+            {
+                // We might have a backslash-escaped newline.
+                // Look at the next byte (if any) to see.
+                //
+                // Note(tfoley): We are assuming a null-terminated input here,
+                // so that we can safely look at the next byte without issue.
+                int d = lexer->cursor[1];
+                switch (d)
+                {
+                case '\r': case '\n':
+                    // The newline was escaped, so return the character after *that*
+                    return lexer->cursor[2];
+
+                default:
+                    break;
+                }
+            }
+            // TODO: handle UTF-8 encoding for non-ASCII code points here
+
+            // Default case is to just hand along the byte we read as an ASCII code point.
+            return c;
+        }
+
+        // Get the next code point from the input, and advance the cursor.
+        static int advance(Lexer* lexer)
+        {
+            // We are going to loop, but only as a way of handling
+            // escaped line endings.
+            for (;;)
+            {
+                // If we are at the end of the input, then the task is easy.
+                if (lexer->cursor == lexer->end)
+                    return kEOF;
+
+                // Look at the next raw byte, and decide what to do
+                int c = *lexer->cursor++;
+
+                if (c == '\\')
+                {
+                    // We might have a backslash-escaped newline.
+                    // Look at the next byte (if any) to see.
+                    //
+                    // Note(tfoley): We are assuming a null-terminated input here,
+                    // so that we can safely look at the next byte without issue.
+                    int d = *lexer->cursor;
+                    switch (d)
+                    {
+                    case '\r': case '\n':
+                        // handle the end-of-line for our source location tracking
+                        handleNewLine(lexer);
+
+                        // Now try again, looking at the character after the
+                        // escaped nmewline.
+                        continue;
+
+                    default:
+                        break;
+                    }
+                }
+
+                // TODO: Need to handle non-ASCII code points.
+
+                // Default case is to advance by one location
+                // and return the raw byte we saw.
+
+                lexer->loc.Col++;
+                lexer->loc.Pos++;
+
+                return c;
+            }
+        }
+
+
         static void lexLineComment(Lexer* lexer)
         {
             for(;;)
diff --git a/source/slang/preprocessor.cpp b/source/slang/preprocessor.cpp
index cdde2591d..60329c275 100644
--- a/source/slang/preprocessor.cpp
+++ b/source/slang/preprocessor.cpp
@@ -450,8 +450,35 @@ static PreprocessorMacro* LookupMacro(PreprocessorEnvironment* environment, Stri
 
 static PreprocessorEnvironment* GetCurrentEnvironment(Preprocessor* preprocessor)
 {
+    // The environment we will use for looking up a macro is assocaited
+    // with the current input stream (because it may include entries
+    // for macro arguments).
+    //
+    // We need to be careful, though, when we are at the end of an
+    // input stream (e.g., representing one argument), so that we
+    // don't use its environment.
+
     PreprocessorInputStream* inputStream = preprocessor->inputStream;
-    return inputStream ? inputStream->environment : &preprocessor->globalEnv;
+
+    for(;;)
+    {
+        // If there is no input stream that isn't at its end,
+        // then fall back to the global environment.
+        if (!inputStream)
+            return &preprocessor->globalEnv;
+
+        // If the current input stream is at its end, then
+        // fall back to its parent stream.
+        if (inputStream->tokenReader.PeekTokenType() == TokenType::EndOfFile)
+        {
+            inputStream = inputStream->parent;
+            continue;
+        }
+
+        // If we've found an active stream that isn't at its end,
+        // then use that for lookup.
+        return inputStream->environment;
+    }
 }
 
 static PreprocessorMacro* LookupMacro(Preprocessor* preprocessor, String const& name)
diff --git a/tests/preprocessor/escaped-newlines.slang b/tests/preprocessor/escaped-newlines.slang
new file mode 100644
index 000000000..1c1fa8f10
--- /dev/null
+++ b/tests/preprocessor/escaped-newlines.slang
@@ -0,0 +1,23 @@
+//TEST:SIMPLE:
+
+// Test support for escaped newlines in macro definitions.
+//
+// A complete lexer would handle backslash-escaped newlines
+// in every possible context (including, e.g., in the middle
+// of an identifier), but we are not going to go to such
+// lengths right now.
+
+#define FOO(x, y) 	\
+	x				\
+	y				\
+	/* */
+
+FOO(float, bar)(float a)
+{
+	FOO(return, a);
+}
+
+float foo(float x)
+{
+	return bar(x);
+}
author	Tim Foley <tfoleyNV@users.noreply.github.com>	2017-06-12 15:34:12 -0700
committer	GitHub <noreply@github.com>	2017-06-12 15:34:12 -0700
commit	7fc4c40b17f340800d6616e0bae111606cef18cc (patch)
tree	e1c59d0b48397e8e33428e65a2e0f3c6925c65d9
parent	ce90fec1c795eaafbd91d7b8a83501a57eeb1946 (diff)
parent	97fc943b476e2482bd1f99c9e76f0dfe8fdd36e0 (diff)