summaryrefslogtreecommitdiff
path: root/source/core/slang-string-escape-util.cpp
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-04-29 09:01:46 -0400
committerGitHub <noreply@github.com>2021-04-29 09:01:46 -0400
commit972bd3c4c24b06501c52127416afb763a066b8ad (patch)
treee3874d4952ac557d5c323bb1e43be4584c100afc /source/core/slang-string-escape-util.cpp
parent541d1cab81d895c406fc33cb476e37ce8a6a9702 (diff)
Support for escaped paths in tools (#1823)
* #include an absolute path didn't work - because paths were taken to always be relative. * Split out StringEscapeUtil. * Added StringEscapeUtil. * Fix typo in unix quoting type. * Small comment improvements. * Try to fix linux linking issue. * Fix typo. * Attempt to fix linux link issue. * Update VS proj even though nothing really changed. * Fix another typo issue. * Fix for windows issue. Fixed bug. * Make separate Utils for escaping. * Fix typo. * Split out into StringEscapeHandler. * Windows shell does handle removing quotes (so remove code to remove them). * Handle unescaping if not initiating using the shell. * Slight improvement around shell like decoding. * Simplify command extraction. * Add shared-library category type. * Fix bug in command extraction. * Typo in transcendental category. * Enable unit-test on in smoke test category. * Make parsing failing output as a failing test. * Fixes for transcendental tests. Disable tests that do not work. * Changed category parsing. * Removed the TestResult parameter from _gatherTestsForFile. Made testsList only output. * Remove testing if all tests were disabled. * Fix typo. * Disable path canonical test on linux because CI issue.
Diffstat (limited to 'source/core/slang-string-escape-util.cpp')
-rw-r--r--source/core/slang-string-escape-util.cpp548
1 files changed, 548 insertions, 0 deletions
diff --git a/source/core/slang-string-escape-util.cpp b/source/core/slang-string-escape-util.cpp
new file mode 100644
index 000000000..13fce6dc7
--- /dev/null
+++ b/source/core/slang-string-escape-util.cpp
@@ -0,0 +1,548 @@
+#include "slang-string-escape-util.h"
+
+#include "slang-char-util.h"
+#include "slang-text-io.h"
+
+#include "../../slang-com-helper.h"
+
+namespace Slang {
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!! SpaceStringEscapeHandler !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+class SpaceStringEscapeHandler : public StringEscapeHandler
+{
+public:
+ typedef StringEscapeHandler Super;
+
+ virtual bool isQuotingNeeded(const UnownedStringSlice& slice) SLANG_OVERRIDE { return isEscapingNeeded(slice); }
+
+ virtual bool isEscapingNeeded(const UnownedStringSlice& slice) SLANG_OVERRIDE;
+ virtual SlangResult appendEscaped(const UnownedStringSlice& slice, StringBuilder& out) SLANG_OVERRIDE;
+ virtual SlangResult appendUnescaped(const UnownedStringSlice& slice, StringBuilder& out) SLANG_OVERRIDE;
+ virtual SlangResult lexQuoted(const char* cursor, const char** outCursor) SLANG_OVERRIDE;
+
+ SpaceStringEscapeHandler() : Super('"') {}
+};
+
+bool SpaceStringEscapeHandler::isEscapingNeeded(const UnownedStringSlice& slice)
+{
+ return slice.indexOf(' ') >= 0;
+}
+
+SlangResult SpaceStringEscapeHandler::appendUnescaped(const UnownedStringSlice& slice, StringBuilder& out)
+{
+ if (slice.indexOf('"') >= 0)
+ {
+ return SLANG_FAIL;
+ }
+
+ out.append(slice);
+ return SLANG_OK;
+}
+
+SlangResult SpaceStringEscapeHandler::appendEscaped(const UnownedStringSlice& slice, StringBuilder& out)
+{
+ if (slice.indexOf('"') >= 0)
+ {
+ return SLANG_FAIL;
+ }
+ out.append(slice);
+ return SLANG_OK;
+}
+
+/* static */SlangResult SpaceStringEscapeHandler::lexQuoted(const char* cursor, const char** outCursor)
+{
+ *outCursor = cursor;
+
+ if (*cursor != m_quoteChar)
+ {
+ return SLANG_FAIL;
+ }
+ cursor++;
+
+ for (;;)
+ {
+ const char c = *cursor;
+ if (c == m_quoteChar)
+ {
+ *outCursor = cursor + 1;
+ return SLANG_OK;
+ }
+ switch (c)
+ {
+ case 0:
+ case '\n':
+ case '\r':
+ {
+ // Didn't hit closing quote!
+ return SLANG_FAIL;
+ }
+ default:
+ {
+ ++cursor;
+ break;
+ }
+ }
+ }
+}
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!! CppStringEscapeHandler !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+class CppStringEscapeHandler : public StringEscapeHandler
+{
+public:
+ typedef StringEscapeHandler Super;
+
+ virtual bool isQuotingNeeded(const UnownedStringSlice& slice) SLANG_OVERRIDE { SLANG_UNUSED(slice); return true; }
+ virtual bool isEscapingNeeded(const UnownedStringSlice& slice) SLANG_OVERRIDE;
+ virtual SlangResult appendEscaped(const UnownedStringSlice& slice, StringBuilder& out) SLANG_OVERRIDE;
+ virtual SlangResult appendUnescaped(const UnownedStringSlice& slice, StringBuilder& out) SLANG_OVERRIDE;
+ virtual SlangResult lexQuoted(const char* cursor, const char** outCursor) SLANG_OVERRIDE;
+
+ CppStringEscapeHandler() : Super('"') {}
+};
+
+static char _getHexChar(int v)
+{
+ return (v <= 9) ? char(v + '0') : char(v - 10 + 'A');
+}
+
+static int _getHexDigit(char c)
+{
+ if (c >= '0' && c <= '9')
+ {
+ return c - '0';
+ }
+ else if (c >= 'a' && c <= 'f')
+ {
+ return c - 'a' + 10;
+ }
+ else if (c >= 'A' && c <= 'F')
+ {
+ return c - 'A' + 10;
+ }
+ else
+ {
+ SLANG_ASSERT(!"Not a hex digit");
+ return 0;
+ }
+}
+
+static char _getCppEscapedChar(char c)
+{
+ switch (c)
+ {
+ case '\b': return 'b';
+ case '\f': return 'f';
+ case '\n': return 'n';
+ case '\r': return 'r';
+ case '\a': return 'a';
+ case '\t': return 't';
+ case '\v': return 'v';
+ case '\'': return '\'';
+ case '\"': return '"';
+ case '\\': return '\\';
+ default: return 0;
+ }
+}
+
+static char _getCppUnescapedChar(char c)
+{
+ switch (c)
+ {
+ case 'b': return '\b';
+ case 'f': return '\f';
+ case 'n': return '\n';
+ case 'r': return '\r';
+ case 'a': return '\a';
+ case 't': return '\t';
+ case 'v': return '\v';
+ case '\'': return '\'';
+ case '\"': return '"';
+ case '\\': return '\\';
+ default: return 0;
+ }
+}
+
+/* static */bool CppStringEscapeHandler::isEscapingNeeded(const UnownedStringSlice& slice)
+{
+ const char* cur = slice.begin();
+ const char*const end = slice.end();
+
+ for (; cur < end; ++cur)
+ {
+ const char c = *cur;
+
+ switch (c)
+ {
+ case '\'':
+ case '\"':
+ case '\\':
+ {
+ // Strictly speaking ' shouldn't need a quote if in a C style string.
+ return true;
+ }
+ default:
+ {
+ if (c < ' ' || c >= 0x7e)
+ {
+ return true;
+ }
+ break;
+ }
+ }
+ }
+ return false;
+}
+
+SlangResult CppStringEscapeHandler::appendEscaped(const UnownedStringSlice& slice, StringBuilder& out)
+{
+ const char* start = slice.begin();
+ const char* cur = start;
+ const char*const end = slice.end();
+
+ for (; cur < end; ++cur)
+ {
+ const char c = *cur;
+ const char escapedChar = _getCppEscapedChar(c);
+
+ if (escapedChar)
+ {
+ // Flush
+ if (start < cur)
+ {
+ out.append(start, cur);
+ }
+ out.appendChar('\\');
+ out.appendChar(escapedChar);
+
+ start = cur + 1;
+ }
+ else if (c < ' ' || c > 126)
+ {
+ // Flush
+ if (start < cur)
+ {
+ out.append(start, cur);
+ }
+
+ char buf[5] = "\\0x0";
+
+ buf[3] = _getHexChar((int(c) >> 4) & 0xf);
+ buf[4] = _getHexChar(c & 0xf);
+
+ out.append(buf, buf + 4);
+
+ start = cur + 1;
+ }
+ }
+
+ if (start < end)
+ {
+ out.append(start, end);
+ }
+ return SLANG_OK;
+}
+
+SlangResult CppStringEscapeHandler::appendUnescaped(const UnownedStringSlice& slice, StringBuilder& out)
+{
+ const char* start = slice.begin();
+ const char* cur = start;
+ const char*const end = slice.end();
+
+ for (; cur < end; ++cur)
+ {
+ const char c = *cur;
+
+ if (c == '\\')
+ {
+ // Flush
+ if (start < end)
+ {
+ out.append(start, end);
+ }
+
+ /// Next
+ cur++;
+
+ if (cur >= end)
+ {
+ return SLANG_FAIL;
+ }
+
+ // Need to handle various escape sequence cases
+ switch (*cur)
+ {
+ case '\'':
+ case '\"':
+ case '\\':
+ case '?':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ {
+ const char unescapedChar = _getCppUnescapedChar(*cur);
+ if (unescapedChar == 0)
+ {
+ // Don't know how to unescape that char
+ return SLANG_FAIL;
+ }
+ out.appendChar(unescapedChar);
+
+ start = cur + 1;
+ break;
+ }
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ {
+ // octal escape: up to 3 characters
+ ++cur;
+ int value = 0;
+
+ const char* octEnd = cur + 3;
+ octEnd = (octEnd > end) ? end : octEnd;
+
+ for (; cur < octEnd; ++cur)
+ {
+ const char d = *cur;
+ if (d >= '0' && d <= '7')
+ {
+ value = (value << 3) | (d - '0');
+ }
+ }
+ out.appendChar(char(value));
+
+ start = cur;
+ break;
+ }
+ case 'x':
+ {
+ uint32_t value = 0;
+ for (++cur; cur < end && CharUtil::isHexDigit(*cur); ++cur)
+ {
+ value = value << 4 | _getHexDigit(*cur);
+ }
+
+ // It's arguable what is appropriate. We only decode/encode 4, which the current spec has,
+ // but 6 are possible, so lets go large.
+ const Index maxUtf8EncodeCount = 6;
+
+ char* chars = out.prepareForAppend(maxUtf8EncodeCount);
+
+ int numChars = EncodeUnicodePointToUTF8(chars, int(value));
+ out.appendInPlace(chars, numChars);
+
+ start = cur;
+ break;
+ }
+ default:
+ {
+ return SLANG_FAIL;
+ }
+ }
+ }
+ }
+
+ if (start < end)
+ {
+ out.append(start, end);
+ }
+
+ return SLANG_OK;
+}
+
+SlangResult CppStringEscapeHandler::lexQuoted(const char* cursor, const char** outCursor)
+{
+ *outCursor = cursor;
+
+ if (*cursor != m_quoteChar)
+ {
+ return SLANG_FAIL;
+ }
+ cursor++;
+
+ for (;;)
+ {
+ const char c = *cursor;
+ if (c == m_quoteChar)
+ {
+ *outCursor = cursor + 1;
+ return SLANG_OK;
+ }
+ switch (c)
+ {
+ case 0:
+ case '\n':
+ case '\r':
+ {
+ // Didn't hit closing quote!
+ return SLANG_FAIL;
+ }
+ case '\\':
+ {
+ ++cursor;
+ // Need to handle various escape sequence cases
+ switch (*cursor)
+ {
+ case '\'':
+ case '\"':
+ case '\\':
+ case '?':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ {
+ ++cursor;
+ break;
+ }
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ {
+ // octal escape: up to 3 characters
+ ++cursor;
+ for (int ii = 0; ii < 3; ++ii)
+ {
+ const char d = *cursor;
+ if (('0' <= d) && (d <= '7'))
+ {
+ ++cursor;
+ continue;
+ }
+ else
+ {
+ break;
+ }
+ }
+ break;
+ }
+ case 'x':
+ {
+ // hexadecimal escape: any number of characters
+ ++cursor;
+ for (; CharUtil::isHexDigit(*cursor); ++cursor);
+
+ // TODO: Unicode escape sequences
+ break;
+ }
+ }
+ break;
+ }
+ default:
+ {
+ ++cursor;
+ break;
+ }
+ }
+ }
+}
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!! StringEscapeUtil !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+static CppStringEscapeHandler g_cppHandler;
+static SpaceStringEscapeHandler g_spaceHandler;
+
+StringEscapeUtil::Handler* StringEscapeUtil::getHandler(Style style)
+{
+ switch (style)
+ {
+ case Style::Cpp: return &g_cppHandler;
+ case Style::Space: return &g_spaceHandler;
+ default: return nullptr;
+ }
+}
+
+/* static */void StringEscapeUtil::appendQuoted(Handler* handler, const UnownedStringSlice& slice, StringBuilder& out)
+{
+ const char quoteChar = handler->getQuoteChar();
+ out.appendChar(quoteChar);
+ handler->appendEscaped(slice, out);
+ out.appendChar(quoteChar);
+}
+
+/* static */SlangResult StringEscapeUtil::appendUnquoted(Handler* handler, const UnownedStringSlice& slice, StringBuilder& out)
+{
+ const Index len = slice.getLength();
+
+ const char quoteChar = handler->getQuoteChar();
+ SLANG_UNUSED(quoteChar);
+
+ // Must have quote characters around if
+ SLANG_ASSERT(len >= 2 && slice[0] == quoteChar && slice[len - 1] == quoteChar);
+
+ return handler->appendUnescaped(slice.subString(1, len - 2), out);
+}
+
+/* static */void StringEscapeUtil::appendMaybeQuoted(Handler* handler, const UnownedStringSlice& slice, StringBuilder& out)
+{
+ if (handler->isQuotingNeeded(slice))
+ {
+ appendQuoted(handler, slice, out);
+ }
+ else
+ {
+ out.append(slice);
+ }
+}
+
+/* static */SlangResult StringEscapeUtil::appendMaybeUnquoted(Handler* handler, const UnownedStringSlice& slice, StringBuilder& out)
+{
+ const char quoteChar = handler->getQuoteChar();
+
+ const Index len = slice.getLength();
+
+ if (len >= 2 && slice[0] == quoteChar && slice[len - 1] == quoteChar)
+ {
+ return appendUnquoted(handler, slice, out);
+ }
+ else
+ {
+ out.append(slice);
+ return SLANG_OK;
+ }
+}
+
+
+/* static */SlangResult StringEscapeUtil::unescapeShellLike(Handler* handler, const UnownedStringSlice& slice, StringBuilder& out)
+{
+ StringBuilder buf;
+ const char quoteChar = handler->getQuoteChar();
+
+ UnownedStringSlice remaining(slice);
+
+ while (remaining.getLength())
+ {
+ const Index index = remaining.indexOf(quoteChar);
+
+ if (index < 0)
+ {
+ out.append(remaining);
+ return SLANG_OK;
+ }
+
+ // Append the bit before
+ out.append(remaining.head(index));
+
+ // Okay we need to lex to the end
+
+ const char* quotedEnd = nullptr;
+ SLANG_RETURN_ON_FAIL(handler->lexQuoted(remaining.begin() + index, &quotedEnd));
+
+ // Unescape it
+ SLANG_RETURN_ON_FAIL(appendUnquoted(handler, UnownedStringSlice(remaining.begin() + index, quotedEnd), out));
+
+ // Fix up remaining
+ remaining = UnownedStringSlice(quotedEnd, remaining.end());
+ }
+
+ return SLANG_OK;
+}
+
+} // namespace Slang