summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2022-05-27 17:28:05 -0400
committerGitHub <noreply@github.com>2022-05-27 17:28:05 -0400
commit2d3392f22c894957d17dd13486e0565c4ecea89c (patch)
treece4dadbd85a59e52725fa6f92613553cd5b29859
parentabb89b3e460e11e8f9a134199c2d559190bfc47e (diff)
Added NativeStringType (#2252)
* #include an absolute path didn't work - because paths were taken to always be relative. * Use TerminatedUnownedStringSlice for literals in output C++. * Remove Escape/Unescape functions used in slang-token-reader.cpp Add target type of 'host-cpp' etc to map to the target types. * Fix some corner cases around string encoding. * Added unit test for string escaping. Fixed some assorted escaping bugs. * Updated test output. * Added decode test. * Stop using hex output, to get around 'greedy' aspect. Use octal instead.
-rw-r--r--build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj3
-rw-r--r--build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj.filters9
-rw-r--r--source/core/slang-char-util.h31
-rw-r--r--source/core/slang-hex-dump-util.cpp23
-rw-r--r--source/core/slang-string-escape-util.cpp195
-rw-r--r--source/core/slang-string-escape-util.h2
-rw-r--r--source/core/slang-string.h66
-rw-r--r--source/core/slang-token-reader.cpp87
-rw-r--r--source/core/slang-token-reader.h2
-rw-r--r--source/core/slang-type-text-util.cpp2
-rw-r--r--source/slang/core.meta.slang5
-rw-r--r--source/slang/slang-ast-builder.cpp10
-rw-r--r--source/slang/slang-ast-builder.h5
-rw-r--r--source/slang/slang-ast-type.h16
-rw-r--r--source/slang/slang-check-conversion.cpp10
-rw-r--r--source/slang/slang-emit-cpp.cpp16
-rw-r--r--source/slang/slang-emit-glsl.cpp7
-rw-r--r--source/slang/slang-emit-hlsl.cpp7
-rw-r--r--source/slang/slang-ir-collect-global-uniforms.cpp5
-rw-r--r--source/slang/slang-ir-inst-defs.h5
-rw-r--r--source/slang/slang-ir-insts.h1
-rw-r--r--source/slang/slang-ir-link.cpp9
-rw-r--r--source/slang/slang-ir.cpp103
-rw-r--r--source/slang/slang-ir.h8
-rw-r--r--tools/slang-unit-test/unit-test-offset-container.cpp (renamed from tools/slang-unit-test/unit-offset-container.cpp)2
-rw-r--r--tools/slang-unit-test/unit-test-string-escape.cpp79
26 files changed, 415 insertions, 293 deletions
diff --git a/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj b/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj
index 9da4294c9..87cd8e9ec 100644
--- a/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj
+++ b/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj
@@ -271,7 +271,6 @@
<ClInclude Include="..\..\..\tools\unit-test\slang-unit-test.h" />
</ItemGroup>
<ItemGroup>
- <ClCompile Include="..\..\..\tools\slang-unit-test\unit-offset-container.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-byte-encode.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-chunked-list.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-command-line-args.cpp" />
@@ -281,11 +280,13 @@
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-json-native.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-json.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-memory-arena.cpp" />
+ <ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-offset-container.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-path.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-process.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-riff.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-rtti.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-short-list.cpp" />
+ <ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-string-escape.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-string.cpp" />
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-translation-unit-import.cpp" />
<ClCompile Include="..\..\..\tools\unit-test\slang-unit-test.cpp" />
diff --git a/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj.filters b/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj.filters
index 5f935e3f7..4a4e7bce9 100644
--- a/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj.filters
+++ b/build/visual-studio/slang-unit-test-tool/slang-unit-test-tool.vcxproj.filters
@@ -14,9 +14,6 @@
</ClInclude>
</ItemGroup>
<ItemGroup>
- <ClCompile Include="..\..\..\tools\slang-unit-test\unit-offset-container.cpp">
- <Filter>Source Files</Filter>
- </ClCompile>
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-byte-encode.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@@ -44,6 +41,9 @@
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-memory-arena.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-offset-container.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-path.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@@ -59,6 +59,9 @@
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-short-list.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-string-escape.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\tools\slang-unit-test\unit-test-string.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/source/core/slang-char-util.h b/source/core/slang-char-util.h
index 8f7f69c90..f831f6d55 100644
--- a/source/core/slang-char-util.h
+++ b/source/core/slang-char-util.h
@@ -31,8 +31,12 @@ struct CharUtil
/// True if it's alpha
SLANG_FORCE_INLINE static bool isAlpha(char c) { return (getFlags(c) & (Flag::Upper | Flag::Lower)) != 0; }
+ /// True if the character is a valid hex character
SLANG_FORCE_INLINE static bool isHexDigit(char c) { return (getFlags(c) & Flag::HexDigit) != 0; }
+ /// True if the character is an octal digit
+ SLANG_FORCE_INLINE static bool isOctalDigit(char c) { return c >= '0' && c <= '7'; }
+
/// For a given character get the associated flags
SLANG_FORCE_INLINE static Flags getFlags(char c) { return g_charFlagMap.flags[size_t(c)]; }
@@ -41,7 +45,14 @@ struct CharUtil
/// Given a character return the upper case equivalent
SLANG_FORCE_INLINE static char toUpper(char c) { return (c >= 'a' && c <= 'z') ? (c -'a' + 'A') : c; }
-
+ /// Returns the value if c interpretted as a hex digit
+ /// If c is not a valid hex returns -1
+ inline static int getHexDigitValue(char c);
+
+ /// Returns the value if c interpretted as a octal digit
+ /// If c is not a valid octal returns -1
+ inline static int getOctalDigitValue(char c) { return isOctalDigit(c) ? (c - '0') : -1; }
+
struct CharFlagMap
{
Flags flags[0x100];
@@ -57,6 +68,24 @@ struct CharUtil
static const CharFlagMap g_charFlagMap;
};
+// ------------------------------------------------------------------------------------
+inline /* static */int CharUtil::getHexDigitValue(char c)
+{
+ if (c >= '0' && c <= '9')
+ {
+ return c - '0';
+ }
+ else if (c >= 'a' && c <= 'f')
+ {
+ return c - 'a' + 10;
+ }
+ else if (c >= 'A' && c <= 'F')
+ {
+ return c - 'A' + 10;
+ }
+ return -1;
+}
+
} // namespace Slang
#endif // SLANG_CHAR_UTIL_H
diff --git a/source/core/slang-hex-dump-util.cpp b/source/core/slang-hex-dump-util.cpp
index b493141a1..1279dc237 100644
--- a/source/core/slang-hex-dump-util.cpp
+++ b/source/core/slang-hex-dump-util.cpp
@@ -5,6 +5,8 @@
#include "slang-string-util.h"
#include "slang-writer.h"
+#include "slang-char-util.h"
+
#include "../../slang-com-helper.h"
#include "slang-hash.h"
@@ -152,23 +154,6 @@ SlangResult HexDumpUtil::dumpSourceBytes(const uint8_t* data, size_t dataCount,
return SLANG_OK;
}
-static int _parseHexDigit(char c)
-{
- if (c >= '0' && c <= '9')
- {
- return c -'0';
- }
- else if (c >= 'a' && c <= 'f')
- {
- return c - 'a' + 10;
- }
- else if (c >= 'A' && c <= 'F')
- {
- return c - 'A' + 10;
- }
- return -1;
-}
-
/* static */SlangResult HexDumpUtil::parse(const UnownedStringSlice& lines, List<uint8_t>& outBytes)
{
outBytes.clear();
@@ -188,8 +173,8 @@ static int _parseHexDigit(char c)
break;
}
- const int hi = _parseHexDigit(c);
- const int lo = _parseHexDigit(cur[1]);
+ const int hi = CharUtil::getHexDigitValue(c);
+ const int lo = CharUtil::getHexDigitValue(cur[1]);
cur += 2;
if (hi < 0 || lo < 0)
diff --git a/source/core/slang-string-escape-util.cpp b/source/core/slang-string-escape-util.cpp
index 513908c4c..334c1aae5 100644
--- a/source/core/slang-string-escape-util.cpp
+++ b/source/core/slang-string-escape-util.cpp
@@ -115,32 +115,6 @@ public:
CppStringEscapeHandler() : Super('"') {}
};
-static char _getHexChar(int v)
-{
- return (v <= 9) ? char(v + '0') : char(v - 10 + 'A');
-}
-
-static int _getHexDigit(char c)
-{
- if (c >= '0' && c <= '9')
- {
- return c - '0';
- }
- else if (c >= 'a' && c <= 'f')
- {
- return c - 'a' + 10;
- }
- else if (c >= 'A' && c <= 'F')
- {
- return c - 'A' + 10;
- }
- else
- {
- SLANG_ASSERT(!"Not a hex digit");
- return 0;
- }
-}
-
static char _getCppEscapedChar(char c)
{
switch (c)
@@ -177,7 +151,6 @@ static char _getCppUnescapedChar(char c)
}
}
-
bool CppStringEscapeHandler::isUnescapingNeeeded(const UnownedStringSlice& slice)
{
return slice.indexOf('\\') >= 0;
@@ -220,6 +193,9 @@ SlangResult CppStringEscapeHandler::appendEscaped(const UnownedStringSlice& slic
const char* cur = start;
const char*const end = slice.end();
+ // TODO(JS): A cleverer implementation might support U and u prefixing for unicode characters.
+ // For now we just stick with hex if it's not 'regular' ascii.
+
for (; cur < end; ++cur)
{
const char c = *cur;
@@ -232,6 +208,7 @@ SlangResult CppStringEscapeHandler::appendEscaped(const UnownedStringSlice& slic
{
out.append(start, cur);
}
+
out.appendChar('\\');
out.appendChar(escapedChar);
@@ -245,17 +222,56 @@ SlangResult CppStringEscapeHandler::appendEscaped(const UnownedStringSlice& slic
out.append(start, cur);
}
- char buf[5] = "\\0x0";
+ // NOTE! There is a possible flaw around checking 'next' character (used for outputting oct and hex)
+ // If a string is constructed appended in parts, the next character is not available so the problem below can still
+ // occur.
+
+ // Another solution to this problem would be to output "", but that makes some other assumptions
+ // For example Slang doesn't support that style.
+
+ // C++ greedily consumes hex/octal digits. This is a problem if we have bytes
+ // 0, '1' as by default this will output as
+ // "\x001" which is the single character byte 1.
+
+ // Note this claims \x is followed with up to 3 hex digits
+ // https://msdn.microsoft.com/en-us/library/69ze775t.aspx
+ // But the following claims otherwise
+ // https://en.cppreference.com/w/cpp/language/string_literal
+
+ // On testing in Visual Studio hex can indeed be more than 3 digits
+
+ // There is a problem outputting values in hex, because C++ allows *any* amount of hex digits.
+ // We could work around with \u \U but they are later extensions (C++11) and have other issue
+
+ // The solution taken here is to always output as octal, because octal can be at most 3 digits.
+
+ // Special case handling of 0
+ if (c == 0 && !(cur + 1 < end && CharUtil::isOctalDigit(cur[1])))
+ {
+ // We can just output as (octal) "\0"
+ out.append("\\0");
+ }
+ else
+ {
+ // A slightly more sophisticated implementation could output less digits if needed, if not followed by an octal
+ // digit, but for now we go simple and output all 3 digits
+
+ const uint32_t v = uint32_t(c);
- buf[3] = _getHexChar((int(c) >> 4) & 0xf);
- buf[4] = _getHexChar(c & 0xf);
+ char buf[4];
+ buf[0] = '\\';
+ buf[1] = ((v >> 6) & 3) + '0';
+ buf[2] = ((v >> 3) & 7) + '0';
+ buf[3] = ((v >> 0) & 7) + '0';
- out.append(buf, buf + 4);
+ out.append(buf, buf + 4);
+ }
start = cur + 1;
}
}
+ // Flush anything remaining
if (start < end)
{
out.append(start, end);
@@ -269,16 +285,16 @@ SlangResult CppStringEscapeHandler::appendUnescaped(const UnownedStringSlice& sl
const char* cur = start;
const char*const end = slice.end();
- for (; cur < end; ++cur)
+ while (cur < end)
{
const char c = *cur;
if (c == '\\')
{
// Flush
- if (start < end)
+ if (start < cur)
{
- out.append(start, end);
+ out.append(start, cur);
}
/// Next
@@ -286,11 +302,14 @@ SlangResult CppStringEscapeHandler::appendUnescaped(const UnownedStringSlice& sl
if (cur >= end)
{
+ // Missing character following '\'
return SLANG_FAIL;
}
+ const char nextC = *cur++;
+
// Need to handle various escape sequence cases
- switch (*cur)
+ switch (nextC)
{
case '\'':
case '\"':
@@ -304,7 +323,7 @@ SlangResult CppStringEscapeHandler::appendUnescaped(const UnownedStringSlice& sl
case 't':
case 'v':
{
- const char unescapedChar = _getCppUnescapedChar(*cur);
+ const char unescapedChar = _getCppUnescapedChar(nextC);
if (unescapedChar == 0)
{
// Don't know how to unescape that char
@@ -312,14 +331,18 @@ SlangResult CppStringEscapeHandler::appendUnescaped(const UnownedStringSlice& sl
}
out.appendChar(unescapedChar);
- start = cur + 1;
+ start = cur;
break;
}
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
{
+ // Rewind back a character, as first digit is the 'nextC'
+ --cur;
+
+ // Don't need to check for enough characters, because there must be 1 - the nextC
+
// octal escape: up to 3 characters
- ++cur;
int value = 0;
const char* octEnd = cur + 3;
@@ -327,33 +350,99 @@ SlangResult CppStringEscapeHandler::appendUnescaped(const UnownedStringSlice& sl
for (; cur < octEnd; ++cur)
{
- const char d = *cur;
- if (d >= '0' && d <= '7')
+ const int digitValue = CharUtil::getOctalDigitValue(*cur);
+ if (digitValue < 0)
{
- value = (value << 3) | (d - '0');
+ break;
}
+ value = (value << 3) | digitValue;
}
out.appendChar(char(value));
+ // Reset start
start = cur;
break;
}
case 'x':
{
+ /// In the C++ standard we consume hex digits until we hit a non hex digit
+ uint32_t value = 0;
+ for (; cur < end && CharUtil::isHexDigit(*cur); ++cur)
+ {
+ const int digitValue = CharUtil::getHexDigitValue(*cur);
+ if (digitValue < 0)
+ {
+ return SLANG_FAIL;
+ }
+
+ value = (value << 4) | digitValue;
+ }
+
+ // If it's ascii, just output it
+ if (value < 0x80)
+ {
+ out.appendChar(char(value));
+ }
+ else
+ {
+ // It's arguable what is appropriate. We only decode/encode 4, which the current spec has,
+ // but 6 are possible, so lets go large.
+ const Index maxUtf8EncodeCount = 6;
+
+ char* chars = out.prepareForAppend(maxUtf8EncodeCount);
+ int numChars = encodeUnicodePointToUTF8(Char32(value), chars);
+ out.appendInPlace(chars, numChars);
+ }
+
+ // Reset start
+ start = cur;
+ break;
+ }
+ case 'u':
+ case 'U':
+ {
+ // u implies 4 hex digits
+ // U implies 6.
+
+ // Work out how many digits we need
+ const Count digitCount = (nextC == 'u') ? 4 : 6;
+
+ // Do we have enough?
+ if (end - cur < digitCount)
+ {
+ return SLANG_FAIL;
+ }
+
uint32_t value = 0;
- for (++cur; cur < end && CharUtil::isHexDigit(*cur); ++cur)
+ for (Index i = 0; i < digitCount; ++i)
{
- value = value << 4 | _getHexDigit(*cur);
+ const int digitValue = CharUtil::getHexDigitValue(cur[i]);
+ if (digitValue < 0)
+ {
+ return SLANG_FAIL;
+ }
+ value = (value << 4) | digitValue;
}
+ cur += digitCount;
- // It's arguable what is appropriate. We only decode/encode 4, which the current spec has,
- // but 6 are possible, so lets go large.
- const Index maxUtf8EncodeCount = 6;
+ // Encode to Utf8
+ // If it's ascii, just output it
+ if (value < 0x80)
+ {
+ out.appendChar(char(value));
+ }
+ else
+ {
+ // It's arguable what is appropriate. We only decode/encode 4, which the current spec has,
+ // but 6 are possible, so lets go large.
+ const Index maxUtf8EncodeCount = 6;
- char* chars = out.prepareForAppend(maxUtf8EncodeCount);
- int numChars = encodeUnicodePointToUTF8(Char32(value), chars);
- out.appendInPlace(chars, numChars);
+ char* chars = out.prepareForAppend(maxUtf8EncodeCount);
+ int numChars = encodeUnicodePointToUTF8(Char32(value), chars);
+ out.appendInPlace(chars, numChars);
+ }
+ // Reset start
start = cur;
break;
}
@@ -363,6 +452,11 @@ SlangResult CppStringEscapeHandler::appendUnescaped(const UnownedStringSlice& sl
}
}
}
+ else
+ {
+ // Next char
+ ++cur;
+ }
}
if (start < end)
@@ -850,6 +944,9 @@ StringEscapeUtil::Handler* StringEscapeUtil::getHandler(Style style)
case Style::Cpp: return &g_cppHandler;
case Style::Space: return &g_spaceHandler;
case Style::JSON: return &g_jsonHandler;
+ // TODO(JS): For now we make Slang language string encoding/decoding the same as C++
+ // That may not be desirable because C++ has a variety of surprising edge cases (for example around \x)
+ case Style::Slang: return &g_cppHandler;
default: return nullptr;
}
}
diff --git a/source/core/slang-string-escape-util.h b/source/core/slang-string-escape-util.h
index 5f749c423..c3a43b623 100644
--- a/source/core/slang-string-escape-util.h
+++ b/source/core/slang-string-escape-util.h
@@ -55,6 +55,8 @@ struct StringEscapeUtil
Cpp, ///< Cpp style quoting and escape handling
Space, ///< Applies quotes if there are spaces. Does not escape.
JSON, ///< Json encoding
+ Slang, ///< Slang style string encoding (For now same as Cpp but that may change in the future)
+ CountOf,
};
/// Given a style returns a handler
diff --git a/source/core/slang-string.h b/source/core/slang-string.h
index 6dc3275eb..5119dac8f 100644
--- a/source/core/slang-string.h
+++ b/source/core/slang-string.h
@@ -62,6 +62,10 @@ namespace Slang
return (((unsigned char)ch) & 0xC0) == 0x80;
}
+ /* A string slice that doesn't own the contained characters.
+ It is the responsibility of code using the type to keep the memory backing
+ the slice in scope.
+ A slice is generally *not* zero terminated. */
struct SLANG_RT_API UnownedStringSlice
{
public:
@@ -85,15 +89,9 @@ namespace Slang
, m_end(b + len)
{}
- char const* begin() const
- {
- return m_begin;
- }
+ SLANG_FORCE_INLINE char const* begin() const { return m_begin; }
- char const* end() const
- {
- return m_end;
- }
+ SLANG_FORCE_INLINE char const* end() const { return m_end; }
/// True if slice is strictly contained in memory.
bool isMemoryContained(const UnownedStringSlice& slice) const
@@ -105,10 +103,8 @@ namespace Slang
return pos >= m_begin && pos <= m_end;
}
- Index getLength() const
- {
- return Index(m_end - m_begin);
- }
+ /// Get the length in *bytes*
+ Count getLength() const { return Index(m_end - m_begin); }
/// Finds first index of char 'c'. If not found returns -1.
Index indexOf(char c) const;
@@ -179,7 +175,7 @@ namespace Slang
template <size_t SIZE>
SLANG_FORCE_INLINE static UnownedStringSlice fromLiteral(const char (&in)[SIZE]) { return UnownedStringSlice(in, SIZE - 1); }
- private:
+ protected:
char const* m_begin;
char const* m_end;
};
@@ -188,6 +184,40 @@ namespace Slang
template <size_t SIZE>
SLANG_FORCE_INLINE UnownedStringSlice toSlice(const char (&in)[SIZE]) { return UnownedStringSlice(in, SIZE - 1); }
+ /// Same as UnownedStringSlice, but must be zero terminated.
+ /// Zero termination is *not* included in the length.
+ struct SLANG_RT_API UnownedTerminatedStringSlice : public UnownedStringSlice
+ {
+ public:
+ typedef UnownedStringSlice Super;
+ typedef UnownedTerminatedStringSlice ThisType;
+
+ /// We can turn into a regular zero terminated string
+ SLANG_FORCE_INLINE operator const char*() const { return m_begin; }
+
+ /// Exists to match the equivalent function in String.
+ SLANG_FORCE_INLINE char const* getBuffer() const { return m_begin; }
+
+ /// Construct from a literal directly.
+ template <size_t SIZE>
+ SLANG_FORCE_INLINE static ThisType fromLiteral(const char(&in)[SIZE]) { return ThisType(in, SIZE - 1); }
+
+ /// Note, b cannot be null because if it were then the string would not be null terminated
+ UnownedTerminatedStringSlice(char const* b)
+ : Super(b, b + strlen(b))
+ {}
+ UnownedTerminatedStringSlice(char const* b, size_t len)
+ : Super(b, len)
+ {
+ // b must be valid and it must be null terminated
+ SLANG_ASSERT(b && b[len] == 0);
+ }
+ };
+
+ // A more convenient way to make terminated slices from *string literals*
+ template <size_t SIZE>
+ SLANG_FORCE_INLINE UnownedTerminatedStringSlice toTerminatedSlice(const char(&in)[SIZE]) { return UnownedTerminatedStringSlice(in, SIZE - 1); }
+
// A `StringRepresentation` provides the backing storage for
// all reference-counted string-related types.
class SLANG_RT_API StringRepresentation : public RefObject
@@ -284,16 +314,6 @@ namespace Slang
class String;
-
-
- struct SLANG_RT_API UnownedTerminatedStringSlice : public UnownedStringSlice
- {
- public:
- UnownedTerminatedStringSlice(char const* b)
- : UnownedStringSlice(b, b + (b?strlen(b):0))
- {}
- };
-
struct SLANG_RT_API StringSlice
{
public:
diff --git a/source/core/slang-token-reader.cpp b/source/core/slang-token-reader.cpp
index 7ffbc12fa..5acc1736c 100644
--- a/source/core/slang-token-reader.cpp
+++ b/source/core/slang-token-reader.cpp
@@ -671,93 +671,6 @@ namespace Misc {
return TokenizeText("", text);
}
- String EscapeStringLiteral(String str)
- {
- StringBuilder sb;
- sb << "\"";
- const Index length = str.getLength();
- const char*const data = str.getBuffer();
- for (Index i = 0; i < length; i++)
- {
- switch (data[i])
- {
- case '\n':
- sb << "\\n";
- break;
- case '\r':
- sb << "\\r";
- break;
- case '\t':
- sb << "\\t";
- break;
- case '\v':
- sb << "\\v";
- break;
- case '\'':
- sb << "\\\'";
- break;
- case '\"':
- sb << "\\\"";
- break;
- case '\\':
- sb << "\\\\";
- break;
- default:
- sb << data[i];
- break;
- }
- }
- sb << "\"";
- return sb.ProduceString();
- }
-
- String UnescapeStringLiteral(String str)
- {
- StringBuilder sb;
- const Index length = str.getLength();
- const char*const data = str.getBuffer();
- for (Index i = 0; i < length; i++)
- {
- if (data[i] == '\\' && i < length - 1)
- {
- switch (data[i + 1])
- {
- case 's':
- sb << " ";
- break;
- case 't':
- sb << '\t';
- break;
- case 'n':
- sb << '\n';
- break;
- case 'r':
- sb << '\r';
- break;
- case 'v':
- sb << '\v';
- break;
- case '\'':
- sb << '\'';
- break;
- case '\"':
- sb << "\"";
- break;
- case '\\':
- sb << "\\";
- break;
- default:
- i = i - 1;
- sb << data[i];
- }
- i++;
- }
- else
- sb << data[i];
- }
- return sb.ProduceString();
- }
-
TokenReader::TokenReader(String text)
{
this->tokens = TokenizeText("", text);
diff --git a/source/core/slang-token-reader.h b/source/core/slang-token-reader.h
index bf5ca4cdc..26539732c 100644
--- a/source/core/slang-token-reader.h
+++ b/source/core/slang-token-reader.h
@@ -295,8 +295,6 @@ namespace Misc {
return result;
}
- String EscapeStringLiteral(String str);
- String UnescapeStringLiteral(String str);
} // namespace Misc
} // namespace Slang
diff --git a/source/core/slang-type-text-util.cpp b/source/core/slang-type-text-util.cpp
index 13bf439ce..454ca4258 100644
--- a/source/core/slang-type-text-util.cpp
+++ b/source/core/slang-type-text-util.cpp
@@ -68,7 +68,7 @@ static const CompileTargetInfo s_compileTargetInfos[] =
{ SLANG_SPIRV_ASM, "spv.asm", "spirv-asm,spirv-assembly" },
{ SLANG_C_SOURCE, "c", "c" },
{ SLANG_CPP_SOURCE, "cpp,c++,cxx", "cpp,c++,cxx" },
- { SLANG_HOST_CPP_SOURCE, "cpp,c++,cxx", "cpp,c++,cxx"},
+ { SLANG_HOST_CPP_SOURCE, "cpp,c++,cxx", "host-cpp,host-c++,host-cxx"},
{ SLANG_HOST_EXECUTABLE,"exe", "exe,executable" },
{ SLANG_SHADER_SHARED_LIBRARY, "dll,so", "sharedlib,sharedlibrary,dll" },
{ SLANG_CUDA_SOURCE, "cu", "cuda,cu" },
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 476e88e3f..41cfea6af 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -378,6 +378,11 @@ __intrinsic_type($(kIROp_StringType))
struct String
{};
+__magic_type(NativeStringType)
+__intrinsic_type($(kIROp_NativeStringType))
+struct NativeString
+{};
+
__magic_type(DynamicType)
__intrinsic_type($(kIROp_DynamicType))
struct __Dynamic
diff --git a/source/slang/slang-ast-builder.cpp b/source/slang/slang-ast-builder.cpp
index caf4c020d..7ac039187 100644
--- a/source/slang/slang-ast-builder.cpp
+++ b/source/slang/slang-ast-builder.cpp
@@ -90,6 +90,16 @@ Type* SharedASTBuilder::getStringType()
return m_stringType;
}
+Type* SharedASTBuilder::getNativeStringType()
+{
+ if (!m_nativeStringType)
+ {
+ auto nativeStringTypeDecl = findMagicDecl("NativeStringType");
+ m_nativeStringType = DeclRefType::create(m_astBuilder, makeDeclRef<Decl>(nativeStringTypeDecl));
+ }
+ return m_nativeStringType;
+}
+
Type* SharedASTBuilder::getEnumTypeType()
{
if (!m_enumTypeType)
diff --git a/source/slang/slang-ast-builder.h b/source/slang/slang-ast-builder.h
index 0642455c3..97aefd118 100644
--- a/source/slang/slang-ast-builder.h
+++ b/source/slang/slang-ast-builder.h
@@ -23,6 +23,10 @@ public:
/// Get the string type
Type* getStringType();
+
+ /// Get the native string type
+ Type* getNativeStringType();
+
/// Get the enum type type
Type* getEnumTypeType();
/// Get the __Dynamic type
@@ -65,6 +69,7 @@ protected:
// TODO(tfoley): These should really belong to the compilation context!
//
Type* m_stringType = nullptr;
+ Type* m_nativeStringType = nullptr;
Type* m_enumTypeType = nullptr;
Type* m_dynamicType = nullptr;
Type* m_nullPtrType = nullptr;
diff --git a/source/slang/slang-ast-type.h b/source/slang/slang-ast-type.h
index fee7f7cac..7aa1a36ab 100644
--- a/source/slang/slang-ast-type.h
+++ b/source/slang/slang-ast-type.h
@@ -460,12 +460,24 @@ private:
Type* rowType = nullptr;
};
-// The built-in `String` type
-class StringType : public BuiltinType
+// Base class for built in string types
+class StringTypeBase : public BuiltinType
+{
+ SLANG_AST_CLASS(StringTypeBase)
+};
+
+// The regular built-in `String` type
+class StringType : public StringTypeBase
{
SLANG_AST_CLASS(StringType)
};
+// The string type native to the target
+class NativeStringType : public StringTypeBase
+{
+ SLANG_AST_CLASS(NativeStringType)
+};
+
// The built-in `__Dynamic` type
class DynamicType : public BuiltinType
{
diff --git a/source/slang/slang-check-conversion.cpp b/source/slang/slang-check-conversion.cpp
index a1935d65c..44bb8a610 100644
--- a/source/slang/slang-check-conversion.cpp
+++ b/source/slang/slang-check-conversion.cpp
@@ -639,6 +639,16 @@ namespace Slang
return true;
}
+ // If both are string types we assume they are convertable in both directions
+ if (as<StringTypeBase>(fromType) && as<StringTypeBase>(toType))
+ {
+ if (outToExpr)
+ *outToExpr = fromExpr;
+ if (outCost)
+ *outCost = kConversionCost_None;
+ return true;
+ }
+
// Another important case is when either the "to" or "from" type
// represents an error. In such a case we must have already
// reporeted the error, so it is better to allow the conversion
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 9887f1ba6..482ada394 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -532,6 +532,11 @@ SlangResult CPPSourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, S
out << "TypeInfo*";
return SLANG_OK;
}
+ case kIROp_NativeStringType:
+ {
+ out << "const char*";
+ return SLANG_OK;
+ }
case kIROp_StringType:
{
out << "String";
@@ -2411,8 +2416,15 @@ bool CPPSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOut
}
case kIROp_StringLit:
{
- m_writer->emit("String(");
- m_writer->emit(Slang::Misc::EscapeStringLiteral(as<IRStringLit>(inst)->getStringSlice()));
+ m_writer->emit("toTerminatedSlice(");
+
+ auto handler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp);
+
+ StringBuilder buf;
+ const auto slice = as<IRStringLit>(inst)->getStringSlice();
+ StringEscapeUtil::appendQuoted(handler, slice, buf);
+ m_writer->emit(buf);
+
m_writer->emit(")");
return true;
}
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index c1bbf813b..b0c10fdc2 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -1967,7 +1967,12 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
}
return;
}
- case kIROp_StringType: m_writer->emit("int"); return;
+ case kIROp_NativeStringType:
+ case kIROp_StringType:
+ {
+ m_writer->emit("int");
+ return;
+ }
default: break;
}
diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp
index 2d42aef83..48fe86fff 100644
--- a/source/slang/slang-emit-hlsl.cpp
+++ b/source/slang/slang-emit-hlsl.cpp
@@ -853,7 +853,12 @@ void HLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
}
return;
}
- case kIROp_StringType: m_writer->emit("int"); return;
+ case kIROp_NativeStringType:
+ case kIROp_StringType:
+ {
+ m_writer->emit("int");
+ return;
+ }
default: break;
}
diff --git a/source/slang/slang-ir-collect-global-uniforms.cpp b/source/slang/slang-ir-collect-global-uniforms.cpp
index 87b21c819..ca5e56b53 100644
--- a/source/slang/slang-ir-collect-global-uniforms.cpp
+++ b/source/slang/slang-ir-collect-global-uniforms.cpp
@@ -69,6 +69,11 @@ struct CollectGlobalUniformParametersContext
//
void processModule()
{
+ if (!globalScopeVarLayout)
+ {
+ return;
+ }
+
// We start by looking at the layout that was computed for the global-scope
// parameters to determine how the parameters are supposed to be pacakged.
//
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index f9e0a5f34..c617a0218 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -24,7 +24,10 @@ INST(Nop, nop, 0, 0)
INST_RANGE(BasicType, VoidType, AfterBaseType)
- INST(StringType, String, 0, 0)
+ /* StringTypeBase */
+ INST(StringType, String, 0, 0)
+ INST(NativeStringType, NativeString, 0, 0)
+ INST_RANGE(StringTypeBase, StringType, NativeStringType)
INST(CapabilitySetType, CapabilitySet, 0, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 0e54802e5..77b3eabc0 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -2080,6 +2080,7 @@ public:
IRBasicType* getUInt64Type();
IRBasicType* getCharType();
IRStringType* getStringType();
+ IRNativeStringType* getNativeStringType();
IRType* getCapabilitySetType();
diff --git a/source/slang/slang-ir-link.cpp b/source/slang/slang-ir-link.cpp
index b67d95abf..7984c5037 100644
--- a/source/slang/slang-ir-link.cpp
+++ b/source/slang/slang-ir-link.cpp
@@ -1477,10 +1477,13 @@ LinkedIR linkIR(
// need to operate on all the global parameters can do so.
//
IRVarLayout* irGlobalScopeVarLayout = nullptr;
- if( auto irGlobalScopeLayoutDecoration = irModuleForLayout->getModuleInst()->findDecoration<IRLayoutDecoration>() )
+ if (irModuleForLayout)
{
- auto irOriginalGlobalScopeVarLayout = irGlobalScopeLayoutDecoration->getLayout();
- irGlobalScopeVarLayout = cast<IRVarLayout>(cloneValue(context, irOriginalGlobalScopeVarLayout));
+ if( auto irGlobalScopeLayoutDecoration = irModuleForLayout->getModuleInst()->findDecoration<IRLayoutDecoration>() )
+ {
+ auto irOriginalGlobalScopeVarLayout = irGlobalScopeLayoutDecoration->getLayout();
+ irGlobalScopeVarLayout = cast<IRVarLayout>(cloneValue(context, irOriginalGlobalScopeVarLayout));
+ }
}
// Bindings for global generic parameters are currently represented
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 9de2f5b4f..d454333e6 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -2508,6 +2508,12 @@ namespace Slang
return (IRStringType*)getType(kIROp_StringType);
}
+ IRNativeStringType* IRBuilder::getNativeStringType()
+ {
+ return (IRNativeStringType*)getType(kIROp_NativeStringType);
+ }
+
+
IRType* IRBuilder::getCapabilitySetType()
{
return getType(kIROp_CapabilitySetType);
@@ -4676,106 +4682,13 @@ namespace Slang
dumpDebugID(context, inst);
}
- struct StringEncoder
- {
- static char getHexChar(int v)
- {
- return (v <= 9) ? char(v + '0') : char(v - 10 + 'A');
- }
-
- void flush(const char* pos)
- {
- if (pos > m_runStart)
- {
- m_builder->append(m_runStart, pos);
- }
- m_runStart = pos + 1;
- }
-
- void appendEscapedChar(const char* pos, char encodeChar)
- {
- flush(pos);
- const char chars[] = { '\\', encodeChar };
- m_builder->Append(chars, 2);
- }
-
- void appendAsHex(const char* pos)
- {
- flush(pos);
-
- const int v = *(const uint8_t*)pos;
-
- char buf[5];
- buf[0] = '\\';
- buf[1] = 'x';
- buf[2] = '0';
-
- buf[3] = getHexChar(v >> 4);
- buf[4] = getHexChar(v & 0xf);
-
- m_builder->Append(buf, 5);
- }
-
- StringEncoder(StringBuilder* builder, const char* start):
- m_runStart(start),
- m_builder(builder)
- {}
-
- StringBuilder* m_builder;
- const char* m_runStart;
- };
-
static void dumpEncodeString(
IRDumpContext* context,
const UnownedStringSlice& slice)
{
- // https://msdn.microsoft.com/en-us/library/69ze775t.aspx
-
+ auto handler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Slang);
StringBuilder& builder = *context->builder;
- builder.Append('"');
-
- {
- const char* cur = slice.begin();
- StringEncoder encoder(&builder, cur);
- const char* end = slice.end();
-
- for (; cur < end; cur++)
- {
- const int8_t c = uint8_t(*cur);
- switch (c)
- {
- case '\\':
- encoder.appendEscapedChar(cur, '\\');
- break;
- case '"':
- encoder.appendEscapedChar(cur, '"');
- break;
- case '\n':
- encoder.appendEscapedChar(cur, 'n');
- break;
- case '\t':
- encoder.appendEscapedChar(cur, 't');
- break;
- case '\r':
- encoder.appendEscapedChar(cur, 'r');
- break;
- case '\0':
- encoder.appendEscapedChar(cur, '0');
- break;
- default:
- {
- if (c < 32)
- {
- encoder.appendAsHex(cur);
- }
- break;
- }
- }
- }
- encoder.flush(end);
- }
-
- builder.Append('"');
+ StringEscapeUtil::appendQuoted(handler, slice, builder);
}
static void dumpType(
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 7a1a0b8aa..6c766542f 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -786,7 +786,13 @@ struct IRBoolType : IRBasicType
IR_LEAF_ISA(BoolType)
};
-SIMPLE_IR_TYPE(StringType, Type)
+struct IRStringTypeBase : IRType
+{
+ IR_PARENT_ISA(StringTypeBase)
+};
+
+SIMPLE_IR_TYPE(StringType, StringTypeBase)
+SIMPLE_IR_TYPE(NativeStringType, StringTypeBase)
SIMPLE_IR_TYPE(DynamicType, Type)
diff --git a/tools/slang-unit-test/unit-offset-container.cpp b/tools/slang-unit-test/unit-test-offset-container.cpp
index 6a179c319..9d8e3a9ff 100644
--- a/tools/slang-unit-test/unit-offset-container.cpp
+++ b/tools/slang-unit-test/unit-test-offset-container.cpp
@@ -1,4 +1,4 @@
-// unit-test-path.cpp
+// unit-test-offset-container.cpp
#include "../../source/core/slang-offset-container.h"
diff --git a/tools/slang-unit-test/unit-test-string-escape.cpp b/tools/slang-unit-test/unit-test-string-escape.cpp
new file mode 100644
index 000000000..337573081
--- /dev/null
+++ b/tools/slang-unit-test/unit-test-string-escape.cpp
@@ -0,0 +1,79 @@
+// unit-test-string-escape.cpp
+
+#include "../../source/core/slang-string-escape-util.h"
+
+#include "tools/unit-test/slang-unit-test.h"
+
+using namespace Slang;
+
+static bool _checkConversion(StringEscapeHandler* handler, const UnownedStringSlice& check)
+{
+ StringBuilder buf;
+ handler->appendEscaped(check, buf);
+
+ StringBuilder decode;
+ handler->appendUnescaped(buf.getUnownedSlice(), decode);
+
+ return decode == check;
+}
+
+static bool _checkDecode(const UnownedStringSlice& encoded, const UnownedStringSlice& decoded)
+{
+ auto handler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp);
+
+ StringBuilder buf;
+ StringEscapeUtil::appendUnquoted(handler, encoded, buf);
+ return buf == decoded;
+}
+
+#define SLANG_ENCODED_DECODED(x) \
+ const auto encoded = toSlice(#x); \
+ const auto decoded = toSlice(x);
+
+SLANG_UNIT_TEST(StringEscape)
+{
+ // Check greedy hex digits
+ {
+ // \x can have any number of hex digits
+ const char text[] = "\x000001";
+ SLANG_ASSERT(SLANG_COUNT_OF(text) == 2 && text[0] == 1);
+ }
+
+ // Check octal greedy
+ {
+ //\ + up to 3 octal digits
+ const char text[] = "\0011";
+ SLANG_ASSERT(SLANG_COUNT_OF(text) == 3 && text[0] == 1 && text[1] == '1');
+
+ const char text2[] = "\78";
+ SLANG_ASSERT(SLANG_COUNT_OF(text2) == 3 && text2[0] == 7 && text2[1] == '8');
+ }
+
+ {
+ auto handler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp);
+
+ SLANG_CHECK(_checkConversion(handler, toSlice("\0\1\2""2")));
+ }
+
+ {
+ auto handler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp);
+
+ // We can't just use '\uxxxx', because it has to be translatable into an output character in MSVC (not into utf8)
+ // Can make work perhaps with something like
+ // #pragma execution_character_set("utf-8")
+ // But for now we don't worry
+ //
+ // Visual Studio does not appear to support '\U' by default, presumably because wchar_t is 16 bits
+
+ {
+ SLANG_ENCODED_DECODED("\a\b\0hey~\u0023\n\0");
+ SLANG_CHECK(_checkDecode(encoded, decoded));
+ }
+
+ {
+ SLANG_ENCODED_DECODED("\n\v\b\t\1\02\003\x5z\x00007f\0");
+ SLANG_CHECK(_checkDecode(encoded, decoded));
+ }
+ }
+}
+