summaryrefslogtreecommitdiffstats
path: root/source/core
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2019-06-18 19:49:34 -0400
committerTim Foley <tfoleyNV@users.noreply.github.com>2019-06-18 16:49:34 -0700
commit7c9298d8b10b5f4e69e24e3eb933e93e0d92fc37 (patch)
treeab5293052f7e5271fd379b02e2168217aa86c4bc /source/core
parente213e394d5008cb7bf08bbf875acd494259f5847 (diff)
StringUtil::extractLine (#989)
* Added extractLine line parsing to StringUtil. Use for matching lines instead of calcLines. calcLines uses extractLine to extract lines. Fixed problems found in output of some tests- due to how a how final line is handled. Now a final line has a \r or \n\r combination, but nothing else after it, it is considered the last line (not the line after it). * Use StringUtil::extractLine in slang-generate. * Improved comment on extractLine * Remove test code from StringUtil::extractLine * Made StringUtil::extractLine act as if line terminators are 'separators'. Added unit-test-string.cpp - to check behavior. * Adding LineParser - not entirely necessary, but slightly easier to use. * Improved LineParser::Iterator end testing. Added improved tests for LineParser. * Move line comparson after termination case - to fix problem with gcc release build. * Make UnownedStringSlice handle comparison when begin is nullptr - as it uses memcmp and passing nullptr to memcmp is undefined, leading to optimizer being able to do some unfortunate optimizations on gcc.
Diffstat (limited to 'source/core')
-rw-r--r--source/core/slang-string-util.cpp75
-rw-r--r--source/core/slang-string-util.h47
-rw-r--r--source/core/slang-string.h21
3 files changed, 115 insertions, 28 deletions
diff --git a/source/core/slang-string-util.cpp b/source/core/slang-string-util.cpp
index 60ebc45ba..fa96e4435 100644
--- a/source/core/slang-string-util.cpp
+++ b/source/core/slang-string-util.cpp
@@ -199,15 +199,18 @@ ComPtr<ISlangBlob> StringUtil::createStringBlob(const String& string)
return (fromChar == toChar || string.indexOf(fromChar) == Index(-1)) ? string : calcCharReplaced(string.getUnownedSlice(), fromChar, toChar);
}
-/* static */void StringUtil::calcLines(const UnownedStringSlice& textIn, List<UnownedStringSlice>& outLines)
+/* static */UnownedStringSlice StringUtil::extractLine(UnownedStringSlice& ioText)
{
- char const* begin = textIn.begin();
- char const* end = textIn.end();
-
- char const* cursor = begin;
+ char const*const begin = ioText.begin();
+ char const*const end = ioText.end();
- const char* lineStart = cursor;
+ // If we have hit the end then return the 'special' terminator
+ if (begin == nullptr)
+ {
+ return UnownedStringSlice(nullptr, nullptr);
+ }
+ char const* cursor = begin;
while (cursor < end)
{
int c = *cursor++;
@@ -215,56 +218,76 @@ ComPtr<ISlangBlob> StringUtil::createStringBlob(const String& string)
{
case '\r': case '\n':
{
- outLines.add(UnownedStringSlice(lineStart, cursor - 1));
+ // Remember the end of the line
+ const char*const lineEnd = cursor - 1;
// When we see a line-break character we need
// to record the line break, but we also need
// to deal with the annoying issue of encodings,
// where a multi-byte sequence might encode
// the line break.
-
if (cursor < end)
{
int d = *cursor;
if ((c ^ d) == ('\r' ^ '\n'))
cursor++;
}
- lineStart = cursor;
- break;
+
+ ioText = UnownedStringSlice(cursor, end);
+ return UnownedStringSlice(begin, lineEnd);
}
default:
break;
}
}
- if (cursor > lineStart)
- {
- outLines.add(UnownedStringSlice(lineStart, cursor));
- }
+ // There is nothing remaining
+ ioText = UnownedStringSlice(nullptr, nullptr);
+
+ // Could be empty, or the remaining line (without line end terminators of)
+ SLANG_ASSERT(begin <= cursor);
+
+ return UnownedStringSlice(begin, cursor);
}
-/* static */bool StringUtil::areLinesEqual(const UnownedStringSlice& a, const UnownedStringSlice& b)
+/* static */void StringUtil::calcLines(const UnownedStringSlice& textIn, List<UnownedStringSlice>& outLines)
{
- List<UnownedStringSlice> slicesA;
- List<UnownedStringSlice> slicesB;
-
- calcLines(a, slicesA);
- calcLines(b, slicesB);
+ outLines.clear();
- const auto linesCount = slicesA.getCount();
- if (linesCount != slicesB.getCount())
+ UnownedStringSlice text(textIn);
+ while (true)
{
- return false;
+ UnownedStringSlice line = extractLine(text);
+ if (line.begin() == nullptr)
+ {
+ return;
+ }
+ outLines.add(line);
}
+}
- for (Index i = 0; i < linesCount; ++i)
+/* static */bool StringUtil::areLinesEqual(const UnownedStringSlice& inA, const UnownedStringSlice& inB)
+{
+ UnownedStringSlice a(inA);
+ UnownedStringSlice b(inB);
+
+ while (true)
{
- if (slicesA[i] != slicesB[i])
+ const UnownedStringSlice lineA = extractLine(a);
+ const UnownedStringSlice lineB = extractLine(b);
+
+ // If either has ended, they both must have ended
+ if (lineA.begin() == nullptr || lineB.begin() == nullptr)
+ {
+ return lineA.begin() == lineB.begin();
+ }
+
+ // The lines must be equal
+ if (lineA != lineB)
{
return false;
}
}
- return true;
}
} // namespace Slang
diff --git a/source/core/slang-string-util.h b/source/core/slang-string-util.h
index 6a3b60240..b9618c17f 100644
--- a/source/core/slang-string-util.h
+++ b/source/core/slang-string-util.h
@@ -78,6 +78,16 @@ struct StringUtil
/// Create a blob from a string
static ComPtr<ISlangBlob> createStringBlob(const String& string);
+ /// Returns a line extracted from the start of ioText.
+ ///
+ /// At the end of all the text a 'special' null UnownedStringSlice with a null 'begin' pointer is returned.
+ /// The slice passed in will be modified on output to contain the remaining text, starting at the beginning of the next line.
+ /// As en empty final line is still a line, the special null UnownedStringSlice is the last value ioText after the last valid line is returned.
+ ///
+ /// NOTE! That behavior is as if line terminators (like \n) act as separators. Thus input of "\n" will return *two* lines - an empty line
+ /// before and then after the \n.
+ static UnownedStringSlice extractLine(UnownedStringSlice& ioText);
+
/// Given text, splits into lines stored in outLines. NOTE! That lines is only valid as long as textIn remains valid
static void calcLines(const UnownedStringSlice& textIn, List<UnownedStringSlice>& lines);
@@ -85,6 +95,43 @@ struct StringUtil
static bool areLinesEqual(const UnownedStringSlice& a, const UnownedStringSlice& b);
};
+/* A helper class that allows parsing of lines from text with iteration. Uses StringUtil::extractLine for the actual underlying implementation. */
+class LineParser
+{
+public:
+ struct Iterator
+ {
+ const UnownedStringSlice& operator*() const { return m_line; }
+ const UnownedStringSlice* operator->() const { return &m_line; }
+ Iterator& operator++()
+ {
+ m_line = StringUtil::extractLine(m_remaining);
+ return *this;
+ }
+ Iterator operator++(int) { Iterator rs = *this; operator++(); return rs; }
+
+ /// Equal if both are at the same m_line address exactly. Handles termination case correctly where line.begin() == nullptr.
+ bool operator==(const Iterator& rhs) const { return m_line.begin() == rhs.m_line.begin(); }
+ bool operator !=(const Iterator& rhs) const { return !(*this == rhs); }
+
+ /// Ctor
+ Iterator(const UnownedStringSlice& line, const UnownedStringSlice& remaining) : m_line(line), m_remaining(remaining) {}
+
+ protected:
+ UnownedStringSlice m_line;
+ UnownedStringSlice m_remaining;
+ };
+
+ Iterator begin() const { UnownedStringSlice remaining(m_text); UnownedStringSlice line = StringUtil::extractLine(remaining); return Iterator(line, remaining); }
+ Iterator end() const { UnownedStringSlice term(nullptr, nullptr); return Iterator(term, term); }
+
+ /// Ctor
+ LineParser(const UnownedStringSlice& text) : m_text(text) {}
+
+protected:
+ UnownedStringSlice m_text;
+};
+
} // namespace Slang
#endif // SLANG_STRING_UTIL_H
diff --git a/source/core/slang-string.h b/source/core/slang-string.h
index 4975e9ec6..8a9e83cd1 100644
--- a/source/core/slang-string.h
+++ b/source/core/slang-string.h
@@ -119,8 +119,25 @@ namespace Slang
bool operator==(UnownedStringSlice const& other) const
{
- return size() == other.size()
- && memcmp(begin(), other.begin(), size()) == 0;
+ // Note that memcmp is undefined when passed in null ptrs, so if we want to handle
+ // we need to cover that case.
+ // Can only be nullptr if size is 0.
+ auto thisSize = size();
+ auto otherSize = other.size();
+
+ if (thisSize != otherSize)
+ {
+ return false;
+ }
+
+ const char*const thisChars = begin();
+ const char*const otherChars = other.begin();
+ if (thisChars == otherChars || thisSize == 0)
+ {
+ return true;
+ }
+ SLANG_ASSERT(thisChars && otherChars);
+ return memcmp(thisChars, otherChars, thisSize) == 0;
}
bool operator==(char const* str) const