diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2019-06-18 19:49:34 -0400 |
|---|---|---|
| committer | Tim Foley <tfoleyNV@users.noreply.github.com> | 2019-06-18 16:49:34 -0700 |
| commit | 7c9298d8b10b5f4e69e24e3eb933e93e0d92fc37 (patch) | |
| tree | ab5293052f7e5271fd379b02e2168217aa86c4bc /source/core | |
| parent | e213e394d5008cb7bf08bbf875acd494259f5847 (diff) | |
StringUtil::extractLine (#989)
* Added extractLine line parsing to StringUtil. Use for matching lines instead of calcLines. calcLines uses extractLine to extract lines.
Fixed problems found in output of some tests- due to how a how final line is handled. Now a final line has a \r or \n\r combination, but nothing else after it, it is considered the last line (not the line after it).
* Use StringUtil::extractLine in slang-generate.
* Improved comment on extractLine
* Remove test code from StringUtil::extractLine
* Made StringUtil::extractLine act as if line terminators are 'separators'.
Added unit-test-string.cpp - to check behavior.
* Adding LineParser - not entirely necessary, but slightly easier to use.
* Improved LineParser::Iterator end testing.
Added improved tests for LineParser.
* Move line comparson after termination case - to fix problem with gcc release build.
* Make UnownedStringSlice handle comparison when begin is nullptr - as it uses memcmp and passing nullptr to memcmp is undefined, leading to optimizer being able to do some unfortunate optimizations on gcc.
Diffstat (limited to 'source/core')
| -rw-r--r-- | source/core/slang-string-util.cpp | 75 | ||||
| -rw-r--r-- | source/core/slang-string-util.h | 47 | ||||
| -rw-r--r-- | source/core/slang-string.h | 21 |
3 files changed, 115 insertions, 28 deletions
diff --git a/source/core/slang-string-util.cpp b/source/core/slang-string-util.cpp index 60ebc45ba..fa96e4435 100644 --- a/source/core/slang-string-util.cpp +++ b/source/core/slang-string-util.cpp @@ -199,15 +199,18 @@ ComPtr<ISlangBlob> StringUtil::createStringBlob(const String& string) return (fromChar == toChar || string.indexOf(fromChar) == Index(-1)) ? string : calcCharReplaced(string.getUnownedSlice(), fromChar, toChar); } -/* static */void StringUtil::calcLines(const UnownedStringSlice& textIn, List<UnownedStringSlice>& outLines) +/* static */UnownedStringSlice StringUtil::extractLine(UnownedStringSlice& ioText) { - char const* begin = textIn.begin(); - char const* end = textIn.end(); - - char const* cursor = begin; + char const*const begin = ioText.begin(); + char const*const end = ioText.end(); - const char* lineStart = cursor; + // If we have hit the end then return the 'special' terminator + if (begin == nullptr) + { + return UnownedStringSlice(nullptr, nullptr); + } + char const* cursor = begin; while (cursor < end) { int c = *cursor++; @@ -215,56 +218,76 @@ ComPtr<ISlangBlob> StringUtil::createStringBlob(const String& string) { case '\r': case '\n': { - outLines.add(UnownedStringSlice(lineStart, cursor - 1)); + // Remember the end of the line + const char*const lineEnd = cursor - 1; // When we see a line-break character we need // to record the line break, but we also need // to deal with the annoying issue of encodings, // where a multi-byte sequence might encode // the line break. - if (cursor < end) { int d = *cursor; if ((c ^ d) == ('\r' ^ '\n')) cursor++; } - lineStart = cursor; - break; + + ioText = UnownedStringSlice(cursor, end); + return UnownedStringSlice(begin, lineEnd); } default: break; } } - if (cursor > lineStart) - { - outLines.add(UnownedStringSlice(lineStart, cursor)); - } + // There is nothing remaining + ioText = UnownedStringSlice(nullptr, nullptr); + + // Could be empty, or the remaining line (without line end terminators of) + SLANG_ASSERT(begin <= cursor); + + return UnownedStringSlice(begin, cursor); } -/* static */bool StringUtil::areLinesEqual(const UnownedStringSlice& a, const UnownedStringSlice& b) +/* static */void StringUtil::calcLines(const UnownedStringSlice& textIn, List<UnownedStringSlice>& outLines) { - List<UnownedStringSlice> slicesA; - List<UnownedStringSlice> slicesB; - - calcLines(a, slicesA); - calcLines(b, slicesB); + outLines.clear(); - const auto linesCount = slicesA.getCount(); - if (linesCount != slicesB.getCount()) + UnownedStringSlice text(textIn); + while (true) { - return false; + UnownedStringSlice line = extractLine(text); + if (line.begin() == nullptr) + { + return; + } + outLines.add(line); } +} - for (Index i = 0; i < linesCount; ++i) +/* static */bool StringUtil::areLinesEqual(const UnownedStringSlice& inA, const UnownedStringSlice& inB) +{ + UnownedStringSlice a(inA); + UnownedStringSlice b(inB); + + while (true) { - if (slicesA[i] != slicesB[i]) + const UnownedStringSlice lineA = extractLine(a); + const UnownedStringSlice lineB = extractLine(b); + + // If either has ended, they both must have ended + if (lineA.begin() == nullptr || lineB.begin() == nullptr) + { + return lineA.begin() == lineB.begin(); + } + + // The lines must be equal + if (lineA != lineB) { return false; } } - return true; } } // namespace Slang diff --git a/source/core/slang-string-util.h b/source/core/slang-string-util.h index 6a3b60240..b9618c17f 100644 --- a/source/core/slang-string-util.h +++ b/source/core/slang-string-util.h @@ -78,6 +78,16 @@ struct StringUtil /// Create a blob from a string static ComPtr<ISlangBlob> createStringBlob(const String& string); + /// Returns a line extracted from the start of ioText. + /// + /// At the end of all the text a 'special' null UnownedStringSlice with a null 'begin' pointer is returned. + /// The slice passed in will be modified on output to contain the remaining text, starting at the beginning of the next line. + /// As en empty final line is still a line, the special null UnownedStringSlice is the last value ioText after the last valid line is returned. + /// + /// NOTE! That behavior is as if line terminators (like \n) act as separators. Thus input of "\n" will return *two* lines - an empty line + /// before and then after the \n. + static UnownedStringSlice extractLine(UnownedStringSlice& ioText); + /// Given text, splits into lines stored in outLines. NOTE! That lines is only valid as long as textIn remains valid static void calcLines(const UnownedStringSlice& textIn, List<UnownedStringSlice>& lines); @@ -85,6 +95,43 @@ struct StringUtil static bool areLinesEqual(const UnownedStringSlice& a, const UnownedStringSlice& b); }; +/* A helper class that allows parsing of lines from text with iteration. Uses StringUtil::extractLine for the actual underlying implementation. */ +class LineParser +{ +public: + struct Iterator + { + const UnownedStringSlice& operator*() const { return m_line; } + const UnownedStringSlice* operator->() const { return &m_line; } + Iterator& operator++() + { + m_line = StringUtil::extractLine(m_remaining); + return *this; + } + Iterator operator++(int) { Iterator rs = *this; operator++(); return rs; } + + /// Equal if both are at the same m_line address exactly. Handles termination case correctly where line.begin() == nullptr. + bool operator==(const Iterator& rhs) const { return m_line.begin() == rhs.m_line.begin(); } + bool operator !=(const Iterator& rhs) const { return !(*this == rhs); } + + /// Ctor + Iterator(const UnownedStringSlice& line, const UnownedStringSlice& remaining) : m_line(line), m_remaining(remaining) {} + + protected: + UnownedStringSlice m_line; + UnownedStringSlice m_remaining; + }; + + Iterator begin() const { UnownedStringSlice remaining(m_text); UnownedStringSlice line = StringUtil::extractLine(remaining); return Iterator(line, remaining); } + Iterator end() const { UnownedStringSlice term(nullptr, nullptr); return Iterator(term, term); } + + /// Ctor + LineParser(const UnownedStringSlice& text) : m_text(text) {} + +protected: + UnownedStringSlice m_text; +}; + } // namespace Slang #endif // SLANG_STRING_UTIL_H diff --git a/source/core/slang-string.h b/source/core/slang-string.h index 4975e9ec6..8a9e83cd1 100644 --- a/source/core/slang-string.h +++ b/source/core/slang-string.h @@ -119,8 +119,25 @@ namespace Slang bool operator==(UnownedStringSlice const& other) const { - return size() == other.size() - && memcmp(begin(), other.begin(), size()) == 0; + // Note that memcmp is undefined when passed in null ptrs, so if we want to handle + // we need to cover that case. + // Can only be nullptr if size is 0. + auto thisSize = size(); + auto otherSize = other.size(); + + if (thisSize != otherSize) + { + return false; + } + + const char*const thisChars = begin(); + const char*const otherChars = other.begin(); + if (thisChars == otherChars || thisSize == 0) + { + return true; + } + SLANG_ASSERT(thisChars && otherChars); + return memcmp(thisChars, otherChars, thisSize) == 0; } bool operator==(char const* str) const |
