diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-02-23 12:36:46 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-02-23 12:36:46 -0500 |
| commit | 55a5ccc559b34b8d2eb9c7b7a2d9efbae40619c2 (patch) | |
| tree | 105e60200bc4f6ac13a1845b448886d777a7398a /source | |
| parent | 4bf01b04cb6bf1df8d4fb2ec5eee0a912ec679dc (diff) | |
Documentation markup extraction (#1724)
* #include an absolute path didn't work - because paths were taken to always be relative.
* WIP extracting source documentation.
* WIP doc extraction.
* More stuff around doc markup extraction.
* More WIP around doc extraction.
* Fix some indexing issues.
* Initial doc extraction working.
* Renaming of types in markup extraction process.
* Extracting markup content.
Removing indenting.
Other fixes and improvements around document tools.
* WIP support for documentation system.
* Remove some commented out sections.
* Remove some comments that no longer apply.
* Improvements around SourceFile - such that more granularity around line ops.
Made some functionality explicitly work without source.
Improved Doc types nameing.
Diffstat (limited to 'source')
| -rw-r--r-- | source/core/slang-string-util.cpp | 22 | ||||
| -rw-r--r-- | source/core/slang-string-util.h | 3 | ||||
| -rwxr-xr-x | source/slang/slang-compiler.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-doc.cpp | 974 | ||||
| -rw-r--r-- | source/slang/slang-doc.h | 70 | ||||
| -rw-r--r-- | source/slang/slang-lexer.cpp | 46 | ||||
| -rw-r--r-- | source/slang/slang-lexer.h | 21 | ||||
| -rw-r--r-- | source/slang/slang-options.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-source-loc.cpp | 49 | ||||
| -rw-r--r-- | source/slang/slang-source-loc.h | 39 | ||||
| -rw-r--r-- | source/slang/slang.cpp | 22 |
11 files changed, 1234 insertions, 17 deletions
diff --git a/source/core/slang-string-util.cpp b/source/core/slang-string-util.cpp index cac7c7cfa..6ce75f3f0 100644 --- a/source/core/slang-string-util.cpp +++ b/source/core/slang-string-util.cpp @@ -360,6 +360,28 @@ ComPtr<ISlangBlob> StringUtil::createStringBlob(const String& string) } } +/* static */UnownedStringSlice StringUtil::trimEndOfLine(const UnownedStringSlice& line) +{ + // Strip CR/LF from end of line if present + + const char* begin = line.begin(); + const char* end = line.end(); + + if (end > begin) + { + const char c = end[-1]; + // If last char is CR/LF move back a char + if (c == '\n' || c == '\r') + { + --end; + // If next char is a match for the CR/LF pair move back an extra char. + end -= Index((end > begin) && (c ^ end[-1]) == ('\r' ^ '\n')); + } + } + + return line.head(Index(end - begin)); +} + /* static */bool StringUtil::areLinesEqual(const UnownedStringSlice& inA, const UnownedStringSlice& inB) { UnownedStringSlice a(inA), b(inB), lineA, lineB; diff --git a/source/core/slang-string-util.h b/source/core/slang-string-util.h index dade8a61d..4672fa1d0 100644 --- a/source/core/slang-string-util.h +++ b/source/core/slang-string-util.h @@ -88,6 +88,9 @@ struct StringUtil /// Given text, splits into lines stored in outLines. NOTE! That lines is only valid as long as textIn remains valid static void calcLines(const UnownedStringSlice& textIn, List<UnownedStringSlice>& lines); + /// Given a line that may contain cr/lf, returns the the a slice that doesn't have trailing cr/lf + static UnownedStringSlice trimEndOfLine(const UnownedStringSlice& slice); + /// Equal if the lines are equal (in effect a way to ignore differences in line breaks) static bool areLinesEqual(const UnownedStringSlice& a, const UnownedStringSlice& b); diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 90b1e30f3..5eedb4db6 100755 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -1522,6 +1522,7 @@ namespace Slang bool shouldValidateIR = false; bool shouldDumpAST = false; + bool shouldDocument = false; /// If true will after lexical analysis output the hierarchy of includes to stdout bool outputIncludes = false; diff --git a/source/slang/slang-doc.cpp b/source/slang/slang-doc.cpp new file mode 100644 index 000000000..fa3e11030 --- /dev/null +++ b/source/slang/slang-doc.cpp @@ -0,0 +1,974 @@ +// slang-doc.cpp +#include "slang-doc.h" + +#include "../core/slang-string-util.h" + +namespace Slang { + +/* TODO(JS): + +* If Decls hand SourceRange, then we could use the range to simplify getting the Post markup, as will be trivial to get to the 'end' +* Need to handle preceeding * in some markup styles +* If we want to be able to disable markup we need a mechanism to do this. Probably define source ranges. + +* Need a way to take the extracted markup and produce suitable markdown +** This will need to display the decoration appropriately +*/ + +/* Extracts 'markup' from comments in Slang source core. The comments are extracted and associated in declarations. The association +is held in DocMarkup type. The comment style follows the doxygen style */ +class DocMarkupExtractor +{ +public: + + typedef uint32_t MarkupFlags; + struct MarkupFlag + { + enum Enum : MarkupFlags + { + Before = 0x1, + After = 0x2, + IsMultiToken = 0x4, ///< Can use more than one token + IsBlock = 0x8, ///< + }; + }; + + enum class MarkupType + { + None, + BlockBefore, /// /** */ or /*! */. + BlockAfter, /// /*!< */ or /**< */ + + LineBangBefore, /// //! Can be multiple lines + LineSlashBefore, /// /// Can be multiple lines + + LineBangAfter, /// //!< Can be multiple lines + LineSlashAfter, /// ///< Can be multiple lines + }; + + struct IndexRange + { + SLANG_FORCE_INLINE Index getCount() const { return end - start; } + + Index start; + Index end; + }; + + enum class Location + { + None, ///< No defined location + Before, + AfterParam, ///< Can have trailing , or ) + AfterSemicolon, ///< Can have a trailing ; + }; + + struct FoundMarkup + { + void reset() + { + location = Location::None; + type = MarkupType::None; + range = IndexRange { 0, 0 }; + } + + Location location = Location::None; + MarkupType type = MarkupType::None; + IndexRange range; + }; + + struct FindInfo + { + + SourceView* sourceView; ///< The source view the tokens were generated from + TokenList* tokenList; ///< The token list + Index declTokenIndex; ///< The token index location (where searches start from) + Index declLineIndex; ///< The line number for the decl + }; + + SlangResult extract(DocMarkup* doc, ModuleDecl* moduleDecl, SourceManager* sourceManager, DiagnosticSink* sink); + + static MarkupFlags getFlags(MarkupType type); + static MarkupType findMarkupType(const Token& tok); + static UnownedStringSlice removeStart(MarkupType type, const UnownedStringSlice& comment); + +protected: + /// returns SLANG_E_NOT_FOUND if not found, SLANG_OK on success else an error + SlangResult _findMarkup(const FindInfo& info, Location location, FoundMarkup& out); + + /// Locations are processed in order, and the first successful used. If found in another location will issue a warning. + /// returns SLANG_E_NOT_FOUND if not found, SLANG_OK on success else an error + SlangResult _findFirstMarkup(const FindInfo& info, const Location* locs, Index locCount, FoundMarkup& out, Index& outIndex); + + SlangResult _findMarkup(const FindInfo& info, const Location* locs, Index locCount, FoundMarkup& out); + + /// Given the decl, the token stream, and the decls tokenIndex, try to find some associated markup + SlangResult _findMarkup(const FindInfo& info, Decl* decl, FoundMarkup& out); + + /// Given a found markup location extracts the contents of the tokens into out + SlangResult _extractMarkup(const FindInfo& info, const FoundMarkup& foundMarkup, StringBuilder& out); + + /// Given a location, try to find the first token index that could potentially be markup + /// Will return -1 if not found + Index _findStartIndex(const FindInfo& info, Location location); + + /// True if the tok is 'on' lineIndex. Interpretation of 'on' depends on the markup type. + static bool _isTokenOnLineIndex(SourceView* sourceView, MarkupType type, const Token& tok, Index lineIndex); + + void _addDecl(Decl* decl); + void _addDeclRec(Decl* decl); + void _findDecls(ModuleDecl* moduleDecl); + + List<Decl*> m_decls; + + DocMarkup* m_doc; + ModuleDecl* m_moduleDecl; + SourceManager* m_sourceManager; + DiagnosticSink* m_sink; +}; + +/* static */UnownedStringSlice DocMarkupExtractor::removeStart(MarkupType type, const UnownedStringSlice& comment) +{ + switch (type) + { + case MarkupType::BlockBefore: + { + if (comment.startsWith(UnownedStringSlice::fromLiteral("/**")) || + comment.startsWith(UnownedStringSlice::fromLiteral("/*!"))) + { + /// /** */ or /*! */. + return comment.tail(3); + } + return comment; + } + case MarkupType::BlockAfter: + { + + if (comment.startsWith(UnownedStringSlice::fromLiteral("/**<")) || + comment.startsWith(UnownedStringSlice::fromLiteral("/*!<"))) + { + /// /*!< */ or /**< */ + return comment.tail(4); + } + return comment; + } + + case MarkupType::LineBangBefore: + { + return comment.startsWith(UnownedStringSlice::fromLiteral("//!")) ? comment.tail(3) : comment; + } + case MarkupType::LineSlashBefore: + { + return comment.startsWith(UnownedStringSlice::fromLiteral("///")) ? comment.tail(3) : comment; + } + + case MarkupType::LineBangAfter: + { + /// //!< Can be multiple lines + return comment.startsWith(UnownedStringSlice::fromLiteral("//!<")) ? comment.tail(4) : comment; + } + case MarkupType::LineSlashAfter: + { + return comment.startsWith(UnownedStringSlice::fromLiteral("///<")) ? comment.tail(4) : comment; + } + default: break; + } + return comment; +} + +void DocMarkupExtractor::_addDecl(Decl* decl) +{ + if (!decl->loc.isValid()) + { + return; + } + m_decls.add(decl); +} + +void DocMarkupExtractor::_addDeclRec(Decl* decl) +{ + // Just add. + // There may be things we don't want to add, but just add them all of now + _addDecl(decl); + +#if 0 + if (CallableDecl* callableDecl = as<CallableDecl>(decl)) + { + // For callables (like functions), + + m_decls.add(callableDecl); + } + else +#endif + if (ContainerDecl* containerDecl = as<ContainerDecl>(decl)) + { + // Add the container - which could be a class, struct, enum, namespace, extension, generic etc. + // Now add what the container contains + for (Decl* childDecl : containerDecl->members) + { + _addDeclRec(childDecl); + } + } +} + +void DocMarkupExtractor::_findDecls(ModuleDecl* moduleDecl) +{ + for (Decl* decl : moduleDecl->members) + { + _addDeclRec(decl); + } +} + +static Index _findTokenIndex(SourceLoc loc, const Token* toks, Index numToks) +{ + // Use a binary search to find the token + Index lo = 0; + Index hi = numToks; + + while (lo + 1 < hi) + { + const Index mid = (hi + lo) >> 1; + const Token& midToken = toks[mid]; + + if (midToken.loc == loc) + { + return mid; + } + + if (midToken.loc.getRaw() <= loc.getRaw()) + { + lo = mid; + } + else + { + hi = mid; + } + } + + // Not found + return -1; +} + +/* static */DocMarkupExtractor::MarkupFlags DocMarkupExtractor::getFlags(MarkupType type) +{ + switch (type) + { + default: + case MarkupType::None: return 0; + case MarkupType::BlockBefore: return MarkupFlag::Before | MarkupFlag::IsBlock; + case MarkupType::BlockAfter: return MarkupFlag::After | MarkupFlag::IsBlock; + + case MarkupType::LineBangBefore: return MarkupFlag::Before | MarkupFlag::IsMultiToken; + case MarkupType::LineSlashBefore: return MarkupFlag::Before | MarkupFlag::IsMultiToken; + + case MarkupType::LineBangAfter: return MarkupFlag::After | MarkupFlag::IsMultiToken; + case MarkupType::LineSlashAfter: return MarkupFlag::After | MarkupFlag::IsMultiToken; + } +} + +/* static */DocMarkupExtractor::MarkupType DocMarkupExtractor::findMarkupType(const Token& tok) +{ + switch (tok.type) + { + case TokenType::BlockComment: + { + UnownedStringSlice slice = tok.getContent(); + if (slice.getLength() >= 3 && (slice[2] == '!' || slice[2] == '*')) + { + return (slice.getLength() >= 4 && slice[3] == '<') ? MarkupType::BlockAfter : MarkupType::BlockBefore; + } + break; + } + case TokenType::LineComment: + { + UnownedStringSlice slice = tok.getContent(); + if (slice.getLength() >= 3) + { + if (slice[2] == '!') + { + return (slice.getLength() >= 4 && slice[3] == '<') ? MarkupType::LineBangAfter : MarkupType::LineBangBefore; + } + else if (slice[2] == '/') + { + return (slice.getLength() >= 4 && slice[3] == '<') ? MarkupType::LineSlashAfter : MarkupType::LineSlashBefore; + } + } + break; + } + default: break; + } + return MarkupType::None; +} + +static Index _calcWhitespaceIndent(const UnownedStringSlice& line) +{ + // TODO(JS): For now we ignore tabs and just work out indentation based on spaces/assume ASCII + Index indent = 0; + const Index count = line.getLength(); + for (; indent < count && line[indent] == ' '; indent++); + return indent; +} + +static Index _calcIndent(const UnownedStringSlice& line) +{ + // TODO(JS): For now we just assume no tabs, and that every char is ASCII + return line.getLength(); +} + +static void _appendUnindenttedLine(const UnownedStringSlice& line, Index maxIndent, StringBuilder& out) +{ + Index indent = _calcWhitespaceIndent(line); + + // We want to remove indenting remove no more than maxIndent + if (maxIndent >= 0) + { + indent = (indent > maxIndent) ? maxIndent : indent; + } + + // Remove the indenting, and append to out + out.append(line.tail(indent)); +} + +SlangResult DocMarkupExtractor::_extractMarkup(const FindInfo& info, const FoundMarkup& foundMarkup, StringBuilder& out) +{ + SourceView* sourceView = info.sourceView; + SourceFile* sourceFile = sourceView->getSourceFile(); + + // Here we want to produce the text that is implied by the markup tokens. + // We want to removing surrounding markup, and to also keep appropriate indentation + + switch (foundMarkup.type) + { + case MarkupType::BlockBefore: + case MarkupType::BlockAfter: + { + // We should only have a single line + SLANG_ASSERT(foundMarkup.range.getCount() == 1); + + const auto& tok = info.tokenList->m_tokens[foundMarkup.range.start]; + uint32_t offset = sourceView->getRange().getOffset(tok.loc); + + const UnownedStringSlice startLine = sourceFile->getLineContainingOffset(offset); + + UnownedStringSlice content = tok.getContent(); + + // Split into lines + List<UnownedStringSlice> lines; + + StringUtil::calcLines(content, lines); + + Index maxIndent = -1; + + StringBuilder unindentedLine; + + const Index linesCount = lines.getCount(); + for (Index i = 0; i < linesCount; ++i) + { + UnownedStringSlice line = lines[i]; + unindentedLine.Clear(); + + if (i == 0) + { + if (startLine.isMemoryContained(line.begin())) + { + // For now we'll ignore tabs, and that the indent amount is, the amount of *byte* + // NOTE! This is only appropriate for ASCII without tabs. + maxIndent = _calcIndent(UnownedStringSlice(startLine.begin(), line.begin())); + + // Let's strip the start stuff + line = removeStart(foundMarkup.type, line); + } + } + + if (i == linesCount - 1) + { + SLANG_ASSERT(line.tail(line.getLength() - 2) == UnownedStringSlice::fromLiteral("*/")); + // Remove the */ at the end of the line + line = line.head(line.getLength() - 2); + } + + if (i > 0) + { + _appendUnindenttedLine(line, maxIndent, unindentedLine); + } + else + { + unindentedLine.append(line); + } + + // If the first or last line are all white space, just ignore them + if ((i == linesCount - 1 || i == 0) && unindentedLine.getUnownedSlice().trim().getLength() == 0) + { + continue; + } + + out.append(unindentedLine); + out.appendChar('\n'); + } + + break; + } + case MarkupType::LineBangBefore: + case MarkupType::LineSlashBefore: + case MarkupType::LineBangAfter: + case MarkupType::LineSlashAfter: + { + // Holds the lines extracted, they may have some white space indenting (like the space at the start of //) + List<UnownedStringSlice> lines; + + const auto& range = foundMarkup.range; + for (Index i = range.start; i < range.end; ++ i) + { + const auto& tok = info.tokenList->m_tokens[i]; + UnownedStringSlice line = tok.getContent(); + line = removeStart(foundMarkup.type, line); + + // If the first or last line are all white space, just ignore them + if ((i == range.start || i == range.end - 1) && line.trim().getLength() == 0) + { + continue; + } + lines.add(line); + } + + if (lines.getCount() == 0) + { + // If there are no lines, theres no content + return SLANG_OK; + } + + Index minIndent = 0x7fffffff; + for (const auto& line : lines) + { + const Index indent = _calcWhitespaceIndent(line); + minIndent = (indent < minIndent) ? indent : minIndent; + } + + for (const auto& line : lines) + { + _appendUnindenttedLine(line, minIndent, out); + out.appendChar('\n'); + } + + break; + } + default: return SLANG_FAIL; + } + + return SLANG_OK; +} + +Index DocMarkupExtractor::_findStartIndex(const FindInfo& info, Location location) +{ + Index openParensCount = 0; + Index openBracketCount = 0; + + const TokenList& toks = *info.tokenList; + const Index tokIndex = info.declTokenIndex; + + Index direction = (location == Location::Before) ? -1 : 1; + + const Index count = toks.m_tokens.getCount(); + for (Index i = tokIndex; i >= 0 && i < count; i += direction) + { + const Token& tok = toks.m_tokens[i]; + + switch (tok.type) + { + case TokenType::LParent: + { + ++openParensCount; + break; + } + case TokenType::RBracket: + { + openBracketCount += Index(location == Location::Before); + break; + } + case TokenType::LBracket: + { + openBracketCount -= Index(location == Location::Before); + break; + } + case TokenType::RParent: + { + if (openParensCount == 0 && + location == Location::AfterParam) + { + return i + 1; + } + + --openParensCount; + if (openParensCount < 0) + { + // Not found - or weird parens at least + return -1; + } + break; + } + case TokenType::Comma: + { + if (location == Location::AfterParam) + { + return i + 1; + } + break; + } + case TokenType::RBrace: + { + // If we haven't hit a candidate yet before hitting } it's not going to work + if (location == Location::Before) + { + return -1; + } + break; + } + case TokenType::Semicolon: + { + // If we haven't hit a candidate yet it's not going to work + if (location == Location::Before) + { + return -1; + } + + if (openParensCount == 0 && location == Location::AfterSemicolon) + { + return i + 1; + } + break; + } + case TokenType::LineComment: + case TokenType::BlockComment: + { + // We hit a comment this could be the markup + if (location == Location::Before && openParensCount == 0 && openBracketCount == 0) + { + return i; + } + break; + } + default: break; + } + } + + return -1; +} + +/* static */bool DocMarkupExtractor::_isTokenOnLineIndex(SourceView* sourceView, MarkupType type, const Token& tok, Index lineIndex) +{ + SourceFile* sourceFile = sourceView->getSourceFile(); + const int offset = sourceView->getRange().getOffset(tok.loc); + + auto const flags = getFlags(type); + + if (flags & MarkupFlag::IsBlock) + { + // Either the start or the end of the block have to be on the specified line + return sourceFile->isOffsetOnLine(offset, lineIndex) || sourceFile->isOffsetOnLine(offset + tok.charsCount, lineIndex); + } + else + { + // Has to be exactly on the specified line + return sourceFile->isOffsetOnLine(offset, lineIndex); + } +} + + +SlangResult DocMarkupExtractor::_findMarkup(const FindInfo& info, Location location, FoundMarkup& out) +{ + out.reset(); + + const auto& toks = info.tokenList->m_tokens; + const Index tokIndex = info.declTokenIndex; + + // The starting token index + Index startIndex = _findStartIndex(info, location); + if (startIndex <= 0) + { + return SLANG_E_NOT_FOUND; + } + + SourceView* sourceView = info.sourceView; + SourceFile* sourceFile = sourceView->getSourceFile(); + + // Let's lookup the line index where this occurred + const int startOffset = sourceView->getRange().getOffset(toks[startIndex - 1].loc); + + // The line index that the markoff starts from + Index lineIndex = sourceFile->calcLineIndexFromOffset(startOffset); + if (lineIndex < 0) + { + return SLANG_E_NOT_FOUND; + } + + const Index searchDirection = (location == Location::Before) ? -1 : 1; + + // Get the type and flags + const MarkupType type = findMarkupType(toks[startIndex]); + const MarkupFlags flags = getFlags(type); + + const MarkupFlag::Enum requiredFlag = (location == Location::Before) ? MarkupFlag::Before : MarkupFlag::After; + if ((flags & requiredFlag) == 0) + { + return SLANG_E_NOT_FOUND; + } + +#if 0 + // The token still isn't accepted, unless it's on the expected line + if (_isTokenOnLineIndex(info.sourceView, type, toks[startIndex], expectedLineIndex)) + { + return SLANG_E_NOT_FOUND; + } +#endif + + Index endIndex = startIndex; + + // If it's multiline, so look for the end index + if (flags & MarkupFlag::IsMultiToken) + { + Index expectedLineIndex = lineIndex; + + // TODO(JS): + // We should probably do the work here to confirm indentation - but that + // requires knowing something about tabs, so for now we leave. + + while (true) + { + endIndex += searchDirection; + expectedLineIndex += searchDirection; + + if (endIndex < 0 || endIndex >= toks.getCount()) + { + break; + } + + // Do we find a token of the right type? + if (findMarkupType(toks[endIndex]) != type) + { + break; + } + + // Is it on the right line? + if (_isTokenOnLineIndex(info.sourceView, type, toks[startIndex], expectedLineIndex)) + { + break; + } + } + + // Fix the end index (it's the last one that worked) + endIndex -= searchDirection; + } + + // Put start < end order + if (endIndex < startIndex) + { + Swap(endIndex, startIndex); + } + // The range excludes end so increase + endIndex++; + + // Okay we've found the markup + out.type = type; + out.location = location; + out.range = IndexRange{ startIndex, endIndex }; + + SLANG_ASSERT(out.range.getCount() > 0); + + return SLANG_OK; +} + +SlangResult DocMarkupExtractor::_findFirstMarkup(const FindInfo& info, const Location* locs, Index locCount, FoundMarkup& out, Index& outIndex) +{ + Index i = 0; + for (; i < locCount; ++i) + { + SlangResult res = _findMarkup(info, locs[i], out); + if (SLANG_SUCCEEDED(res) || (SLANG_FAILED(res) && res != SLANG_E_NOT_FOUND)) + { + outIndex = i; + return res; + } + } + return SLANG_E_NOT_FOUND; +} + +SlangResult DocMarkupExtractor::_findMarkup(const FindInfo& info, const Location* locs, Index locCount, FoundMarkup& out) +{ + Index foundIndex; + SLANG_RETURN_ON_FAIL(_findFirstMarkup(info, locs, locCount, out, foundIndex)); + + // Lets see if the remaining ones match + { + FoundMarkup otherMarkup; + for (Index i = foundIndex + 1; i < locCount; ++i) + { + SlangResult res = _findMarkup(info, locs[i], otherMarkup); + if (SLANG_SUCCEEDED(res)) + { + // TODO(JS): Warning found markup in another location + } + } + } + + return SLANG_OK; +} + +SlangResult DocMarkupExtractor::_findMarkup(const FindInfo& info, Decl* decl, FoundMarkup& out) +{ + if (auto paramDecl = as<ParamDecl>(decl)) + { + Location locs[] = { Location::Before, Location::AfterParam }; + return _findMarkup(info, locs, SLANG_COUNT_OF(locs), out); + } + else if (auto callableDecl = as<CallableDecl>(decl)) + { + // We allow it defined before + return _findMarkup(info, Location::Before, out); + } + else if (as<VarDecl>(decl) || as<TypeDefDecl>(decl) || as<AssocTypeDecl>(decl)) + { + Location locs[] = { Location::Before, Location::AfterSemicolon }; + return _findMarkup(info, locs, SLANG_COUNT_OF(locs), out); + } + else + { + // We'll only allow before + return _findMarkup(info, Location::Before, out); + } +} + +SlangResult DocMarkupExtractor::extract(DocMarkup* doc, ModuleDecl* moduleDecl, SourceManager* sourceManager, DiagnosticSink* sink) +{ + m_doc = doc; + m_moduleDecl = moduleDecl; + m_sourceManager = sourceManager; + m_sink = sink; + + _findDecls(moduleDecl); + + struct Entry + { + typedef Entry ThisType; + + bool operator<(const ThisType& rhs) const { return locOrOffset < rhs.locOrOffset; } + + Index viewIndex; ///< The view/file index this loc is found in + SourceLoc::RawValue locOrOffset; ///< Can be a loc or an offset into the file + + Decl* decl; ///< The decl + }; + + List<Entry> entries; + + { + const Index count = m_decls.getCount(); + entries.setCount(count); + + for (Index i = 0; i < count; ++i) + { + Entry& entry = entries[i]; + auto decl = m_decls[i]; + entry.decl = decl; + entry.viewIndex = -1; //< We don't know what file/view it's in + entry.locOrOffset = decl->loc.getRaw(); + } + } + + // We hold one view per *SourceFile* + List<SourceView*> views; + + // Sort them into loc order + entries.sort([](Entry& a, Entry& b) { return a.locOrOffset < b.locOrOffset; }); + + { + SourceView* sourceView = nullptr; + Index viewIndex = -1; + + for (auto& entry : entries) + { + const SourceLoc loc = SourceLoc::fromRaw(entry.locOrOffset); + + if (sourceView == nullptr || !sourceView->getRange().contains(loc)) + { + // Find the new view + sourceView = m_sourceManager->findSourceView(loc); + SLANG_ASSERT(sourceView); + + // We want only one view per SourceFile + SourceFile* sourceFile = sourceView->getSourceFile(); + + // NOTE! The view found might be different than sourceView. + viewIndex = views.findFirstIndex([&](SourceView* currentView) -> bool { return currentView->getSourceFile() == sourceFile; }); + + if (viewIndex < 0) + { + viewIndex = views.getCount(); + views.add(sourceView); + } + } + + SLANG_ASSERT(viewIndex >= 0); + SLANG_ASSERT(sourceView && sourceView->getRange().contains(loc)); + + // Set the file index + entry.viewIndex = viewIndex; + // Set as the offset within the file + entry.locOrOffset = sourceView->getRange().getOffset(loc); + } + + // Sort into view/file and then offset order + entries.sort([](Entry& a, Entry& b) { return (a.viewIndex < b.viewIndex) || ((a.viewIndex == b.viewIndex) && a.locOrOffset < b.locOrOffset); }); + } + + { + TokenList tokens; + + MemoryArena memoryArena; + RootNamePool rootNamePool; + NamePool namePool; + namePool.setRootNamePool(&rootNamePool); + + Index viewIndex = -1; + SourceView* sourceView = nullptr; + + for (auto& entry : entries) + { + if (viewIndex != entry.viewIndex) + { + viewIndex = entry.viewIndex; + sourceView = views[viewIndex]; + + // Make all memory free again + memoryArena.reset(); + + // Run the lexer + Lexer lexer; + lexer.initialize(sourceView, sink, &namePool, &memoryArena, Lexer::OptionFlag::TokenizeComments); + + // Lex everything + tokens = lexer.lexAllTokens(); + } + + // Get the offset within the source file + const uint32_t offset = entry.locOrOffset; + + // We need to get the loc in the source views space, so we look up appropriately in the list of tokens (which uses the views loc range) + const SourceLoc loc = sourceView->getRange().getSourceLocFromOffset(offset); + + // Work out the line number + SourceFile* sourceFile = sourceView->getSourceFile(); + const Index lineIndex = sourceFile->calcLineIndexFromOffset(int(offset)); + + // Okay, lets find the token index with a binary chop + Index tokenIndex = _findTokenIndex(loc, tokens.m_tokens.getBuffer(), tokens.m_tokens.getCount()); + if (tokenIndex >= 0 && lineIndex >= 0) + { + FindInfo findInfo; + findInfo.declTokenIndex = tokenIndex; + findInfo.declLineIndex = lineIndex; + findInfo.tokenList = &tokens; + findInfo.sourceView = sourceView; + + // Okay let's see if we extract some documentation then for this. + FoundMarkup foundMarkup; + SlangResult res = _findMarkup(findInfo, entry.decl, foundMarkup); + + if (SLANG_SUCCEEDED(res)) + { + // We need to extract + StringBuilder buf; + SLANG_RETURN_ON_FAIL(_extractMarkup(findInfo, foundMarkup, buf)); + + // Add to the documentation + DocMarkup::Entry& docEntry = m_doc->addEntry(entry.decl); + docEntry.m_markup = buf; + } + else if (res != SLANG_E_NOT_FOUND) + { + return res; + } + } + } + } + + return SLANG_OK; +} + +SlangResult DocMarkup::extract(ModuleDecl* moduleDecl, SourceManager* sourceManager, DiagnosticSink* sink) +{ + m_moduleDecl = moduleDecl; + + DocMarkupExtractor context; + return context.extract(this, moduleDecl, sourceManager, sink); +} + +/* static */SlangResult DocumentationUtil::writeMarkdown(DocMarkup* markup, StringBuilder& out) +{ + for (const auto& entry : markup->getEntries()) + { + NodeBase* node = entry.m_node; + Decl* decl = as<Decl>(node); + if (!decl) + { + continue; + } + + // Skip these they will be output as part of their respective 'containers' + if (as<ParamDecl>(decl) || as<EnumCaseDecl>(decl)) + { + continue; + } + + if (CallableDecl* callableDecl = as<CallableDecl>(decl)) + { + out << entry.m_markup; + + // There's code to output sigs in the SemanticsVisitor - we probably need to extract that functionality + // out so can be used here + + // String declString = getDeclSignatureString(item); + + auto params = callableDecl->getParameters(); + //const auto& returnType = callableDecl->returnType; + + // Let's see if we can get markup on the parameters + for (auto param : params) + { + DocMarkup::Entry* paramEntry = markup->getEntry(param); + + if (paramEntry) + { + out << paramEntry->m_markup; + + auto type = param->getType(); + + if (type) + { + out << type->toString(); + } + + Name* name = param->getName(); + if (name) + { + out << " "; + out << name->text; + } + out << "\n\n"; + } + } + } + else if (EnumDecl* enumDecl = as<EnumDecl>(decl)) + { + + } + else if (StructDecl* structDecl = as<StructDecl>(decl)) + { + } + else if (ClassDecl* classDecl = as<ClassDecl>(decl)) + { + } + } + + return SLANG_OK; +} + +} // namespace Slang diff --git a/source/slang/slang-doc.h b/source/slang/slang-doc.h new file mode 100644 index 000000000..49251808a --- /dev/null +++ b/source/slang/slang-doc.h @@ -0,0 +1,70 @@ +// slang-doc.h +#ifndef SLANG_DOC_H +#define SLANG_DOC_H + +#include "../core/slang-basic.h" +#include "slang-ast-all.h" + +namespace Slang { + +/* Holds the documentation markup that is associated with each node (typically a decl) from a module */ +class DocMarkup : public RefObject +{ +public: + struct Entry + { + NodeBase* m_node; ///< The node this documentation is associated with + String m_markup; ///< The raw contents of of markup associated with the decoration + }; + + /// Adds an entry, returns the reference to pre-existing node if there is one + Entry& addEntry(NodeBase* base); + /// Gets an entry for a node. Returns nullptr if there is no markup. + Entry* getEntry(NodeBase* base); + + /// Get list of all of the entries in source order + const List<Entry>& getEntries() const { return m_entries; } + + /// Given a module extracts all the associated markup. + SlangResult extract(ModuleDecl* moduleDecl, SourceManager* sourceManager, DiagnosticSink* sink); + +protected: + + /// The module this information was extracted from + ModuleDecl* m_moduleDecl; + /// Map from AST nodes to documentation entries + Dictionary<NodeBase*, Index> m_entryMap; + /// All of the documentation entries in source order + List<Entry> m_entries; +}; + +// --------------------------------------------------------------------------- +SLANG_INLINE DocMarkup::Entry& DocMarkup::addEntry(NodeBase* base) +{ + const Index count = m_entries.getCount(); + const Index index = m_entryMap.GetOrAddValue(base, count); + + if (index == count) + { + Entry entry; + entry.m_node = base; + m_entries.add(entry); + } + return m_entries[index]; +} + +// --------------------------------------------------------------------------- +SLANG_INLINE DocMarkup::Entry* DocMarkup::getEntry(NodeBase* base) +{ + Index* indexPtr = m_entryMap.TryGetValue(base); + return (indexPtr) ? &m_entries[*indexPtr] : nullptr; +} + +struct DocumentationUtil +{ + static SlangResult writeMarkdown(DocMarkup* markup, StringBuilder& out); +}; + +} // namespace Slang + +#endif diff --git a/source/slang/slang-lexer.cpp b/source/slang/slang-lexer.cpp index b0146c5b0..6c8e9474a 100644 --- a/source/slang/slang-lexer.cpp +++ b/source/slang/slang-lexer.cpp @@ -75,27 +75,29 @@ namespace Slang // Lexer void Lexer::initialize( - SourceView* inSourceView, - DiagnosticSink* inSink, - NamePool* inNamePool, - MemoryArena* inMemoryArena) + SourceView* sourceView, + DiagnosticSink* sink, + NamePool* namePool, + MemoryArena* memoryArena, + OptionFlags optionFlags) { - m_sourceView = inSourceView; - m_sink = inSink; - m_namePool = inNamePool; - m_memoryArena = inMemoryArena; + m_sourceView = sourceView; + m_sink = sink; + m_namePool = namePool; + m_memoryArena = memoryArena; - auto content = inSourceView->getContent(); + auto content = sourceView->getContent(); m_begin = content.begin(); m_cursor = content.begin(); m_end = content.end(); // Set the start location - m_startLoc = inSourceView->getRange().begin; + m_startLoc = sourceView->getRange().begin; m_tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; m_lexerFlags = 0; + m_optionFlags = optionFlags; } Lexer::~Lexer() @@ -1231,11 +1233,31 @@ namespace Slang continue; case TokenType::WhiteSpace: - case TokenType::LineComment: - case TokenType::BlockComment: + { flags |= TokenFlag::AfterWhitespace; continue; + } + case TokenType::BlockComment: + case TokenType::LineComment: + { + flags |= TokenFlag::AfterWhitespace; + if (m_optionFlags & OptionFlag::TokenizeComments) + { + // We don't break here, and use the normal token adding logic + // because we want the behavior to be identical (in terms of flags etc) + // as if TokenizeComments is not enabled + char const* textEnd = m_cursor; + + token.type = tokenType; + token.flags = m_tokenFlags; + token.setContent(UnownedStringSlice(textBegin, textEnd)); + return token; + } + + continue; + } + // We don't want to skip the end-of-file token, but we *do* // want to make sure it has appropriate flags to make our life easier case TokenType::EndOfFile: diff --git a/source/slang/slang-lexer.h b/source/slang/slang-lexer.h index 957d05fec..f1fe89516 100644 --- a/source/slang/slang-lexer.h +++ b/source/slang/slang-lexer.h @@ -90,19 +90,29 @@ namespace Slang typedef unsigned int LexerFlags; enum { - kLexerFlag_InDirective = 1 << 0, ///< Turn end-of-line and end-of-file into end-of-directive - kLexerFlag_ExpectFileName = 1 << 1, ///< Support `<>` style strings for file paths - kLexerFlag_IgnoreInvalid = 1 << 2, ///< Suppress errors about invalid/unsupported characters - kLexerFlag_ExpectDirectiveMessage = 1 << 3, ///< Don't lexer ordinary tokens, and instead consume rest of line as a string + kLexerFlag_InDirective = 1 << 0, ///< Turn end-of-line and end-of-file into end-of-directive + kLexerFlag_ExpectFileName = 1 << 1, ///< Support `<>` style strings for file paths + kLexerFlag_IgnoreInvalid = 1 << 2, ///< Suppress errors about invalid/unsupported characters + kLexerFlag_ExpectDirectiveMessage = 1 << 3, ///< Don't lexer ordinary tokens, and instead consume rest of line as a string }; struct Lexer { + typedef uint32_t OptionFlags; + struct OptionFlag + { + enum Enum : OptionFlags + { + TokenizeComments = 1 << 0, ///< If set comments will be output to the token stream + }; + }; + void initialize( SourceView* sourceView, DiagnosticSink* sink, NamePool* namePool, - MemoryArena* memoryArena); + MemoryArena* memoryArena, + OptionFlags optionFlags = 0); ~Lexer(); @@ -134,6 +144,7 @@ namespace Slang TokenFlags m_tokenFlags; LexerFlags m_lexerFlags; + OptionFlags m_optionFlags; MemoryArena* m_memoryArena; }; diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp index d28b50b88..fb7ba79c6 100644 --- a/source/slang/slang-options.cpp +++ b/source/slang/slang-options.cpp @@ -531,6 +531,10 @@ struct OptionsParser { requestImpl->getFrontEndReq()->shouldDumpAST = true; } + else if (argStr == "-doc") + { + requestImpl->getFrontEndReq()->shouldDocument = true; + } else if (argStr == "-dump-repro") { SLANG_RETURN_ON_FAIL(tryReadCommandLineArgument(sink, arg, &argCursor, argEnd, requestImpl->m_dumpRepro)); diff --git a/source/slang/slang-source-loc.cpp b/source/slang/slang-source-loc.cpp index 4b9e16b8e..3f9f8ad31 100644 --- a/source/slang/slang-source-loc.cpp +++ b/source/slang/slang-source-loc.cpp @@ -279,6 +279,55 @@ const List<uint32_t>& SourceFile::getLineBreakOffsets() return m_lineBreakOffsets; } +SourceFile::OffsetRange SourceFile::getOffsetRangeAtLineIndex(Index lineIndex) +{ + const List<uint32_t>& offsets = getLineBreakOffsets(); + const Index count = offsets.getCount(); + + if (lineIndex >= count - 1) + { + // Work out the line start + const uint32_t offsetEnd = uint32_t(getContentSize()); + const uint32_t offsetStart = (lineIndex >= count) ? offsetEnd : offsets[lineIndex]; + // The line is the span from start, to the end of the content + return OffsetRange{ offsetStart, offsetEnd }; + } + else + { + const uint32_t offsetStart = offsets[lineIndex]; + const uint32_t offsetEnd = offsets[lineIndex + 1]; + return OffsetRange { offsetStart, offsetEnd }; + } +} + +UnownedStringSlice SourceFile::getLineAtIndex(Index lineIndex) +{ + const OffsetRange range = getOffsetRangeAtLineIndex(lineIndex); + + if (range.isValid() && hasContent()) + { + const UnownedStringSlice content = getContent(); + SLANG_ASSERT(range.end <= uint32_t(content.getLength())); + + const char*const text = content.begin(); + return UnownedStringSlice(text + range.start, text + range.end); + } + + return UnownedStringSlice(); +} + +UnownedStringSlice SourceFile::getLineContainingOffset(uint32_t offset) +{ + const Index lineIndex = calcLineIndexFromOffset(offset); + return getLineAtIndex(lineIndex); +} + +bool SourceFile::isOffsetOnLine(uint32_t offset, Index lineIndex) +{ + const OffsetRange range = getOffsetRangeAtLineIndex(lineIndex); + return range.isValid() && range.containsInclusive(offset); +} + int SourceFile::calcLineIndexFromOffset(int offset) { SLANG_ASSERT(UInt(offset) <= getContentSize()); diff --git a/source/slang/slang-source-loc.h b/source/slang/slang-source-loc.h index ef8b49c3b..54811918f 100644 --- a/source/slang/slang-source-loc.h +++ b/source/slang/slang-source-loc.h @@ -139,6 +139,9 @@ struct SourceRange /// Get the offset of a loc in this range int getOffset(SourceLoc loc) const { SLANG_ASSERT(contains(loc)); return int(loc.getRaw() - begin.getRaw()); } + /// Convert an offset to a loc + SourceLoc getSourceLocFromOffset(uint32_t offset) const { SLANG_ASSERT(offset <= getSize()); return begin + Int(offset); } + SourceRange() {} @@ -156,6 +159,7 @@ struct SourceRange SourceLoc end; }; + // Pre-declare struct SourceManager; @@ -165,10 +169,45 @@ class SourceFile { public: + struct OffsetRange + { + /// We need a value to indicate an invalid range. We can't use 0 as that is valid for an offset range + /// We can't use a negative number, and don't want to make signed so we get the full 32-bits. + /// So we just use the max value as invalid + static const uint32_t kInvalid = 0xffffffff; + + /// True if the range is valid + SLANG_FORCE_INLINE bool isValid() const { return end >= start && start != kInvalid; } + /// True if offset is within range (inclusively) + SLANG_FORCE_INLINE bool containsInclusive(uint32_t offset) const { return offset >= start && offset <= end; } + + /// Get the count + SLANG_FORCE_INLINE uint32_t getCount() const { return end - start; } + + /// Return an invalid range. + static OffsetRange makeInvalid() { return OffsetRange{ kInvalid, kInvalid }; } + + uint32_t start; + uint32_t end; + }; + /// Returns the line break offsets (in bytes from start of content) /// Note that this is lazily evaluated - the line breaks are only calculated on the first request const List<uint32_t>& getLineBreakOffsets(); + /// Returns true if the offset is on the specified line + /// NOTE! If offsets are not fully setup (because we don't have source), will only be correct for lines that have offsets + bool isOffsetOnLine(uint32_t offset, Index lineIndex); + + /// Get the line containing the offset. Requires that content is available, else will return an empty slice. + UnownedStringSlice getLineContainingOffset(uint32_t offset); + + /// Get the line at the specified line index. Requires that content is available, else will return an empty slice. + UnownedStringSlice getLineAtIndex(Index lineIndex); + + /// Get the offset range at the specified line index. Works without content. + OffsetRange getOffsetRangeAtLineIndex(Index lineIndex); + /// Set the line break offsets void setLineBreakOffsets(const uint32_t* offsets, UInt numOffsets); diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index 14f30c632..e02715015 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -30,6 +30,8 @@ #include "slang-serialize-ir.h" #include "slang-serialize-container.h" +#include "slang-doc.h" + #include "slang-check-impl.h" #include "../../slang-tag-version.h" @@ -1637,6 +1639,26 @@ void FrontEndCompileRequest::parseTranslationUnit( } } + if (shouldDocument) + { + RefPtr<DocMarkup> markup(new DocMarkup); + markup->extract(translationUnit->getModuleDecl(), getSourceManager(), getSink()); + + // Extract to a file + + const String& path = sourceFile->getPathInfo().foundPath; + if (path.getLength()) + { + String fileName = Path::getFileNameWithoutExt(path); + fileName.append(".md"); + + StringBuilder buf; + DocumentationUtil::writeMarkdown(markup, buf); + + File::writeAllText(fileName, buf); + } + } + #if 0 // Test serialization { |
