diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2023-03-22 12:04:33 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-03-22 12:04:33 -0400 |
| commit | d4f99c8bac8b28f18c864a717d8833db6a1c872d (patch) | |
| tree | ebea06c019130d8248d5e4f6bccf5e4b2649e3cb /source/slang | |
| parent | d8a40abba5223fbcb56c52b04ccb88c02bbaf79f (diff) | |
Source map obfuscation (#2717)
* #include an absolute path didn't work - because paths were taken to always be relative.
* WIP source map.
* Split out handling of RttiTypeFuncs to a map type.
* Make RttiTypeFuncsMap hold default impls.
* Slightly more sophisticated RttiTypeFuncsMap
* Source map decoding.
* Fix tabs.
* Fix asserts due to negative values.
* Use less obscure mechanisms in SourceMap.
* Source map decoding.
Simplifying SourceMap usage.
* First attempt at ouputting a source map as part of emit.
* Added support for -source-map option. SourceMap is added to the artifact.
* Small improvements around column calculation in SourceWriter.
* Source Loc obuscation WIP.
* Fix some issues around SourceMap obfuscation.
* Split out obfuscation into its own file.
* Keep obfuscated SourceMap even through serialization bottleneck.
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/slang-emit-source-writer.cpp | 83 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-obfuscate-loc.cpp | 279 | ||||
| -rw-r--r-- | source/slang/slang-ir-obfuscate-loc.h | 20 | ||||
| -rw-r--r-- | source/slang/slang-ir.h | 7 | ||||
| -rw-r--r-- | source/slang/slang-lower-to-ir.cpp | 23 | ||||
| -rw-r--r-- | source/slang/slang.cpp | 4 |
7 files changed, 374 insertions, 44 deletions
diff --git a/source/slang/slang-emit-source-writer.cpp b/source/slang/slang-emit-source-writer.cpp index b27e0f8b4..f66f36758 100644 --- a/source/slang/slang-emit-source-writer.cpp +++ b/source/slang/slang-emit-source-writer.cpp @@ -1,6 +1,8 @@ // slang-emit-source-writer.cpp #include "slang-emit-source-writer.h" +#include "../core/slang-char-encode.h" + // Disable warnings about sprintf #ifdef _WIN32 # pragma warning(disable:4996) @@ -516,61 +518,60 @@ void SourceWriter::_emitLineDirective(const HumaneSourceLoc& sourceLocation) void SourceWriter::_calcLocation(Index& outLineIndex, Index& outColumnIndex) { - // If we are at the end, then we are done. - if (m_currentOutputOffset == m_builder.getLength()) - { - outLineIndex = m_currentLineIndex; - outColumnIndex = m_currentColumnIndex; - return; - } - - const char* cur = m_builder.getBuffer() + m_currentOutputOffset; - const char* end = m_builder.end(); - - const char* start = cur; - - while (cur < end) + // If there are move chars we need to update + if (m_currentOutputOffset < m_builder.getLength()) { - // Reset start - start = cur; + const char* cur = m_builder.getBuffer() + m_currentOutputOffset; + const char* end = m_builder.end(); - // Look for the end of the line - while (*cur != '\n' && *cur != '\r' && cur < end) - { - cur++; - } + const char* start = cur; - // If we are not at the total end then we must have hit a \n or \r - if (cur < end) + while (cur < end) { - const auto c = *cur++; + // Reset start + start = cur; - ++m_currentLineIndex; - // Reset the column - m_currentColumnIndex = 0; + // Look for the end of the line + while (*cur != '\n' && *cur != '\r' && cur < end) + { + cur++; + } - // Check the next char to see if it's part of a CR/LF combination + // If we are not at the total end then we must have hit a \n or \r if (cur < end) { - const auto d = *cur; - // If it is combination skip the next byte - cur += ((c ^ d) == ('\r' ^ '\n')); + const auto c = *cur++; + + // Next line + ++m_currentLineIndex; + + // Check the next char to see if it's part of a CR/LF combination + if (cur < end) + { + const auto d = *cur; + // If it is combination skip the next byte + cur += ((c ^ d) == ('\r' ^ '\n')); + } + + // Calculate the offset to the start of this line + m_currentColumnIndex = 0; + start = cur; } } - } - // Fix up the current index. - // TODO(JS): - // NOTE! This isn't strictly correct because it assumes one byte is a *column* which isn't actually the case with utf8 - // encoding... - m_currentColumnIndex += Index(cur - start); + // Set the current offset to the end + m_currentOutputOffset = m_builder.getLength(); - // Set the current offset is the end - m_currentOutputOffset = m_builder.getLength(); + // Get the bytes remaining on this line (which may not be complete) + const UnownedStringSlice lineRemaining(start, m_builder.end()); - // Output the values - outLineIndex = m_currentLineIndex; + // Offset the column index in codepoints + m_currentColumnIndex += UTF8Util::calcCodePointCount(lineRemaining); + } + + // Output the position outColumnIndex = m_currentColumnIndex; + outLineIndex = m_currentLineIndex; } } // namespace Slang diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 335e95c9e..bc62e488f 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -1126,7 +1126,7 @@ SlangResult CodeGenContext::emitEntryPointsSourceFromIR(ComPtr<IArtifact>& outAr auto sourceMapBlob = StringBlob::moveCreate(writer.getBuilder()); - auto sourceMapArtifact = ArtifactUtil::createArtifact(ArtifactDesc::make(ArtifactKind::Text, ArtifactPayload::SourceMap, ArtifactStyle::None)); + auto sourceMapArtifact = ArtifactUtil::createArtifact(ArtifactDesc::make(ArtifactKind::Json, ArtifactPayload::SourceMap, ArtifactStyle::None)); sourceMapArtifact->addRepresentationUnknown(sourceMapBlob); artifact->addAssociated(sourceMapArtifact); diff --git a/source/slang/slang-ir-obfuscate-loc.cpp b/source/slang/slang-ir-obfuscate-loc.cpp new file mode 100644 index 000000000..b3f5d5cd3 --- /dev/null +++ b/source/slang/slang-ir-obfuscate-loc.cpp @@ -0,0 +1,279 @@ +// slang-ir-obfuscate-loc.cpp +#include "slang-ir-obfuscate-loc.h" + +#include "../../slang.h" + +#include "../core/slang-random-generator.h" +#include "../core/slang-hash.h" +#include "../core/slang-char-util.h" + +namespace Slang +{ + +namespace { // anonymous + +struct InstWithLoc +{ + typedef InstWithLoc ThisType; + + SLANG_FORCE_INLINE bool operator<(const ThisType& rhs) const { return loc.getRaw() < rhs.loc.getRaw(); } + + IRInst* inst; + SourceLoc loc; +}; + +struct LocPair +{ + SourceLoc originalLoc; + SourceLoc obfuscatedLoc; +}; + +} // anonymous + +static void _findInstsRec(IRInst* inst, List<InstWithLoc>& out) +{ + if (inst->sourceLoc.isValid()) + { + InstWithLoc instWithLoc; + instWithLoc.inst = inst; + instWithLoc.loc = inst->sourceLoc; + out.add(instWithLoc); + } + + for (IRInst* child : inst->getModifiableChildren()) + { + _findInstsRec(child, out); + } +} + +SlangResult obfuscateModuleLocs(IRModule* module, SourceManager* sourceManager) +{ + // There shouldn't be an obfuscated source map set + SLANG_ASSERT(module->getObfuscatedSourceMap() == nullptr); + + List<InstWithLoc> instWithLocs; + + // Find all of the instructions with source locs + _findInstsRec(module->getModuleInst(), instWithLocs); + + // Sort them + instWithLocs.sort(); + + // Lets produce a hash, so we can use as a key for random number generation. + // We could base it on time, or some other thing as there is no requirement for + // stability or consistency. + // We use a hash because it avoids issues around clocks, and availability of a clock + // as a good source of entropy. + // + // An argument *could* be made to generate the name via some mechanism that uniquely identified the + // combination of flags, options, files, names that identified the compilation, but that is + // not easily achieved. + HashCode hash = 0; + + List<LocPair> locPairs; + + { + SourceLoc curLoc; + for (const auto& instWithLoc : instWithLocs) + { + hash = combineHash(hash, getHashCode(instWithLoc.inst)); + hash = combineHash(hash, getHashCode(instWithLoc.loc.getRaw())); + + if (instWithLoc.loc != curLoc) + { + LocPair locPair; + locPair.originalLoc = instWithLoc.loc; + locPairs.add(locPair); + + // This is the current loc + curLoc = instWithLoc.loc; + } + } + } + + const Count uniqueLocCount = locPairs.getCount(); + + // We need a seed to make this random on each run + const uint32_t randomSeed = uint32_t(hash); + RefPtr<RandomGenerator> rand = RandomGenerator::create(randomSeed); + + // We want a random unique name because we could have multiple obfuscated modules + // and we need to identify each + + PathInfo obfusctatedPathInfo; + + { + // We need a pathInfo to *identify* this modules obfuscated locs. + // We are going to use a random number, seeded from the hash to do this. + // Turning the number as hex as the name. + { + StringBuilder buf; + + uint8_t data[4]; + rand->nextData(data, sizeof(data)); + + const Count charsCount = SLANG_COUNT_OF(data) * 2; + + char* dst = buf.prepareForAppend(charsCount); + + for (Index i = 0; i < SLANG_COUNT_OF(data); ++i) + { + dst[i * 2 + 0] = CharUtil::getHexChar(data[i] & 0xf); + dst[i * 2 + 1] = CharUtil::getHexChar(data[i] >> 4); + } + buf.appendInPlace(dst, charsCount); + obfusctatedPathInfo = PathInfo::makePath(buf); + } + } + + SourceFile* obfuscatedFile = sourceManager->createSourceFileWithSize(obfusctatedPathInfo, uniqueLocCount); + + // Create the view we are going to use from the obfusctated "file". + SourceView* obfuscatedView = sourceManager->createSourceView(obfuscatedFile, nullptr, SourceLoc()); + + // Okay now we want to produce a map from these locs to a new source location + { + // Create a "bag" and put all of the indices in it. + List<SourceLoc> bag; + + bag.setCount(uniqueLocCount); + + const SourceLoc baseLoc = obfuscatedView->getRange().begin; + + { + SourceLoc* dst = bag.getBuffer(); + for (Index i = 0; i < uniqueLocCount; ++i) + { + dst[i] = baseLoc + i; + } + } + + // Pull the indices randomly out of the bag to create the map + for (auto& pair : locPairs) + { + // Find an index in the bag + const Index bagIndex = rand->nextInt32InRange(0, int32_t(bag.getCount())); + // Set in the map + pair.obfuscatedLoc = bag[bagIndex]; + // Remove from the bag + bag.fastRemoveAt(bagIndex); + } + } + + // We can now just set all the new locs in the instructions + { + const LocPair* curPair = locPairs.getBuffer(); + LocPair pair = *curPair; + + for (const auto& instWithLoc : instWithLocs) + { + auto inst = instWithLoc.inst; + + if (instWithLoc.loc != pair.originalLoc) + { + SLANG_ASSERT(curPair < locPairs.end()); + curPair++; + pair = *curPair; + } + SLANG_ASSERT(pair.originalLoc == instWithLoc.loc); + + // Set the loc + inst->sourceLoc = pair.obfuscatedLoc; + } + } + + // We can now create a map. The locs are in order in entries, so that should make lookup easier. + // This doesn't "leak" anything as the obfuscated loc map is not distributed. + + RefPtr<SourceMap> sourceMap = new SourceMap; + sourceMap->m_file = obfusctatedPathInfo.getName(); + + // Make sure we have line 0. + // We only end up with one line in the obfuscated map. + sourceMap->advanceToLine(0); + + { + // Current view, with cached "View" based sourceFileIndex + SourceView* curView = nullptr; + Index curViewSourceFileIndex = -1; + + // Current handle, and store cached index in curPathSourceFileIndex + StringSlicePool::Handle curPathHandle = StringSlicePool::Handle(0); + Index curPathSourceFileIndex = -1; + + for (Index i = 0; i < uniqueLocCount; ++i) + { + const auto& pair = locPairs[i]; + + + // First find the view + if (curView == nullptr || + !curView->getRange().contains(pair.originalLoc)) + { + curView = sourceManager->findSourceViewRecursively(pair.originalLoc); + SLANG_ASSERT(curView); + + // Reset the current view path index, to being unset + curViewSourceFileIndex = -1; + + // We have to reset, because the path index is for the source manager + // that holds the view. If the view changes we need to re determine the + // path string, and index. + curPathSourceFileIndex = -1; + } + + // Now get the location + const auto handleLoc = curView->getHandleLoc(pair.originalLoc); + + Index sourceFileIndex = -1; + + if (handleLoc.pathHandle == StringSlicePool::Handle(0)) + { + if (curViewSourceFileIndex < 0) + { + const auto pathInfo = curView->getViewPathInfo(); + curViewSourceFileIndex = sourceMap->getSourceFileIndex(pathInfo.getName().getUnownedSlice()); + } + sourceFileIndex = curViewSourceFileIndex; + } + else + { + if (curPathSourceFileIndex < 0 || + handleLoc.pathHandle != curPathHandle) + { + auto viewSourceManager = curView->getSourceManager(); + const auto filePathSlice = viewSourceManager->getStringSlicePool().getSlice(curPathHandle); + + // Set the handle + curPathHandle = handleLoc.pathHandle; + + // Get the source file index. + curPathSourceFileIndex = sourceMap->getSourceFileIndex(filePathSlice); + } + + sourceFileIndex = curPathSourceFileIndex; + } + + // Create the entry + SourceMap::Entry entry; + entry.init(); + + entry.sourceFileIndex = sourceFileIndex; + + // i is the generated column + entry.generatedColumn = i; + + entry.sourceColumn = handleLoc.column - 1; + entry.sourceLine = handleLoc.line - 1; + + // Add it to the source map + sourceMap->addEntry(entry); + } + } + + module->setObfuscatedSourceMap(sourceMap); + + return SLANG_OK; +} + +} // namespace Slang diff --git a/source/slang/slang-ir-obfuscate-loc.h b/source/slang/slang-ir-obfuscate-loc.h new file mode 100644 index 000000000..03e32a143 --- /dev/null +++ b/source/slang/slang-ir-obfuscate-loc.h @@ -0,0 +1,20 @@ +// slang-ir-obfuscate-loc.h +#ifndef SLANG_IR_OBFUSCATE_LOC_H_INCLUDED +#define SLANG_IR_OBFUSCATE_LOC_H_INCLUDED + +#include "../core/slang-basic.h" + +#include "../compiler-core/slang-source-map.h" + +#include "slang-compiler.h" +#include "slang-ir.h" + +namespace Slang +{ + +/*** Obfuscate locs in module. Store the mapping from obfuscated locs to actual locs in the form of a source map */ +SlangResult obfuscateModuleLocs(IRModule* module, SourceManager* sourceManager); + +} + +#endif diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h index 14a216fd2..025812f83 100644 --- a/source/slang/slang-ir.h +++ b/source/slang/slang-ir.h @@ -13,6 +13,7 @@ #include "../core/slang-memory-arena.h" #include "../compiler-core/slang-source-loc.h" +#include "../compiler-core/slang-source-map.h" #include "slang-type-system-shared.h" @@ -2008,6 +2009,9 @@ public: SLANG_FORCE_INLINE IRModuleInst* getModuleInst() const { return m_moduleInst; } SLANG_FORCE_INLINE MemoryArena& getMemoryArena() { return m_memoryArena; } + SLANG_FORCE_INLINE SourceMap* getObfuscatedSourceMap() const { return m_obfuscatedSourceMap; } + SLANG_FORCE_INLINE void setObfuscatedSourceMap(SourceMap* sourceMap) { m_obfuscatedSourceMap = sourceMap; } + IRDeduplicationContext* getDeduplicationContext() const { return &m_deduplicationContext; } IRInstListBase getGlobalInsts() const { return getModuleInst()->getChildren(); } @@ -2074,6 +2078,9 @@ private: /// Shared contexts for constructing and deduplicating the IR. mutable IRDeduplicationContext m_deduplicationContext; + + /// Holds the obfuscated source map for this module if applicable + RefPtr<SourceMap> m_obfuscatedSourceMap; }; struct IRSpecializationDictionaryItem : public IRInst diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 86df89702..f84f17886 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -3,6 +3,10 @@ #include "../../slang.h" +#include "../core/slang-random-generator.h" +#include "../core/slang-hash.h" +#include "../core/slang-char-util.h" + #include "slang-check.h" #include "slang-ir.h" #include "slang-ir-constexpr.h" @@ -21,6 +25,8 @@ #include "slang-ir-string-hash.h" #include "slang-ir-clone.h" #include "slang-ir-lower-error-handling.h" +#include "slang-ir-obfuscate-loc.h" + #include "slang-mangle.h" #include "slang-type-layout.h" #include "slang-visitor.h" @@ -9299,10 +9305,17 @@ RefPtr<IRModule> generateIRForTranslationUnit( Linkage* linkage = compileRequest->getLinkage(); stripOptions.shouldStripNameHints = linkage->m_obfuscateCode; - stripOptions.stripSourceLocs = linkage->m_obfuscateCode; + // If we are generating an obfuscated source map, we don't want to strip locs, + // we want to generate *new* locs that can be mapped (via source map) + // back to *actual* source. + // + // We don't do the obfuscation remapping here, because DCE and other passes may + // change what locs are actually needed, we need to be sure + // that if we have obfuscation enabled we don't forget to obfuscate. + stripOptions.stripSourceLocs = linkage->m_obfuscateCode && !linkage->m_generateSourceMap; stripFrontEndOnlyInstructions(module, stripOptions); - + // Stripping out decorations could leave some dead code behind // in the module, and in some cases that extra code is also // undesirable (e.g., the string literals referenced by name-hint @@ -9314,6 +9327,12 @@ RefPtr<IRModule> generateIRForTranslationUnit( IRDeadCodeEliminationOptions options; options.keepExportsAlive = true; eliminateDeadCode(module, options); + + if (linkage->m_obfuscateCode && linkage->m_generateSourceMap) + { + // The obfuscated source map is stored on the module + obfuscateModuleLocs(module, compileRequest->getSourceManager()); + } } // TODO: consider doing some more aggressive optimizations diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index 14b59d156..bd19670d9 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -2377,6 +2377,9 @@ void FrontEndCompileRequest::generateIR() if (useSerialIRBottleneck) { + // Keep the obfuscated source map (if there is one) + RefPtr<SourceMap> obfuscatedSourceMap = irModule->getObfuscatedSourceMap(); + IRSerialData serialData; { // Write IR out to serialData - copying over SourceLoc information directly @@ -2395,6 +2398,7 @@ void FrontEndCompileRequest::generateIR() // Set irModule to the read module irModule = irReadModule; + irModule->setObfuscatedSourceMap(obfuscatedSourceMap); } // Set the module on the translation unit |
