summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2023-03-22 12:04:33 -0400
committerGitHub <noreply@github.com>2023-03-22 12:04:33 -0400
commitd4f99c8bac8b28f18c864a717d8833db6a1c872d (patch)
treeebea06c019130d8248d5e4f6bccf5e4b2649e3cb /source/slang
parentd8a40abba5223fbcb56c52b04ccb88c02bbaf79f (diff)
Source map obfuscation (#2717)
* #include an absolute path didn't work - because paths were taken to always be relative. * WIP source map. * Split out handling of RttiTypeFuncs to a map type. * Make RttiTypeFuncsMap hold default impls. * Slightly more sophisticated RttiTypeFuncsMap * Source map decoding. * Fix tabs. * Fix asserts due to negative values. * Use less obscure mechanisms in SourceMap. * Source map decoding. Simplifying SourceMap usage. * First attempt at ouputting a source map as part of emit. * Added support for -source-map option. SourceMap is added to the artifact. * Small improvements around column calculation in SourceWriter. * Source Loc obuscation WIP. * Fix some issues around SourceMap obfuscation. * Split out obfuscation into its own file. * Keep obfuscated SourceMap even through serialization bottleneck.
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/slang-emit-source-writer.cpp83
-rw-r--r--source/slang/slang-emit.cpp2
-rw-r--r--source/slang/slang-ir-obfuscate-loc.cpp279
-rw-r--r--source/slang/slang-ir-obfuscate-loc.h20
-rw-r--r--source/slang/slang-ir.h7
-rw-r--r--source/slang/slang-lower-to-ir.cpp23
-rw-r--r--source/slang/slang.cpp4
7 files changed, 374 insertions, 44 deletions
diff --git a/source/slang/slang-emit-source-writer.cpp b/source/slang/slang-emit-source-writer.cpp
index b27e0f8b4..f66f36758 100644
--- a/source/slang/slang-emit-source-writer.cpp
+++ b/source/slang/slang-emit-source-writer.cpp
@@ -1,6 +1,8 @@
// slang-emit-source-writer.cpp
#include "slang-emit-source-writer.h"
+#include "../core/slang-char-encode.h"
+
// Disable warnings about sprintf
#ifdef _WIN32
# pragma warning(disable:4996)
@@ -516,61 +518,60 @@ void SourceWriter::_emitLineDirective(const HumaneSourceLoc& sourceLocation)
void SourceWriter::_calcLocation(Index& outLineIndex, Index& outColumnIndex)
{
- // If we are at the end, then we are done.
- if (m_currentOutputOffset == m_builder.getLength())
- {
- outLineIndex = m_currentLineIndex;
- outColumnIndex = m_currentColumnIndex;
- return;
- }
-
- const char* cur = m_builder.getBuffer() + m_currentOutputOffset;
- const char* end = m_builder.end();
-
- const char* start = cur;
-
- while (cur < end)
+ // If there are move chars we need to update
+ if (m_currentOutputOffset < m_builder.getLength())
{
- // Reset start
- start = cur;
+ const char* cur = m_builder.getBuffer() + m_currentOutputOffset;
+ const char* end = m_builder.end();
- // Look for the end of the line
- while (*cur != '\n' && *cur != '\r' && cur < end)
- {
- cur++;
- }
+ const char* start = cur;
- // If we are not at the total end then we must have hit a \n or \r
- if (cur < end)
+ while (cur < end)
{
- const auto c = *cur++;
+ // Reset start
+ start = cur;
- ++m_currentLineIndex;
- // Reset the column
- m_currentColumnIndex = 0;
+ // Look for the end of the line
+ while (*cur != '\n' && *cur != '\r' && cur < end)
+ {
+ cur++;
+ }
- // Check the next char to see if it's part of a CR/LF combination
+ // If we are not at the total end then we must have hit a \n or \r
if (cur < end)
{
- const auto d = *cur;
- // If it is combination skip the next byte
- cur += ((c ^ d) == ('\r' ^ '\n'));
+ const auto c = *cur++;
+
+ // Next line
+ ++m_currentLineIndex;
+
+ // Check the next char to see if it's part of a CR/LF combination
+ if (cur < end)
+ {
+ const auto d = *cur;
+ // If it is combination skip the next byte
+ cur += ((c ^ d) == ('\r' ^ '\n'));
+ }
+
+ // Calculate the offset to the start of this line
+ m_currentColumnIndex = 0;
+ start = cur;
}
}
- }
- // Fix up the current index.
- // TODO(JS):
- // NOTE! This isn't strictly correct because it assumes one byte is a *column* which isn't actually the case with utf8
- // encoding...
- m_currentColumnIndex += Index(cur - start);
+ // Set the current offset to the end
+ m_currentOutputOffset = m_builder.getLength();
- // Set the current offset is the end
- m_currentOutputOffset = m_builder.getLength();
+ // Get the bytes remaining on this line (which may not be complete)
+ const UnownedStringSlice lineRemaining(start, m_builder.end());
- // Output the values
- outLineIndex = m_currentLineIndex;
+ // Offset the column index in codepoints
+ m_currentColumnIndex += UTF8Util::calcCodePointCount(lineRemaining);
+ }
+
+ // Output the position
outColumnIndex = m_currentColumnIndex;
+ outLineIndex = m_currentLineIndex;
}
} // namespace Slang
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 335e95c9e..bc62e488f 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -1126,7 +1126,7 @@ SlangResult CodeGenContext::emitEntryPointsSourceFromIR(ComPtr<IArtifact>& outAr
auto sourceMapBlob = StringBlob::moveCreate(writer.getBuilder());
- auto sourceMapArtifact = ArtifactUtil::createArtifact(ArtifactDesc::make(ArtifactKind::Text, ArtifactPayload::SourceMap, ArtifactStyle::None));
+ auto sourceMapArtifact = ArtifactUtil::createArtifact(ArtifactDesc::make(ArtifactKind::Json, ArtifactPayload::SourceMap, ArtifactStyle::None));
sourceMapArtifact->addRepresentationUnknown(sourceMapBlob);
artifact->addAssociated(sourceMapArtifact);
diff --git a/source/slang/slang-ir-obfuscate-loc.cpp b/source/slang/slang-ir-obfuscate-loc.cpp
new file mode 100644
index 000000000..b3f5d5cd3
--- /dev/null
+++ b/source/slang/slang-ir-obfuscate-loc.cpp
@@ -0,0 +1,279 @@
+// slang-ir-obfuscate-loc.cpp
+#include "slang-ir-obfuscate-loc.h"
+
+#include "../../slang.h"
+
+#include "../core/slang-random-generator.h"
+#include "../core/slang-hash.h"
+#include "../core/slang-char-util.h"
+
+namespace Slang
+{
+
+namespace { // anonymous
+
+struct InstWithLoc
+{
+ typedef InstWithLoc ThisType;
+
+ SLANG_FORCE_INLINE bool operator<(const ThisType& rhs) const { return loc.getRaw() < rhs.loc.getRaw(); }
+
+ IRInst* inst;
+ SourceLoc loc;
+};
+
+struct LocPair
+{
+ SourceLoc originalLoc;
+ SourceLoc obfuscatedLoc;
+};
+
+} // anonymous
+
+static void _findInstsRec(IRInst* inst, List<InstWithLoc>& out)
+{
+ if (inst->sourceLoc.isValid())
+ {
+ InstWithLoc instWithLoc;
+ instWithLoc.inst = inst;
+ instWithLoc.loc = inst->sourceLoc;
+ out.add(instWithLoc);
+ }
+
+ for (IRInst* child : inst->getModifiableChildren())
+ {
+ _findInstsRec(child, out);
+ }
+}
+
+SlangResult obfuscateModuleLocs(IRModule* module, SourceManager* sourceManager)
+{
+ // There shouldn't be an obfuscated source map set
+ SLANG_ASSERT(module->getObfuscatedSourceMap() == nullptr);
+
+ List<InstWithLoc> instWithLocs;
+
+ // Find all of the instructions with source locs
+ _findInstsRec(module->getModuleInst(), instWithLocs);
+
+ // Sort them
+ instWithLocs.sort();
+
+ // Lets produce a hash, so we can use as a key for random number generation.
+ // We could base it on time, or some other thing as there is no requirement for
+ // stability or consistency.
+ // We use a hash because it avoids issues around clocks, and availability of a clock
+ // as a good source of entropy.
+ //
+ // An argument *could* be made to generate the name via some mechanism that uniquely identified the
+ // combination of flags, options, files, names that identified the compilation, but that is
+ // not easily achieved.
+ HashCode hash = 0;
+
+ List<LocPair> locPairs;
+
+ {
+ SourceLoc curLoc;
+ for (const auto& instWithLoc : instWithLocs)
+ {
+ hash = combineHash(hash, getHashCode(instWithLoc.inst));
+ hash = combineHash(hash, getHashCode(instWithLoc.loc.getRaw()));
+
+ if (instWithLoc.loc != curLoc)
+ {
+ LocPair locPair;
+ locPair.originalLoc = instWithLoc.loc;
+ locPairs.add(locPair);
+
+ // This is the current loc
+ curLoc = instWithLoc.loc;
+ }
+ }
+ }
+
+ const Count uniqueLocCount = locPairs.getCount();
+
+ // We need a seed to make this random on each run
+ const uint32_t randomSeed = uint32_t(hash);
+ RefPtr<RandomGenerator> rand = RandomGenerator::create(randomSeed);
+
+ // We want a random unique name because we could have multiple obfuscated modules
+ // and we need to identify each
+
+ PathInfo obfusctatedPathInfo;
+
+ {
+ // We need a pathInfo to *identify* this modules obfuscated locs.
+ // We are going to use a random number, seeded from the hash to do this.
+ // Turning the number as hex as the name.
+ {
+ StringBuilder buf;
+
+ uint8_t data[4];
+ rand->nextData(data, sizeof(data));
+
+ const Count charsCount = SLANG_COUNT_OF(data) * 2;
+
+ char* dst = buf.prepareForAppend(charsCount);
+
+ for (Index i = 0; i < SLANG_COUNT_OF(data); ++i)
+ {
+ dst[i * 2 + 0] = CharUtil::getHexChar(data[i] & 0xf);
+ dst[i * 2 + 1] = CharUtil::getHexChar(data[i] >> 4);
+ }
+ buf.appendInPlace(dst, charsCount);
+ obfusctatedPathInfo = PathInfo::makePath(buf);
+ }
+ }
+
+ SourceFile* obfuscatedFile = sourceManager->createSourceFileWithSize(obfusctatedPathInfo, uniqueLocCount);
+
+ // Create the view we are going to use from the obfusctated "file".
+ SourceView* obfuscatedView = sourceManager->createSourceView(obfuscatedFile, nullptr, SourceLoc());
+
+ // Okay now we want to produce a map from these locs to a new source location
+ {
+ // Create a "bag" and put all of the indices in it.
+ List<SourceLoc> bag;
+
+ bag.setCount(uniqueLocCount);
+
+ const SourceLoc baseLoc = obfuscatedView->getRange().begin;
+
+ {
+ SourceLoc* dst = bag.getBuffer();
+ for (Index i = 0; i < uniqueLocCount; ++i)
+ {
+ dst[i] = baseLoc + i;
+ }
+ }
+
+ // Pull the indices randomly out of the bag to create the map
+ for (auto& pair : locPairs)
+ {
+ // Find an index in the bag
+ const Index bagIndex = rand->nextInt32InRange(0, int32_t(bag.getCount()));
+ // Set in the map
+ pair.obfuscatedLoc = bag[bagIndex];
+ // Remove from the bag
+ bag.fastRemoveAt(bagIndex);
+ }
+ }
+
+ // We can now just set all the new locs in the instructions
+ {
+ const LocPair* curPair = locPairs.getBuffer();
+ LocPair pair = *curPair;
+
+ for (const auto& instWithLoc : instWithLocs)
+ {
+ auto inst = instWithLoc.inst;
+
+ if (instWithLoc.loc != pair.originalLoc)
+ {
+ SLANG_ASSERT(curPair < locPairs.end());
+ curPair++;
+ pair = *curPair;
+ }
+ SLANG_ASSERT(pair.originalLoc == instWithLoc.loc);
+
+ // Set the loc
+ inst->sourceLoc = pair.obfuscatedLoc;
+ }
+ }
+
+ // We can now create a map. The locs are in order in entries, so that should make lookup easier.
+ // This doesn't "leak" anything as the obfuscated loc map is not distributed.
+
+ RefPtr<SourceMap> sourceMap = new SourceMap;
+ sourceMap->m_file = obfusctatedPathInfo.getName();
+
+ // Make sure we have line 0.
+ // We only end up with one line in the obfuscated map.
+ sourceMap->advanceToLine(0);
+
+ {
+ // Current view, with cached "View" based sourceFileIndex
+ SourceView* curView = nullptr;
+ Index curViewSourceFileIndex = -1;
+
+ // Current handle, and store cached index in curPathSourceFileIndex
+ StringSlicePool::Handle curPathHandle = StringSlicePool::Handle(0);
+ Index curPathSourceFileIndex = -1;
+
+ for (Index i = 0; i < uniqueLocCount; ++i)
+ {
+ const auto& pair = locPairs[i];
+
+
+ // First find the view
+ if (curView == nullptr ||
+ !curView->getRange().contains(pair.originalLoc))
+ {
+ curView = sourceManager->findSourceViewRecursively(pair.originalLoc);
+ SLANG_ASSERT(curView);
+
+ // Reset the current view path index, to being unset
+ curViewSourceFileIndex = -1;
+
+ // We have to reset, because the path index is for the source manager
+ // that holds the view. If the view changes we need to re determine the
+ // path string, and index.
+ curPathSourceFileIndex = -1;
+ }
+
+ // Now get the location
+ const auto handleLoc = curView->getHandleLoc(pair.originalLoc);
+
+ Index sourceFileIndex = -1;
+
+ if (handleLoc.pathHandle == StringSlicePool::Handle(0))
+ {
+ if (curViewSourceFileIndex < 0)
+ {
+ const auto pathInfo = curView->getViewPathInfo();
+ curViewSourceFileIndex = sourceMap->getSourceFileIndex(pathInfo.getName().getUnownedSlice());
+ }
+ sourceFileIndex = curViewSourceFileIndex;
+ }
+ else
+ {
+ if (curPathSourceFileIndex < 0 ||
+ handleLoc.pathHandle != curPathHandle)
+ {
+ auto viewSourceManager = curView->getSourceManager();
+ const auto filePathSlice = viewSourceManager->getStringSlicePool().getSlice(curPathHandle);
+
+ // Set the handle
+ curPathHandle = handleLoc.pathHandle;
+
+ // Get the source file index.
+ curPathSourceFileIndex = sourceMap->getSourceFileIndex(filePathSlice);
+ }
+
+ sourceFileIndex = curPathSourceFileIndex;
+ }
+
+ // Create the entry
+ SourceMap::Entry entry;
+ entry.init();
+
+ entry.sourceFileIndex = sourceFileIndex;
+
+ // i is the generated column
+ entry.generatedColumn = i;
+
+ entry.sourceColumn = handleLoc.column - 1;
+ entry.sourceLine = handleLoc.line - 1;
+
+ // Add it to the source map
+ sourceMap->addEntry(entry);
+ }
+ }
+
+ module->setObfuscatedSourceMap(sourceMap);
+
+ return SLANG_OK;
+}
+
+} // namespace Slang
diff --git a/source/slang/slang-ir-obfuscate-loc.h b/source/slang/slang-ir-obfuscate-loc.h
new file mode 100644
index 000000000..03e32a143
--- /dev/null
+++ b/source/slang/slang-ir-obfuscate-loc.h
@@ -0,0 +1,20 @@
+// slang-ir-obfuscate-loc.h
+#ifndef SLANG_IR_OBFUSCATE_LOC_H_INCLUDED
+#define SLANG_IR_OBFUSCATE_LOC_H_INCLUDED
+
+#include "../core/slang-basic.h"
+
+#include "../compiler-core/slang-source-map.h"
+
+#include "slang-compiler.h"
+#include "slang-ir.h"
+
+namespace Slang
+{
+
+/*** Obfuscate locs in module. Store the mapping from obfuscated locs to actual locs in the form of a source map */
+SlangResult obfuscateModuleLocs(IRModule* module, SourceManager* sourceManager);
+
+}
+
+#endif
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 14a216fd2..025812f83 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -13,6 +13,7 @@
#include "../core/slang-memory-arena.h"
#include "../compiler-core/slang-source-loc.h"
+#include "../compiler-core/slang-source-map.h"
#include "slang-type-system-shared.h"
@@ -2008,6 +2009,9 @@ public:
SLANG_FORCE_INLINE IRModuleInst* getModuleInst() const { return m_moduleInst; }
SLANG_FORCE_INLINE MemoryArena& getMemoryArena() { return m_memoryArena; }
+ SLANG_FORCE_INLINE SourceMap* getObfuscatedSourceMap() const { return m_obfuscatedSourceMap; }
+ SLANG_FORCE_INLINE void setObfuscatedSourceMap(SourceMap* sourceMap) { m_obfuscatedSourceMap = sourceMap; }
+
IRDeduplicationContext* getDeduplicationContext() const { return &m_deduplicationContext; }
IRInstListBase getGlobalInsts() const { return getModuleInst()->getChildren(); }
@@ -2074,6 +2078,9 @@ private:
/// Shared contexts for constructing and deduplicating the IR.
mutable IRDeduplicationContext m_deduplicationContext;
+
+ /// Holds the obfuscated source map for this module if applicable
+ RefPtr<SourceMap> m_obfuscatedSourceMap;
};
struct IRSpecializationDictionaryItem : public IRInst
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 86df89702..f84f17886 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -3,6 +3,10 @@
#include "../../slang.h"
+#include "../core/slang-random-generator.h"
+#include "../core/slang-hash.h"
+#include "../core/slang-char-util.h"
+
#include "slang-check.h"
#include "slang-ir.h"
#include "slang-ir-constexpr.h"
@@ -21,6 +25,8 @@
#include "slang-ir-string-hash.h"
#include "slang-ir-clone.h"
#include "slang-ir-lower-error-handling.h"
+#include "slang-ir-obfuscate-loc.h"
+
#include "slang-mangle.h"
#include "slang-type-layout.h"
#include "slang-visitor.h"
@@ -9299,10 +9305,17 @@ RefPtr<IRModule> generateIRForTranslationUnit(
Linkage* linkage = compileRequest->getLinkage();
stripOptions.shouldStripNameHints = linkage->m_obfuscateCode;
- stripOptions.stripSourceLocs = linkage->m_obfuscateCode;
+ // If we are generating an obfuscated source map, we don't want to strip locs,
+ // we want to generate *new* locs that can be mapped (via source map)
+ // back to *actual* source.
+ //
+ // We don't do the obfuscation remapping here, because DCE and other passes may
+ // change what locs are actually needed, we need to be sure
+ // that if we have obfuscation enabled we don't forget to obfuscate.
+ stripOptions.stripSourceLocs = linkage->m_obfuscateCode && !linkage->m_generateSourceMap;
stripFrontEndOnlyInstructions(module, stripOptions);
-
+
// Stripping out decorations could leave some dead code behind
// in the module, and in some cases that extra code is also
// undesirable (e.g., the string literals referenced by name-hint
@@ -9314,6 +9327,12 @@ RefPtr<IRModule> generateIRForTranslationUnit(
IRDeadCodeEliminationOptions options;
options.keepExportsAlive = true;
eliminateDeadCode(module, options);
+
+ if (linkage->m_obfuscateCode && linkage->m_generateSourceMap)
+ {
+ // The obfuscated source map is stored on the module
+ obfuscateModuleLocs(module, compileRequest->getSourceManager());
+ }
}
// TODO: consider doing some more aggressive optimizations
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index 14b59d156..bd19670d9 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -2377,6 +2377,9 @@ void FrontEndCompileRequest::generateIR()
if (useSerialIRBottleneck)
{
+ // Keep the obfuscated source map (if there is one)
+ RefPtr<SourceMap> obfuscatedSourceMap = irModule->getObfuscatedSourceMap();
+
IRSerialData serialData;
{
// Write IR out to serialData - copying over SourceLoc information directly
@@ -2395,6 +2398,7 @@ void FrontEndCompileRequest::generateIR()
// Set irModule to the read module
irModule = irReadModule;
+ irModule->setObfuscatedSourceMap(obfuscatedSourceMap);
}
// Set the module on the translation unit