Use SPIR-V opcode names rather than numbers (#2571)

* s/emititng blobal/emitting global * Use SPIR-V opcode names rather than numbers * regenerate Visual Studio project files * Use names for extended SPIR-V GLSL instructions * Add missing operand for SPIR-V extended instruction * Add warning aginst modifying generated hashing files * Squash warnings on MSVC
author: Ellie Hermaszewska <ellieh@nvidia.com> 2023-02-03 12:18:49 +0800
committer: GitHub <noreply@github.com> 2023-02-03 12:18:49 +0800
commit: a00dc69ca8dfbc346940c7d03d67c1cb229577fb (patch)
tree: ce102791bfe2b052edb4e4fc3453a92fc53fb3a8 /tools
parent: bbd1e1786401bb88c34802b987d4da72e2364503 (diff)
1 files changed, 374 insertions, 0 deletions
diff --git a/tools/slang-lookup-generator/lookup-generator-main.cpp b/tools/slang-lookup-generator/lookup-generator-main.cpp
new file mode 100644
index 000000000..27dae8e54
--- /dev/null
+++ b/tools/slang-lookup-generator/lookup-generator-main.cpp
@@ -0,0 +1,374 @@
+// perfect-hash-main.cpp
+
+#include <stdio.h>
+#include "../../source/compiler-core/slang-json-parser.h"
+#include "../../source/compiler-core/slang-json-value.h"
+#include "../../source/compiler-core/slang-lexer.h"
+#include "../../source/core/slang-io.h"
+#include "../../source/core/slang-secure-crt.h"
+
+using namespace Slang;
+
+static SlangResult parseJson(const char* inputPath, DiagnosticSink* sink, JSONListener& listener)
+{
+    auto sourceManager = sink->getSourceManager();
+
+    String contents;
+    SLANG_RETURN_ON_FAIL(File::readAllText(inputPath, contents));
+    PathInfo    pathInfo = PathInfo::makeFromString(inputPath);
+    SourceFile* sourceFile = sourceManager->createSourceFileWithString(pathInfo, contents);
+    SourceView* sourceView = sourceManager->createSourceView(sourceFile, nullptr, SourceLoc());
+    JSONLexer   lexer;
+    lexer.init(sourceView, sink);
+    JSONParser parser;
+    SLANG_RETURN_ON_FAIL(parser.parse(&lexer, sourceView, &listener, sink));
+    return SLANG_OK;
+}
+
+// Extract from a json value, the "opname" member from all the objects in the
+// "instructions" array.
+// Returns the empty list on failure
+static List<String> extractOpNames(UnownedStringSlice& error, const JSONValue& v, JSONContainer& container)
+{
+    List<String> opnames;
+
+    // Wish we could just write à la jq
+    // List<String> result = match(myJSONValue, "instructions", AsArray, "opname", AsString);
+    const auto instKey = container.findKey(UnownedStringSlice("instructions"));
+    const auto opnameKey = container.findKey(UnownedStringSlice("opname"));
+    if (!instKey)
+    {
+        error = UnownedStringSlice("JSON parsing failed, no \"instructions\" key\n");
+        return {};
+    }
+    if (!opnameKey)
+    {
+        error = UnownedStringSlice("JSON parsing failed, no \"opname\" key\n");
+        return {};
+    }
+
+    const auto instructions = container.findObjectValue(v, instKey);
+    if (!instructions.isValid() || instructions.type != JSONValue::Type::Array)
+    {
+        error = UnownedStringSlice("JSON parsing failed, no \"instructions\" member of array type\n");
+        return {};
+    }
+    for (const auto& inst : container.getArray(instructions))
+    {
+        const auto opname = container.findObjectValue(inst, opnameKey);
+        if (!opname.isValid() || opname.getKind() != JSONValue::Kind::String)
+        {
+            error = UnownedStringSlice("JSON parsing failed, no \"opname\" member of string type for instruction\n");
+            return {};
+        }
+        opnames.add(container.getString(opname));
+    }
+
+    return opnames;
+}
+
+struct HashParams
+{
+    List<UInt32> saltTable;
+    List<String> destTable;
+};
+
+enum HashFindResult {
+    Success,
+    NonUniqueKeys,
+    UnavoidableHashCollision,
+};
+
+// Implemented according to "Hash, displace, and compress"
+// https://cmph.sourceforge.net/papers/esa09.pdf
+static HashFindResult minimalPerfectHash(const List<String>& ss, HashParams& hashParams)
+{
+    // Check for uniqueness
+    for (Index i = 0; i < ss.getCount(); ++i)
+    {
+        for (Index j = i + 1; j < ss.getCount(); ++j)
+        {
+            if (ss[i] == ss[j])
+            {
+                return NonUniqueKeys;
+            }
+        }
+    }
+
+    SLANG_ASSERT(UIndex(ss.getCount()) < std::numeric_limits<UInt32>::max());
+    const UInt32       nBuckets = UInt32(ss.getCount());
+    List<List<String>> initialBuckets;
+    initialBuckets.setCount(nBuckets);
+
+    const auto hash = [&](const String& s, const HashCode64 salt = 0) -> UInt32
+    {
+        //
+        // The current getStableHashCode32 is susceptible to patterns of
+        // collisions causing the search to fail for the SPIR-V opnames
+        //
+        // getStableHashCode64 is better, although it still performs poorly on
+        // short strings, taking over 300000 iterations to diverge on "Ceil"
+        // and "FMix" (and place them in already unoccupied slots)!
+        //
+        // Use FNV Hash here which seem perform much better on these short inputs
+        // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+        //
+        // If you change this, don't forget to also sync the version below in
+        // the printing code.
+        UInt64 h = salt;
+        for (const char c : s) h = ((h * 0x00000100000001B3) ^ c);
+        return h % nBuckets;
+    };
+
+    // Assign the inputs into their buckets according to the hash without salt.
+    // Sort the buckets according to size, so that later we can make these have
+    // unique destinations starting with the largest ones first as they are at
+    // most risk of collision.
+    for (const auto& s : ss)
+    {
+        initialBuckets[hash(s)].add(s);
+    }
+    initialBuckets.sort([](const List<String>& a, const List<String>& b) { return a.getCount() > b.getCount(); });
+
+    // These are our outputs, the salts are calculated such that for all input
+    // word, x, hash(x, salt[hash(x, 0)]) is unique
+    //
+    // We keep the final table as we need to detect when we've been given a
+    // word not in our language.
+    hashParams.saltTable.setCount(nBuckets);
+    for (auto& s : hashParams.saltTable)
+    {
+        s = 0;
+    }
+    hashParams.destTable.setCount(nBuckets);
+    for (auto& s : hashParams.destTable)
+    {
+        s.reduceLength(0);
+    }
+
+    // This mask will, in each salt tryout, be used to prevent collisions
+    // within a single bucket.
+    List<bool> bucketDestinations = List<bool>::makeRepeated(false, nBuckets);
+
+    for (const auto& b : initialBuckets)
+    {
+        // Break if we've reached the empty buckets
+        if (!b.getCount())
+        {
+            break;
+        }
+
+        // Try out all the salts until we get one which has no internal
+        // collisions for this bucket and also no collisions with the buckets
+        // we've processed so far.
+        UInt32 salt = 1;
+        while (true)
+        {
+            bool collision = false;
+            for (auto& d : bucketDestinations)
+            {
+                d = false;
+            }
+
+            for (const auto& s : b)
+            {
+                const auto i = hash(s, salt);
+                if (hashParams.destTable[i].getLength() || bucketDestinations[i])
+                {
+                    collision = true;
+                    break;
+                }
+                bucketDestinations[i] = true;
+            }
+            if (!collision)
+            {
+                break;
+            }
+            salt++;
+
+            // If we fail to find a solution after some massive amount of tries
+            // it's almost certainly because of some property of the hash
+            // function and language causing an irresolvable collision.
+            if (salt > 10000 * nBuckets)
+            {
+                return UnavoidableHashCollision;
+            }
+        }
+        for (const auto& s : b)
+        {
+            hashParams.saltTable[hash(s)] = salt;
+            hashParams.destTable[hash(s, salt)] = s;
+        }
+    }
+    return Success;
+}
+
+void writeHashFile(
+    const char* const  outCppPath,
+    const char*        valueType,
+    const char*        valuePrefix,
+    const List<String> includes,
+    const HashParams&  hashParams)
+{
+    FILE* f = nullptr;
+    fopen_s(&f, outCppPath, "w");
+    if (!f)
+    {
+        return;
+    }
+    FileWriter   writer(f, WriterFlags(0));
+    WriterHelper w(&writer);
+
+    w.print("// Hash function for %s\n", valueType);
+    w.print("//\n");
+    w.print("// This file was thoughtfully generated by a machine,\n");
+    w.print("// don't even think about modifying it yourself!\n");
+    w.print("//\n");
+    w.print("\n");
+    for (const auto& i : includes)
+    {
+        w.print("#include \"%s\"\n", i.getBuffer());
+    }
+    w.print("\n");
+    w.print("\n");
+    w.print("namespace Slang\n");
+    w.print("{\n");
+    w.print("\n");
+
+    w.print("static const unsigned tableSalt[%ld] =", hashParams.saltTable.getCount());
+    w.print("{\n   ");
+    for (Index i = 0; i < hashParams.saltTable.getCount(); ++i)
+    {
+        const auto salt = hashParams.saltTable[i];
+        if (i != hashParams.saltTable.getCount() - 1)
+        {
+            w.print(" %d,", salt);
+            if (i % 16 == 15)
+            {
+                w.print("\n   ");
+            }
+        }
+        else
+        {
+            w.print(" %d", salt);
+        }
+    }
+    w.print("\n};\n");
+    w.print("\n");
+
+    w.print("struct KV\n");
+    w.print("{\n");
+    w.print("    const char* name;\n");
+    w.print("    %s value;\n", valueType);
+    w.print("};\n");
+    w.print("\n");
+
+    w.print("static const KV words[%ld] =\n", hashParams.destTable.getCount());
+    w.print("{\n");
+    for (const auto& s : hashParams.destTable)
+    {
+        w.print("    {\"%s\", %s%s},\n", s.getBuffer(), valuePrefix, s.getBuffer());
+    }
+    w.print("};\n");
+    w.print("\n");
+
+    // Make sure to update the hash function in the search function above if
+    // you change this.
+    w.print("static UInt32 hash(const UnownedStringSlice& str, UInt32 salt)\n");
+    w.print("{\n");
+    w.print("    UInt64 h = salt;\n");
+    w.print("    for(const char c : str)\n");
+    w.print("        h = ((h * 0x00000100000001B3) ^ c);\n");
+    w.print("    return h %% (sizeof(tableSalt)/sizeof(tableSalt[0]));\n");
+    w.print("}\n");
+    w.print("\n");
+
+    w.print("bool lookup%s(const UnownedStringSlice& str, %s& value)\n", valueType, valueType);
+    w.print("{\n");
+    w.print("    const auto i = hash(str, tableSalt[hash(str, 0)]);\n");
+    w.print("    if(str == words[i].name)\n");
+    w.print("    {\n");
+    w.print("        value = words[i].value;\n");
+    w.print("        return true;\n");
+    w.print("    }\n");
+    w.print("    else\n");
+    w.print("    {\n");
+    w.print("        return false;\n");
+    w.print("    }\n");
+    w.print("}\n");
+    w.print("\n");
+
+    w.print("}\n");
+}
+
+int main(int argc, const char* const* argv)
+{
+    using namespace Slang;
+
+    if (argc != 6)
+    {
+        fprintf(
+            stderr,
+            "Usage: %s input.grammar.json output.cpp enum-name enumerant-prefix enum-header-file\n",
+            argc >= 1 ? argv[0] : "slang-lookup-generator");
+        return 1;
+    }
+
+    const char* const inJsonPath = argv[1];
+    const char* const outCppPath = argv[2];
+    const char* const enumName = argv[3];
+    const char* const enumerantPrefix = argv[4];
+    const char* const enumHeader = argv[5];
+
+    RefPtr<FileWriter> writer(new FileWriter(stderr, WriterFlag::AutoFlush));
+    SourceManager      sourceManager;
+    sourceManager.initialize(nullptr, nullptr);
+    DiagnosticSink sink(&sourceManager, Lexer::sourceLocationLexer);
+    sink.writer = writer;
+
+    JSONContainer container(sink.getSourceManager());
+    JSONBuilder   builder(&container);
+    if (SLANG_FAILED(parseJson(inJsonPath, &sink, builder)))
+    {
+        sink.diagnoseRaw(Severity::Error, "Json parsing failed\n");
+        return 1;
+    }
+
+    UnownedStringSlice error;
+    const List<String> opnames = extractOpNames(error, builder.getRootValue(), container);
+    if (error.getLength())
+    {
+        sink.diagnoseRaw(Severity::Error, error);
+        return 1;
+    }
+
+    HashParams hashParams;
+    auto       r = minimalPerfectHash(opnames, hashParams);
+    switch (r)
+    {
+    case UnavoidableHashCollision:
+        {
+            sink.diagnoseRaw(
+                Severity::Error,
+                "Unable to find a non-overlapping hash function.\n"
+                "The hash function probably has a unavoidable "
+                "collision for some input words\n");
+            return 1;
+        }
+    case NonUniqueKeys:
+        {
+            sink.diagnoseRaw(Severity::Error, "Input word list has duplicates\n");
+            return 1;
+        }
+    case Success:;
+    }
+
+    writeHashFile(
+        outCppPath,
+        enumName,
+        enumerantPrefix,
+        { "../core/slang-common.h", "../core/slang-string.h", enumHeader },
+        hashParams);
+
+    return 0;
+}
author	Ellie Hermaszewska <ellieh@nvidia.com>	2023-02-03 12:18:49 +0800
committer	GitHub <noreply@github.com>	2023-02-03 12:18:49 +0800
commit	a00dc69ca8dfbc346940c7d03d67c1cb229577fb (patch)
tree	ce102791bfe2b052edb4e4fc3453a92fc53fb3a8 /tools
parent	bbd1e1786401bb88c34802b987d4da72e2364503 (diff)