Initial steps on GPU printing example (#1197)

* Initial steps on GPU printing example This change is checkpointing work on a new Slang example that shows how a "GPU `printf()`" can be implemented almost entirely in user space thanks to a combination of Slang language and API features. The example is not perfect as it stands today due to limitations in our current handling of hashed string literals: * At call sites where a string literal is passed, we currently have to explicitly invoke `getStringHash()` to get the hash code, because we don't currently support `String` as a function argument/parameter type. * On the implementation side, because strings are passed as their `int` hash codes, we can't tell them apart from ordinary `int` arguments. The current code handles this by assuming an `int` is *always* a hashed string, which obviously isn't appropriate. There are plenty of other limitations in the implementation presented, but the above are the two main things I'd like to address in follow-up work. I would like to checkpoint this work on the application first, in order to keep work on the Slang implementation and the example as separate as possible. * typo
author: Tim Foley <tfoleyNV@users.noreply.github.com> 2020-02-03 17:36:32 -0800
committer: GitHub <noreply@github.com> 2020-02-03 17:36:32 -0800
commit: b415760d7f166eaad7fa27daa09edc9a8964c37e (patch)
tree: 1d9035d18dc6767ed4c4100a289365bd08a02514 /examples/gpu-printing/gpu-printing.cpp
parent: 2c1fbf8330efc34b85e09ee9b101c6a55327778a (diff)
1 files changed, 390 insertions, 0 deletions
diff --git a/examples/gpu-printing/gpu-printing.cpp b/examples/gpu-printing/gpu-printing.cpp
new file mode 100644
index 000000000..ff35fd0b3
--- /dev/null
+++ b/examples/gpu-printing/gpu-printing.cpp
@@ -0,0 +1,390 @@
+// gpu-printing.cpp
+#include "gpu-printing.h"
+
+#include <assert.h>
+
+// This file implements the CPU side of a simple GPU printing
+// library. The CPU code is responsible for scanning through
+// buffers of "print commands" produced by GPU shaders, and
+// executing those commands to print output.
+//
+// The opcodes for the printing commands are shared between
+// CPU and GPU, and also between C++ and Slang, by putting
+// their declarations in the `gpu-printing-ops.h` file
+// and including them into both the host and device code
+// to generate `enum` types.
+//
+enum class GPUPrintingOp : uint32_t
+{
+#define GPU_PRINTING_OP(NAME) NAME,
+#include "gpu-printing-ops.h"
+};
+
+// One of the key ideas in this printing system is that strings
+// are not encoded into the buffer of print commands directly,
+// but are instead encoded using a hash of the string data.
+//
+// In order to map from a hash code back to the original string,
+// the host side code for the printing system needs a way to
+// pre-populate a lookup table with the strings that appear
+// in a shader. The Slang reflection API provides a service to
+// do exactly that.
+//
+void GPUPrinting::loadStrings(slang::ProgramLayout* slangReflection)
+{
+    // Given the Slang-generated reflection and layout information
+    // for a program, we can query the number of string literals
+    // that appear in the linked program.
+    //
+    SlangUInt hashedStringCount = slangReflection->getHashedStringCount();
+    for( SlangUInt ii = 0; ii < hashedStringCount; ++ii )
+    {
+        // For each string we can fetch its bytes from the Slang
+        // reflection data.
+        //
+        size_t stringSize = 0;
+        char const* stringData = slangReflection->getHashedString(ii, &stringSize);
+
+        // Then we can compute the hash code for that string using
+        // another Slang API function.
+        //
+        // Note: the exact hashing algorithm that Slang uses for
+        // string literals is not currently documented, and may
+        // change in future releases of the compiler.
+        //
+        StringHash hash = spComputeStringHash(stringData, stringSize);
+
+        // The `GPUPrinting` implementation will store the mapping
+        // from hash codes back to strings in a simple STL `map`.
+        //
+        m_hashedStrings.insert(std::make_pair(hash, std::string(stringData, stringData + stringSize)));
+    }
+}
+
+// The main service that the host code for the GPU printing library
+// provides is a way to execute the printing commands that have been
+// encoded to a buffer by shader code.
+//
+void GPUPrinting::processGPUPrintCommands(const void* data, size_t dataSize)
+{
+    // Everything that the GPU code writes to the buffer will be in
+    // a granularity of 32-bits words, so we start by computing
+    // how many words, total, will fit in the buffer.
+    //
+    uint32_t dataWordCount = uint32_t(dataSize/ sizeof(uint32_t));
+    //
+    // If the buffer doesn't even have enough space for the leading counter,
+    // then there is nothing to print.
+    //
+    if( dataWordCount < 1 )
+    {
+        fprintf(stderr, "error: expected at least 4 bytes in GPU printing buffer\n");
+        return;
+    }
+    //
+    // Otherwise, we set ourselves up to start reading data from the buffer
+    // at a granularity of 32-bit words.
+    //
+    const uint32_t* dataCursor = (const uint32_t*) data;
+
+    // The first word of a printing buffer gives us the total number of
+    // words that were appended by GPU printing operations.
+    //
+    uint32_t wordsAppended = *dataCursor++;
+    //
+    // Under normal operation, we will stop processing data from
+    // the buffer after we have read everything the GPU wrote.
+    //
+    const uint32_t* dataEnd = dataCursor + wordsAppended;
+
+    // If the number of bytes the GPU code tried to write (including
+    // the counter stored in the first word of the buffer) exceeds what
+    // the buffer could hold, then we will print a warning message,
+    // indicating that the application might want to allocate a
+    // larger buffer.
+    //
+    size_t totalBytesWritten = sizeof(uint32_t) * (wordsAppended + 1);
+    if( totalBytesWritten > dataSize )
+    {
+        fprintf(stderr, "warning: GPU code attempted to write %llu bytes to the printing buffer, but only %llu bytes were available\n", (unsigned long long)totalBytesWritten, (unsigned long long)dataSize);
+
+        // If the buffer is full, then we only want to read through
+        // to the end of what is available.
+        //
+        dataEnd = ((const uint32_t*) data) + dataWordCount;
+    }
+
+    // We will now proceed to read off "commands" from the buffer,
+    // and execute those commands to print things to `stdout`.
+    //
+    while( dataCursor < dataEnd )
+    {
+        // The first word of each command is encoded to hold both
+        // an "opcode" for the command, and the number of "payload"
+        // words that follow the header.
+        //
+        uint32_t cmdHeader = *dataCursor++;
+        GPUPrintingOp op = GPUPrintingOp((cmdHeader >> 16) & 0xFFFF);
+        uint32_t payloadWordCount = cmdHeader & 0xFFFF;
+
+        // It is possible that we are at the end of the buffer,
+        // and not all of the payload words could be written.
+        // In such a case we will bail out of the printing loop to
+        // avoid crashes from a command trying to fetch data past
+        // the end of the buffer.
+        //
+        if( payloadWordCount > size_t(dataCursor - dataEnd) )
+        {
+            break;
+        }
+        //
+        // Otherwise, we can form a pointer to the payload words
+        // for this command, and advance our cursor past the payload
+        // to set up for reading the next command.
+        //
+        const uint32_t* payloadWords = dataCursor;
+        const uint32_t* payloadWordsEnd = payloadWords + payloadWordCount;
+        dataCursor += payloadWordCount;
+
+        // What to do with a command depends a lot on which "op" was selected.
+        switch( op )
+        {
+        default:
+            // If we encounter an op that we don't understand, there is a change
+            // that the buffer is corrupted or invalid, but we will try to
+            // soldier on and process further commands.
+            //
+            fprintf(stderr, "error: unexpected GPU printing op %d\n", op);
+            break;
+
+        case GPUPrintingOp::Nop:
+            // The `Nop` case is a no-op, and allows GPU code to conservatively
+            // allocate bytes in the printing buffer and then overwrite any
+            // excess with zeros to trim their allocation.
+            break;
+
+        case GPUPrintingOp::NewLine:
+            // The `NewLine` case prints a single '\n' and doesn't need any payload.
+            putchar('\n');
+            break;
+
+            // Simple value printing cases can just load the bytes of
+            // a value directly from the payload, and then print it.
+            //
+            // We will use a macro to avoid duplication the code shared
+            // between these cases.
+            //
+    #define CASE(OP, FORMAT, TYPE)                                              \
+        case GPUPrintingOp::OP:                                                 \
+            {                                                                   \
+                TYPE value;                                                     \
+                assert(payloadWordCount >= (sizeof(value) / sizeof(uint32_t))); \
+                memcpy(&value, payloadWords, sizeof(value));                    \
+                printf(FORMAT, value);                                          \
+            }                                                                   \
+            break
+
+        CASE(Int32,     "%d", int);
+        CASE(UInt32,    "%u", unsigned int);
+        CASE(Float32,   "%f", float);
+
+    #undef CASE
+
+        case GPUPrintingOp::String:
+            {
+                // Strings are handled differently than other values because
+                // most GPU graphics APIs do not natively support strings
+                // in shader code.
+                //
+                // Instead, strings are handled by the printing logic in
+                // terms of 32-bit hash codes. When printing a string,
+                // the generated GPU code will write the hash value for
+                // the string to the print buffer.
+                //
+                // On the CPU, we then read the hash code from the payload
+                // for this command:
+                //
+                assert(payloadWordCount >= 1);
+                StringHash hash = *payloadWords++;
+                //
+                // Next, we look up the hash value in a map from hash
+                // codes to strings, that was seeded with strings known
+                // to appear in the GPU code.
+                //
+                auto iter = m_hashedStrings.find(hash);
+                if(iter == m_hashedStrings.end())
+                {
+                    // If we didn't have a string to match that hash code in
+                    // our map, we can continue trying to print, but it is
+                    // likely that the application code needs to be configured
+                    // to pass in the right strings.
+                    //
+                    fprintf(stderr, "error: string with unknown hash %d\n", hash);
+                    continue;
+                }
+
+                // Once we've found a string that matches our hash
+                // code, we can print it.
+                //
+                // TODO: This code isn't robust against strings with
+                // embeded null bytes.
+                //s
+                printf("%s", iter->second.c_str());
+            }
+            break;
+
+        case GPUPrintingOp::PrintF:
+            {
+                // Handling a general-purpose `printf` call requires looking
+                // up the format string, and then processing further payload
+                // words based on the format.
+                //
+                // Finding the format string follows logic similar to the
+                // `GPUPrintingOp::String` case.
+                //
+                assert(payloadWords != payloadWordsEnd);
+                StringHash formatHash = *payloadWords++;
+
+                auto iter = m_hashedStrings.find(formatHash);
+                if(iter == m_hashedStrings.end())
+                {
+                    // If we didn't have a string to match that hash code in
+                    // our map, we can continue trying to print, but it is
+                    // likely that the application code needs to be configured
+                    // to pass in the right strings.
+                    //
+                    fprintf(stderr, "error: string with unknown hash %d\n", formatHash);
+                    continue;
+                }
+                std::string format = iter->second;
+
+                // We can't just route things through to the `printf()` function
+                // provided by standard library on the host CPU, because we don't
+                // have a portable way to translate the payload data into
+                // varargs that match the platform ABI.
+                //
+                // Instead, we have to scan through the string ourselves, and
+                // implement a subset of the full `printf()`.
+                //
+                const char* cursor = format.c_str();
+                const char* end = cursor + format.length();
+                while( cursor != end )
+                {
+                    int c = *cursor++;
+
+                    // If we see a byte other than `%`, then we can just
+                    // output it directly and keep scanning the format string.
+                    //
+                    if( c != '%' )
+                    {
+                        putchar(c);
+                        continue;
+                    }
+
+                    // Otherwise, we have a `%` which is supposed to
+                    // introduce a format specifier.
+                    //
+                    // If we are somehow at the end of the format
+                    // string, then the format was bad.
+                    //
+                    if( cursor == end )
+                    {
+                        fprintf(stderr, "error: unexpected '%%' at and of format string\n");
+                        break;
+                    }
+
+                    // If the next byte in the format string is
+                    // the `%` character, then it is an escaped
+                    // `%` so we should just emit it as-is and move along.
+                    //
+                    if( *cursor == '%' )
+                    {
+                        putchar(*cursor++);
+                        continue;
+                    }
+
+                    // TODO: For proper `printf()` support, we would need
+                    // to read:
+                    //
+                    // * optional flags: `-+#0`
+                    // * optional width specifier: a number or `*`
+                    // * optional precision specifier: `.` and a number or `*`
+                    // * optional length sub-specifiers: `h`, `l`, `ll`, etc.
+                    //
+                    // For now we ignore all those details and just
+                    // read a single-byte specifier.
+                    //
+                    int specifier = *cursor++;
+                    switch( specifier )
+                    {
+                    default:
+                        fprintf(stderr, "error: unexpected format specifier '%c' (0x%X)\n", specifier, specifier);
+                        break;
+
+
+                    // When processing each format speecifier, we will
+                    // read words from the payload, as necessary
+                    // to yield a value of the expected type.
+                    //
+                    // To reduce the amount of boilerplate, we will
+                    // use a macro to capture the shared code for
+                    // common cases.
+                    //
+                #define CASE(CHAR, FORMAT, TYPE)                                \
+                    case CHAR:                                                  \
+                        {                                                       \
+                            assert(payloadWords != payloadWordsEnd);            \
+                            TYPE value;                                         \
+                            memcpy(&value, payloadWords, sizeof(value));        \
+                            payloadWords += sizeof(value) / sizeof(uint32_t);   \
+                            printf(FORMAT, value);                              \
+                        }                                                       \
+                        break
+
+                    case 'i': // `%i` is just an alias for `%d`
+                    CASE('d', "%d", int);
+                    CASE('u', "%u", unsigned int);
+                    CASE('x', "%x", unsigned int);
+                    CASE('X', "%X", unsigned int);
+
+                    // Note: all of our printing support for floating-point
+                    // values will use the `float` type instead of `double`.
+                    // This isn't compatible with C rules, but makes more sense
+                    // for GPU code.
+                    //
+                    CASE('f', "%f", float);
+                    CASE('F', "%F", float);
+                    CASE('e', "%e", float);
+                    CASE('E', "%E", float);
+                    CASE('g', "%g", float);
+                    CASE('G', "%G", float);
+                    CASE('c', "%c", int);
+
+                #undef CASE
+
+                    case 's':
+                        {
+                            // The case for strings is more complicated
+                            // just because it has to deal with our hashing
+                            // scheme.
+                            //
+                            assert(payloadWords != payloadWordsEnd);
+                            StringHash hash = *payloadWords++;
+                            auto iter = m_hashedStrings.find(hash);
+                            if(iter == m_hashedStrings.end())
+                            {
+                                fprintf(stderr, "error: string with unknown hash %d\n", hash);
+                                continue;
+                            }
+                            printf("%s", iter->second.c_str());
+                        }
+                        break;
+                    }
+                }
+            }
+            break;
+        }
+    }
+
+
+}
author	Tim Foley <tfoleyNV@users.noreply.github.com>	2020-02-03 17:36:32 -0800
committer	GitHub <noreply@github.com>	2020-02-03 17:36:32 -0800
commit	b415760d7f166eaad7fa27daa09edc9a8964c37e (patch)
tree	1d9035d18dc6767ed4c4100a289365bd08a02514 /examples/gpu-printing/gpu-printing.cpp
parent	2c1fbf8330efc34b85e09ee9b101c6a55327778a (diff)