diff options
Diffstat (limited to 'examples/gpu-printing/printing.slang')
| -rw-r--r-- | examples/gpu-printing/printing.slang | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/examples/gpu-printing/printing.slang b/examples/gpu-printing/printing.slang new file mode 100644 index 000000000..47d102f97 --- /dev/null +++ b/examples/gpu-printing/printing.slang @@ -0,0 +1,392 @@ +// printing.slang + +// This file provides the GPU code for a simple library that +// allows GPU shaders to print values to `stdout`. +// +// The implementation relies on a single buffer that must +// be bound to any shader that uses GPU printing. +// +RWStructuredBuffer<uint> gPrintBuffer; +// +// Encoding +// ======== +// +// The print buffer is organized in terms of 32-bit (`uint`) *words*. +// +// The first word in the print buffer is used as an atomic +// counter, and must be initialized to zero before a shader starts. +// By atomically incrementing this counter, GPU threads can allocate +// space for printing commands in the buffer. All printing +// commands are stored after the first word (so, starting at +// an index of 1). +// +// A printing command starts with a single-word header, where +// the high 32 bits specify the *op* for the command, and the +// low 32 bits specify the number of *payload* words in in +// the command. The payload is the words that immediately +// follow the command header. +// +// Note that the header word for a command is *not* included +// in the count of words in the low 16 bits. +// +// The opcode values need to be shared between CPU and GPU +// code, so we use a bit of preprocessor trickery here to +// generate an `enum` type with all the opcodes. +// +enum PrintingOp +{ +#define GPU_PRINTING_OP(NAME) NAME , +#include "gpu-printing-ops.h" +}; + +// It is critical that when printing something, we allocate +// all the words it requires in the print buffer contiguously. +// For example, if the user writes: +// +// println("Thread number ", threadID, " has value ", someValue); +// +// It would be very bad if the output from different threads +// got interleaved, such that one cannot determine which value +// goes with which thread. +// +// Allocating individual print *commands* atomically is not necessarily +// enough: instead, we need to allocate the storage for all +// the commands that comprise a `print()` call at once. +// +// The core allocation operation here is `_allocatePrintWords()` + + // Allocate space for one or more print commands. +uint _allocatePrintWords(uint wordCount) +{ + // We allocate the required number of words with an atomic, and + // get back the old value of the counter, which tells us the + // offset at which the words for our printing operation should start. + // + uint wordOffset = 0; + InterlockedAdd(gPrintBuffer[0], wordCount, wordOffset); + + // Because the first word of the buffer is reserved for the counter, + // and the counter value starts at zero, we need to add one to + // get to the actual offset for the data to be written. + // + return wordOffset + 1; +} + +// Java-style `println` +// ==================== +// +// We will start by building up a Java-style `println()` function +// that accepts zero or more values to print, and prints them +// atomically (without any other thread being able to interleave +// in the printed output), followed by a newline. +// +// We will define a wrapper around `_allocatePrintWords()` +// that captures the main idiom for `println()`. +// +uint _beginPrintln(uint wordCount) +{ + // The `wordCount` passed in will represent the + // number of words required for the arguments + // to `println`, but won't include the terminating + // newline. + // + // Thus we will allocate one extra word to allow + // us to append a newline to the print command we + // generate. + // + uint wordOffset = _allocatePrintWords(wordCount + 1); + // + // We will then initialize the last word of the command + // that was allocated to a `NewLine` command. + // + gPrintBuffer[wordOffset + wordCount] = uint(PrintingOp.NewLine) << 16; + return wordOffset; +} +// +// With the `_beginPrintLn()` function handling all the heavy-lifting, +// we can define a zero-argument `println()` trivially. +// +void println() +{ + _beginPrintln(0); +} + +// We could continue to build a family of overloaded `println()` functions, like: +// +// void println(); +// void println(int value); +// void println(float value); +// void println(uint value); +// ... +// +// but it should be clear that this approach doesn't scale at all +// to functions with multiple argumenst: +// +// void println(int a, int b); +// void println(float a, int b); +// void println(int a, float b); +// ... +// +// Using the features of the Slang language, we can build a framework +// for a more scalable solution. +// +// We start by defining an `interface` that captures the essence +// of what a type of printable values needs to support. +// + +interface IPrintable +{ + // Every printable value needs to be able to compute the number + // of words required to write it into the print buffer. + // + uint getPrintWordCount(); + + // A printable value must also support writing those words into + // a buffer, once the appropriate offset to write to is known. + // + void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset); +}; + +// With the `IPrintable` interface in place, we can now write +// a generic one-argument `println()` that works with any +// printable value. + +void println<T : IPrintable>(T value) +{ + // In order to print a value we first compute the number of words + // it needs in the print buffer. + // + uint wordCount = value.getPrintWordCount(); + + // Then we can use `_beginPrint()` to allocate those words and + // find the starting offset to write to. + // + uint wordOffset = _beginPrintln(wordCount); + + // And finally we can ask the value to write itself into the + // buffer at the given offset. + // + value.writePrintWords(gPrintBuffer, wordOffset); +} + +// Of course, in order to be able to print things with this `println()` +// operation, we need to have some types that implement `IPrintable`. +// +// In particular, we'd like to be able to print built-in types like +// `uint`, but we don't have access to the declaration of `uint` +// to be able to change it! +// +// It just so happens that another Slang feature, `extension` +// declarations, lets us extend a type with new methods *and* +// allows us to add new interface implementations to it. +// +// We can therefore making the exisint Slang `uint` type be +// printable. + +extension uint : IPrintable // <-- Note: we are adding a conformance to `IPrintable here` +{ + // Printing a `uint` uses up two words in the buffer + // + uint getPrintWordCount() { return 2; } + + // Writing a command to print a `uint` is straightforward, + // given knowledge of our encoding. + // + void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset) + { + buffer[offset++] = (uint(PrintingOp.UInt32) << 16) | 1; + buffer[offset++] = this; + } +} + +// HACK: Because we currently don't have a `String` type that we +// can pass down into subroutines, we will be using the hash +// code of a string to represent the string itself. These hash +// codes currently have type `int`, so our printing library +// will *always* assume that an `int` represents a hashed +// string, and thus we can't print plain old `int`s right now. + +typedef int StringHash; + +extension StringHash : IPrintable +{ + uint getPrintWordCount() { return 2; } + + void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset) + { + buffer[offset++] = (uint(PrintingOp.String) << 16) | 1; + buffer[offset++] = this; + } +} + +// Where generics and interfaces start to pay off is when we want +// to scale up to a two-argument `println()` function that can +// work for any combination of printable types. + + // Print two values, `a` and `b`. + // + // This function ensures that the values of `a` and `b` + // are written out atomically, without values printed + // from other threads spliced in between. + // +void println<A : IPrintable, B : IPrintable>(A a, B b) +{ + // To print two values atomically, we must first + // allocate the total number of words that are + // required to print the values. + // + uint wordCount = 0; + uint aCount = a.getPrintWordCount(); wordCount += aCount; + uint bCount = b.getPrintWordCount(); wordCount += bCount; + + // Then we can allocate those words atomically + // with a single `_beginPrint()`. + // + uint wordOffset = _beginPrintln(wordCount); + + // Finally, we can write the words for each of `a` + // and `b` to an appropriate offset in the print buffer, + // without having to worry about other threads inserting + // print commands between them. + // + a.writePrintWords(gPrintBuffer, wordOffset); wordOffset += aCount; + b.writePrintWords(gPrintBuffer, wordOffset); wordOffset += bCount; +} + +// We can then continue to build up to `println()` functions with +// three or more arguments. + +void println<A : IPrintable, B : IPrintable, C : IPrintable>( + A a, B b, C c) +{ + uint wordCount = 0; + uint aCount = a.getPrintWordCount(); wordCount += aCount; + uint bCount = b.getPrintWordCount(); wordCount += bCount; + uint cCount = c.getPrintWordCount(); wordCount += cCount; + + uint wordOffset = _beginPrintln(wordCount); + + a.writePrintWords(gPrintBuffer, wordOffset); wordOffset += aCount; + b.writePrintWords(gPrintBuffer, wordOffset); wordOffset += bCount; + c.writePrintWords(gPrintBuffer, wordOffset); wordOffset += cCount; +} + +// Further generalizing to four or more arguments is straightforward but tedious. +// +// A future version of Slang may support variadic functions, variadic generics, +// or some other facilities to make writing code like this easier. + +// An important benefit of the approach we have taken here with an `IPrintable` +// interface is that arbitrary user-defined types can implement `IPrintable` +// and will work correctly with the existing `println()` definitions in +// this file. + +// C-style `printf()` +// ================== +// +// Many developers who use C/C++ would prefer to be able to use traditional +// `printf()` with format strings. `printf`-based printing tends to be +// more readable than `println`-style alternatives, but comes at the cost +// of only supported a more restricted set of types for printing. +// +// Similar to the `println()` case, our Slang implementation of `printf()` +// starts with an allocation function that does the behind-the-scenes +// work. +// + +uint _beginPrintf(int formatStrngHash, uint wordCount) +{ + // A printf command will start with the usual command header word, + // along with a word for the (hashed) format string. These + // two header words will be followed by the user-provided payload + // words for all the format arguments. + // + uint wordOffset = _allocatePrintWords(wordCount + 2); + gPrintBuffer[wordOffset++] = (uint(PrintingOp.PrintF) << 16) | (wordCount+1); + gPrintBuffer[wordOffset++] = formatStrngHash; + return wordOffset; +} + +// Now we will define an interface for types that are allowed to +// appear as format arguments to `printf()`. + +interface IPrintf +{ + // A `printf()` format argument must know how many words it encodes into + uint getPrintfWordCount(); + + // A `printf()` format argument must know how to encode itself + void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset); +}; + +// The extension to make `uint` compatible with `printf()` is straightforward. + +extension uint : IPrintf +{ + // A `uint` only consumes one word in the variadic payload. + // + // Note: unlike the case for `IPrintable` above, the encoding + // for format args for `printf()` doesn't include type information. + // + uint getPrintfWordCount() { return 1; } + + // Writing the required data to the payload for `printf()` is simple + void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset) + { + buffer[offset++] = this; + } +} + +extension StringHash : IPrintf +{ + uint getPrintfWordCount() { return 1; } + + void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset) + { + buffer[offset++] = this; + } +} + + +// A `printf()` with no format arguments can just call back to `_beginPrintf()` +void printf(StringHash format) +{ + _beginPrintf(format, 0); +} + +// The `printf()` cases with one or more format arguments are all quite similar. + +void printf<A : IPrintf>(StringHash format, A a) +{ + // We need to compute the words required by each format argument + // and sum them up. + // + uint wordCount = 0; + uint aCount = a.getPrintfWordCount(); wordCount += aCount; + + // We need to allocate a `printf()` command in the buffer with + // the required number of words for format argument payload. + // + uint wordOffset = _beginPrintf(format, wordCount); + + // We need to write each format argument to the appropriate offset + // in the payload part of the `printf()` command. + // + a.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += aCount; +} + +void printf<A : IPrintf, B : IPrintf>(StringHash format, A a, B b) +{ + uint wordCount = 0; + uint aCount = a.getPrintfWordCount(); wordCount += aCount; + uint bCount = b.getPrintfWordCount(); wordCount += bCount; + + uint wordOffset = _beginPrintf(format, wordCount); + + a.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += aCount; + b.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += bCount; +} + +// Extending this `printf()` implementation to handle more format arguments +// is straightforward, but tedious. Future versions of Slang might add +// support for variadic generics, which could make this code more compact. |
