summaryrefslogtreecommitdiffstats
path: root/examples/gpu-printing
diff options
context:
space:
mode:
Diffstat (limited to 'examples/gpu-printing')
-rw-r--r--examples/gpu-printing/README.md47
-rw-r--r--examples/gpu-printing/gpu-printing-ops.h55
-rw-r--r--examples/gpu-printing/gpu-printing.cpp390
-rw-r--r--examples/gpu-printing/gpu-printing.h57
-rw-r--r--examples/gpu-printing/gpu-printing.vcxproj190
-rw-r--r--examples/gpu-printing/gpu-printing.vcxproj.filters32
-rw-r--r--examples/gpu-printing/kernels.slang38
-rw-r--r--examples/gpu-printing/main.cpp225
-rw-r--r--examples/gpu-printing/printing.slang392
9 files changed, 1426 insertions, 0 deletions
diff --git a/examples/gpu-printing/README.md b/examples/gpu-printing/README.md
new file mode 100644
index 000000000..826f4e316
--- /dev/null
+++ b/examples/gpu-printing/README.md
@@ -0,0 +1,47 @@
+GPU Printing
+============
+
+This example demonstrates how supporting for printing formatted messages from GPU shader code can be implemented in application code, using language and API features provided by Slang.
+
+Overview
+--------
+
+If you want to read the code here, start with `kernels.slang`, which contains a compute shader entry point showing how simple printing operations in shader code can be made.
+Once you see the client code, you will probably want to understand the implementation, so that you can add these features to your own codebase.
+
+The GPU/shader part of the implementation resides in `printing.slang`, which provides a stand-alone Slang module intended to be brought into your code with `import`.
+The comments in that file explain how the low-level implementaiton encoding of print data into buffers is performed, and then also shows how Slang language mechanisms can be used to wrap that low-level implementation in usable and extensible syntax.
+
+The CPU part of the implementation resides in `gpu-printing.{h,cpp}`, which are responsible for taking GPU-generated buffers encoded by the code above, and translating it into host-side calls to C `printf()` and other console printing operations.
+The CPU code also shows how to use the Slang reflection API to extract information from a compiled program to enable printing of strings by their hash codes.
+
+The `main.cpp` file implements a small host application that loads the compute shader and executes it using the D3D11 API.
+The code in this file is not especially relevant to the printing system.
+
+Adding printing support to your own codebase
+--------------------------------------------
+
+The code in this example is meant to provide a starting point for applications/frameworks/engines that want to allow shader code to print messages, for debugging logging, etc.
+You can start by copying the `gpu-printing.{h,cpp}`, `gpu-printing-ops.h`, and `printing.slang` files into your codebase, and then modifying them to meet your needs.
+
+The implementation presented here is not feature-complete, so you may want to extend and customize it by:
+
+* Adapting it to use the graphics API or wrapper layer appropriate to your codebase
+
+* Making more GPU data types printable (including types specific to your application)
+
+* Adding overloads of `println()` and `printf()` to support more arguments
+
+* Customizing the encoding of print commands to make better use of space based on application-specific constraints
+
+* Handling extended `printf()` formatting (width, precision, etc.) in the CPU code
+
+Caveats
+-------
+
+This code is not battle-tested, and it makes no promises about security.
+It is probable that a malformed or malicious GPU shader could write data into the "print buffer" that causes the CPU code to invoke `printf()` or other C standard library functions with invalid arguments.
+
+In this implementation, GPU printing commands are only "flushed" by the CPU on draw/dispatch boundaries.
+This means that the printing approach here cannot easily be used to diagnose deadlocks, infinite loops, or hardware/driver crashes.
+Extending the implementation to better support such cases would likely depend on using platform- or hardware-specific knowledge or functionality.
diff --git a/examples/gpu-printing/gpu-printing-ops.h b/examples/gpu-printing/gpu-printing-ops.h
new file mode 100644
index 000000000..bace8bc8a
--- /dev/null
+++ b/examples/gpu-printing/gpu-printing-ops.h
@@ -0,0 +1,55 @@
+// gpu-printing-op.h
+
+// This file defines the various opcodes that
+// will be used for GPU printing commands.
+//
+// Because the CPU will be doing printing on
+// behalf of the GPU, the two processors need
+// to agree on the values of these opcodes.
+// Therefore we have set up this file to be
+// included into both the C++ `gpu-printing.cpp`
+// implementation and the Slang `printing.slang`
+// file.
+//
+// Client code should defiine the `GPU_PRINTING_OP`
+// macro appropriately, before including this file.
+//
+#ifndef GPU_PRINTING_OP
+#error "Must define 'GPU_PRINTING_OP(NAME)' before including"
+#endif
+
+// The `Nop` opcode is used to represent a vacuous
+// printing command that does nothing.
+//
+// It's main purpose is to allow GPU code to zero
+// out parts of the printing buffer to disable
+// or shorten a printing command that was started.
+//
+GPU_PRINTING_OP(Nop)
+
+// The `NewLine` command is a compact way to
+// print a newline character (`\n`)
+GPU_PRINTING_OP(NewLine)
+
+// Simple value types like `int`, `uint`, and `float`
+// can have their own printing commands for when
+// they will be printed directly.
+//
+GPU_PRINTING_OP(Int32)
+GPU_PRINTING_OP(UInt32)
+GPU_PRINTING_OP(Float32)
+
+// String values are encoded in the print buffer as
+// a 32-bit hash code, and are thus similar to
+// the simple value cases in practice.
+//
+GPU_PRINTING_OP(String)
+
+// The final opcode we define is a complex `printf()`
+// style operation that combines a format string with
+// a variable amount of argument data to be referenced
+// by that format string.
+//
+GPU_PRINTING_OP(PrintF)
+
+#undef GPU_PRINTING_OP
diff --git a/examples/gpu-printing/gpu-printing.cpp b/examples/gpu-printing/gpu-printing.cpp
new file mode 100644
index 000000000..ff35fd0b3
--- /dev/null
+++ b/examples/gpu-printing/gpu-printing.cpp
@@ -0,0 +1,390 @@
+// gpu-printing.cpp
+#include "gpu-printing.h"
+
+#include <assert.h>
+
+// This file implements the CPU side of a simple GPU printing
+// library. The CPU code is responsible for scanning through
+// buffers of "print commands" produced by GPU shaders, and
+// executing those commands to print output.
+//
+// The opcodes for the printing commands are shared between
+// CPU and GPU, and also between C++ and Slang, by putting
+// their declarations in the `gpu-printing-ops.h` file
+// and including them into both the host and device code
+// to generate `enum` types.
+//
+enum class GPUPrintingOp : uint32_t
+{
+#define GPU_PRINTING_OP(NAME) NAME,
+#include "gpu-printing-ops.h"
+};
+
+// One of the key ideas in this printing system is that strings
+// are not encoded into the buffer of print commands directly,
+// but are instead encoded using a hash of the string data.
+//
+// In order to map from a hash code back to the original string,
+// the host side code for the printing system needs a way to
+// pre-populate a lookup table with the strings that appear
+// in a shader. The Slang reflection API provides a service to
+// do exactly that.
+//
+void GPUPrinting::loadStrings(slang::ProgramLayout* slangReflection)
+{
+ // Given the Slang-generated reflection and layout information
+ // for a program, we can query the number of string literals
+ // that appear in the linked program.
+ //
+ SlangUInt hashedStringCount = slangReflection->getHashedStringCount();
+ for( SlangUInt ii = 0; ii < hashedStringCount; ++ii )
+ {
+ // For each string we can fetch its bytes from the Slang
+ // reflection data.
+ //
+ size_t stringSize = 0;
+ char const* stringData = slangReflection->getHashedString(ii, &stringSize);
+
+ // Then we can compute the hash code for that string using
+ // another Slang API function.
+ //
+ // Note: the exact hashing algorithm that Slang uses for
+ // string literals is not currently documented, and may
+ // change in future releases of the compiler.
+ //
+ StringHash hash = spComputeStringHash(stringData, stringSize);
+
+ // The `GPUPrinting` implementation will store the mapping
+ // from hash codes back to strings in a simple STL `map`.
+ //
+ m_hashedStrings.insert(std::make_pair(hash, std::string(stringData, stringData + stringSize)));
+ }
+}
+
+// The main service that the host code for the GPU printing library
+// provides is a way to execute the printing commands that have been
+// encoded to a buffer by shader code.
+//
+void GPUPrinting::processGPUPrintCommands(const void* data, size_t dataSize)
+{
+ // Everything that the GPU code writes to the buffer will be in
+ // a granularity of 32-bits words, so we start by computing
+ // how many words, total, will fit in the buffer.
+ //
+ uint32_t dataWordCount = uint32_t(dataSize/ sizeof(uint32_t));
+ //
+ // If the buffer doesn't even have enough space for the leading counter,
+ // then there is nothing to print.
+ //
+ if( dataWordCount < 1 )
+ {
+ fprintf(stderr, "error: expected at least 4 bytes in GPU printing buffer\n");
+ return;
+ }
+ //
+ // Otherwise, we set ourselves up to start reading data from the buffer
+ // at a granularity of 32-bit words.
+ //
+ const uint32_t* dataCursor = (const uint32_t*) data;
+
+ // The first word of a printing buffer gives us the total number of
+ // words that were appended by GPU printing operations.
+ //
+ uint32_t wordsAppended = *dataCursor++;
+ //
+ // Under normal operation, we will stop processing data from
+ // the buffer after we have read everything the GPU wrote.
+ //
+ const uint32_t* dataEnd = dataCursor + wordsAppended;
+
+ // If the number of bytes the GPU code tried to write (including
+ // the counter stored in the first word of the buffer) exceeds what
+ // the buffer could hold, then we will print a warning message,
+ // indicating that the application might want to allocate a
+ // larger buffer.
+ //
+ size_t totalBytesWritten = sizeof(uint32_t) * (wordsAppended + 1);
+ if( totalBytesWritten > dataSize )
+ {
+ fprintf(stderr, "warning: GPU code attempted to write %llu bytes to the printing buffer, but only %llu bytes were available\n", (unsigned long long)totalBytesWritten, (unsigned long long)dataSize);
+
+ // If the buffer is full, then we only want to read through
+ // to the end of what is available.
+ //
+ dataEnd = ((const uint32_t*) data) + dataWordCount;
+ }
+
+ // We will now proceed to read off "commands" from the buffer,
+ // and execute those commands to print things to `stdout`.
+ //
+ while( dataCursor < dataEnd )
+ {
+ // The first word of each command is encoded to hold both
+ // an "opcode" for the command, and the number of "payload"
+ // words that follow the header.
+ //
+ uint32_t cmdHeader = *dataCursor++;
+ GPUPrintingOp op = GPUPrintingOp((cmdHeader >> 16) & 0xFFFF);
+ uint32_t payloadWordCount = cmdHeader & 0xFFFF;
+
+ // It is possible that we are at the end of the buffer,
+ // and not all of the payload words could be written.
+ // In such a case we will bail out of the printing loop to
+ // avoid crashes from a command trying to fetch data past
+ // the end of the buffer.
+ //
+ if( payloadWordCount > size_t(dataCursor - dataEnd) )
+ {
+ break;
+ }
+ //
+ // Otherwise, we can form a pointer to the payload words
+ // for this command, and advance our cursor past the payload
+ // to set up for reading the next command.
+ //
+ const uint32_t* payloadWords = dataCursor;
+ const uint32_t* payloadWordsEnd = payloadWords + payloadWordCount;
+ dataCursor += payloadWordCount;
+
+ // What to do with a command depends a lot on which "op" was selected.
+ switch( op )
+ {
+ default:
+ // If we encounter an op that we don't understand, there is a change
+ // that the buffer is corrupted or invalid, but we will try to
+ // soldier on and process further commands.
+ //
+ fprintf(stderr, "error: unexpected GPU printing op %d\n", op);
+ break;
+
+ case GPUPrintingOp::Nop:
+ // The `Nop` case is a no-op, and allows GPU code to conservatively
+ // allocate bytes in the printing buffer and then overwrite any
+ // excess with zeros to trim their allocation.
+ break;
+
+ case GPUPrintingOp::NewLine:
+ // The `NewLine` case prints a single '\n' and doesn't need any payload.
+ putchar('\n');
+ break;
+
+ // Simple value printing cases can just load the bytes of
+ // a value directly from the payload, and then print it.
+ //
+ // We will use a macro to avoid duplication the code shared
+ // between these cases.
+ //
+ #define CASE(OP, FORMAT, TYPE) \
+ case GPUPrintingOp::OP: \
+ { \
+ TYPE value; \
+ assert(payloadWordCount >= (sizeof(value) / sizeof(uint32_t))); \
+ memcpy(&value, payloadWords, sizeof(value)); \
+ printf(FORMAT, value); \
+ } \
+ break
+
+ CASE(Int32, "%d", int);
+ CASE(UInt32, "%u", unsigned int);
+ CASE(Float32, "%f", float);
+
+ #undef CASE
+
+ case GPUPrintingOp::String:
+ {
+ // Strings are handled differently than other values because
+ // most GPU graphics APIs do not natively support strings
+ // in shader code.
+ //
+ // Instead, strings are handled by the printing logic in
+ // terms of 32-bit hash codes. When printing a string,
+ // the generated GPU code will write the hash value for
+ // the string to the print buffer.
+ //
+ // On the CPU, we then read the hash code from the payload
+ // for this command:
+ //
+ assert(payloadWordCount >= 1);
+ StringHash hash = *payloadWords++;
+ //
+ // Next, we look up the hash value in a map from hash
+ // codes to strings, that was seeded with strings known
+ // to appear in the GPU code.
+ //
+ auto iter = m_hashedStrings.find(hash);
+ if(iter == m_hashedStrings.end())
+ {
+ // If we didn't have a string to match that hash code in
+ // our map, we can continue trying to print, but it is
+ // likely that the application code needs to be configured
+ // to pass in the right strings.
+ //
+ fprintf(stderr, "error: string with unknown hash %d\n", hash);
+ continue;
+ }
+
+ // Once we've found a string that matches our hash
+ // code, we can print it.
+ //
+ // TODO: This code isn't robust against strings with
+ // embeded null bytes.
+ //s
+ printf("%s", iter->second.c_str());
+ }
+ break;
+
+ case GPUPrintingOp::PrintF:
+ {
+ // Handling a general-purpose `printf` call requires looking
+ // up the format string, and then processing further payload
+ // words based on the format.
+ //
+ // Finding the format string follows logic similar to the
+ // `GPUPrintingOp::String` case.
+ //
+ assert(payloadWords != payloadWordsEnd);
+ StringHash formatHash = *payloadWords++;
+
+ auto iter = m_hashedStrings.find(formatHash);
+ if(iter == m_hashedStrings.end())
+ {
+ // If we didn't have a string to match that hash code in
+ // our map, we can continue trying to print, but it is
+ // likely that the application code needs to be configured
+ // to pass in the right strings.
+ //
+ fprintf(stderr, "error: string with unknown hash %d\n", formatHash);
+ continue;
+ }
+ std::string format = iter->second;
+
+ // We can't just route things through to the `printf()` function
+ // provided by standard library on the host CPU, because we don't
+ // have a portable way to translate the payload data into
+ // varargs that match the platform ABI.
+ //
+ // Instead, we have to scan through the string ourselves, and
+ // implement a subset of the full `printf()`.
+ //
+ const char* cursor = format.c_str();
+ const char* end = cursor + format.length();
+ while( cursor != end )
+ {
+ int c = *cursor++;
+
+ // If we see a byte other than `%`, then we can just
+ // output it directly and keep scanning the format string.
+ //
+ if( c != '%' )
+ {
+ putchar(c);
+ continue;
+ }
+
+ // Otherwise, we have a `%` which is supposed to
+ // introduce a format specifier.
+ //
+ // If we are somehow at the end of the format
+ // string, then the format was bad.
+ //
+ if( cursor == end )
+ {
+ fprintf(stderr, "error: unexpected '%%' at and of format string\n");
+ break;
+ }
+
+ // If the next byte in the format string is
+ // the `%` character, then it is an escaped
+ // `%` so we should just emit it as-is and move along.
+ //
+ if( *cursor == '%' )
+ {
+ putchar(*cursor++);
+ continue;
+ }
+
+ // TODO: For proper `printf()` support, we would need
+ // to read:
+ //
+ // * optional flags: `-+#0`
+ // * optional width specifier: a number or `*`
+ // * optional precision specifier: `.` and a number or `*`
+ // * optional length sub-specifiers: `h`, `l`, `ll`, etc.
+ //
+ // For now we ignore all those details and just
+ // read a single-byte specifier.
+ //
+ int specifier = *cursor++;
+ switch( specifier )
+ {
+ default:
+ fprintf(stderr, "error: unexpected format specifier '%c' (0x%X)\n", specifier, specifier);
+ break;
+
+
+ // When processing each format speecifier, we will
+ // read words from the payload, as necessary
+ // to yield a value of the expected type.
+ //
+ // To reduce the amount of boilerplate, we will
+ // use a macro to capture the shared code for
+ // common cases.
+ //
+ #define CASE(CHAR, FORMAT, TYPE) \
+ case CHAR: \
+ { \
+ assert(payloadWords != payloadWordsEnd); \
+ TYPE value; \
+ memcpy(&value, payloadWords, sizeof(value)); \
+ payloadWords += sizeof(value) / sizeof(uint32_t); \
+ printf(FORMAT, value); \
+ } \
+ break
+
+ case 'i': // `%i` is just an alias for `%d`
+ CASE('d', "%d", int);
+ CASE('u', "%u", unsigned int);
+ CASE('x', "%x", unsigned int);
+ CASE('X', "%X", unsigned int);
+
+ // Note: all of our printing support for floating-point
+ // values will use the `float` type instead of `double`.
+ // This isn't compatible with C rules, but makes more sense
+ // for GPU code.
+ //
+ CASE('f', "%f", float);
+ CASE('F', "%F", float);
+ CASE('e', "%e", float);
+ CASE('E', "%E", float);
+ CASE('g', "%g", float);
+ CASE('G', "%G", float);
+ CASE('c', "%c", int);
+
+ #undef CASE
+
+ case 's':
+ {
+ // The case for strings is more complicated
+ // just because it has to deal with our hashing
+ // scheme.
+ //
+ assert(payloadWords != payloadWordsEnd);
+ StringHash hash = *payloadWords++;
+ auto iter = m_hashedStrings.find(hash);
+ if(iter == m_hashedStrings.end())
+ {
+ fprintf(stderr, "error: string with unknown hash %d\n", hash);
+ continue;
+ }
+ printf("%s", iter->second.c_str());
+ }
+ break;
+ }
+ }
+ }
+ break;
+ }
+ }
+
+
+}
diff --git a/examples/gpu-printing/gpu-printing.h b/examples/gpu-printing/gpu-printing.h
new file mode 100644
index 000000000..81c2e615f
--- /dev/null
+++ b/examples/gpu-printing/gpu-printing.h
@@ -0,0 +1,57 @@
+// gpu-printing.h
+#pragma once
+
+// This file provides the CPU side support for a basic GPU
+// printing system. The GPU implementation of the system
+// is in `printing.slang`.
+
+// The host side of the system needs to be able to load
+// strings that were specified in Slang shader code, and
+// for that it will use the Slang reflection API.
+//
+#include <slang.h>
+
+// We also need a way to store the data for strings that
+// were used in shader code, and we will go ahead and
+// use the C++ STL for that, in order to make this
+// code moderately portable.
+//
+#include <map>
+#include <string>
+
+ /// Stores state used for executing print commands generated by GPU shaders
+struct GPUPrinting
+{
+public:
+ /// Load any string literals used by a Slang program.
+ ///
+ /// The `slangReflection` should be the layout and reflection
+ /// object for a Slang shader program that might need to produce
+ /// printed output. This function will load any strings
+ /// referenced by the program into its database for mapping
+ /// string hashes back to the original strings.
+ ///
+ void loadStrings(slang::ProgramLayout* slangReflection);
+
+ /// Process a buffer of GPU printing commands and write output to `stdout`.
+ ///
+ /// This function attempts to read print commands from the buffer
+ /// pointed to by `data` and execute them to produce output.
+ ///
+ /// The buffer pointed at by `data` (of size `dataSize`) should be allocated
+ /// in host-visible memory.
+ ///
+ /// Before executing GPU work, the first four bytes pointed to by `data`
+ /// should have been cleared to zero.
+ ///
+ /// If GPU work has attempted to write more data than the buffer
+ /// can fit, a warning will be printed to `stderr`, and printing commands
+ /// that could not fit completely in the buffer will be skipped.
+ ///
+ void processGPUPrintCommands(const void* data, size_t dataSize);
+
+private:
+ typedef int StringHash;
+
+ std::map<StringHash, std::string> m_hashedStrings;
+};
diff --git a/examples/gpu-printing/gpu-printing.vcxproj b/examples/gpu-printing/gpu-printing.vcxproj
new file mode 100644
index 000000000..d97c97914
--- /dev/null
+++ b/examples/gpu-printing/gpu-printing.vcxproj
@@ -0,0 +1,190 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{57C81DD3-4304-213D-AC16-39349871C957}</ProjectGuid>
+ <IgnoreWarnCompileDuplicatedFilename>true</IgnoreWarnCompileDuplicatedFilename>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>gpu-printing</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v140</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v140</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v140</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v140</PlatformToolset>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>..\..\bin\windows-x86\debug\</OutDir>
+ <IntDir>..\..\intermediate\windows-x86\debug\gpu-printing\</IntDir>
+ <TargetName>gpu-printing</TargetName>
+ <TargetExt>.exe</TargetExt>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>..\..\bin\windows-x64\debug\</OutDir>
+ <IntDir>..\..\intermediate\windows-x64\debug\gpu-printing\</IntDir>
+ <TargetName>gpu-printing</TargetName>
+ <TargetExt>.exe</TargetExt>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ <OutDir>..\..\bin\windows-x86\release\</OutDir>
+ <IntDir>..\..\intermediate\windows-x86\release\gpu-printing\</IntDir>
+ <TargetName>gpu-printing</TargetName>
+ <TargetExt>.exe</TargetExt>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ <OutDir>..\..\bin\windows-x64\release\</OutDir>
+ <IntDir>..\..\intermediate\windows-x64\release\gpu-printing\</IntDir>
+ <TargetName>gpu-printing</TargetName>
+ <TargetExt>.exe</TargetExt>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..;..\..\tools;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..;..\..\tools;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..;..\..\tools;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <Optimization>Full</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <MinimalRebuild>false</MinimalRebuild>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <PrecompiledHeader>NotUsing</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..;..\..\tools;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <Optimization>Full</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <MinimalRebuild>false</MinimalRebuild>
+ <StringPooling>true</StringPooling>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="gpu-printing.cpp" />
+ <ClCompile Include="main.cpp" />
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="kernels.slang" />
+ <None Include="printing.slang" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\source\slang\slang.vcxproj">
+ <Project>{DB00DA62-0533-4AFD-B59F-A67D5B3A0808}</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\source\core\core.vcxproj">
+ <Project>{F9BE7957-8399-899E-0C49-E714FDDD4B65}</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\tools\gfx\gfx.vcxproj">
+ <Project>{222F7498-B40C-4F3F-A704-DDEB91A4484A}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="gpu-printing-ops.h" />
+ <ClInclude Include="gpu-printing.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/examples/gpu-printing/gpu-printing.vcxproj.filters b/examples/gpu-printing/gpu-printing.vcxproj.filters
new file mode 100644
index 000000000..c539443c3
--- /dev/null
+++ b/examples/gpu-printing/gpu-printing.vcxproj.filters
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{E9C7FDCE-D52A-8D73-7EB0-C5296AF258F6}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="main.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="gpu-printing.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="kernels.slang">
+ <Filter>Source Files</Filter>
+ </None>
+ <None Include="printing.slang">
+ <Filter>Source Files</Filter>
+ </None>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="gpu-printing-ops.h">
+ <Filter>Source Files</Filter>
+ </ClInclude>
+ <ClInclude Include="gpu-printing.h">
+ <Filter>Source Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/examples/gpu-printing/kernels.slang b/examples/gpu-printing/kernels.slang
new file mode 100644
index 000000000..ec4533958
--- /dev/null
+++ b/examples/gpu-printing/kernels.slang
@@ -0,0 +1,38 @@
+// kernels.slang
+
+// This file demonstrates how ordinary shader code can make use of
+// a standalone GPU printing library implemented in `printing.slang`.
+//
+// The first step for using a module of Slang code is to `import` it.
+//
+import printing;
+//
+// The `import` declaration above brings all the types and functions
+// declared in the `printing` module (in `printing.slang`) into
+// scope so that our code can use them.
+
+// For simplicity, we will define a single compute shader that does
+// some simple printing.
+//
+[shader("compute")]
+[numthreads(32)]
+void computeMain(uint3 tid : SV_DispatchThreadID)
+{
+ // The `printing` module defines two main printing routines.
+ //
+ // The first is a `println` function in the style of Java,
+ // which takes zero or more arguments and prints them all
+ // followed by a newline.
+ //
+ // HACK: We are having to explicitly call `getStringHash` here
+ // because the print implementation wants to write out strings
+ // in terms of their hash code, and the current Slang implementation
+ // of `getStringHash` only applies to string literals.
+ //
+ println(getStringHash("hello from thread number "), tid.x);
+
+ // The second facility supported by `printing.slang` is a C-style
+ // `printf()` function.
+ //
+ printf(getStringHash("printf from thread 0x%x\n"), tid.x);
+}
diff --git a/examples/gpu-printing/main.cpp b/examples/gpu-printing/main.cpp
new file mode 100644
index 000000000..49a8912db
--- /dev/null
+++ b/examples/gpu-printing/main.cpp
@@ -0,0 +1,225 @@
+// main.cpp
+
+#include <slang.h>
+
+#include <slang-com-ptr.h>
+using Slang::ComPtr;
+
+#include "gfx/render.h"
+#include "gfx/d3d11/render-d3d11.h"
+#include "gfx/window.h"
+using namespace gfx;
+
+#include <string>
+
+#include "gpu-printing.h"
+
+ComPtr<slang::ISession> createSlangSession(gfx::Renderer* renderer)
+{
+ ComPtr<slang::IGlobalSession> slangGlobalSession;
+ slangGlobalSession.attach(spCreateSession(NULL));
+
+ slang::TargetDesc targetDesc;
+ targetDesc.format = SLANG_DXBC;
+ targetDesc.profile = spFindProfile(slangGlobalSession, "sm_5_0");
+
+ slang::SessionDesc sessionDesc;
+ sessionDesc.targetCount = 1;
+ sessionDesc.targets = &targetDesc;
+
+ ComPtr<slang::ISession> slangSession;
+ slangGlobalSession->createSession(sessionDesc, slangSession.writeRef());
+
+ return slangSession;
+}
+
+ComPtr<slang::IModule> compileShaderModuleFromFile(slang::ISession* slangSession, char const* filePath)
+{
+ SlangCompileRequest* slangRequest = nullptr;
+ slangSession->createCompileRequest(&slangRequest);
+
+ int translationUnitIndex = spAddTranslationUnit(slangRequest, SLANG_SOURCE_LANGUAGE_SLANG, filePath);
+ spAddTranslationUnitSourceFile(slangRequest, translationUnitIndex, filePath);
+
+ const SlangResult compileRes = spCompile(slangRequest);
+ if(auto diagnostics = spGetDiagnosticOutput(slangRequest))
+ {
+ reportError("%s", diagnostics);
+ }
+
+ if(SLANG_FAILED(compileRes))
+ {
+ spDestroyCompileRequest(slangRequest);
+ return ComPtr<slang::IModule>();
+ }
+
+ ComPtr<slang::IModule> slangModule;
+ spCompileRequest_getModule(slangRequest, translationUnitIndex, slangModule.writeRef());
+ return slangModule;
+}
+
+struct ExampleProgram
+{
+int gWindowWidth = 640;
+int gWindowHeight = 480;
+
+gfx::ApplicationContext* gAppContext;
+gfx::Window* gWindow;
+RefPtr<gfx::Renderer> gRenderer;
+
+ComPtr<slang::ISession> gSlangSession;
+ComPtr<slang::IModule> gSlangModule;
+RefPtr<gfx::ShaderProgram> gProgram;
+
+RefPtr<gfx::PipelineLayout> gPipelineLayout;
+RefPtr<gfx::PipelineState> gPipelineState;
+RefPtr<gfx::DescriptorSet> gDescriptorSet;
+
+Dictionary<int, std::string> gHashedStrings;
+
+GPUPrinting gGPUPrinting;
+
+RefPtr<gfx::ShaderProgram> loadComputeProgram(slang::IModule* slangModule, char const* entryPointName)
+{
+ ComPtr<slang::IEntryPoint> entryPoint;
+ slangModule->findEntryPointByName(entryPointName, entryPoint.writeRef());
+
+ ComPtr<slang::IComponentType> linkedProgram;
+ entryPoint->link(linkedProgram.writeRef());
+
+ gGPUPrinting.loadStrings(linkedProgram->getLayout());
+
+ ComPtr<ISlangBlob> codeBlob;
+ linkedProgram->getEntryPointCode(0, 0, codeBlob.writeRef());
+
+ char const* code = (char const*) codeBlob->getBufferPointer();
+ char const* codeEnd = code + codeBlob->getBufferSize();
+
+ gfx::ShaderProgram::KernelDesc kernelDescs[] =
+ {
+ { gfx::StageType::Compute, code, codeEnd },
+ };
+
+ gfx::ShaderProgram::Desc programDesc;
+ programDesc.pipelineType = gfx::PipelineType::Compute;
+ programDesc.kernels = &kernelDescs[0];
+ programDesc.kernelCount = 2;
+
+ auto shaderProgram = gRenderer->createProgram(programDesc);
+
+ return shaderProgram;
+}
+
+Result execute()
+{
+ WindowDesc windowDesc;
+ windowDesc.title = "GPU Printing";
+ windowDesc.width = gWindowWidth;
+ windowDesc.height = gWindowHeight;
+ gWindow = createWindow(windowDesc);
+
+ gRenderer = createD3D11Renderer();
+ Renderer::Desc rendererDesc;
+ rendererDesc.width = gWindowWidth;
+ rendererDesc.height = gWindowHeight;
+ {
+ Result res = gRenderer->initialize(rendererDesc, getPlatformWindowHandle(gWindow));
+ if(SLANG_FAILED(res)) return res;
+ }
+
+ gSlangSession = createSlangSession(gRenderer);
+ gSlangModule = compileShaderModuleFromFile(gSlangSession, "kernels.slang");
+
+ gProgram = loadComputeProgram(gSlangModule, "computeMain");
+ if(!gProgram) return SLANG_FAIL;
+
+ DescriptorSetLayout::SlotRangeDesc slotRanges[] =
+ {
+ DescriptorSetLayout::SlotRangeDesc(DescriptorSlotType::StorageBuffer),
+ };
+ DescriptorSetLayout::Desc descriptorSetLayoutDesc;
+ descriptorSetLayoutDesc.slotRangeCount = 1;
+ descriptorSetLayoutDesc.slotRanges = &slotRanges[0];
+ auto descriptorSetLayout = gRenderer->createDescriptorSetLayout(descriptorSetLayoutDesc);
+ if(!descriptorSetLayout) return SLANG_FAIL;
+
+ PipelineLayout::DescriptorSetDesc descriptorSets[] =
+ {
+ PipelineLayout::DescriptorSetDesc( descriptorSetLayout ),
+ };
+ PipelineLayout::Desc pipelineLayoutDesc;
+ pipelineLayoutDesc.renderTargetCount = 1;
+ pipelineLayoutDesc.descriptorSetCount = 1;
+ pipelineLayoutDesc.descriptorSets = &descriptorSets[0];
+ auto pipelineLayout = gRenderer->createPipelineLayout(pipelineLayoutDesc);
+ if(!pipelineLayout) return SLANG_FAIL;
+
+ gPipelineLayout = pipelineLayout;
+
+ // Once we have the descriptor set layout, we can allocate
+ // and fill in a descriptor set to hold our parameters.
+ //
+ auto descriptorSet = gRenderer->createDescriptorSet(descriptorSetLayout);
+ if(!descriptorSet) return SLANG_FAIL;
+
+// descriptorSet->setConstantBuffer(0, 0, gConstantBuffer);
+
+ gDescriptorSet = descriptorSet;
+
+ ComputePipelineStateDesc desc;
+ desc.pipelineLayout = gPipelineLayout;
+ desc.program = gProgram;
+ auto pipelineState = gRenderer->createComputePipelineState(desc);
+ if(!pipelineState) return SLANG_FAIL;
+
+ gPipelineState = pipelineState;
+
+ size_t printBufferSize = 4 * 1024; // use a small-ish (4KB) buffer for print output
+
+ BufferResource::Desc printBufferDesc;
+ printBufferDesc.init(printBufferSize);
+ printBufferDesc.elementSize = sizeof(uint32_t);
+ printBufferDesc.cpuAccessFlags = Resource::AccessFlag::Read; // | Resource::AccessFlag::Write;
+ auto printBuffer = gRenderer->createBufferResource(Resource::Usage::UnorderedAccess, printBufferDesc);
+
+ ResourceView::Desc printBufferViewDesc;
+ printBufferViewDesc.type = ResourceView::Type::UnorderedAccess;
+ auto printBufferView = gRenderer->createBufferView(printBuffer, printBufferViewDesc);
+
+ // TODO: need to copy a zero into the start of the print buffer!
+
+ gDescriptorSet->setResource(0, 0, printBufferView);
+ gRenderer->setDescriptorSet(PipelineType::Compute, gPipelineLayout, 0, gDescriptorSet);
+
+ gRenderer->setPipelineState(PipelineType::Compute, gPipelineState);
+ gRenderer->dispatchCompute(1, 1, 1);
+
+ // TODO: need to copy from the print buffer to a staging buffer...
+
+ auto printBufferData = (uint32_t*) gRenderer->map(printBuffer, MapFlavor::HostRead);
+
+ gGPUPrinting.processGPUPrintCommands(printBufferData, printBufferSize);
+
+ return SLANG_OK;
+}
+
+};
+
+// This "inner" main function is used by the platform abstraction
+// layer to deal with differences in how an entry point needs
+// to be defined for different platforms.
+//
+void innerMain(ApplicationContext* context)
+{
+ ExampleProgram app;
+
+ if (SLANG_FAILED(app.execute()))
+ {
+ return exitApplication(context, 1);
+ }
+}
+
+// This macro instantiates an appropriate main function to
+// invoke the `innerMain` above.
+//
+GFX_CONSOLE_MAIN(innerMain)
diff --git a/examples/gpu-printing/printing.slang b/examples/gpu-printing/printing.slang
new file mode 100644
index 000000000..47d102f97
--- /dev/null
+++ b/examples/gpu-printing/printing.slang
@@ -0,0 +1,392 @@
+// printing.slang
+
+// This file provides the GPU code for a simple library that
+// allows GPU shaders to print values to `stdout`.
+//
+// The implementation relies on a single buffer that must
+// be bound to any shader that uses GPU printing.
+//
+RWStructuredBuffer<uint> gPrintBuffer;
+//
+// Encoding
+// ========
+//
+// The print buffer is organized in terms of 32-bit (`uint`) *words*.
+//
+// The first word in the print buffer is used as an atomic
+// counter, and must be initialized to zero before a shader starts.
+// By atomically incrementing this counter, GPU threads can allocate
+// space for printing commands in the buffer. All printing
+// commands are stored after the first word (so, starting at
+// an index of 1).
+//
+// A printing command starts with a single-word header, where
+// the high 32 bits specify the *op* for the command, and the
+// low 32 bits specify the number of *payload* words in in
+// the command. The payload is the words that immediately
+// follow the command header.
+//
+// Note that the header word for a command is *not* included
+// in the count of words in the low 16 bits.
+//
+// The opcode values need to be shared between CPU and GPU
+// code, so we use a bit of preprocessor trickery here to
+// generate an `enum` type with all the opcodes.
+//
+enum PrintingOp
+{
+#define GPU_PRINTING_OP(NAME) NAME ,
+#include "gpu-printing-ops.h"
+};
+
+// It is critical that when printing something, we allocate
+// all the words it requires in the print buffer contiguously.
+// For example, if the user writes:
+//
+// println("Thread number ", threadID, " has value ", someValue);
+//
+// It would be very bad if the output from different threads
+// got interleaved, such that one cannot determine which value
+// goes with which thread.
+//
+// Allocating individual print *commands* atomically is not necessarily
+// enough: instead, we need to allocate the storage for all
+// the commands that comprise a `print()` call at once.
+//
+// The core allocation operation here is `_allocatePrintWords()`
+
+ // Allocate space for one or more print commands.
+uint _allocatePrintWords(uint wordCount)
+{
+ // We allocate the required number of words with an atomic, and
+ // get back the old value of the counter, which tells us the
+ // offset at which the words for our printing operation should start.
+ //
+ uint wordOffset = 0;
+ InterlockedAdd(gPrintBuffer[0], wordCount, wordOffset);
+
+ // Because the first word of the buffer is reserved for the counter,
+ // and the counter value starts at zero, we need to add one to
+ // get to the actual offset for the data to be written.
+ //
+ return wordOffset + 1;
+}
+
+// Java-style `println`
+// ====================
+//
+// We will start by building up a Java-style `println()` function
+// that accepts zero or more values to print, and prints them
+// atomically (without any other thread being able to interleave
+// in the printed output), followed by a newline.
+//
+// We will define a wrapper around `_allocatePrintWords()`
+// that captures the main idiom for `println()`.
+//
+uint _beginPrintln(uint wordCount)
+{
+ // The `wordCount` passed in will represent the
+ // number of words required for the arguments
+ // to `println`, but won't include the terminating
+ // newline.
+ //
+ // Thus we will allocate one extra word to allow
+ // us to append a newline to the print command we
+ // generate.
+ //
+ uint wordOffset = _allocatePrintWords(wordCount + 1);
+ //
+ // We will then initialize the last word of the command
+ // that was allocated to a `NewLine` command.
+ //
+ gPrintBuffer[wordOffset + wordCount] = uint(PrintingOp.NewLine) << 16;
+ return wordOffset;
+}
+//
+// With the `_beginPrintLn()` function handling all the heavy-lifting,
+// we can define a zero-argument `println()` trivially.
+//
+void println()
+{
+ _beginPrintln(0);
+}
+
+// We could continue to build a family of overloaded `println()` functions, like:
+//
+// void println();
+// void println(int value);
+// void println(float value);
+// void println(uint value);
+// ...
+//
+// but it should be clear that this approach doesn't scale at all
+// to functions with multiple argumenst:
+//
+// void println(int a, int b);
+// void println(float a, int b);
+// void println(int a, float b);
+// ...
+//
+// Using the features of the Slang language, we can build a framework
+// for a more scalable solution.
+//
+// We start by defining an `interface` that captures the essence
+// of what a type of printable values needs to support.
+//
+
+interface IPrintable
+{
+ // Every printable value needs to be able to compute the number
+ // of words required to write it into the print buffer.
+ //
+ uint getPrintWordCount();
+
+ // A printable value must also support writing those words into
+ // a buffer, once the appropriate offset to write to is known.
+ //
+ void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset);
+};
+
+// With the `IPrintable` interface in place, we can now write
+// a generic one-argument `println()` that works with any
+// printable value.
+
+void println<T : IPrintable>(T value)
+{
+ // In order to print a value we first compute the number of words
+ // it needs in the print buffer.
+ //
+ uint wordCount = value.getPrintWordCount();
+
+ // Then we can use `_beginPrint()` to allocate those words and
+ // find the starting offset to write to.
+ //
+ uint wordOffset = _beginPrintln(wordCount);
+
+ // And finally we can ask the value to write itself into the
+ // buffer at the given offset.
+ //
+ value.writePrintWords(gPrintBuffer, wordOffset);
+}
+
+// Of course, in order to be able to print things with this `println()`
+// operation, we need to have some types that implement `IPrintable`.
+//
+// In particular, we'd like to be able to print built-in types like
+// `uint`, but we don't have access to the declaration of `uint`
+// to be able to change it!
+//
+// It just so happens that another Slang feature, `extension`
+// declarations, lets us extend a type with new methods *and*
+// allows us to add new interface implementations to it.
+//
+// We can therefore making the exisint Slang `uint` type be
+// printable.
+
+extension uint : IPrintable // <-- Note: we are adding a conformance to `IPrintable here`
+{
+ // Printing a `uint` uses up two words in the buffer
+ //
+ uint getPrintWordCount() { return 2; }
+
+ // Writing a command to print a `uint` is straightforward,
+ // given knowledge of our encoding.
+ //
+ void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset)
+ {
+ buffer[offset++] = (uint(PrintingOp.UInt32) << 16) | 1;
+ buffer[offset++] = this;
+ }
+}
+
+// HACK: Because we currently don't have a `String` type that we
+// can pass down into subroutines, we will be using the hash
+// code of a string to represent the string itself. These hash
+// codes currently have type `int`, so our printing library
+// will *always* assume that an `int` represents a hashed
+// string, and thus we can't print plain old `int`s right now.
+
+typedef int StringHash;
+
+extension StringHash : IPrintable
+{
+ uint getPrintWordCount() { return 2; }
+
+ void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset)
+ {
+ buffer[offset++] = (uint(PrintingOp.String) << 16) | 1;
+ buffer[offset++] = this;
+ }
+}
+
+// Where generics and interfaces start to pay off is when we want
+// to scale up to a two-argument `println()` function that can
+// work for any combination of printable types.
+
+ // Print two values, `a` and `b`.
+ //
+ // This function ensures that the values of `a` and `b`
+ // are written out atomically, without values printed
+ // from other threads spliced in between.
+ //
+void println<A : IPrintable, B : IPrintable>(A a, B b)
+{
+ // To print two values atomically, we must first
+ // allocate the total number of words that are
+ // required to print the values.
+ //
+ uint wordCount = 0;
+ uint aCount = a.getPrintWordCount(); wordCount += aCount;
+ uint bCount = b.getPrintWordCount(); wordCount += bCount;
+
+ // Then we can allocate those words atomically
+ // with a single `_beginPrint()`.
+ //
+ uint wordOffset = _beginPrintln(wordCount);
+
+ // Finally, we can write the words for each of `a`
+ // and `b` to an appropriate offset in the print buffer,
+ // without having to worry about other threads inserting
+ // print commands between them.
+ //
+ a.writePrintWords(gPrintBuffer, wordOffset); wordOffset += aCount;
+ b.writePrintWords(gPrintBuffer, wordOffset); wordOffset += bCount;
+}
+
+// We can then continue to build up to `println()` functions with
+// three or more arguments.
+
+void println<A : IPrintable, B : IPrintable, C : IPrintable>(
+ A a, B b, C c)
+{
+ uint wordCount = 0;
+ uint aCount = a.getPrintWordCount(); wordCount += aCount;
+ uint bCount = b.getPrintWordCount(); wordCount += bCount;
+ uint cCount = c.getPrintWordCount(); wordCount += cCount;
+
+ uint wordOffset = _beginPrintln(wordCount);
+
+ a.writePrintWords(gPrintBuffer, wordOffset); wordOffset += aCount;
+ b.writePrintWords(gPrintBuffer, wordOffset); wordOffset += bCount;
+ c.writePrintWords(gPrintBuffer, wordOffset); wordOffset += cCount;
+}
+
+// Further generalizing to four or more arguments is straightforward but tedious.
+//
+// A future version of Slang may support variadic functions, variadic generics,
+// or some other facilities to make writing code like this easier.
+
+// An important benefit of the approach we have taken here with an `IPrintable`
+// interface is that arbitrary user-defined types can implement `IPrintable`
+// and will work correctly with the existing `println()` definitions in
+// this file.
+
+// C-style `printf()`
+// ==================
+//
+// Many developers who use C/C++ would prefer to be able to use traditional
+// `printf()` with format strings. `printf`-based printing tends to be
+// more readable than `println`-style alternatives, but comes at the cost
+// of only supported a more restricted set of types for printing.
+//
+// Similar to the `println()` case, our Slang implementation of `printf()`
+// starts with an allocation function that does the behind-the-scenes
+// work.
+//
+
+uint _beginPrintf(int formatStrngHash, uint wordCount)
+{
+ // A printf command will start with the usual command header word,
+ // along with a word for the (hashed) format string. These
+ // two header words will be followed by the user-provided payload
+ // words for all the format arguments.
+ //
+ uint wordOffset = _allocatePrintWords(wordCount + 2);
+ gPrintBuffer[wordOffset++] = (uint(PrintingOp.PrintF) << 16) | (wordCount+1);
+ gPrintBuffer[wordOffset++] = formatStrngHash;
+ return wordOffset;
+}
+
+// Now we will define an interface for types that are allowed to
+// appear as format arguments to `printf()`.
+
+interface IPrintf
+{
+ // A `printf()` format argument must know how many words it encodes into
+ uint getPrintfWordCount();
+
+ // A `printf()` format argument must know how to encode itself
+ void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset);
+};
+
+// The extension to make `uint` compatible with `printf()` is straightforward.
+
+extension uint : IPrintf
+{
+ // A `uint` only consumes one word in the variadic payload.
+ //
+ // Note: unlike the case for `IPrintable` above, the encoding
+ // for format args for `printf()` doesn't include type information.
+ //
+ uint getPrintfWordCount() { return 1; }
+
+ // Writing the required data to the payload for `printf()` is simple
+ void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset)
+ {
+ buffer[offset++] = this;
+ }
+}
+
+extension StringHash : IPrintf
+{
+ uint getPrintfWordCount() { return 1; }
+
+ void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset)
+ {
+ buffer[offset++] = this;
+ }
+}
+
+
+// A `printf()` with no format arguments can just call back to `_beginPrintf()`
+void printf(StringHash format)
+{
+ _beginPrintf(format, 0);
+}
+
+// The `printf()` cases with one or more format arguments are all quite similar.
+
+void printf<A : IPrintf>(StringHash format, A a)
+{
+ // We need to compute the words required by each format argument
+ // and sum them up.
+ //
+ uint wordCount = 0;
+ uint aCount = a.getPrintfWordCount(); wordCount += aCount;
+
+ // We need to allocate a `printf()` command in the buffer with
+ // the required number of words for format argument payload.
+ //
+ uint wordOffset = _beginPrintf(format, wordCount);
+
+ // We need to write each format argument to the appropriate offset
+ // in the payload part of the `printf()` command.
+ //
+ a.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += aCount;
+}
+
+void printf<A : IPrintf, B : IPrintf>(StringHash format, A a, B b)
+{
+ uint wordCount = 0;
+ uint aCount = a.getPrintfWordCount(); wordCount += aCount;
+ uint bCount = b.getPrintfWordCount(); wordCount += bCount;
+
+ uint wordOffset = _beginPrintf(format, wordCount);
+
+ a.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += aCount;
+ b.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += bCount;
+}
+
+// Extending this `printf()` implementation to handle more format arguments
+// is straightforward, but tedious. Future versions of Slang might add
+// support for variadic generics, which could make this code more compact.