From d6a37a0f151e390808f196998c48a341bc4c7b60 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Fri, 12 Mar 2021 11:58:14 -0800 Subject: Add a CPU renderer implementation (#1750) * Add a CPU renderer implementation This change adds a CPU back-end to `gfx` and ensures that most of our existing CPU tests pass when using it. Detailed notes: * Most of the CPU renderer implementation is copy-pasted from the CUDA case, so they share a lot of similar logic * The main addition to the CPU renderer is a semi-complete implementation of host-memory textures. The logic here handles all the main shapes (Buffer, 1D, 2D, 3D, Cube) and all the currently-supported `Format`s that are sample-able as-is (no D24S8). The implementation is not intended to be fast, and it currently only does nearest-neighbor sampling, but otherwise it tries to avoid cutting too many corners and should be ar reasonable starting point for a more complete (but not performance-oriented) implementation. * Refactored the CPU prelude `IRWTexture` interface to inherit from `ITexture`, since in most cases a single type will end up implementing both. It might be worth it to collapse it all down to a single interface later. * Changed the CPU prelude `ITexture`/`IRWTexture` interface so that it takes both a pointer *and* a size for output arguments. This change seems necessary to allow a shader variable declared as a `Texture2D` to fetch a single `float` when the underlying texture might be using RGBA32F. * Added to the `IComponentType` public API so that we can query a "host callable" for an entry point and not just a binary. * Turned off the `-shaderobj` flag on two tests that weren't yet compatible with shader objects but still had the flag left in on the path (since previously the CPU path always used the non-`gfx` non-shader-object logic anyway) * Disabled one test (`dynamic-dispatch-11`) that relied on the `ConstantBuffer` idiom that we know we are planning to chagne soon anyway. * Made a few changes to the CUDA path to bring it into line with what I added for the CPU path. These were mostly bug fixes around indexing logic for sub-objects and resources. * fixup --- tools/render-test/cpu-compute-util.cpp | 14 ++++++++++---- tools/render-test/render-test-main.cpp | 2 +- tools/render-test/shader-renderer-util.h | 4 ++-- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'tools/render-test') diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index 7c9103cb3..6682eef1a 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -52,15 +52,15 @@ struct ValueTexture : public CPUComputeUtil::Resource, public CPPPrelude::ITextu { return _calcMipDims(mipLevel, m_dims); } - virtual void Load(const int32_t* loc, void* out) SLANG_OVERRIDE + virtual void Load(const int32_t* loc, void* out, size_t dataSize) SLANG_OVERRIDE { _set(out); } - virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out) SLANG_OVERRIDE + virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out, size_t dataSize) SLANG_OVERRIDE { _set(out); } - virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out) SLANG_OVERRIDE + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out, size_t dataSize) SLANG_OVERRIDE { _set(out); } @@ -201,9 +201,15 @@ struct FloatRWTexture : public CPUComputeUtil::Resource, public CPPPrelude::IRWT { return _calcMipDims(mipLevel, m_dims); } - virtual void Load(const int32_t* loc, void* out) SLANG_OVERRIDE { m_data.getAt((const uint32_t*)loc, (float*)out); } + virtual void Load(const int32_t* loc, void* out, size_t dataSize) SLANG_OVERRIDE { m_data.getAt((const uint32_t*)loc, (float*)out); } virtual void* refAt(const uint32_t* loc) SLANG_OVERRIDE { return m_data.getAt(loc); } + virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out, size_t dataSize) SLANG_OVERRIDE + {} + + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out, size_t dataSize) SLANG_OVERRIDE + {} + FloatRWTexture(int elementCount, const CPPPrelude::TextureDimensions& inDims, float initialValue): m_dims(inDims) { diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index e13642c5c..15100e2a5 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -1294,7 +1294,7 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi } // If it's CPU testing we don't need a window or a renderer - if (options.deviceType == DeviceType::CPU) + if (options.deviceType == DeviceType::CPU && !options.useShaderObjects) { // Check we have all the required features for (const auto& renderFeature : options.renderFeatures) diff --git a/tools/render-test/shader-renderer-util.h b/tools/render-test/shader-renderer-util.h index ecb8fc8bb..9d583331f 100644 --- a/tools/render-test/shader-renderer-util.h +++ b/tools/render-test/shader-renderer-util.h @@ -73,13 +73,13 @@ struct ShaderRendererUtil bool isOutput, size_t bufferSize, const void* initData, - IDevice* renderer, + IDevice* device, ComPtr& bufferOut); /// Create BindingState::Desc from the contents of layout static Slang::Result createBindingState( const ShaderInputLayout& layout, - IDevice* renderer, + IDevice* device, IBufferResource* addedConstantBuffer, BindingStateImpl** outBindingState); }; -- cgit v1.2.3