// render-test-main.cpp #define _CRT_SECURE_NO_WARNINGS 1 #include "options.h" #include #include #include "slang-support.h" #include "png-serialize-util.h" #include "shader-renderer-util.h" #include "../source/core/slang-io.h" #include "../source/core/slang-string-util.h" #include "core/slang-token-reader.h" #include "shader-input-layout.h" #include #include #include "window.h" #include "../../source/core/slang-test-tool-util.h" #define ENABLE_RENDERDOC_INTEGRATION 0 #if ENABLE_RENDERDOC_INTEGRATION # include "external/renderdoc_app.h" # include #endif namespace renderer_test { using Slang::Result; int gWindowWidth = 1024; int gWindowHeight = 768; // // For the purposes of a small example, we will define the vertex data for a // single triangle directly in the source file. It should be easy to extend // this example to load data from an external source, if desired. // struct Vertex { float position[3]; float color[3]; float uv[2]; }; static const Vertex kVertexData[] = { { { 0, 0, 0.5 }, {1, 0, 0} , {0, 0} }, { { 0, 1, 0.5 }, {0, 0, 1} , {1, 0} }, { { 1, 0, 0.5 }, {0, 1, 0} , {1, 1} }, }; static const int kVertexCount = SLANG_COUNT_OF(kVertexData); using namespace Slang; static void _outputProfileTime(uint64_t startTicks, uint64_t endTicks) { WriterHelper out = StdWriters::getOut(); double time = double(endTicks - startTicks) / Process::getClockFrequency(); out.print("profile-time=%g\n", time); } class ProgramVars; struct ShaderOutputPlan { struct Item { ComPtr resource; slang::TypeLayoutReflection* typeLayout = nullptr; }; List items; }; enum class PipelineType { Graphics, Compute, RayTracing, }; class RenderTestApp { public: Result update(); // At initialization time, we are going to load and compile our Slang shader // code, and then create the API objects we need for rendering. Result initialize( SlangSession* session, IDevice* device, const Options& options, const ShaderCompilerUtil::Input& input); void runCompute(IComputeCommandEncoder* encoder); void renderFrame(IRenderCommandEncoder* encoder); void renderFrameMesh(IRenderCommandEncoder* encoder); void finalize(); Result applyBinding(PipelineType pipelineType, ICommandEncoder* encoder); void setProjectionMatrix(IShaderObject* rootObject); Result writeBindingOutput(const String& fileName); Result writeScreen(const String& filename); protected: /// Called in initialize Result _initializeShaders( SlangSession* session, IDevice* device, Options::ShaderProgramType shaderType, const ShaderCompilerUtil::Input& input); void _initializeRenderPass(); void _initializeAccelerationStructure(); uint64_t m_startTicks; // variables for state to be used for rendering... uintptr_t m_constantBufferSize; IDevice* m_device; ComPtr m_queue; ComPtr m_transientHeap; ComPtr m_inputLayout; ComPtr m_vertexBuffer; ComPtr m_shaderProgram; ComPtr m_pipeline; ComPtr m_depthBuffer; ComPtr m_depthBufferView; ComPtr m_colorBuffer; ComPtr m_colorBufferView; ComPtr m_blasBuffer; ComPtr m_bottomLevelAccelerationStructure; ComPtr m_tlasBuffer; ComPtr m_topLevelAccelerationStructure; ShaderCompilerUtil::OutputAndLayout m_compilationOutput; ShaderInputLayout m_shaderInputLayout; ///< The binding layout Options m_options; ShaderOutputPlan m_outputPlan; }; struct AssignValsFromLayoutContext { IDevice* device; slang::ISession* slangSession; ShaderOutputPlan& outputPlan; slang::ProgramLayout* slangReflection; IAccelerationStructure* accelerationStructure; AssignValsFromLayoutContext( IDevice* device, slang::ISession* slangSession, ShaderOutputPlan& outputPlan, slang::ProgramLayout* slangReflection, IAccelerationStructure* accelerationStructure) : device(device) , slangSession(slangSession) , outputPlan(outputPlan) , slangReflection(slangReflection) , accelerationStructure(accelerationStructure) {} void maybeAddOutput(ShaderCursor const& dstCursor, ShaderInputLayout::Val* srcVal, IResource* resource) { if(srcVal->isOutput) { ShaderOutputPlan::Item item; item.resource = resource; item.typeLayout = dstCursor.getTypeLayout(); outputPlan.items.add(item); } } SlangResult assignData(ShaderCursor const& dstCursor, ShaderInputLayout::DataVal* srcVal) { const size_t bufferSize = srcVal->bufferData.getCount() * sizeof(uint32_t); ShaderCursor dataCursor = dstCursor; switch(dataCursor.getTypeLayout()->getKind()) { case slang::TypeReflection::Kind::ConstantBuffer: case slang::TypeReflection::Kind::ParameterBlock: dataCursor = dataCursor.getDereferenced(); break; default: break; } SLANG_RETURN_ON_FAIL(dataCursor.setData(srcVal->bufferData.getBuffer(), bufferSize)); return SLANG_OK; } SlangResult assignBuffer(ShaderCursor const& dstCursor, ShaderInputLayout::BufferVal* srcVal) { const InputBufferDesc& srcBuffer = srcVal->bufferDesc; auto& bufferData = srcVal->bufferData; const size_t bufferSize = Math::Max((size_t)bufferData.getCount() * sizeof(uint32_t), (size_t)(srcBuffer.elementCount * srcBuffer.stride)); bufferData.reserve(bufferSize / sizeof(uint32_t)); for (size_t i = bufferData.getCount(); i < bufferSize / sizeof(uint32_t); i++) bufferData.add(0); ComPtr bufferResource; SLANG_RETURN_ON_FAIL(ShaderRendererUtil::createBuffer(srcBuffer, /*entry.isOutput,*/ bufferSize, bufferData.getBuffer(), device, bufferResource)); ComPtr counterResource; const auto explicitCounterCursor = dstCursor.getExplicitCounter(); if(srcBuffer.counter != ~0u) { if(explicitCounterCursor.isValid()) { // If this cursor has a full buffer object associated with the // resource, then assign to that. ShaderInputLayout::BufferVal counterVal; counterVal.bufferData.add(srcBuffer.counter); assignBuffer(explicitCounterCursor, &counterVal); } else { // Otherwise, this API (D3D) must be handling the buffer object // specially, in which case create the buffer resource to pass // into `createBufferView` const InputBufferDesc& counterBufferDesc{ InputBufferType::StorageBuffer, sizeof(uint32_t), 1, Format::Unknown, }; SLANG_RETURN_ON_FAIL(ShaderRendererUtil::createBuffer( counterBufferDesc, sizeof(srcBuffer.counter), &srcBuffer.counter, device, counterResource )); } } else if(explicitCounterCursor.isValid()) { // If we know we require a counter for this resource but haven't // been given one, error return SLANG_E_INVALID_ARG; } if (counterResource) { dstCursor.setBinding(Binding(bufferResource, counterResource)); } else { dstCursor.setBinding(bufferResource); } maybeAddOutput(dstCursor, srcVal, bufferResource); return SLANG_OK; } SlangResult assignCombinedTextureSampler(ShaderCursor const& dstCursor, ShaderInputLayout::CombinedTextureSamplerVal* srcVal) { auto& textureEntry = srcVal->textureVal; auto& samplerEntry = srcVal->samplerVal; ComPtr texture; SLANG_RETURN_ON_FAIL(ShaderRendererUtil::generateTexture( textureEntry->textureDesc, ResourceState::ShaderResource, device, texture)); auto sampler = _createSampler(device, samplerEntry->samplerDesc); dstCursor.setBinding(Binding(texture, sampler)); maybeAddOutput(dstCursor, srcVal, texture); return SLANG_OK; } SlangResult assignTexture(ShaderCursor const& dstCursor, ShaderInputLayout::TextureVal* srcVal) { ComPtr texture; ResourceState defaultState = srcVal->textureDesc.isRWTexture ? ResourceState::UnorderedAccess : ResourceState::ShaderResource; SLANG_RETURN_ON_FAIL(ShaderRendererUtil::generateTexture( srcVal->textureDesc, defaultState, device, texture)); dstCursor.setBinding(texture); maybeAddOutput(dstCursor, srcVal, texture); return SLANG_OK; } SlangResult assignSampler(ShaderCursor const& dstCursor, ShaderInputLayout::SamplerVal* srcVal) { auto sampler = _createSampler(device, srcVal->samplerDesc); dstCursor.setBinding(sampler); return SLANG_OK; } SlangResult assignAggregate(ShaderCursor const& dstCursor, ShaderInputLayout::AggVal* srcVal) { Index fieldCount = srcVal->fields.getCount(); for(Index fieldIndex = 0; fieldIndex < fieldCount; ++fieldIndex) { auto& field = srcVal->fields[fieldIndex]; if(field.name.getLength() == 0) { // If no name was given, assume by-indexing matching is requested auto fieldCursor = dstCursor.getElement((GfxIndex)fieldIndex); if(!fieldCursor.isValid()) { StdWriters::getError().print("error: could not find shader parameter at index %d\n", (int)fieldIndex); return SLANG_E_INVALID_ARG; } SLANG_RETURN_ON_FAIL(assign(fieldCursor, field.val)); } else { auto fieldCursor = dstCursor.getPath(field.name.getBuffer()); if(!fieldCursor.isValid()) { StdWriters::getError().print("error: could not find shader parameter matching '%s'\n", field.name.begin()); return SLANG_E_INVALID_ARG; } SLANG_RETURN_ON_FAIL(assign(fieldCursor, field.val)); } } return SLANG_OK; } SlangResult assignObject(ShaderCursor const& dstCursor, ShaderInputLayout::ObjectVal* srcVal) { auto typeName = srcVal->typeName; slang::TypeReflection* slangType = nullptr; if(typeName.getLength() != 0) { // If the input line specified the name of the type // to allocate, then we use it directly. // slangType = slangReflection->findTypeByName(typeName.getBuffer()); } else { // if the user did not specify what type to allocate, // then we will infer the type from the type of the // value pointed to by `entryCursor`. // auto slangTypeLayout = dstCursor.getTypeLayout(); switch(slangTypeLayout->getKind()) { default: break; case slang::TypeReflection::Kind::ConstantBuffer: case slang::TypeReflection::Kind::ParameterBlock: // If the cursor is pointing at a constant buffer // or parameter block, then we assume the user // actually means to allocate an object based on // the element type of the block. // slangTypeLayout = slangTypeLayout->getElementTypeLayout(); break; } slangType = slangTypeLayout->getType(); } ComPtr shaderObject; device->createShaderObject2(slangSession, slangType, ShaderObjectContainerType::None, shaderObject.writeRef()); SLANG_RETURN_ON_FAIL(assign(ShaderCursor(shaderObject), srcVal->contentVal)); dstCursor.setObject(shaderObject); return SLANG_OK; } SlangResult assignValWithSpecializationArg( ShaderCursor const& dstCursor, ShaderInputLayout::SpecializeVal* srcVal) { assign(dstCursor, srcVal->contentVal); List args; for (auto& typeName : srcVal->typeArgs) { auto slangType = slangReflection->findTypeByName(typeName.getBuffer()); if (!slangType) { StdWriters::getError().print("error: could not find shader type '%s'\n", typeName.getBuffer()); return SLANG_E_INVALID_ARG; } args.add(slang::SpecializationArg::fromType(slangType)); } return dstCursor.setSpecializationArgs(args.getBuffer(), (uint32_t)args.getCount()); } SlangResult assignArray(ShaderCursor const& dstCursor, ShaderInputLayout::ArrayVal* srcVal) { Index elementCounter = 0; for(auto elementVal : srcVal->vals) { Index elementIndex = elementCounter++; SLANG_RETURN_ON_FAIL(assign(dstCursor[elementIndex], elementVal)); } return SLANG_OK; } SlangResult assignAccelerationStructure( ShaderCursor const& dstCursor, ShaderInputLayout::AccelerationStructureVal* srcVal) { dstCursor.setBinding(accelerationStructure); return SLANG_OK; } SlangResult assign(ShaderCursor const& dstCursor, ShaderInputLayout::ValPtr const& srcVal) { auto& entryCursor = dstCursor; switch(srcVal->kind) { case ShaderInputType::UniformData: return assignData(dstCursor, (ShaderInputLayout::DataVal*) srcVal.Ptr()); case ShaderInputType::Buffer: return assignBuffer(dstCursor, (ShaderInputLayout::BufferVal*) srcVal.Ptr()); case ShaderInputType::CombinedTextureSampler: return assignCombinedTextureSampler(dstCursor, (ShaderInputLayout::CombinedTextureSamplerVal*) srcVal.Ptr()); case ShaderInputType::Texture: return assignTexture(dstCursor, (ShaderInputLayout::TextureVal*) srcVal.Ptr()); case ShaderInputType::Sampler: return assignSampler(dstCursor, (ShaderInputLayout::SamplerVal*) srcVal.Ptr()); case ShaderInputType::Object: return assignObject(dstCursor, (ShaderInputLayout::ObjectVal*) srcVal.Ptr()); case ShaderInputType::Specialize: return assignValWithSpecializationArg( dstCursor, (ShaderInputLayout::SpecializeVal*)srcVal.Ptr()); case ShaderInputType::Aggregate: return assignAggregate(dstCursor, (ShaderInputLayout::AggVal*) srcVal.Ptr()); case ShaderInputType::Array: return assignArray(dstCursor, (ShaderInputLayout::ArrayVal*) srcVal.Ptr()); case ShaderInputType::AccelerationStructure: return assignAccelerationStructure( dstCursor, (ShaderInputLayout::AccelerationStructureVal*)srcVal.Ptr()); default: assert(!"Unhandled type"); return SLANG_FAIL; } } }; SlangResult _assignVarsFromLayout( IDevice* device, slang::ISession* slangSession, IShaderObject* shaderObject, ShaderInputLayout const& layout, ShaderOutputPlan& ioOutputPlan, slang::ProgramLayout* slangReflection, IAccelerationStructure* accelerationStructure) { AssignValsFromLayoutContext context( device, slangSession, ioOutputPlan, slangReflection, accelerationStructure); ShaderCursor rootCursor = ShaderCursor(shaderObject); return context.assign(rootCursor, layout.rootVal); } Result RenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) { auto slangReflection = (slang::ProgramLayout*)spGetReflection( m_compilationOutput.output.getRequestForReflection()); ComPtr slangSession; m_compilationOutput.output.m_requestForKernels->getSession(slangSession.writeRef()); switch (pipelineType) { case PipelineType::Compute: { IComputeCommandEncoder* computeEncoder = static_cast(encoder); auto rootObject = computeEncoder->bindPipeline(m_pipeline); SLANG_RETURN_ON_FAIL(_assignVarsFromLayout( m_device, slangSession, rootObject, m_compilationOutput.layout, m_outputPlan, slangReflection, m_topLevelAccelerationStructure)); } break; case PipelineType::Graphics: { IRenderCommandEncoder* renderEncoder = static_cast(encoder); auto rootObject = renderEncoder->bindPipeline(m_pipeline); SLANG_RETURN_ON_FAIL(_assignVarsFromLayout( m_device, slangSession, rootObject, m_compilationOutput.layout, m_outputPlan, slangReflection, m_topLevelAccelerationStructure)); setProjectionMatrix(rootObject); } break; default: throw "unknown pipeline type"; } return SLANG_OK; } SlangResult RenderTestApp::initialize( SlangSession* session, IDevice* device, const Options& options, const ShaderCompilerUtil::Input& input) { m_options = options; // We begin by compiling the shader file and entry points that specified via the options. // SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(device->getSlangSession()->getGlobalSession(), options, input, m_compilationOutput)); m_shaderInputLayout = m_compilationOutput.layout; // Once the shaders have been compiled we load them via the underlying API. // ComPtr outDiagnostics; auto result = device->createShaderProgram(m_compilationOutput.output.desc, m_shaderProgram.writeRef(), outDiagnostics.writeRef()); // If there was a failure creating a program, we can't continue // Special case SLANG_E_NOT_AVAILABLE error code to make it a failure, // as it is also used to indicate an attempt setup something failed gracefully (because it couldn't be supported) // but that's not this. if (SLANG_FAILED(result)) { result = (result == SLANG_E_NOT_AVAILABLE) ? SLANG_FAIL : result; return result; } m_device = device; _initializeRenderPass(); _initializeAccelerationStructure(); { switch(m_options.shaderType) { default: assert(!"unexpected test shader type"); return SLANG_FAIL; case Options::ShaderProgramType::Compute: { ComputePipelineDesc desc; desc.program = m_shaderProgram; m_pipeline = device->createComputePipeline(desc); } break; case Options::ShaderProgramType::Graphics: case Options::ShaderProgramType::GraphicsCompute: { // TODO: We should conceivably be able to match up the "available" vertex // attributes, as defined by the vertex stream(s) on the model being // renderer, with the "required" vertex attributes as defiend on the // shader. // // For now we just create a fixed input layout for all graphics tests // since at present they all draw the same single triangle with a // fixed/known set of attributes. // const InputElementDesc inputElements[] = { { "A", 0, Format::R32G32B32_FLOAT, offsetof(Vertex, position) }, { "A", 1, Format::R32G32B32_FLOAT, offsetof(Vertex, color) }, { "A", 2, Format::R32G32_FLOAT, offsetof(Vertex, uv) }, }; ComPtr inputLayout; SLANG_RETURN_ON_FAIL(device->createInputLayout( sizeof(Vertex), inputElements, SLANG_COUNT_OF(inputElements), inputLayout.writeRef())); BufferDesc vertexBufferDesc; vertexBufferDesc.size = kVertexCount * sizeof(Vertex); vertexBufferDesc.memoryType = MemoryType::Upload; vertexBufferDesc.usage = BufferUsage::VertexBuffer; vertexBufferDesc.defaultState = ResourceState::VertexBuffer; SLANG_RETURN_ON_FAIL(device->createBuffer( vertexBufferDesc, kVertexData, m_vertexBuffer.writeRef())); ColorTargetState colorTarget; colorTarget.format = Format::R8G8B8A8_UNORM; RenderPipelineDesc desc; desc.program = m_shaderProgram; desc.inputLayout = inputLayout; desc.targets = &colorTarget; desc.targetCount = 1; desc.depthStencil.format = Format::D32_FLOAT; m_pipeline = device->createRenderPipeline(desc); } break; case Options::ShaderProgramType::GraphicsMeshCompute: case Options::ShaderProgramType::GraphicsTaskMeshCompute: { ColorTargetState colorTarget; colorTarget.format = Format::R8G8B8A8_UNORM; RenderPipelineDesc desc; desc.program = m_shaderProgram; desc.targets = &colorTarget; desc.targetCount = 1; desc.depthStencil.format = Format::D32_FLOAT; m_pipeline = device->createRenderPipeline(desc); } } } // If success must have a pipeline state return m_pipeline ? SLANG_OK : SLANG_FAIL; } Result RenderTestApp::_initializeShaders( SlangSession* session, IDevice* device, Options::ShaderProgramType shaderType, const ShaderCompilerUtil::Input& input) { SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(device->getSlangSession()->getGlobalSession(), m_options, input, m_compilationOutput)); m_shaderInputLayout = m_compilationOutput.layout; m_shaderProgram = device->createShaderProgram(m_compilationOutput.output.desc); return m_shaderProgram ? SLANG_OK : SLANG_FAIL; } void RenderTestApp::_initializeRenderPass() { ITransientResourceHeap::Desc transientHeapDesc = {}; transientHeapDesc.constantBufferSize = 4096 * 1024; m_transientHeap = m_device->createTransientResourceHeap(transientHeapDesc); SLANG_ASSERT(m_transientHeap); ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics}; m_queue = m_device->createCommandQueue(queueDesc); SLANG_ASSERT(m_queue); rhi::TextureDesc depthBufferDesc; depthBufferDesc.type = TextureType::Texture2D; depthBufferDesc.size.width = gWindowWidth; depthBufferDesc.size.height = gWindowHeight; depthBufferDesc.size.depth = 1; depthBufferDesc.numMipLevels = 1; depthBufferDesc.format = Format::D32_FLOAT; depthBufferDesc.usage = TextureUsage::DepthWrite; depthBufferDesc.defaultState = ResourceState::DepthWrite; m_depthBuffer = m_device->createTexture(depthBufferDesc, nullptr); SLANG_ASSERT(m_depthBuffer); m_depthBufferView = m_device->createTextureView(m_depthBuffer, {}); SLANG_ASSERT(m_depthBufferView); rhi::TextureDesc colorBufferDesc; colorBufferDesc.type = TextureType::Texture2D; colorBufferDesc.size.width = gWindowWidth; colorBufferDesc.size.height = gWindowHeight; colorBufferDesc.size.depth = 1; colorBufferDesc.numMipLevels = 1; colorBufferDesc.format = Format::R8G8B8A8_UNORM; colorBufferDesc.usage = TextureUsage::RenderTarget; colorBufferDesc.defaultState = ResourceState::RenderTarget; m_colorBuffer = m_device->createTexture(colorBufferDesc, nullptr); SLANG_ASSERT(m_colorBuffer); m_colorBufferView = m_device->createTextureView(m_colorBuffer, {}); SLANG_ASSERT(m_colorBufferView); } void RenderTestApp::_initializeAccelerationStructure() { if (!m_device->hasFeature("ray-tracing")) return; BufferDesc vertexBufferDesc = {}; vertexBufferDesc.size = kVertexCount * sizeof(Vertex); vertexBufferDesc.usage = BufferUsage::AccelerationStructureBuildInput; vertexBufferDesc.defaultState = ResourceState::AccelerationStructureBuildInput; ComPtr vertexBuffer = m_device->createBuffer(vertexBufferDesc, &kVertexData[0]); BufferDesc transformBufferDesc = {}; transformBufferDesc.size = sizeof(float) * 12; transformBufferDesc.usage = BufferUsage::AccelerationStructureBuildInput; transformBufferDesc.defaultState = ResourceState::AccelerationStructureBuildInput; float transformData[12] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f}; ComPtr transformBuffer = m_device->createBuffer(transformBufferDesc, &transformData); // Build bottom level acceleration structure. { IAccelerationStructure::BuildInputs accelerationStructureBuildInputs = {}; IAccelerationStructure::PrebuildInfo accelerationStructurePrebuildInfo = {}; accelerationStructureBuildInputs.descCount = 1; accelerationStructureBuildInputs.kind = IAccelerationStructure::Kind::BottomLevel; accelerationStructureBuildInputs.flags = IAccelerationStructure::BuildFlags::AllowCompaction; IAccelerationStructure::GeometryDesc geomDesc = {}; geomDesc.flags = IAccelerationStructure::GeometryFlags::Opaque; geomDesc.type = IAccelerationStructure::GeometryType::Triangles; geomDesc.content.triangles.indexCount = 0; geomDesc.content.triangles.indexData = 0; geomDesc.content.triangles.indexFormat = Format::Unknown; geomDesc.content.triangles.vertexCount = kVertexCount; geomDesc.content.triangles.vertexData = vertexBuffer->getDeviceAddress(); geomDesc.content.triangles.vertexFormat = Format::R32G32B32_FLOAT; geomDesc.content.triangles.vertexStride = sizeof(Vertex); geomDesc.content.triangles.transform3x4 = transformBuffer->getDeviceAddress(); accelerationStructureBuildInputs.geometryDescs = &geomDesc; // Query buffer size for acceleration structure build. m_device->getAccelerationStructurePrebuildInfo( accelerationStructureBuildInputs, &accelerationStructurePrebuildInfo); // Allocate buffers for acceleration structure. BufferDesc asDraftBufferDesc = {}; asDraftBufferDesc.usage = BufferUsage::AccelerationStructure; asDraftBufferDesc.defaultState = ResourceState::AccelerationStructure; asDraftBufferDesc.size = accelerationStructurePrebuildInfo.resultDataMaxSize; ComPtr draftBuffer = m_device->createBuffer(asDraftBufferDesc); BufferDesc scratchBufferDesc = {}; scratchBufferDesc.usage = BufferUsage::UnorderedAccess; scratchBufferDesc.defaultState = ResourceState::UnorderedAccess; scratchBufferDesc.size = accelerationStructurePrebuildInfo.scratchDataSize; ComPtr scratchBuffer = m_device->createBuffer(scratchBufferDesc); // Build acceleration structure. ComPtr compactedSizeQuery; QueryPoolDesc queryPoolDesc = {}; queryPoolDesc.count = 1; queryPoolDesc.type = QueryType::AccelerationStructureCompactedSize; m_device->createQueryPool(queryPoolDesc, compactedSizeQuery.writeRef()); ComPtr draftAS; IAccelerationStructure::CreateDesc draftCreateDesc = {}; draftCreateDesc.buffer = draftBuffer; draftCreateDesc.kind = IAccelerationStructure::Kind::BottomLevel; draftCreateDesc.offset = 0; draftCreateDesc.size = accelerationStructurePrebuildInfo.resultDataMaxSize; m_device->createAccelerationStructure(draftCreateDesc, draftAS.writeRef()); compactedSizeQuery->reset(); auto commandBuffer = m_transientHeap->createCommandBuffer(); auto encoder = commandBuffer->encodeRayTracingCommands(); IAccelerationStructure::BuildDesc buildDesc = {}; buildDesc.dest = draftAS; buildDesc.inputs = accelerationStructureBuildInputs; buildDesc.scratchData = scratchBuffer->getDeviceAddress(); AccelerationStructureQueryDesc compactedSizeQueryDesc = {}; compactedSizeQueryDesc.queryPool = compactedSizeQuery; compactedSizeQueryDesc.queryType = QueryType::AccelerationStructureCompactedSize; encoder->buildAccelerationStructure(buildDesc, 1, &compactedSizeQueryDesc); encoder->endEncoding(); commandBuffer->close(); m_queue->executeCommandBuffer(commandBuffer); m_queue->waitOnHost(); uint64_t compactedSize = 0; compactedSizeQuery->getResult(0, 1, &compactedSize); BufferDesc asBufferDesc = {}; asBufferDesc.usage = BufferUsage::AccelerationStructure; asBufferDesc.defaultState = ResourceState::AccelerationStructure; asBufferDesc.size = (Size)compactedSize; m_blasBuffer = m_device->createBuffer(asBufferDesc); IAccelerationStructure::CreateDesc createDesc; createDesc.buffer = m_blasBuffer; createDesc.kind = IAccelerationStructure::Kind::BottomLevel; createDesc.offset = 0; createDesc.size = (Size)compactedSize; m_device->createAccelerationStructure(createDesc, m_bottomLevelAccelerationStructure.writeRef()); commandBuffer = m_transientHeap->createCommandBuffer(); encoder = commandBuffer->encodeRayTracingCommands(); encoder->copyAccelerationStructure( m_bottomLevelAccelerationStructure, draftAS, AccelerationStructureCopyMode::Compact); encoder->endEncoding(); commandBuffer->close(); m_queue->executeCommandBuffer(commandBuffer); m_queue->waitOnHost(); } // Build top level acceleration structure. { List instanceDescs; instanceDescs.setCount(1); instanceDescs[0].accelerationStructure = m_bottomLevelAccelerationStructure->getDeviceAddress(); instanceDescs[0].flags = IAccelerationStructure::GeometryInstanceFlags::TriangleFacingCullDisable; instanceDescs[0].instanceContributionToHitGroupIndex = 0; instanceDescs[0].instanceID = 0; instanceDescs[0].instanceMask = 0xFF; float transformMatrix[] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f}; memcpy(&instanceDescs[0].transform[0][0], transformMatrix, sizeof(float) * 12); BufferDesc instanceBufferDesc = {}; instanceBufferDesc.size = instanceDescs.getCount() * sizeof(IAccelerationStructure::InstanceDesc); instanceBufferDesc.usage = BufferUsage::AccelerationStructureBuildInput; instanceBufferDesc.defaultState = ResourceState::AccelerationStructureBuildInput; ComPtr instanceBuffer = m_device->createBuffer(instanceBufferDesc, instanceDescs.getBuffer()); IAccelerationStructure::BuildInputs accelerationStructureBuildInputs = {}; IAccelerationStructure::PrebuildInfo accelerationStructurePrebuildInfo = {}; accelerationStructureBuildInputs.descCount = 1; accelerationStructureBuildInputs.kind = IAccelerationStructure::Kind::TopLevel; accelerationStructureBuildInputs.instanceDescs = instanceBuffer->getDeviceAddress(); // Query buffer size for acceleration structure build. m_device->getAccelerationStructurePrebuildInfo( accelerationStructureBuildInputs, &accelerationStructurePrebuildInfo); BufferDesc asBufferDesc = {}; asBufferDesc.usage = BufferUsage::AccelerationStructure; asBufferDesc.defaultState = ResourceState::AccelerationStructure; asBufferDesc.size = (size_t)accelerationStructurePrebuildInfo.resultDataMaxSize; m_tlasBuffer = m_device->createBuffer(asBufferDesc); BufferDesc scratchBufferDesc = {}; scratchBufferDesc.usage = BufferUsage::UnorderedAccess; scratchBufferDesc.defaultState = ResourceState::UnorderedAccess; scratchBufferDesc.size = (size_t)accelerationStructurePrebuildInfo.scratchDataSize; ComPtr scratchBuffer = m_device->createBuffer(scratchBufferDesc); IAccelerationStructure::CreateDesc createDesc = {}; createDesc.buffer = m_tlasBuffer; createDesc.kind = IAccelerationStructure::Kind::TopLevel; createDesc.offset = 0; createDesc.size = accelerationStructurePrebuildInfo.resultDataMaxSize; m_device->createAccelerationStructure( createDesc, m_topLevelAccelerationStructure.writeRef()); auto commandBuffer = m_transientHeap->createCommandBuffer(); auto encoder = commandBuffer->encodeRayTracingCommands(); IAccelerationStructure::BuildDesc buildDesc = {}; buildDesc.dest = m_topLevelAccelerationStructure; buildDesc.inputs = accelerationStructureBuildInputs; buildDesc.scratchData = scratchBuffer->getDeviceAddress(); encoder->buildAccelerationStructure(buildDesc, 0, nullptr); encoder->endEncoding(); commandBuffer->close(); m_queue->executeCommandBuffer(commandBuffer); m_queue->waitOnHost(); } } void RenderTestApp::setProjectionMatrix(IShaderObject* rootObject) { auto info = m_device->getDeviceInfo(); ShaderCursor(rootObject) .getField("Uniforms") .getDereferenced() .setData(info.identityProjectionMatrix, sizeof(float) * 16); } void RenderTestApp::renderFrameMesh(IRenderCommandEncoder* encoder) { auto pipelineType = PipelineType::Graphics; applyBinding(pipelineType, encoder); encoder->drawMeshTasks( m_options.computeDispatchSize[0], m_options.computeDispatchSize[1], m_options.computeDispatchSize[2] ); } void RenderTestApp::renderFrame(IRenderCommandEncoder* encoder) { auto pipelineType = PipelineType::Graphics; applyBinding(pipelineType, encoder); encoder->setPrimitiveTopology(PrimitiveTopology::TriangleList); encoder->setVertexBuffer(0, m_vertexBuffer); encoder->draw(3); } void RenderTestApp::runCompute(IComputeCommandEncoder* encoder) { auto pipelineType = PipelineType::Compute; applyBinding(pipelineType, encoder); encoder->dispatchCompute( m_options.computeDispatchSize[0], m_options.computeDispatchSize[1], m_options.computeDispatchSize[2]); } void RenderTestApp::finalize() { m_compilationOutput.output.reset(); } Result RenderTestApp::writeBindingOutput(const String& fileName) { // Wait until everything is complete m_queue->waitOnHost(); FILE * f = fopen(fileName.getBuffer(), "wb"); if (!f) { return SLANG_FAIL; } FileWriter writer(f, WriterFlags(0)); for(auto outputItem : m_outputPlan.items) { auto resource = outputItem.resource; IBuffer* buffer = nullptr; resource->queryInterface(IBuffer::getTypeGuid(), (void**)&buffer); if (buffer) { const BufferDesc& bufferDesc = buffer->getDesc(); const size_t bufferSize = bufferDesc.size; ComPtr blob; m_device->readBuffer(buffer, 0, bufferSize, blob.writeRef()); buffer->release(); if (!blob) { return SLANG_FAIL; } const SlangResult res = ShaderInputLayout::writeBinding( m_options.outputUsingType ? outputItem.typeLayout : nullptr, // TODO: always output using type blob->getBufferPointer(), bufferSize, &writer); SLANG_RETURN_ON_FAIL(res); } else { auto typeName = outputItem.typeLayout->getName(); printf("invalid output type '%s'.\n", typeName ? typeName : "UNKNOWN"); } } return SLANG_OK; } Result RenderTestApp::writeScreen(const String& filename) { size_t rowPitch, pixelSize; ComPtr blob; SLANG_RETURN_ON_FAIL(m_device->readTexture( m_colorBuffer, ResourceState::RenderTarget, blob.writeRef(), &rowPitch, &pixelSize)); auto bufferSize = blob->getBufferSize(); uint32_t width = static_cast(rowPitch / pixelSize); uint32_t height = static_cast(bufferSize / rowPitch); return PngSerializeUtil::write(filename.getBuffer(), blob, width, height); } Result RenderTestApp::update() { auto commandBuffer = m_transientHeap->createCommandBuffer(); if (m_options.shaderType == Options::ShaderProgramType::Compute) { auto encoder = commandBuffer->encodeComputeCommands(); runCompute(encoder); encoder->endEncoding(); } else { RenderPassColorAttachment colorAttachment = {}; colorAttachment.view = m_colorBufferView; colorAttachment.loadOp = LoadOp::Clear; colorAttachment.storeOp = StoreOp::Store; colorAttachment.initialState = ResourceState::Undefined; colorAttachment.finalState = ResourceState::RenderTarget; RenderPassDepthStencilAttachment depthStencilAttachment = {}; depthStencilAttachment.view = m_depthBufferView; depthStencilAttachment.depthLoadOp = LoadOp::Clear; depthStencilAttachment.depthStoreOp = StoreOp::Store; depthStencilAttachment.initialState = ResourceState::Undefined; depthStencilAttachment.finalState = ResourceState::DepthWrite; RenderPassDesc renderPass = {}; renderPass.colorAttachments = &colorAttachment; renderPass.colorAttachmentCount = 1; renderPass.depthStencilAttachment = &depthStencilAttachment; auto encoder = commandBuffer->encodeRenderCommands(renderPass); rhi::Viewport viewport = {}; viewport.maxZ = 1.0f; viewport.extentX = (float)gWindowWidth; viewport.extentY = (float)gWindowHeight; encoder->setViewportAndScissor(viewport); if(m_options.shaderType == Options::ShaderProgramType::GraphicsMeshCompute || m_options.shaderType == Options::ShaderProgramType::GraphicsTaskMeshCompute) renderFrameMesh(encoder); else renderFrame(encoder); encoder->endEncoding(); } commandBuffer->close(); m_startTicks = Process::getClockTick(); m_queue->executeCommandBuffer(commandBuffer); m_queue->waitOnHost(); // If we are in a mode where output is requested, we need to snapshot the back buffer here if (m_options.outputPath.getLength() || m_options.performanceProfile) { // Wait until everything is complete if (m_options.performanceProfile) { #if 0 // It might not be enough on some APIs to 'waitForGpu' to mean the computation has completed. Let's lock an output // buffer to be sure if (m_bindingState->outputBindings.getCount() > 0) { const auto& binding = m_bindingState->outputBindings[0]; auto i = binding.entryIndex; const auto& layoutBinding = m_shaderInputLayout.entries[i]; assert(layoutBinding.isOutput); if (binding.resource && binding.resource->isBuffer()) { BufferResource* bufferResource = static_cast(binding.resource.Ptr()); const size_t bufferSize = bufferResource->getDesc().size; unsigned int* ptr = (unsigned int*)m_renderer->map(bufferResource, MapFlavor::HostRead); if (!ptr) { return SLANG_FAIL; } m_renderer->unmap(bufferResource); } } #endif // Note we don't do the same with screen rendering -> as that will do a lot of work, which may swamp any computation // so can only really profile compute shaders at the moment const uint64_t endTicks = Process::getClockTick(); _outputProfileTime(m_startTicks, endTicks); } if (m_options.outputPath.getLength()) { if (m_options.shaderType == Options::ShaderProgramType::Compute || m_options.shaderType == Options::ShaderProgramType::GraphicsCompute || m_options.shaderType == Options::ShaderProgramType::GraphicsMeshCompute || m_options.shaderType == Options::ShaderProgramType::GraphicsTaskMeshCompute) { auto request = m_compilationOutput.output.getRequestForReflection(); auto slangReflection = (slang::ShaderReflection*) spGetReflection(request); SLANG_RETURN_ON_FAIL(writeBindingOutput(m_options.outputPath)); } else { SlangResult res = writeScreen(m_options.outputPath); if (SLANG_FAILED(res)) { fprintf(stderr, "ERROR: failed to write screen capture to file\n"); return res; } } } return SLANG_OK; } return SLANG_OK; } static SlangResult _setSessionPrelude(const Options& options, const char* exePath, SlangSession* session) { // Let's see if we need to set up special prelude for HLSL if (options.nvapiExtnSlot.getLength()) { #if !SLANG_WINDOWS_FAMILY // NVAPI is currently only available on Windows return SLANG_E_NOT_AVAILABLE; #else // We want to set the path to NVAPI String rootPath; SLANG_RETURN_ON_FAIL(TestToolUtil::getRootPath(exePath, rootPath)); String includePath; SLANG_RETURN_ON_FAIL(TestToolUtil::getIncludePath(rootPath, "external/nvapi/nvHLSLExtns.h", includePath)) StringBuilder buf; // We have to choose a slot that NVAPI will use. buf << "#define NV_SHADER_EXTN_SLOT " << options.nvapiExtnSlot << "\n"; // Include the NVAPI header buf << "#include "; StringEscapeUtil::appendQuoted(StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp), includePath.getUnownedSlice(), buf); buf << "\n\n"; session->setLanguagePrelude(SLANG_SOURCE_LANGUAGE_HLSL, buf.getBuffer()); #endif } else { session->setLanguagePrelude(SLANG_SOURCE_LANGUAGE_HLSL, ""); } return SLANG_OK; } } // namespace renderer_test #if ENABLE_RENDERDOC_INTEGRATION static RENDERDOC_API_1_1_2* rdoc_api = NULL; static void initializeRenderDoc() { if (HMODULE mod = GetModuleHandleA("renderdoc.dll")) { pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(mod, "RENDERDOC_GetAPI"); int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, (void**)&rdoc_api); assert(ret == 1); } } static void renderDocBeginFrame() { if (rdoc_api) rdoc_api->StartFrameCapture(nullptr, nullptr); } static void renderDocEndFrame() { if (rdoc_api) rdoc_api->EndFrameCapture(nullptr, nullptr); _fgetchar(); } #else static void initializeRenderDoc(){} static void renderDocBeginFrame(){} static void renderDocEndFrame(){} #endif class StdWritersDebugCallback : public rhi::IDebugCallback { public: Slang::StdWriters* writers; virtual SLANG_NO_THROW void SLANG_MCALL handleMessage( rhi::DebugMessageType type, rhi::DebugMessageSource source, const char* message) override { SLANG_UNUSED(source); if (type == rhi::DebugMessageType::Error) { writers->getOut().print("%s\n", message); } } }; static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* session, int argcIn, const char*const* argvIn) { using namespace renderer_test; using namespace Slang; initializeRenderDoc(); StdWriters::setSingleton(stdWriters); Options options; // Parse command-line options SLANG_RETURN_ON_FAIL(Options::parse(argcIn, argvIn, StdWriters::getError(), options)); if (options.deviceType == DeviceType::Default) { return SLANG_OK; } ShaderCompilerUtil::Input input; input.profile = ""; input.target = SLANG_TARGET_NONE; SlangSourceLanguage nativeLanguage = SLANG_SOURCE_LANGUAGE_UNKNOWN; SlangPassThrough slangPassThrough = SLANG_PASS_THROUGH_NONE; char const* profileName = ""; switch (options.deviceType) { case DeviceType::D3D11: input.target = SLANG_DXBC; input.profile = "sm_5_0"; nativeLanguage = SLANG_SOURCE_LANGUAGE_HLSL; slangPassThrough = SLANG_PASS_THROUGH_FXC; break; case DeviceType::D3D12: input.target = SLANG_DXBC; input.profile = "sm_5_0"; nativeLanguage = SLANG_SOURCE_LANGUAGE_HLSL; slangPassThrough = SLANG_PASS_THROUGH_FXC; if( options.useDXIL ) { input.target = SLANG_DXIL; input.profile = "sm_6_5"; slangPassThrough = SLANG_PASS_THROUGH_DXC; } break; case DeviceType::Vulkan: input.target = SLANG_SPIRV; input.profile = ""; nativeLanguage = SLANG_SOURCE_LANGUAGE_GLSL; slangPassThrough = SLANG_PASS_THROUGH_GLSLANG; break; case DeviceType::Metal: input.target = SLANG_METAL_LIB; input.profile = ""; nativeLanguage = SLANG_SOURCE_LANGUAGE_METAL; slangPassThrough = SLANG_PASS_THROUGH_METAL; break; case DeviceType::CPU: input.target = SLANG_SHADER_HOST_CALLABLE; input.profile = ""; nativeLanguage = SLANG_SOURCE_LANGUAGE_CPP; slangPassThrough = SLANG_PASS_THROUGH_GENERIC_C_CPP; break; case DeviceType::CUDA: input.target = SLANG_PTX; input.profile = ""; nativeLanguage = SLANG_SOURCE_LANGUAGE_CUDA; slangPassThrough = SLANG_PASS_THROUGH_NVRTC; break; default: fprintf(stderr, "error: unexpected\n"); return SLANG_FAIL; } switch (options.inputLanguageID) { case Options::InputLanguageID::Slang: input.sourceLanguage = SLANG_SOURCE_LANGUAGE_SLANG; input.passThrough = SLANG_PASS_THROUGH_NONE; break; case Options::InputLanguageID::Native: input.sourceLanguage = nativeLanguage; input.passThrough = slangPassThrough; break; default: break; } if (options.sourceLanguage != SLANG_SOURCE_LANGUAGE_UNKNOWN) { input.sourceLanguage = options.sourceLanguage; if (input.sourceLanguage == SLANG_SOURCE_LANGUAGE_C || input.sourceLanguage == SLANG_SOURCE_LANGUAGE_CPP) { input.passThrough = SLANG_PASS_THROUGH_GENERIC_C_CPP; } } #ifdef _DEBUG rhiEnableDebugLayer(); #endif StdWritersDebugCallback debugCallback; debugCallback.writers = stdWriters; rhiSetDebugCallback(&debugCallback); struct ResetDebugCallbackRAII { ~ResetDebugCallbackRAII() { rhiSetDebugCallback(nullptr); } } resetDebugCallbackRAII; // Use the profile name set on options if set input.profile = options.profileName.getLength() ? options.profileName : input.profile; StringBuilder rendererName; auto info = rendererName << "[" << rhiGetDeviceTypeName(options.deviceType) << "] "; if (options.onlyStartup) { switch (options.deviceType) { case DeviceType::CUDA: { #if RENDER_TEST_CUDA if(SLANG_FAILED(spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_NVRTC))) return SLANG_FAIL; #else return SLANG_FAIL; #endif } case DeviceType::CPU: { // As long as we have CPU, then this should work return spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_GENERIC_C_CPP); } default: break; } } Index nvapiExtnSlot = -1; // Let's see if we need to set up special prelude for HLSL if (options.nvapiExtnSlot.getLength() && options.nvapiExtnSlot[0] == 'u') { // Slang::Int value; UnownedStringSlice slice = options.nvapiExtnSlot.getUnownedSlice(); UnownedStringSlice indexText(slice.begin() + 1 , slice.end()); if (SLANG_SUCCEEDED(StringUtil::parseInt(indexText, value))) { nvapiExtnSlot = Index(value); } } // If can't set up a necessary prelude make not available (which will lead to the test being ignored) if (SLANG_FAILED(_setSessionPrelude(options, argvIn[0], session))) { return SLANG_E_NOT_AVAILABLE; } Slang::ComPtr device; { IDevice::Desc desc = {}; desc.deviceType = options.deviceType; desc.slang.lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_NONE; if (options.generateSPIRVDirectly) desc.slang.targetFlags = SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY; else desc.slang.targetFlags = 0; List requiredFeatureList; for (auto& name : options.renderFeatures) requiredFeatureList.add(name.getBuffer()); desc.requiredFeatures = requiredFeatureList.getBuffer(); desc.requiredFeatureCount = (int)requiredFeatureList.getCount(); // Look for args going to slang { const auto& args = options.downstreamArgs.getArgsByName("slang"); for (const auto& arg : args) { if (arg.value == "-matrix-layout-column-major") { desc.slang.defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_COLUMN_MAJOR; break; } } } desc.nvapiExtnSlot = int(nvapiExtnSlot); desc.slang.slangGlobalSession = session; desc.slang.targetProfile = options.profileName.getBuffer(); { SlangResult res = rhiCreateDevice(&desc, device.writeRef()); if (SLANG_FAILED(res)) { // We need to be careful here about SLANG_E_NOT_AVAILABLE. This return value means that the renderer couldn't // be created because it required *features* that were *not available*. It does not mean the renderer in general couldn't // be constructed. // // Returning SLANG_E_NOT_AVAILABLE will lead to the test infrastructure ignoring this test. // // We also don't want to output the 'Unable to create renderer' error, as this isn't an error. if (res == SLANG_E_NOT_AVAILABLE) { return res; } if (!options.onlyStartup) { fprintf(stderr, "Unable to create renderer %s\n", rendererName.getBuffer()); } return res; } SLANG_ASSERT(device); } for (const auto& feature : requiredFeatureList) { // If doesn't have required feature... we have to give up if (!device->hasFeature(feature)) { return SLANG_E_NOT_AVAILABLE; } } } // If the only test is we can startup, then we are done if (options.onlyStartup) { return SLANG_OK; } { RenderTestApp app; renderDocBeginFrame(); SLANG_RETURN_ON_FAIL(app.initialize(session, device, options, input)); app.update(); renderDocEndFrame(); app.finalize(); } return SLANG_OK; } SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSession* sharedSession, int inArgc, const char*const* inArgv) { using namespace Slang; // Assume we will used the shared session ComPtr session(sharedSession); // The sharedSession always has a pre-loaded stdlib. // This differed test checks if the command line has an option to setup the stdlib. // If so we *don't* use the sharedSession, and create a new stdlib-less session just for this compilation. if (TestToolUtil::hasDeferredStdLib(Index(inArgc - 1), inArgv + 1)) { SLANG_RETURN_ON_FAIL(slang_createGlobalSessionWithoutStdLib(SLANG_API_VERSION, session.writeRef())); } SlangResult res = SLANG_FAIL; try { res = _innerMain(stdWriters, session, inArgc, inArgv); } catch (const Slang::Exception& exception) { stdWriters->getOut().put(exception.Message.getUnownedSlice()); return SLANG_FAIL; } catch (...) { stdWriters->getOut().put(UnownedStringSlice::fromLiteral("Unhandled exception")); return SLANG_FAIL; } return res; } int main(int argc, char** argv) { using namespace Slang; SlangSession* session = spCreateSession(nullptr); TestToolUtil::setSessionDefaultPreludeFromExePath(argv[0], session); auto stdWriters = StdWriters::initDefaultSingleton(); SlangResult res = innerMain(stdWriters, session, argc, argv); spDestroySession(session); slang::shutdown(); return (int)TestToolUtil::getReturnCode(res); }