diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2021-03-17 12:55:30 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-17 12:55:30 -0700 |
| commit | 6e5d85efb9fa5f647f7f0c7ef784a9fd09b29023 (patch) | |
| tree | 6206ef11502a1a5d9c1dc00df359be9aececffdf /tools/render-test | |
| parent | b64a23cccfe9876d53cda773afc796bd975fa7e5 (diff) | |
Remove old code paths from render-test (#1760)
* Remove old code paths from render-test
Historically, the `render-test` tool was using three different code paths:
* One based on `gfx` and manual (non-reflection-based) parameter setting, used for OpenGL, D3D11, D3D12, and Vulkan
* One for CPU that used reflection-based parameter setting but shared no code with the first
* One for CUDA that used reflection-based parameter setting and shared some, but not all, code with the CPU path
Recently we've updated `render-test` to include a fourth option:
* Using `gfx` and the "shader object" system it exposes for a unified reflection-based parameter-setting system taht works across OpenGL, D3D11, D3D12, Vulkan, CUDA, and CPU
This change removes the first three options and leaves only the single unified path. A sa result, a bunch of code in `render-test` is no longer needed, and the codebase no longer relies on things like the `IDescriptorSet`-related APIs in `gfx`.
Several existing tests had to be disabled to make this change possible. Those tests will need to be audited and either re-enabled once we fix issues in the shader object system, or permanently removed if they don't test stuff we intend to support in the long run (e.g., global-scope type parameters, which aren't a clear necessity).
* fixup: CUDA detection logic
Diffstat (limited to 'tools/render-test')
| -rw-r--r-- | tools/render-test/bind-location.cpp | 1254 | ||||
| -rw-r--r-- | tools/render-test/bind-location.h | 452 | ||||
| -rw-r--r-- | tools/render-test/cpu-compute-util.cpp | 930 | ||||
| -rw-r--r-- | tools/render-test/cpu-compute-util.h | 79 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 1872 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.h | 69 | ||||
| -rw-r--r-- | tools/render-test/options.cpp | 4 | ||||
| -rw-r--r-- | tools/render-test/options.h | 2 | ||||
| -rw-r--r-- | tools/render-test/render-test-main.cpp | 407 | ||||
| -rw-r--r-- | tools/render-test/shader-input-layout.cpp | 196 | ||||
| -rw-r--r-- | tools/render-test/shader-input-layout.h | 20 | ||||
| -rw-r--r-- | tools/render-test/shader-renderer-util.cpp | 295 | ||||
| -rw-r--r-- | tools/render-test/shader-renderer-util.h | 46 |
13 files changed, 26 insertions, 5600 deletions
diff --git a/tools/render-test/bind-location.cpp b/tools/render-test/bind-location.cpp deleted file mode 100644 index f791e56f6..000000000 --- a/tools/render-test/bind-location.cpp +++ /dev/null @@ -1,1254 +0,0 @@ - -#include "bind-location.h" - -#include "../../slang-com-helper.h" - -#include "../../source/core/slang-token-reader.h" - -namespace renderer_test { -using namespace Slang; - -/* static */const BindLocation BindLocation::Invalid; - -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! BindSet !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -BindSet::BindSet(): - m_arena(4096, 16) -{ -} - -BindSet::~BindSet() -{ - for (auto value : m_values) - { - value->~Value(); - } -} - -void BindSet::setAt(const BindLocation& loc, Value* value) -{ - SLANG_ASSERT(loc.isValid()); - if (loc.isInvalid()) - { - return; - } - - // Note we don't remove when value == null, such that it is stored if should be nullptr - Value** valuePtr = m_bindings.TryGetValueOrAdd(loc, value); - if (valuePtr) - { - *valuePtr = value; - } -} - -void BindSet::setAt(const BindLocation& loc, SlangParameterCategory category, Value* value) -{ - SLANG_ASSERT(loc.isValid()); - if (loc.isInvalid()) - { - return; - } - - const BindPoint* point = loc.getValidBindPointForCategory(category); - if (point) - { - if (loc.m_bindPointSet == nullptr) - { - // Can only have one category, so just set on that - setAt(loc, value); - } - else - { - - BindLocation catLoc(loc.m_typeLayout, category, *point, loc.m_value); - setAt(catLoc, value); - } - } - else - { - SLANG_ASSERT(!"Does not have category"); - } -} - -BindSet::Value* BindSet::getAt(const BindLocation& loc) const -{ - SLANG_ASSERT(loc.isValid()); - if (loc.isInvalid()) - { - return nullptr; - } - Value** valuePtr = m_bindings.TryGetValue(loc); - return valuePtr ? *valuePtr : nullptr; -} - -BindSet::Value* BindSet::_createBufferValue(slang::TypeReflection::Kind kind, slang::TypeLayoutReflection* typeLayout, size_t bufferSizeInBytes, size_t initialSizeInBytes, const void* initialData) -{ - SLANG_ASSERT(typeLayout == nullptr || typeLayout->getKind() == kind); - - Value* value = new (m_arena.allocateAligned(sizeof(Value), SLANG_ALIGN_OF(Value))) Value(); - - value->m_kind = kind; - value->m_sizeInBytes = bufferSizeInBytes; - value->m_elementCount = 0; - value->m_type = typeLayout; - value->m_userIndex = -1; - - value->m_data = (uint8_t*)m_arena.allocateAligned(bufferSizeInBytes, 16); - - SLANG_ASSERT(initialSizeInBytes <= value->m_sizeInBytes); - if (initialData) - { - ::memcpy(value->m_data, initialData, initialSizeInBytes); - ::memset(value->m_data + initialSizeInBytes, 0, bufferSizeInBytes - initialSizeInBytes); - } - else - { - ::memset(value->m_data, 0, value->m_sizeInBytes); - } - - m_values.add(value); - return value; -} - -/* static */bool BindSet::isTextureType(slang::TypeLayoutReflection* typeLayout) -{ - switch (typeLayout->getKind()) - { - case slang::TypeReflection::Kind::Resource: - { - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - - switch (shape & SLANG_RESOURCE_BASE_SHAPE_MASK) - { - case SLANG_TEXTURE_2D: - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - case SLANG_TEXTURE_BUFFER: - { - return true; - } - } - } - default: break; - } - - return false; -} - -BindSet::Value* BindSet::createTextureValue(slang::TypeLayoutReflection* typeLayout) -{ - if (!isTextureType(typeLayout)) - { - SLANG_ASSERT(!"Not a texture type"); - return nullptr; - } - - Value* value = new (m_arena.allocateAligned(sizeof(Value), SLANG_ALIGN_OF(Value))) Value(); - - value->m_kind = typeLayout->getKind(); - value->m_sizeInBytes = 0; - value->m_elementCount = 0; - value->m_type = typeLayout; - value->m_data = nullptr; - value->m_userIndex = -1; - - m_values.add(value); - - return value; -} - -BindSet::Value* BindSet::createBufferValue(slang::TypeReflection::Kind kind, size_t sizeInBytes, const void* initialData) -{ - return _createBufferValue(kind, nullptr, sizeInBytes, sizeInBytes, initialData); -} - -BindSet::Value* BindSet::createBufferValue(slang::TypeLayoutReflection* typeLayout, size_t sizeInBytes, const void* initialData) -{ - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::ParameterBlock: - case slang::TypeReflection::Kind::ConstantBuffer: - { - return _createBufferValue(kind, typeLayout, sizeInBytes, sizeInBytes, initialData); - } - case slang::TypeReflection::Kind::Resource: - { - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - - switch (shape & SLANG_RESOURCE_BASE_SHAPE_MASK) - { - case SLANG_STRUCTURED_BUFFER: - { - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - size_t elementSize = elementTypeLayout->getSize(SLANG_PARAMETER_CATEGORY_UNIFORM); - - // We don't know the size of the buffer, but we can work it out, based on what is initialized - size_t elementCount = size_t((sizeInBytes + elementSize - 1) / elementSize); - size_t bufferSize = elementCount * elementSize; - - Value* value = _createBufferValue(kind, typeLayout, bufferSize, sizeInBytes, initialData); - value->m_elementCount = elementCount; - return value; - } - case SLANG_BYTE_ADDRESS_BUFFER: - { - return _createBufferValue(kind, typeLayout, (sizeInBytes + 3) & ~size_t(3), sizeInBytes, initialData); - } - } - break; - } - - - default: break; - } - - SLANG_ASSERT(!"Unable to construct this type of buffer"); - return nullptr; -} - -void BindSet::destroyValue(Value* value) -{ - // TODO(JS): NOTE we do not free the old buffer. This is not a memory leak, because - // it is tracked elsewhere, but there is an argument to destroy it. - const Index index = m_values.indexOf(value); - SLANG_ASSERT(index >= 0); - if (index >= 0) - { - m_values.fastRemoveAt(index); - - // I guess we should remove any bindings to it whilst we are at it - List<BindLocation> locations; - for (const auto& pair : m_bindings) - { - const auto& location = pair.Key; - if (location.m_value == value) - { - locations.add(location); - } - } - - for (auto location : locations) - { - m_bindings.Remove(location); - } - - // Run the dtor - value->~Value(); - } -} - -void BindSet::calcChildResourceLocations(const BindLocation& location, List<BindLocation>& outLocations) -{ - auto typeLayout = location.getTypeLayout(); - - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::Array: - { - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - auto elementCount = int(typeLayout->getElementCount()); - - // We only iterate over the array, if it's a fixed array (not an unbounded array) - // as it is then the elements are much like the fields of a struct and so 'children'. - if (elementCount != 0) - { - for (Index i = 0; i < elementCount; ++i) - { - BindLocation elementLocation = toIndex(location, i); - calcChildResourceLocations(elementLocation, outLocations); - } - } - break; - } - case slang::TypeReflection::Kind::Struct: - { - auto structTypeLayout = typeLayout; - - auto fieldCount = structTypeLayout->getFieldCount(); - for (uint32_t ff = 0; ff < fieldCount; ++ff) - { - auto field = structTypeLayout->getFieldByIndex(ff); - BindLocation fieldLocation = toField(location, field); - - calcChildResourceLocations(fieldLocation, outLocations); - } - break; - } - - default: break; - } -} - -void BindSet::calcValueLocations(const BindLocation& location, Slang::List<BindLocation>& outLocations) -{ - auto typeLayout = location.getTypeLayout(); - - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::Array: - { - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - auto elementCount = int(typeLayout->getElementCount()); - - // If it's unbounded, it could point directly to a value/resource. We can't iterate over it - // as 'children' because being an external value/resource (or in a register space) they - // are not part of the underling location. - if (elementCount == 0) - { - outLocations.add(location); - } - break; - } - - case slang::TypeReflection::Kind::SamplerState: - - case slang::TypeReflection::Kind::ParameterBlock: - case slang::TypeReflection::Kind::ConstantBuffer: - case slang::TypeReflection::Kind::Resource: - case slang::TypeReflection::Kind::TextureBuffer: - case slang::TypeReflection::Kind::ShaderStorageBuffer: - { - //auto elementTypeLayout = typeLayout->getElementTypeLayout(); - //const size_t elementSize = elementTypeLayout->getSize(); - - outLocations.add(location); - break; - } - default: - { - calcChildResourceLocations(location, outLocations); - break; - } - } -} - -// Finds the first category from layout reflection that represents an actual value -// i.e. that is not ExistentialType or ExistentialObject. -template<typename LayoutReflectionType> -slang::ParameterCategory getFirstNonExistentialValueCategory(LayoutReflectionType* layout) -{ - slang::ParameterCategory category = slang::ParameterCategory::None; - for (UInt i = 0; i < layout->getCategoryCount(); i++) - { - auto currentCategory = layout->getCategoryByIndex((unsigned int)i); - if (currentCategory == slang::ParameterCategory::ExistentialTypeParam || - currentCategory == slang::ParameterCategory::ExistentialObjectParam) - continue; - category = currentCategory; - } - return category; -} - -BindLocation BindSet::toField(const BindLocation& loc, slang::VariableLayoutReflection* field) const -{ - const Index categoryCount = Index(field->getCategoryCount()); - if (categoryCount == 0) - { - return BindLocation::Invalid; - } - - if (loc.m_bindPointSet) - { - BindPoints bindPoints; - bindPoints.setInvalid(); - - // Copy over and add the ones found here - for (Index i = 0; i < categoryCount; ++i) - { - auto category = field->getCategoryByIndex((unsigned int)i); - - auto const& point = loc.m_bindPointSet->m_points[category]; - if (point.isInvalid()) - { - return BindLocation::Invalid; - } - - auto space = field->getBindingSpace(category); - auto offset = field->getOffset(category); - - // Set using new space, and offset - bindPoints[category] = BindPoint(space, point.m_offset + offset); - } - - return BindLocation(field->getTypeLayout(), bindPoints, loc.m_value); - } - else - { - slang::ParameterCategory category = getFirstNonExistentialValueCategory(field); - SLANG_ASSERT(category != slang::ParameterCategory::None); - - // If I'm going from mixed, then I will have multiple items being tracked (so won't be here) - // If I'm not, then I'm getting an inplace field. It must be relative - // So it would seem I never need to call getBindingIndex, and since I can't do that it must be relative. - // AND if it's relative well it must be in the same category. - - if (category == loc.m_category) - { - auto space = field->getBindingSpace(category); - auto offset = field->getOffset(category); - - return BindLocation(field->getTypeLayout(), category, BindPoint(space, loc.m_point.m_offset + offset), loc.m_value); - } - } - - return BindLocation::Invalid; -} - -BindLocation BindSet::toField(const BindLocation& loc, const char* name) const -{ - if (!loc.isValid()) - { - return loc; - } - - auto typeLayout = loc.m_typeLayout; - const auto kind = typeLayout->getKind(); - - // Strip constantBuffer wrapping, only really applies when we have handles to value/resource - // embedded in other types (like on CPU and CUDA) - if (loc.m_value && - (kind == slang::TypeReflection::Kind::ConstantBuffer || kind == slang::TypeReflection::Kind::ParameterBlock)) - { - // Follow the to associated value/resource - BindSet::Value* value = getAt(loc); - if (value) - { - typeLayout = typeLayout->getElementTypeLayout(); - return toField(BindLocation(typeLayout, SLANG_PARAMETER_CATEGORY_UNIFORM, BindPoint(0, 0), value), name); - } - } - - if (kind == slang::TypeReflection::Kind::Struct) - { - slang::VariableLayoutReflection* varLayout = nullptr; - auto fieldCount = typeLayout->getFieldCount(); - for (uint32_t ff = 0; ff < fieldCount; ++ff) - { - auto field = typeLayout->getFieldByIndex(ff); - if (strcmp(field->getName(), name) == 0) - { - return toField(loc, field); - } - } - } - - // Invalid - return BindLocation::Invalid; -} - -BindLocation BindSet::toIndex(const BindLocation& loc, Index index) const -{ - if (!loc.isValid()) - { - return loc; - } - SLANG_ASSERT(index >= 0); - if (index < 0) - { - return BindLocation::Invalid; - } - - auto typeLayout = loc.m_typeLayout; - const auto kind = typeLayout->getKind(); - - // If it's a zero sized array, we may need to special case indirecting through a buffer that holds it's contents - if (kind != slang::TypeReflection::Kind::Array) - { - return BindLocation::Invalid; - } - - // Find where the uniform data will be held. If we have a unsized array, for some targets the actual content's might be in a different location - BindSet::Value* uniformValue = loc.m_value; - if (typeLayout->getElementCount() == 0) - { - // If we have a value/resource at this location, then we need to offset through that - BindSet::Value* arrayValue = getAt(loc); - if (arrayValue) - { - uniformValue = arrayValue; - - // Check it's in range. - // NOTE we can't check this if the unbounded binding is in another space for example. - if (index >= Index(uniformValue->m_elementCount)) - { - return BindLocation::Invalid; - } - } - } - - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - - const Index categoryCount = Index(elementTypeLayout->getCategoryCount()); - - if (loc.m_bindPointSet) - { - BindPoints bindPoints; - bindPoints.setInvalid(); - - // Copy over and add the ones found here - for (Index i = 0; i < categoryCount; ++i) - { - auto category = elementTypeLayout->getCategoryByIndex((unsigned int)i); - const auto elementStride = typeLayout->getElementStride(category); - - size_t baseOffset = loc.m_bindPointSet->m_points[category].m_offset; - - if (category == slang::ParameterCategory::Uniform && uniformValue != loc.m_value) - { - baseOffset = 0; - } - - const auto& basePoint = loc.m_bindPointSet->m_points[category]; - SLANG_ASSERT(basePoint.isValid()); - bindPoints[category] = BindPoint(basePoint.m_space, baseOffset + elementStride * index); - } - - return BindLocation(elementTypeLayout, bindPoints, uniformValue); - } - else - { - slang::ParameterCategory category = getFirstNonExistentialValueCategory(elementTypeLayout); - SLANG_ASSERT(category != slang::ParameterCategory::None); - - const auto elementStride = typeLayout->getElementStride(category); - - size_t baseOffset = 0; - if (category == slang::ParameterCategory::Uniform && uniformValue != loc.m_value) - { - // base of 0 is appropriate as it is the child value - } - else - { - // TODO(JS): - // Hmm, if its a different category, then not entirely clear what to do here. - // Just zero as we can't use the base we have. - // This might just be an error - - baseOffset = (category == loc.m_category) ? loc.m_point.m_offset : 0; - } - - BindPoint point(loc.m_point.m_space, baseOffset + elementStride * index); - - return BindLocation(elementTypeLayout, category, point, uniformValue); - } - - return BindLocation::Invalid; -} - - -SlangResult BindSet::setBufferContents(const BindLocation& loc, const void* initialData, size_t sizeInBytes) const -{ - BindSet::Value* value = getAt(loc); - if (value) - { - // Truncate if initial data is larger than the buffer - sizeInBytes = (sizeInBytes > value->m_sizeInBytes) ? value->m_sizeInBytes : sizeInBytes; - - SLANG_ASSERT(value->m_sizeInBytes >= sizeInBytes); - ::memcpy(value->m_data, initialData, sizeInBytes); - return SLANG_OK; - } - return SLANG_FAIL; -} - -void BindSet::getBindings(List<BindLocation>& outLocations, List<Value*>& outResources) const -{ - outResources.clear(); - outLocations.clear(); - for (const auto& pair : m_bindings) - { - outLocations.add(pair.Key); - outResources.add(pair.Value); - } -} - -void BindSet::releaseValueTargets() -{ - for (Value* value : m_values) - { - value->m_target.setNull(); - } -} - -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! BindLocation !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -BindLocation::BindLocation(slang::TypeLayoutReflection* typeLayout, const BindPoints& points, BindSet_Value* value) : - m_typeLayout(typeLayout), - m_value(value) -{ - setPoints(points); -} - -BindLocation::BindLocation(slang::TypeLayoutReflection* typeLayout, SlangParameterCategory category, const BindPoint& point, BindSet_Value* value) : - m_category(category), - m_point(point), - m_typeLayout(typeLayout), - m_value(value) -{ -} - -BindLocation::BindLocation(slang::VariableLayoutReflection* varLayout, BindSet_Value* value) -{ - m_value = value; - m_typeLayout = varLayout->getTypeLayout(); - - const Index categoryCount = Index(varLayout->getCategoryCount()); - - if (categoryCount <= 0) - { - *this = BindLocation::Invalid; - return; - } - else if (categoryCount == 1) - { - const auto category = varLayout->getCategoryByIndex(0); - - const auto offset = varLayout->getOffset(category); - const auto space = varLayout->getBindingSpace(category); - - m_category = category; - m_point = BindPoint(Index(space), size_t(offset)); - } - else - { - BindPoints points; - points.setInvalid(); - - for (Index i = 0; i < categoryCount; ++i) - { - const auto category = varLayout->getCategoryByIndex((unsigned int)i); - - const auto offset = varLayout->getOffset(category); - const auto space = varLayout->getBindingSpace(category); - - BindPoint& point = points.m_points[category]; - - point.m_offset = size_t(offset); - point.m_space = Index(space); - } - - setPoints(points); - } -} - -BindPoint* BindLocation::getValidBindPointForCategory(SlangParameterCategory category) -{ - BindPoint* point = nullptr; - if (m_bindPointSet) - { - point = &m_bindPointSet->m_points.m_points[category]; - } - else if (m_category == category) - { - point = &m_point; - } - return (point && point->isValid()) ? point : nullptr; -} - -const BindPoint* BindLocation::getValidBindPointForCategory(SlangParameterCategory category) const -{ - const BindPoint* point = nullptr; - if (m_bindPointSet) - { - point = &m_bindPointSet->m_points.m_points[category]; - } - else if (m_category == category) - { - point = &m_point; - } - return (point && point->isValid()) ? point : nullptr; -} - -BindPoint BindLocation::getBindPointForCategory(SlangParameterCategory category) const -{ - if (m_bindPointSet) - { - return m_bindPointSet->m_points.m_points[category]; - } - else if (m_category == category) - { - return m_point; - } - return BindPoint::makeInvalid(); -} - -void BindLocation::setPoints(const BindPoints& points) -{ - Index found; - auto const validCount = points.calcValidCount(&found); - - // There is nothing tracked, so we are done. - if (validCount == 0) - { - setEmptyBinding(); - return; - } - - if (validCount == 1) - { - m_bindPointSet.setNull(); - m_point = points.m_points[found]; - m_category = SlangParameterCategory(found); - return; - } - - if (m_bindPointSet->isUniquelyReferenced()) - { - m_bindPointSet->m_points = points; - } - else - { - m_bindPointSet = new BindPointSet(points); - } -} - -void BindLocation::addOffset(SlangParameterCategory category, ptrdiff_t offset) -{ - BindPoint* point = getValidBindPointForCategory(category); - if (point) - { - point->m_offset += offset; - } -} - -void* BindLocation::getUniform(size_t sizeInBytes) const -{ - const BindPoint* point = getValidBindPointForCategory(SLANG_PARAMETER_CATEGORY_UNIFORM); - if (m_value && point) - { - size_t offset = point->m_offset; - // Make sure it's in range - if (offset + sizeInBytes <= m_value->m_sizeInBytes) - { - return m_value->m_data + offset; - } - } - return nullptr; -} - -SlangResult BindLocation::setUniform(const void* data, size_t sizeInBytes) const -{ - // It has to be a location with uniform - const BindPoint* point = getValidBindPointForCategory(SLANG_PARAMETER_CATEGORY_UNIFORM); - if (m_value && point) - { - size_t offset = point->m_offset; - ptrdiff_t maxSizeInBytes = m_value->m_sizeInBytes - offset; - SLANG_ASSERT(maxSizeInBytes > 0); - - if (maxSizeInBytes <= 0) - { - return SLANG_FAIL; - } - - // Clamp such that only fill in what's available to write - sizeInBytes = sizeInBytes > size_t(maxSizeInBytes) ? size_t(maxSizeInBytes) : sizeInBytes; - - // Make sure it's in range - SLANG_ASSERT(offset + sizeInBytes <= m_value->m_sizeInBytes); - - // Okay copy the contents - ::memcpy(m_value->m_data + offset, data, sizeInBytes); - return SLANG_OK; - } - return SLANG_FAIL; -} - -bool BindLocation::operator==(const ThisType& rhs) const -{ - if (m_typeLayout != rhs.m_typeLayout || - m_value != rhs.m_value) - { - return false; - } - - // If same, then if it's set they must be equal - // If not set, then must be the same category/point - if (m_bindPointSet == rhs.m_bindPointSet) - { - return m_bindPointSet || (m_category == rhs.m_category && m_point == rhs.m_point); - } - - // Only way these can be equal now, is if both are m_bindPointSet are different pointers, but same value - return (m_bindPointSet && rhs.m_bindPointSet) && (m_bindPointSet->m_points == rhs.m_bindPointSet->m_points); -} - -HashCode BindLocation::getHashCode() const -{ - if (!m_typeLayout) - { - return 1; - } - if (m_bindPointSet) - { - return m_bindPointSet->getHashCode(); - } - else - { - return Slang::combineHash(Slang::combineHash(m_category, Slang::getHashCode(m_typeLayout)), m_point.getHashCode()); - } -} - - -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! BindRoot !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -SlangResult BindRoot::parse(const String& text, const String& sourcePath, WriterHelper outStream, BindLocation& outLocation) -{ - SLANG_ASSERT(m_bindSet); - - // We will parse the 'name' as may be path to a value/resource - TokenReader parser(text); - - BindLocation location = BindLocation::Invalid; - - { - Token nameToken = parser.ReadToken(); - if (nameToken.Type != TokenType::Identifier) - { - outStream.print("Invalid input syntax at line %d", int(parser.NextToken().Position.Line)); - return SLANG_FAIL; - } - location = find(nameToken.Content.getBuffer()); - if (location.isInvalid()) - { - outStream.print("Unable to find entry in '%s' for '%s' (for CPU name must be specified) \n", sourcePath.getBuffer(), text.getBuffer()); - return SLANG_FAIL; - } - } - - while (!parser.IsEnd()) - { - Token token = parser.NextToken(0); - - if (token.Type == TokenType::LBracket) - { - parser.ReadToken(); - int index = parser.ReadInt(); - SLANG_ASSERT(index >= 0); - - location = m_bindSet->toIndex(location, index); - if (location.isInvalid()) - { - outStream.print("Unable to find entry in '%d' in '%s'\n", index, text.getBuffer()); - return SLANG_FAIL; - } - parser.ReadMatchingToken(TokenType::RBracket); - } - else if (token.Type == TokenType::Dot) - { - parser.ReadToken(); - Token identifierToken = parser.ReadMatchingToken(TokenType::Identifier); - - location = m_bindSet->toField(location, identifierToken.Content.getBuffer()); - if (location.isInvalid()) - { - outStream.print("Unable to find field '%s' in '%s'\n", identifierToken.Content.getBuffer(), text.getBuffer()); - return SLANG_FAIL; - } - } - else if (token.Type == TokenType::Comma) - { - // Break out - break; - } - else - { - return SLANG_FAIL; - } - } - - outLocation = location; - return SLANG_OK; -} - -slang::VariableLayoutReflection* BindRoot::getParameterByName(const char* name) -{ - const int parameterCount = m_reflection->getParameterCount(); - for (int i = 0; i < parameterCount; ++i) - { - auto parameter = m_reflection->getParameterByIndex(i); - const char* paramName = parameter->getName(); - if (strcmp(name, paramName) == 0) - { - return parameter; - } - } - - return nullptr; -} - -slang::VariableLayoutReflection* BindRoot::getEntryPointParameterByName(const char* name) -{ - const int parameterCount = int(m_entryPoint->getParameterCount()); - for (int i = 0; i < parameterCount; ++i) - { - auto parameter = m_entryPoint->getParameterByIndex(i); - // If has a semantic we will ignore - if (parameter->getSemanticName()) - { - continue; - } - if (strcmp(parameter->getName(), name) == 0) - { - return parameter; - } - } - return nullptr; -} - -SlangResult BindRoot::init(BindSet* bindSet, slang::ShaderReflection* reflection, int entryPointIndex) -{ - m_bindSet = bindSet; - m_reflection = reflection; - m_entryPoint = nullptr; - - { - auto entryPointCount = int(reflection->getEntryPointCount()); - if (entryPointIndex < 0 || entryPointIndex >= entryPointCount) - { - SLANG_ASSERT(!"Entry point index out of range"); - return SLANG_FAIL; - } - m_entryPoint = reflection->getEntryPointByIndex(entryPointIndex); - } - - return SLANG_OK; -} - -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! CPULikeBindRoot !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -SlangResult CPULikeBindRoot::init(BindSet* bindSet, slang::ShaderReflection* reflection, int entryPointIndex) -{ - m_rootValue = nullptr; - m_entryPointValue = nullptr; - - SLANG_RETURN_ON_FAIL(Super::init(bindSet, reflection, entryPointIndex)); - - { - size_t globalConstantBuffer = reflection->getGlobalConstantBufferSize(); - - size_t rootSizeInBytes = 0; - const int parameterCount = reflection->getParameterCount(); - for (int i = 0; i < parameterCount; ++i) - { - auto parameter = reflection->getParameterByIndex(i); - - auto offset = parameter->getOffset(); - - auto typeLayout = parameter->getTypeLayout(); - auto sizeInBytes = typeLayout->getSize(); - - size_t endOffset = offset + sizeInBytes; - - rootSizeInBytes = (endOffset > rootSizeInBytes) ? endOffset : rootSizeInBytes; - } - SLANG_ASSERT(rootSizeInBytes == globalConstantBuffer); - - if (rootSizeInBytes) - { - // Allocate the 'root' buffer - m_rootValue = m_bindSet->createBufferValue(slang::TypeReflection::Kind::ConstantBuffer, rootSizeInBytes); - } - } - - { - size_t entryPointParamsSizeInBytes = 0; - - const int parameterCount = int(m_entryPoint->getParameterCount()); - for (int i = 0; i < parameterCount; i++) - { - slang::VariableLayoutReflection* parameter = m_entryPoint->getParameterByIndex(i); - - // If has a semantic, then isn't uniform parameter - if (auto semanticName = parameter->getSemanticName()) - { - continue; - } - - auto offset = parameter->getOffset(); - - auto typeLayout = parameter->getTypeLayout(); - auto sizeInBytes = typeLayout->getSize(); - - size_t endOffset = offset + sizeInBytes; - entryPointParamsSizeInBytes = (endOffset > entryPointParamsSizeInBytes) ? endOffset : entryPointParamsSizeInBytes; - } - - if (entryPointParamsSizeInBytes) - { - m_entryPointValue = m_bindSet->createBufferValue(slang::TypeReflection::Kind::ConstantBuffer, entryPointParamsSizeInBytes); - } - } - - return SLANG_OK; -} - - - -BindLocation CPULikeBindRoot::find(const char* name) -{ - Value* value = nullptr; - slang::VariableLayoutReflection* varLayout = nullptr; - - if (m_rootValue) - { - varLayout = getParameterByName(name); - value = m_rootValue; - } - - if (!varLayout && m_entryPointValue) - { - value = m_entryPointValue; - varLayout = getEntryPointParameterByName(name); - } - - if (!varLayout) - { - return BindLocation::Invalid; - } - - // We don't need to worry about bindSpace because variable must be stored in the buffer - // auto space = varLayout->getBindingSpace(); - // TODO(JS): Where is getBindingIndex supposed to be used. It seems the offset here will do the right thing - auto offset = varLayout->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); - - return BindLocation(varLayout->getTypeLayout(), SLANG_PARAMETER_CATEGORY_UNIFORM, BindPoint(0, offset), value); -} - -SlangResult CPULikeBindRoot::setArrayCount(const BindLocation& location, int count) -{ - if (!location.isValid()) - { - return SLANG_FAIL; - } - - // I can see if a resource has already been set - Value* value = m_bindSet->getAt(location); - - auto typeLayout = location.getTypeLayout(); - const auto kind = typeLayout->getKind(); - - if (!(typeLayout->getKind() == slang::TypeReflection::Kind::Array && typeLayout->getElementCount() == 0)) - { - return SLANG_FAIL; - } - - const size_t elementStride = typeLayout->getElementStride(SLANG_PARAMETER_CATEGORY_UNIFORM); - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - - if (value) - { - // Making smaller, just reduce the count. - // NOTE! Nothing is done here about deallocating resources which are perhaps no longer reachable. - // This isn't a leakage problem tho, as all buffers are released automatically when scope is left. - if (count <= int(value->m_elementCount) || count <= int(value->m_sizeInBytes / elementStride)) - { - value->m_elementCount = count; - return SLANG_OK; - } - - const size_t maxElementCount = (value->m_sizeInBytes / elementStride); - if (size_t(count) <= maxElementCount) - { - // Just initialize the space - ::memset(value->m_data + elementStride * value->m_elementCount, 0, (count - value->m_elementCount) * elementStride); - value->m_elementCount = count; - return SLANG_OK; - } - } - - // Ok allocate a buffer that can hold all the elements - - const size_t newBufferSize = count * elementStride; - - Value* newValue = m_bindSet->createBufferValue(slang::TypeReflection::Kind::Array, newBufferSize); - newValue->m_elementCount = count; - - // Copy over the data from the old buffer if there is any - if (value && value->m_elementCount) - { - ::memcpy(newValue->m_data, value->m_data, value->m_elementCount * elementStride); - } - - // Remove the old buffer as no longer needed - - if (value) - { - m_bindSet->destroyValue(value); - } - - // Set the new buffer - m_bindSet->setAt(location, newValue); - return SLANG_OK; -} - - -void CPULikeBindRoot::getRoots(Slang::List<BindLocation>& outLocations) -{ - if (m_entryPointValue) - { - const int parameterCount = int(m_entryPoint->getParameterCount()); - for (int i = 0; i < parameterCount; ++i) - { - auto parameter = m_entryPoint->getParameterByIndex(i); - // If has a semantic we will ignore - if (parameter->getSemanticName()) - { - continue; - } - - auto offset = parameter->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); - - BindLocation location(parameter->getTypeLayout(), SLANG_PARAMETER_CATEGORY_UNIFORM, BindPoint(0, offset), m_entryPointValue); - outLocations.add(location); - } - } - - if (m_rootValue) - { - const int parameterCount = m_reflection->getParameterCount(); - for (int i = 0; i < parameterCount; ++i) - { - auto parameter = m_reflection->getParameterByIndex(i); - - auto offset = parameter->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); - - BindLocation location(parameter->getTypeLayout(), SLANG_PARAMETER_CATEGORY_UNIFORM, BindPoint(0, offset), m_rootValue); - outLocations.add(location); - } - } -} - -static void _addDefaultBuffersRec(BindSet* bindSet, const BindLocation& loc) -{ - // See if there is a value/resource attached there - auto typeLayout = loc.getTypeLayout(); - - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::ParameterBlock: - case slang::TypeReflection::Kind::ConstantBuffer: - { - BindSet::Value* value = bindSet->getAt(loc); - - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - - if (!value) - { - //SLANG_ASSERT(typeLayout->getSize() == sizeof(void*)); - const size_t elementSize = elementTypeLayout->getSize(); - - // We create using typeLayout (as opposed to elementTypeLayout), because it also holds the wrapping - // 'resource' type. - value = bindSet->createBufferValue(typeLayout, elementSize); - SLANG_ASSERT(value); - - bindSet->setAt(loc, value); - } - - // Recurse into buffer, using the elementType - BindLocation childLocation(elementTypeLayout, SLANG_PARAMETER_CATEGORY_UNIFORM, BindPoint(0, 0), value ); - _addDefaultBuffersRec(bindSet, childLocation); - return; - } - default: break; - } - - // Recurse - { - List<BindLocation> childLocations; - bindSet->calcChildResourceLocations(loc, childLocations); - for (auto& childLocation : childLocations) - { - _addDefaultBuffersRec(bindSet, childLocation); - } - } -} - -void CPULikeBindRoot::addDefaultValues() -{ - - List<BindLocation> rootLocations; - getRoots(rootLocations); - - for (auto& location : rootLocations) - { - _addDefaultBuffersRec(m_bindSet, location); - } -} - -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! GPULikeBindRoot !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -BindLocation GPULikeBindRoot::find(const char* name) -{ - slang::VariableLayoutReflection* varLayout = nullptr; - - varLayout = getParameterByName(name); - if (!varLayout) - { - varLayout = getEntryPointParameterByName(name); - } - - if (!varLayout) - { - return BindLocation::Invalid; - } - - return BindLocation(varLayout, nullptr); -} - -SlangResult GPULikeBindRoot::setArrayCount(const BindLocation& location, int count) -{ - // TODO(JS): - // Not 100% clear how to handle this. If the mechanism uses 'spaces' there is nothing to do. - // If the size is an aspect of the binding, then we need to set up the binding information correctly. Depending on underlying - // API. This could perhaps be handled with a base class for m_target which meant we could just call that and it would - // do the right thing. - // - // For now, lets not worry. - return SLANG_OK; -} - -void GPULikeBindRoot::getRoots(Slang::List<BindLocation>& outLocations) -{ - { - const int parameterCount = int(m_entryPoint->getParameterCount()); - for (int i = 0; i < parameterCount; ++i) - { - auto parameter = m_entryPoint->getParameterByIndex(i); - // If has a semantic we will ignore - if (parameter->getSemanticName()) - { - continue; - } - - auto offset = parameter->getOffset(SLANG_PARAMETER_CATEGORY_UNIFORM); - - BindLocation location(parameter, nullptr); - SLANG_ASSERT(location.isValid()); - - outLocations.add(location); - } - } - { - const int parameterCount = m_reflection->getParameterCount(); - for (int i = 0; i < parameterCount; ++i) - { - auto parameter = m_reflection->getParameterByIndex(i); - - BindLocation location(parameter, nullptr); - SLANG_ASSERT(location.isValid()); - - outLocations.add(location); - } - } -} - -} // renderer_test diff --git a/tools/render-test/bind-location.h b/tools/render-test/bind-location.h deleted file mode 100644 index 33670bf4b..000000000 --- a/tools/render-test/bind-location.h +++ /dev/null @@ -1,452 +0,0 @@ -#ifndef BIND_LOCATION_H -#define BIND_LOCATION_H - -#include "source/core/slang-basic.h" -#include "source/core/slang-free-list.h" -#include "source/core/slang-memory-arena.h" -#include "source/core/slang-writer.h" - -#include "slang.h" - -namespace renderer_test { - -/* -Bind Set/Point/Value -==================== - -The following classes are designed as a mechanism to simplify binding within the test system. The underlying issues are - -* How binding occurs is very dependent on the underlying target (CPU is different from Dx for example) - + CPU everything is just backed by uniform 'memory'/GPU uses different registers for different types - + With unbound arrays CPU can just indirect to a buffer, on GPU it might need use of register spaces or some other mechanism (as in VK) - + CPU groups together global/entry point parameters, GPU typically does not -* Having a mechanism that will the data/binding for the test independent of the actual target, allows that code/implementation to be shared across many targets. -* How a resource/state is configured within binding also varies significantly between targets - -One way to handle this disparity, would be to build an abstraction layer, that could create the device specific -resources/state and set them. This is not the approach taken here though. The idea here is to have a mechanism to -be able to build structures in memory, and record where binding takes place without having to create any -device specific resources or state. This data can then be used to construct and then bind as is appropriate. - -The process broadly for test system is is - -1) Set up any default buffers required for a target (for example the uniform/entry point buffers for CPU) -2) Add any default Value/buffers that are needed by traversing reflection -3) Create/Set the Values for the elements of ShaderInputLayoutEntry -4) Go through the values set on the BindSet, creating Resources/State etc appropriate for the target -5) Go through the bindings setting the Resource bindings as appropriate for the target -6) Execute -7) If the computation takes place outside of Values backing memory, copy back the data for output entries -8) Write the output entries - -To do this we need a mechanism to store a binding location. In the general case a BindingLocation might -track the location of many different categories of data. - -We also need a way to record what we want to create on the device for execution. To do this we have the -BindSet::Value. 'Value' was used instead of 'Resource' because the types of things the Value might represent -may not be resource like or might be multiple resources. In simple use cases though a 'Value' is typically -synonymous with some kind of Resource on the device. - -A Value knows the underlying type it represents as was determined via the slang layout/reflection. That an added -feature of 'Values' is there are able hold a buffer that is typically mapped onto some linear buffer on the -device. Doing so means that we do not need to store BindLocation mappings for say uniform data (like float or -matrix), it can just be stored in the memory buffer. When the resources are constructed for execution, we can -just copy over that data. - -This all sounds well and good but there is a final underlying important aspect. That is that some resource -like bindings may have to be stored in a buffer. For example on a CPU we could have a constant buffer that contained -another constant buffer as a field. On CPU this field would be converted into a pointer which needs to be set up. On CUDA this might be some -device specific value. So before we can copy the memory representation to a device specific buffer we must convert -any such bindings into something appropriate in the memory buffer associated with the Value. To do this we can traverse -a record of all of the bindings (which are held on the BindSet), and then set the appropriate date for the device from -data stored in the associated 'Value'. - -A final observation is that on CPU targets, the memory buffer held in the Value can just be used directly. - -NOTE: - -That these classes are written so they can be used to track locations across multiple categories such that binding -can work across many different types of targets. For the moment the mechanism/s are only tested on CPU like binding, -and there are quirks in how locations are traversed that have knowledge of how such bindings work. It may be necessary -for this to work more generally to only allow certain kinds of transitions based on some well defined specific -binding styles. -*/ - -/* A bind point records a specific binding point (typically for a category). It records a space and an offset. -As with Slangs layout reflection, the offset meaning is dependent on category. It might be an offset to -a 'register'. If category is 'uniform' it might be a memory offset. The space defines the 'space' a register -is in. -Note that m_space is ignored (but must be valid) for uniform offsets. -*/ -struct BindPoint -{ - typedef BindPoint ThisType; - - /// - bool isValid() const { return m_space >= 0; } - bool isInvalid() const { return m_space < 0; } - - void setInvalid() { m_space = -1; m_offset = 0; } - - bool operator==(const ThisType& rhs) const { return m_space == rhs.m_space && m_offset == rhs.m_offset; } - bool operator!=(const ThisType& rhs) const { return !(*this == rhs); } - - Slang::HashCode getHashCode() const { return Slang::combineHash(Slang::getHashCode(m_space), Slang::getHashCode(m_offset)); } - - BindPoint() = default; - BindPoint(Slang::Index space, size_t offset):m_space(space), m_offset(offset) {} - - static BindPoint makeInvalid() { return BindPoint(-1, 0); } - - Slang::Index m_space = 0; ///< The register space - size_t m_offset = 0; ///< The offset, might be a byte address or a register index -}; - -/* Stores the BindPoints by category. */ -struct BindPoints -{ - typedef BindPoints ThisType; - - Slang::Index findSingle() const - { - Slang::Index found; - if (calcValidCount(&found) == 1) - { - return found; - } - return -1; - } - Slang::Index calcValidCount(Slang::Index* outFoundIndex) const - { - using namespace Slang; - Index found = -1; - Index validCount = 0; - for (Index i = 0; i < Index(SLANG_PARAMETER_CATEGORY_COUNT); ++i) - { - const auto& point = m_points[i]; - if (point.isValid()) - { - found = i; - validCount++; - } - } - if (outFoundIndex) - { - *outFoundIndex = found; - } - return validCount; - } - void setInvalid() - { - for (auto& point : m_points) - { - point.setInvalid(); - } - } - - bool operator==(const ThisType& rhs) const - { - for (Slang::Index i = 0; i < SLANG_PARAMETER_CATEGORY_COUNT; ++i) - { - if (m_points[i] != rhs.m_points[i]) - { - return false; - } - } - return true; - } - bool operator!=(const ThisType& rhs) const { return !(*this == rhs); } - - Slang::HashCode getHashCode() const - { - using namespace Slang; - HashCode hash = 0x5435abbc; - int bits = 0; - int bit = 1; - for (Index i = 0; i < SLANG_PARAMETER_CATEGORY_COUNT; ++i, bit += bit) - { - const auto& point = m_points[i]; - if (point.isValid()) - { - hash = combineHash(hash, point.getHashCode()); - bits |= bit; - } - } - // The categories set is important too, so merge that in - return combineHash(bits, hash); - } - - BindPoint& operator[](SlangParameterCategory category) { return m_points[Slang::Index(category)]; } - const BindPoint& operator[](SlangParameterCategory category) const { return m_points[Slang::Index(category)]; } - - BindPoint m_points[SLANG_PARAMETER_CATEGORY_COUNT]; -}; - -/* A BindPointSet is really just a reference counted 'BindPoints'. This allows for BindPoints to be shared between -multiple BindLocations if they hold the same value. */ -class BindPointSet : public Slang::RefObject -{ -public: - typedef Slang::RefObject Super; - - Slang::HashCode getHashCode() const { return m_points.getHashCode(); } - - BindPointSet(const BindPoints& points) : - m_points(points) - { - } - BindPointSet() {} - - BindPoints m_points; -}; - -/* A BindSet::Value represents a 'value' associated with a binding. Typically it will be a Resource type -like a Buffer/Texture on a target device. As well as recording type information, it can also store a chunk -of memory that can hold uniform data, and may hold bindings for some kinds of targets (for example CPU pointers). -Additionally if the Value holds some kind of array, the amount of elements in the array can be stored in m_elementCount. - -All Value are constructed stored and tracked on a BindSet. When a BindSet is destroyed any associated Value will become -destroyed. -*/ -struct BindSet_Value -{ - slang::TypeReflection::Kind m_kind; ///< The kind, used if type is not set. Same as m_type.kind otherwise - slang::TypeLayoutReflection* m_type; ///< The type - uint8_t* m_data; - size_t m_sizeInBytes; ///< Total size in bytes - size_t m_elementCount; ///< Only applicable on an array like type, else 0 - - /// Can be set by user code to indicate the origin of contents/definition of a value, such that actual resource can be later constructed. - /// -1 is used to indicate it is not set. - Slang::Index m_userIndex = -1; - - Slang::RefPtr<Slang::RefObject> m_target; ///< Can be used to store data related to an actual target resource. -}; - -class BindSet; - -/* Specifies a binding location (including the associated slang reflection type information) - -It really can be in 3 type of state. -1) Invalid - not a valid binding (m_typeLayout is null, m_pointSet is not used. -2) Holds a single bind point defined by category and BindPoint m_point (m_category and m_point are used) -3) Hold multiple bind points by category (in this case m_bindPointSet is used) - -NOTE! it is an invariant - that the BindLocation must always be in the 'simplest' form that can represent it. -That is if there is only a single binding it *cannot* be stored as a m_bindPointSet with a single category - -That construction through BindPoints, will do this determination automatically. - -A BindLocation can be stored in a Hash. -*/ -struct BindLocation -{ - typedef BindLocation ThisType; - - bool isValid() const { return m_typeLayout != nullptr; } - bool isInvalid() const { return m_typeLayout == nullptr; } - - const BindPointSet* getPointSet() const { return m_bindPointSet; } - void setPoints(const BindPoints& points); - - /// Add an offset - void addOffset(SlangParameterCategory category, ptrdiff_t offset); - - /// True if holds tracking for this category - bool hasCategory(SlangParameterCategory category) const { return getBindPointForCategory(category).isValid(); } - - BindPoint getBindPointForCategory(SlangParameterCategory category) const; - BindPoint* getValidBindPointForCategory(SlangParameterCategory category); - const BindPoint* getValidBindPointForCategory(SlangParameterCategory category) const; - slang::TypeLayoutReflection* getTypeLayout() const { return m_typeLayout; } - - void setEmptyBinding() { m_bindPointSet.setNull(); m_point = BindPoint::makeInvalid(); m_category = SLANG_PARAMETER_CATEGORY_NONE; } - - template <typename T> - T* getUniform() const { return reinterpret_cast<T*>(getUniform(sizeof(T))); } - void* getUniform(size_t size) const; - - /// Set uniform data - SlangResult setUniform(const void* data, size_t sizeInBytes) const; - - bool operator==(const ThisType& rhs) const; - bool operator!=(const ThisType& rhs) const { return !(*this == rhs); } - - /// Get the hash code - Slang::HashCode getHashCode() const; - - /// Default Ctor - constructs as invalid - BindLocation() {} - BindLocation(slang::TypeLayoutReflection* typeLayout, const BindPoints& points, BindSet_Value* value = nullptr); - BindLocation(slang::TypeLayoutReflection* typeLayout, SlangParameterCategory category, const BindPoint& point, BindSet_Value* value = nullptr); - BindLocation(slang::VariableLayoutReflection* varLayout, BindSet_Value* value = nullptr); - - BindLocation(const ThisType& rhs) = default; - - /// An invalid location. - /// Better to return this than use default Ctor as indicates validity in code directly. - static const BindLocation Invalid; - - slang::TypeLayoutReflection* m_typeLayout = nullptr; ///< The type layout - - BindSet_Value* m_value = nullptr; ///< The value if we are in one. - - SlangParameterCategory m_category = SLANG_PARAMETER_CATEGORY_NONE; ///< If there isn't a set this defines the category - BindPoint m_point; ///< If there isn't a bind point set, this defines the point - - /// Holds multiple BindPoints. - /// To keep invariants (such that getHashCode and == work), it can only be set if - /// there is more than one category. If there is just one, m_category and m_point *MUST* be used. - /// NOTE! Can only be written to if there is a single reference. - Slang::RefPtr<BindPointSet> m_bindPointSet; -}; - -/* A BindSet holds all of the Value and bindings. It is designed to be used such that it can hold -all of the bind state needed for setting up a specific binding. - -Unfortunately it is not enough to lookup via a path for a Binding, because different targets represents the -'root' variables and values in different ways. The BindRoot interface is designed to handle this aspect. -*/ -class BindSet -{ -public: - typedef BindSet_Value Value; - - Value* getAt(const BindLocation& loc) const; - void setAt(const BindLocation& loc, Value* value); - void setAt(const BindLocation& loc, SlangParameterCategory category, Value* value); - - Value* createBufferValue(slang::TypeLayoutReflection* type, size_t sizeInBytes, const void* initialData = nullptr); - Value* createBufferValue(slang::TypeReflection::Kind kind, size_t sizeInBytes, const void* initialData = nullptr); - - Value* createTextureValue(slang::TypeLayoutReflection* type); - - /// Calculate from the current location everything that is referenced - void calcValueLocations(const BindLocation& location, Slang::List<BindLocation>& outLocations); - void calcChildResourceLocations(const BindLocation& location, Slang::List<BindLocation>& outLocations); - - void destroyValue(Value* value); - - BindLocation toField(const BindLocation& loc, slang::VariableLayoutReflection* field) const; - BindLocation toField(const BindLocation& loc, const char* name) const; - BindLocation toIndex(const BindLocation& location, Slang::Index index) const; - - SlangResult setBufferContents(const BindLocation& loc, const void* initialData, size_t sizeInBytes) const; - - /// Get all of the values - const Slang::List<Value*>& getValues() const { return m_values; } - /// Get all of the bindings - void getBindings(Slang::List<BindLocation>& outLocations, Slang::List<Value*>& outValues) const; - - /// - void releaseValueTargets(); - - /// Ctor - BindSet(); - - /// Dtor - ~BindSet(); - - /// True if is a texture type - static bool isTextureType(slang::TypeLayoutReflection* typeLayout); - -protected: - Value* _createBufferValue(slang::TypeReflection::Kind kind, slang::TypeLayoutReflection* typeLayout, size_t bufferSizeInBytes, size_t sizeInBytes, const void* initalData); - - Slang::List<Value*> m_values; - - Slang::Dictionary<BindLocation, Value*> m_bindings; - - Slang::MemoryArena m_arena; -}; - -/* BindRoot is an interface for finding the roots bindings by name. It is an interface because different targets have different ways of -representing how root values are located. -More specifically a CPU target holds the uniform and entry point variables in two buffers. -*/ -class BindRoot : public Slang::RefObject -{ -public: - typedef RefObject Super; - - typedef BindSet::Value Value; - - virtual BindLocation find(const char* name) = 0; - /// The setting of an array count is dependent on the underlying implementation. - /// On the CPU this means making sure there is a buffer that is large enough - /// And using that for storage. - /// But this does NOT set the actual location in the appropriate manner - that is - /// something that has to be done by the process that sets all the 'resource' handles etc elsewhere - virtual SlangResult setArrayCount(const BindLocation& location, int count) = 0; - - /// Find all of the roots - virtual void getRoots(Slang::List<BindLocation>& outLocations) = 0; - - /// Parse (specifying some location in HLSL style expression) slice to get to a location. - SlangResult parse(const Slang::String& text, const Slang::String& sourcePath, Slang::WriterHelper streamOut, BindLocation& outLocation); - - /// Get the bindset - BindSet* getBindSet() const { return m_bindSet; } - - slang::VariableLayoutReflection* getParameterByName(const char* name); - slang::VariableLayoutReflection* getEntryPointParameterByName(const char* name); - - SlangResult init(BindSet* bindSet, slang::ShaderReflection* reflection, int entryPointIndex); - - -protected: - - BindSet* m_bindSet = nullptr; - slang::EntryPointReflection* m_entryPoint = nullptr; - slang::ShaderReflection* m_reflection = nullptr; -}; - -/* A CPULike implementation of the BindRoot. This can be used for any binding that holds -the entry point variables/uniforms in buffers. This type also stores the Value/Buffers for -the 'root', and entry point, so they can be directly accessed. -*/ -class CPULikeBindRoot : public BindRoot -{ -public: - typedef BindRoot Super; - - // BindRoot - virtual BindLocation find(const char* name) SLANG_OVERRIDE; - virtual SlangResult setArrayCount(const BindLocation& location, int count) SLANG_OVERRIDE; - virtual void getRoots(Slang::List<BindLocation>& outLocations) SLANG_OVERRIDE; - - void addDefaultValues(); - - Value* getRootValue() const { return m_rootValue; } - Value* getEntryPointValue() const { return m_entryPointValue; } - - void* getRootData() { return m_rootValue ? m_rootValue->m_data : nullptr; } - void* getEntryPointData() { return m_entryPointValue ? m_entryPointValue->m_data : nullptr; } - - SlangResult init(BindSet* bindSet, slang::ShaderReflection* reflection, int entryPointIndex); - -protected: - // Used when we have uniform buffers (as used on CPU/CUDA) - - Value* m_rootValue = nullptr; - Value* m_entryPointValue = nullptr; -}; - -class GPULikeBindRoot : public BindRoot -{ -public: - typedef BindRoot Super; - - // BindRoot - virtual BindLocation find(const char* name) SLANG_OVERRIDE; - virtual SlangResult setArrayCount(const BindLocation& location, int count) SLANG_OVERRIDE; - virtual void getRoots(Slang::List<BindLocation>& outLocations) SLANG_OVERRIDE; - -protected: -}; - - - -} // renderer_test - -#endif //BIND_LOCATION_H diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp deleted file mode 100644 index 6682eef1a..000000000 --- a/tools/render-test/cpu-compute-util.cpp +++ /dev/null @@ -1,930 +0,0 @@ -#define _CRT_SECURE_NO_WARNINGS 1 - -#include "cpu-compute-util.h" - -#include "../../slang-com-helper.h" - -#include "../../source/core/slang-std-writers.h" -#include "../../source/core/slang-token-reader.h" - -#include "bind-location.h" - -#define SLANG_PRELUDE_NAMESPACE CPPPrelude -#include "../../prelude/slang-cpp-types.h" - -struct UniformState; - -namespace renderer_test { -using namespace Slang; - -static void _fixMipSize(uint32_t& ioDim, int mipLevel) -{ - uint32_t dim = ioDim; - if (dim > 0) - { - dim >>= mipLevel; - dim = (dim == 0) ? 1 : dim; - ioDim = dim; - } -} - -CPPPrelude::TextureDimensions _calcMipDims(int mipLevel, const CPPPrelude::TextureDimensions& inDims) -{ - if (mipLevel > 0 && mipLevel < int(inDims.numberOfLevels)) - { - CPPPrelude::TextureDimensions dims(inDims); - _fixMipSize(dims.width, mipLevel); - _fixMipSize(dims.height, mipLevel); - _fixMipSize(dims.depth, mipLevel); - return dims; - } - else - { - return inDims; - } -} - -template <int COUNT> -struct ValueTexture : public CPUComputeUtil::Resource, public CPPPrelude::ITexture -{ - // ITexture interface - virtual CPPPrelude::TextureDimensions GetDimensions(int mipLevel) SLANG_OVERRIDE - { - return _calcMipDims(mipLevel, m_dims); - } - virtual void Load(const int32_t* loc, void* out, size_t dataSize) SLANG_OVERRIDE - { - _set(out); - } - virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out, size_t dataSize) SLANG_OVERRIDE - { - _set(out); - } - virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out, size_t dataSize) SLANG_OVERRIDE - { - _set(out); - } - - ValueTexture(const CPPPrelude::TextureDimensions& dims, float value) : - m_value(value), - m_dims(dims) - { - m_interface = static_cast<CPPPrelude::ITexture*>(this); - } - - void _set(void* out) - { - float* dst = (float*)out; - for (int i = 0; i < COUNT; ++i) - { - dst[i] = m_value; - } - } - - float m_value; - CPPPrelude::TextureDimensions m_dims; -}; - -class FloatTextureData -{ -public: - FloatTextureData() {} - FloatTextureData(int elementCount, int dimCount, const uint32_t* dims) - { - init(elementCount, dimCount, dims); - } - - void init(int elementCount, int dimCount, const uint32_t* dims) - { - SLANG_ASSERT(elementCount >= 1 && elementCount <= 4); - SLANG_ASSERT(dimCount >= 1 && dimCount < 4); - - Index totalSize = 1; - - for (Index i = 0; i < Index(dimCount); ++i) - { - m_dims[i] = (dims[i] <= 0) ? 1 : dims[i]; - totalSize *= m_dims[i]; - } - - m_dimCount = uint8_t(dimCount); - m_elementCount = uint8_t(elementCount); - - // Set the array to hold the total capacity needed - m_values.setCount(totalSize); - } - - void setValue(float value) - { - const Index count = m_values.getCount(); - float* dst = m_values.getBuffer(); - - for (Index i = 0; i < count; ++i) - { - dst[i] = value; - } - } - - void setAt(const uint32_t* location, const float* value) - { - const Index index = _getIndex(location); - float* dst = &m_values[index]; - switch (m_elementCount) - { - case 1: dst[0] = value[0]; break; - case 2: dst[0] = value[0]; dst[1] = value[1]; break; - case 3: dst[0] = value[0]; dst[1] = value[1]; dst[2] = value[2]; break; - case 4: dst[0] = value[0]; dst[1] = value[1]; dst[2] = value[2]; dst[3] = value[3]; break; - } - } - - float* getAt(const uint32_t* location) - { - const Index index = _getIndex(location); - return &m_values[index]; - } - - void getAt(const uint32_t* location, float* dst) - { - const Index index = _getIndex(location); - float* value = &m_values[index]; - switch (m_elementCount) - { - case 1: dst[0] = value[0]; break; - case 2: dst[0] = value[0]; dst[1] = value[1]; break; - case 3: dst[0] = value[0]; dst[1] = value[1]; dst[2] = value[2]; break; - case 4: dst[0] = value[0]; dst[1] = value[1]; dst[2] = value[2]; dst[3] = value[3]; break; - } - } - - bool isLocationValid(const uint32_t* location) const - { - for (Index i = 0; i < m_dimCount; ++i) - { - const auto v = location[i]; - if (v >= m_dims[i]) - { - return false; - } - } - return true; - } - - Index _getIndex(const uint32_t* location) - { - const auto style = (m_dimCount << 2) | m_elementCount; - SLANG_ASSERT(isLocationValid(location)); - switch (m_dimCount) - { - default: return 0; - case 1: return (location[0] )* m_elementCount; - case 2: return (location[0] + location[1] * m_dims[0]) * m_elementCount; - case 3: return (location[0] + (location[1] + location[2] * m_dims[1]) * m_dims[0]) * m_elementCount; - case 4: return (location[0] + (location[1] + (location[2] + location[3] * m_dims[2]) * m_dims[1]) * m_dims[0]) * m_elementCount; - } - } - - uint8_t m_style; - uint8_t m_elementCount; ///< Number of elements in each value - - uint8_t m_dimCount; - uint32_t m_dims[4]; ///< Sizes in each dimension - - List<float> m_values; ///< Holds the contained data -}; - -// For a RWTexture we will define it to have memory, and that it can only be accessed via -struct FloatRWTexture : public CPUComputeUtil::Resource, public CPPPrelude::IRWTexture -{ - // IRWTexture - virtual CPPPrelude::TextureDimensions GetDimensions(int mipLevel) SLANG_OVERRIDE - { - return _calcMipDims(mipLevel, m_dims); - } - virtual void Load(const int32_t* loc, void* out, size_t dataSize) SLANG_OVERRIDE { m_data.getAt((const uint32_t*)loc, (float*)out); } - virtual void* refAt(const uint32_t* loc) SLANG_OVERRIDE { return m_data.getAt(loc); } - - virtual void Sample(CPPPrelude::SamplerState samplerState, const float* loc, void* out, size_t dataSize) SLANG_OVERRIDE - {} - - virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const float* loc, float level, void* out, size_t dataSize) SLANG_OVERRIDE - {} - - FloatRWTexture(int elementCount, const CPPPrelude::TextureDimensions& inDims, float initialValue): - m_dims(inDims) - { - uint32_t dimSizes[4]; - int dimSizesCount = inDims.getDimSizes(dimSizes); - - m_data.init(elementCount, dimSizesCount, dimSizes); - m_data.setValue(initialValue); - m_interface = static_cast<CPPPrelude::IRWTexture*>(this); - } - - FloatTextureData m_data; - CPPPrelude::TextureDimensions m_dims; -}; - -static int _calcDims(const InputTextureDesc& desc, slang::TypeLayoutReflection* typeLayout, CPPPrelude::TextureDimensions& outDims) -{ - outDims.reset(); - SlangResourceShape shape = SLANG_TEXTURE_2D; - if (typeLayout) - { - const auto kind = typeLayout->getKind(); - SLANG_ASSERT(kind == slang::TypeReflection::Kind::Resource); - auto type = typeLayout->getType(); - shape = type->getResourceShape(); - } - else - { - if (desc.isCube) - { - shape = SLANG_TEXTURE_CUBE; - } - else - { - switch (desc.dimension) - { - case 1: - shape = SLANG_TEXTURE_1D; - break; - case 2: - shape = SLANG_TEXTURE_2D; - break; - case 3: - shape = SLANG_TEXTURE_3D; - break; - default: - break; - } - } - } - - outDims.shape = shape; - - const uint32_t size = uint32_t(desc.size); - const auto baseShape = (shape & SLANG_RESOURCE_BASE_SHAPE_MASK); - - int dimsCount = 0; - - switch (baseShape) - { - case SLANG_TEXTURE_1D: - { - outDims.width = size; - break; - } - case SLANG_TEXTURE_2D: - { - outDims.width = size; - outDims.height = size; - break; - } - case SLANG_TEXTURE_3D: - { - outDims.width = size; - outDims.height = size; - outDims.depth = size; - break; - } - case SLANG_TEXTURE_CUBE: - { - outDims.width = size; - outDims.height = size; - break; - } - } - - if (shape & SLANG_TEXTURE_ARRAY_FLAG) - { - outDims.arrayElementCount = uint32_t(desc.arrayLength); - } - - int maxMipCount = outDims.calcMaxMIPLevels(); - SLANG_ASSERT(desc.mipMapCount <= maxMipCount); - - outDims.numberOfLevels = (desc.mipMapCount == 0) ? uint32_t(maxMipCount) : uint32_t(desc.mipMapCount); - - return dimsCount; -} - - -static CPUComputeUtil::Resource* _newReadTexture(int elemCount, const CPPPrelude::TextureDimensions& dims, float initialValue) -{ - switch (elemCount) - { - case 1: return new ValueTexture<1>(dims, initialValue); - case 2: return new ValueTexture<2>(dims, initialValue); - case 3: return new ValueTexture<3>(dims, initialValue); - case 4: return new ValueTexture<4>(dims, initialValue); - default: break; - } - return nullptr; -} - -static SlangResult _newTexture(const InputTextureDesc& desc, slang::TypeLayoutReflection* typeLayout, RefPtr<CPUComputeUtil::Resource>& outResource) -{ - SlangResourceAccess access = SLANG_RESOURCE_ACCESS_READ; - SlangResourceShape shape = SLANG_TEXTURE_2D; - int elemCount = 1; - if (typeLayout) - { - const auto kind = typeLayout->getKind(); - SLANG_ASSERT(kind == slang::TypeReflection::Kind::Resource); - - auto type = typeLayout->getType(); - shape = type->getResourceShape(); - - access = type->getResourceAccess(); - slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); - if (typeReflection->getKind() == slang::TypeReflection::Kind::Vector) - { - elemCount = int(typeReflection->getElementCount()); - } - } - else - { - if (desc.isCube) - { - shape = SLANG_TEXTURE_CUBE; - } - else - { - switch (desc.dimension) - { - case 1: - shape = SLANG_TEXTURE_1D; - break; - case 2: - shape = SLANG_TEXTURE_2D; - break; - case 3: - shape = SLANG_TEXTURE_3D; - break; - default: - break; - } - } - if (desc.isRWTexture) - access = SLANG_RESOURCE_ACCESS_READ_WRITE; - elemCount = 4; - } - - // TODO(JS): Currently we support only textures who's content is either - // 0 or 1. This is because this is easy to implement. - // Will need to do something better in the future.. - - float initialValue = 0.0f; - - switch (desc.content) - { - case InputTextureContent::One: initialValue = 1.0f; break; - case InputTextureContent::Zero: initialValue = 0.0f; break; - default: break; - } - - CPPPrelude::TextureDimensions dims; - _calcDims(desc, typeLayout, dims); - - // These need a different style of texture if can be written to - if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) - { - - switch (shape) - { - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - case SLANG_TEXTURE_1D_ARRAY: - case SLANG_TEXTURE_2D_ARRAY: - { - outResource = new FloatRWTexture(elemCount, dims, initialValue); - return SLANG_OK; - } - } - } - else - { - outResource = _newReadTexture(elemCount, dims, initialValue); - return outResource ? SLANG_OK : SLANG_FAIL; - } - - return SLANG_FAIL; -} - -/* static */bool CPUComputeUtil::hasFeature(const UnownedStringSlice& feature) -{ - SLANG_UNUSED(feature); - // CPU has no specific support requirements - return false; -} - -SlangResult CPUComputeUtil::fillRuntimeHandleInBuffers( - ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, - Context& context, - ISlangSharedLibrary* sharedLib) -{ - auto request = compilationAndLayout.output.getRequestForReflection(); - Slang::ComPtr<slang::ISession> linkage; - spCompileRequest_getSession(request, linkage.writeRef()); - auto& inputLayout = compilationAndLayout.layout; - for (auto& entry : inputLayout.entries) - { - for (auto& rtti : entry.rttiEntries) - { - uint64_t ptrValue = 0; - switch (rtti.type) - { - case RTTIDataEntryType::RTTIObject: - { - auto reflection = - slang::ShaderReflection::get(request); - auto concreteType = reflection->findTypeByName(rtti.typeName.getBuffer()); - ComPtr<ISlangBlob> outName; - linkage->getTypeRTTIMangledName(concreteType, outName.writeRef()); - if (!outName) - return SLANG_FAIL; - ptrValue = (uint64_t)sharedLib->findSymbolAddressByName((char*)outName->getBufferPointer()); - } - break; - case RTTIDataEntryType::WitnessTable: - { - auto reflection = slang::ShaderReflection::get(request); - auto concreteType = reflection->findTypeByName(rtti.typeName.getBuffer()); - if (!concreteType) - return SLANG_FAIL; - auto interfaceType = reflection->findTypeByName(rtti.interfaceName.getBuffer()); - if (!interfaceType) - return SLANG_FAIL; - uint32_t id = -1; - linkage->getTypeConformanceWitnessSequentialID(concreteType, interfaceType, &id); - ptrValue = id; - break; - } - default: - break; - } - if (rtti.offset >= 0 && rtti.offset + sizeof(ptrValue) <= entry.bufferData.getCount() * sizeof(decltype(entry.bufferData[0]))) - { - memcpy( - ((char*)entry.bufferData.getBuffer()) + rtti.offset, - &ptrValue, - sizeof(ptrValue)); - } - else - { - return SLANG_FAIL; - } - } - for (auto& handle : entry.bindlessHandleEntry) - { - RefPtr<Resource> resource; - uint64_t handleValue = 0; - if (context.m_bindlessResources.TryGetValue(handle.name, resource)) - { - handleValue = (uint64_t)resource->getInterface(); - } - else - { - return SLANG_FAIL; - } - if (handle.offset >= 0 && - handle.offset + sizeof(uint64_t) <= - entry.bufferData.getCount() * sizeof(decltype(entry.bufferData[0]))) - { - memcpy( - ((char*)entry.bufferData.getBuffer()) + handle.offset, - &handleValue, - sizeof(handleValue)); - } - else - { - return SLANG_FAIL; - } - } - } - return SLANG_OK; -} - -/* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext) -{ - auto request = compilationAndLayout.output.getRequestForReflection(); - auto reflection = (slang::ShaderReflection*) spGetReflection(request); - - const auto& sourcePath = compilationAndLayout.sourcePath; - - outContext.m_bindRoot.init(&outContext.m_bindSet, reflection, 0); - - // This will set up constant buffer that are contained from the roots - outContext.m_bindRoot.addDefaultValues(); - - // Okay lets iterate adding buffers - auto outStream = StdWriters::getOut(); - SLANG_RETURN_ON_FAIL(ShaderInputLayout::addBindSetValues(compilationAndLayout.layout.entries, compilationAndLayout.sourcePath, outStream, outContext.m_bindRoot)); - ShaderInputLayout::getValueBuffers(compilationAndLayout.layout.entries, outContext.m_bindSet, outContext.m_buffers); - - // Okay we need to find all of the bindings and match up to those in the layout - const ShaderInputLayout& layout = compilationAndLayout.layout; - - // The final stage is to actual set up the CPU based variables - - { - // First create all of the resources for the values - // We don't need to create anything backed by a buffer on CPU, as the memory buffer as provided - // by BindSet::Resource can just be used - { - const auto& values = outContext.m_bindSet.getValues(); - - for (BindSet::Value* value : values) - { - auto typeLayout = value->m_type; - if (typeLayout == nullptr) - { - // We need type layout here to create anything - continue; - } - - // TODO(JS): - // Here we should be using information about what textures hold to create appropriate - // textures. For now we only support 2d textures that always return 1. - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::Resource: - { - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - - auto access = type->getResourceAccess(); - - auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; - switch (baseShape) - { - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - { - SLANG_ASSERT(value->m_userIndex >= 0); - auto& srcEntry = layout.entries[value->m_userIndex]; - - RefPtr<CPUComputeUtil::Resource> resource; - SLANG_RETURN_ON_FAIL(_newTexture(srcEntry.textureDesc, typeLayout, resource)); - value->m_target = resource; - break; - } - case SLANG_TEXTURE_BUFFER: - { - // Need a CPU impl for these... - // For now we can just leave as target will just be nullptr - break; - } - - case SLANG_BYTE_ADDRESS_BUFFER: - case SLANG_STRUCTURED_BUFFER: - { - // On CPU we just use the memory in the BindSet buffer, so don't need to create anything - break; - } - - } - } - default: break; - } - } - } - - // Now we need to go through all of the bindings and set the appropriate data - { - List<BindLocation> locations; - List<BindSet::Value*> values; - outContext.m_bindSet.getBindings(locations, values); - - for (Index i = 0; i < locations.getCount(); ++i) - { - const auto& location = locations[i]; - BindSet::Value* value = values[i]; - - // Okay now we need to set up the actual handles that CPU will follow. - auto typeLayout = location.getTypeLayout(); - - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::Array: - { - auto elementCount = int(typeLayout->getElementCount()); - if (elementCount == 0) - { - CPPPrelude::Array<uint8_t>* array = location.getUniform<CPPPrelude::Array<uint8_t> >(); - - // If set, we setup the data needed for array on CPU side - if (value && array) - { - array->data = value->m_data; - array->count = value->m_elementCount; - } - } - break; - } - case slang::TypeReflection::Kind::ConstantBuffer: - case slang::TypeReflection::Kind::ParameterBlock: - { - // These map down to pointers. In our case the contents of the resource - void* data = value ? value->m_data : nullptr; - location.setUniform(&data, sizeof(data)); - break; - } - case slang::TypeReflection::Kind::Resource: - { - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - - //auto access = type->getResourceAccess(); - - switch (shape & SLANG_RESOURCE_BASE_SHAPE_MASK) - { - default: - assert(!"unhandled case"); - break; - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - case SLANG_TEXTURE_BUFFER: - { - Resource* targetResource = value ? static_cast<Resource*>(value->m_target.Ptr()) : nullptr; - void* intf = targetResource ? targetResource->getInterface() : nullptr; - *location.getUniform<void*>() = intf; - break; - } - case SLANG_STRUCTURED_BUFFER: - { - if (value) - { - auto& dstBuf = *location.getUniform<CPPPrelude::StructuredBuffer<uint8_t> >(); - dstBuf.data = (uint8_t*)value->m_data; - dstBuf.count = value->m_elementCount; - } - break; - } - case SLANG_BYTE_ADDRESS_BUFFER: - { - if (value) - { - auto& dstBuf = *location.getUniform<CPPPrelude::ByteAddressBuffer>(); - dstBuf.data = (uint32_t*)value->m_data; - dstBuf.sizeInBytes = value->m_sizeInBytes; - } - break; - } - } - } - } - } - } - } - return SLANG_OK; -} - -/* static */SlangResult CPUComputeUtil::calcExecuteInfo(ExecuteStyle style, ISlangSharedLibrary* sharedLib, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context, ExecuteInfo& out) -{ - auto request = compilationAndLayout.output.getRequestForReflection(); - auto reflection = (slang::ShaderReflection*) spGetReflection(request); - - slang::EntryPointReflection* entryPoint = nullptr; - auto entryPointCount = reflection->getEntryPointCount(); - SLANG_ASSERT(entryPointCount == 1); - - entryPoint = reflection->getEntryPointByIndex(0); - - const char* entryPointName = entryPoint->getName(); - - // Copy dispatch size - for (int i = 0; i < 3; ++i) - { - out.m_dispatchSize[i] = dispatchSize[i]; - } - - out.m_style = style; - out.m_uniformState = (void*)context.m_bindRoot.getRootData(); - out.m_uniformEntryPointParams = (void*)context.m_bindRoot.getEntryPointData(); - - switch (style) - { - case ExecuteStyle::Group: - { - StringBuilder groupEntryPointName; - groupEntryPointName << entryPointName << "_Group"; - - CPPPrelude::ComputeFunc groupFunc = (CPPPrelude::ComputeFunc)sharedLib->findFuncByName(groupEntryPointName.getBuffer()); - if (!groupFunc) - { - return SLANG_FAIL; - } - - out.m_func = (ExecuteInfo::Func)groupFunc; - break; - } - case ExecuteStyle::GroupRange: - { - CPPPrelude::ComputeFunc groupRangeFunc = nullptr; - groupRangeFunc = (CPPPrelude::ComputeFunc)sharedLib->findFuncByName(entryPointName); - if (!groupRangeFunc) - { - return SLANG_FAIL; - } - out.m_func = (ExecuteInfo::Func)groupRangeFunc; - break; - } - case ExecuteStyle::Thread: - { - StringBuilder threadEntryPointName; - threadEntryPointName << entryPointName << "_Thread"; - - CPPPrelude::ComputeThreadFunc threadFunc = (CPPPrelude::ComputeThreadFunc)sharedLib->findFuncByName(threadEntryPointName.getBuffer()); - if (!threadFunc) - { - return SLANG_FAIL; - } - - SlangUInt numThreadsPerAxis[3]; - entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis); - for (int i = 0; i < 3; ++i) - { - out.m_numThreadsPerAxis[i] = uint32_t(numThreadsPerAxis[i]); - } - out.m_func = (ExecuteInfo::Func)threadFunc; - break; - } - default: - { - return SLANG_FAIL; - } - } - - return SLANG_OK; -} - -/* static */SlangResult CPUComputeUtil::execute(const ExecuteInfo& info) -{ - void* uniformState = info.m_uniformState; - void* uniformEntryPointParams = info.m_uniformEntryPointParams; - - switch (info.m_style) - { - case ExecuteStyle::Group: - { - CPPPrelude::ComputeFunc groupFunc = (CPPPrelude::ComputeFunc)info.m_func; - CPPPrelude::ComputeVaryingInput varying; - - const uint32_t groupXCount = info.m_dispatchSize[0]; - const uint32_t groupYCount = info.m_dispatchSize[1]; - const uint32_t groupZCount = info.m_dispatchSize[2]; - - for (uint32_t groupZ = 0; groupZ < groupZCount; ++groupZ) - { - for (uint32_t groupY = 0; groupY < groupYCount; ++groupY) - { - for (uint32_t groupX = 0; groupX < groupXCount; ++groupX) - { - varying.startGroupID = { groupX, groupY, groupZ }; - groupFunc(&varying, uniformEntryPointParams, uniformState); - } - } - } - break; - } - case ExecuteStyle::GroupRange: - { - CPPPrelude::ComputeFunc groupRangeFunc = (CPPPrelude::ComputeFunc)info.m_func; - CPPPrelude::ComputeVaryingInput varying; - - varying.startGroupID = {}; - varying.endGroupID = { info.m_dispatchSize[0], info.m_dispatchSize[1], info.m_dispatchSize[2] }; - - groupRangeFunc(&varying, uniformEntryPointParams, uniformState); - break; - } - case ExecuteStyle::Thread: - { - CPPPrelude::ComputeThreadFunc threadFunc = (CPPPrelude::ComputeThreadFunc)info.m_func; - CPPPrelude::ComputeThreadVaryingInput varying; - - const uint32_t groupXCount = info.m_dispatchSize[0]; - const uint32_t groupYCount = info.m_dispatchSize[1]; - const uint32_t groupZCount = info.m_dispatchSize[2]; - - const uint32_t threadXCount = uint32_t(info.m_numThreadsPerAxis[0]); - const uint32_t threadYCount = uint32_t(info.m_numThreadsPerAxis[1]); - const uint32_t threadZCount = uint32_t(info.m_numThreadsPerAxis[2]); - - for (uint32_t groupZ = 0; groupZ < groupZCount; ++groupZ) - { - for (uint32_t groupY = 0; groupY < groupYCount; ++groupY) - { - for (uint32_t groupX = 0; groupX < groupXCount; ++groupX) - { - varying.groupID = { groupX, groupY, groupZ }; - - for (uint32_t z = 0; z < threadZCount; ++z) - { - varying.groupThreadID.z = z; - for (uint32_t y = 0; y < threadYCount; ++y) - { - varying.groupThreadID.y = y; - for (uint32_t x = 0; x < threadXCount; ++x) - { - varying.groupThreadID.x = x; - - threadFunc(&varying, uniformEntryPointParams, uniformState); - } - } - } - } - } - } - break; - } - default: return SLANG_FAIL; - } - - return SLANG_OK; -} - - -/* static */ SlangResult CPUComputeUtil::checkStyleConsistency(ISlangSharedLibrary* sharedLib, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout) -{ - Context context; - SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, context)); - - // Run the thread style to test against - { - ExecuteInfo info; - SLANG_RETURN_ON_FAIL(calcExecuteInfo(ExecuteStyle::Thread, sharedLib, dispatchSize, compilationAndLayout, context, info)); - SLANG_RETURN_ON_FAIL(execute(info)); - } - - ExecuteStyle styles[] = { ExecuteStyle::Group, ExecuteStyle::GroupRange }; - for (auto style: styles) - { - Context checkContext; - SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, checkContext)); - - ExecuteInfo info; - SLANG_RETURN_ON_FAIL(calcExecuteInfo(style, sharedLib, dispatchSize, compilationAndLayout, checkContext, info)); - SLANG_RETURN_ON_FAIL(execute(info)); - - // Make sure the out buffers are all the same - - const auto& entries = compilationAndLayout.layout.entries; - - for (int i = 0; i < entries.getCount(); ++i) - { - const auto& entry = entries[i]; - if (entry.isOutput) - { - BindSet::Value* buffer = context.m_buffers[i]; - BindSet::Value* checkBuffer = checkContext.m_buffers[i]; - - if (buffer->m_sizeInBytes != checkBuffer->m_sizeInBytes || - ::memcmp(buffer->m_data, checkBuffer->m_data, buffer->m_sizeInBytes) != 0) - { - return SLANG_FAIL; - } - } - } - } - - return SLANG_OK; -} - -SlangResult renderer_test::CPUComputeUtil::createBindlessResources( - ShaderCompilerUtil::OutputAndLayout& outputAndLayout, Context& context) -{ - auto outStream = StdWriters::getOut(); - for (auto& entry : outputAndLayout.layout.entries) - { - if (!entry.isBindlessObject) - continue; - switch (entry.type) - { - case ShaderInputType::Texture: - { - RefPtr<Resource> resource; - _newTexture(entry.textureDesc, nullptr, resource); - context.m_bindlessResources.Add(entry.name, resource); - break; - } - default: - outStream.print("Unsupported bindless resource type.\n"); - return SLANG_FAIL; - } - } - return SLANG_OK; -} - - -} // renderer_test diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h deleted file mode 100644 index b1de6ce85..000000000 --- a/tools/render-test/cpu-compute-util.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef CPU_COMPUTE_UTIL_H -#define CPU_COMPUTE_UTIL_H - -#include "slang-support.h" -#include "options.h" - -#include "bind-location.h" - -#include "../../source/core/slang-basic.h" - -namespace renderer_test { - -struct CPUComputeUtil -{ - enum class ExecuteStyle - { - Unknown, - Thread, - Group, - GroupRange, - }; - - struct Resource : public Slang::RefObject - { - void* getInterface() const { return m_interface; } - void* m_interface; - }; - - struct Context - { - /// Holds the binding information - BindSet m_bindSet; - CPULikeBindRoot m_bindRoot; - - /// Buffers are held in same order as entries in layout (useful for dumping out bindings) - Slang::List<BindSet::Value*> m_buffers; - - /// Bindless resource objects - Slang::OrderedDictionary<Slang::String, Slang::RefPtr<Resource>> m_bindlessResources; - }; - - struct ExecuteInfo - { - typedef void (*Func)(); - - ExecuteStyle m_style; - Func m_func; - uint32_t m_dispatchSize[3]; - uint32_t m_numThreadsPerAxis[3]; - - void* m_uniformState; - void* m_uniformEntryPointParams; - }; - - /// True if this feature is available on CPU - static bool hasFeature(const Slang::UnownedStringSlice& feature); - - /// Runs code across run styles and makes sure output buffers match - static SlangResult checkStyleConsistency(ISlangSharedLibrary* sharedLib, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout); - - static SlangResult createBindlessResources(ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context); - - /// Query and fill in the RTTI pointer and runtime resource handle values in data buffers. - static SlangResult fillRuntimeHandleInBuffers( - ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, - Context& context, - ISlangSharedLibrary* sharedLib); - - static SlangResult calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext); - - static SlangResult calcExecuteInfo(ExecuteStyle style, ISlangSharedLibrary* sharedLib, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context, ExecuteInfo& out); - - static SlangResult execute(const ExecuteInfo& info); -}; - - -} // renderer_test - -#endif //CPU_COMPUTE_UTIL_H diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp deleted file mode 100644 index bd77919a2..000000000 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ /dev/null @@ -1,1872 +0,0 @@ - -#include "cuda-compute-util.h" - -#include "slang-com-helper.h" - -#include "source/core/slang-std-writers.h" -#include "source/core/slang-token-reader.h" -#include "source/core/slang-semantic-version.h" - -#include "../bind-location.h" - -#include <cuda.h> - -#include <cuda_runtime_api.h> - -// TODO: should conditionalize this on OptiX being present -#ifdef RENDER_TEST_OPTIX - -// The `optix_stubs.h` header produces warnings when compiled with MSVC -#ifdef _MSC_VER -#pragma warning(disable: 4996) -#endif -#include <optix.h> -#include <optix_function_table_definition.h> -#include <optix_stubs.h> -#endif - -namespace renderer_test { -using namespace Slang; - -SLANG_FORCE_INLINE static bool _isError(CUresult result) { return result != 0; } -SLANG_FORCE_INLINE static bool _isError(cudaError_t result) { return result != 0; } - -// A enum used to control if errors are reported on failure of CUDA call. -enum class CUDAReportStyle -{ - Normal, - Silent, -}; - -struct CUDAErrorInfo -{ - CUDAErrorInfo(const char* filePath, int lineNo, const char* errorName = nullptr, const char* errorString = nullptr): - m_filePath(filePath), - m_lineNo(lineNo), - m_errorName(errorName), - m_errorString(errorString) - { - } - SlangResult handle() const - { - StringBuilder builder; - builder << "Error: " << m_filePath << " (" << m_lineNo << ") :"; - - if (m_errorName) - { - builder << m_errorName << " : "; - } - if (m_errorString) - { - builder << m_errorString; - } - - StdWriters::getError().put(builder.getUnownedSlice()); - - //Slang::signalUnexpectedError(builder.getBuffer()); - return SLANG_FAIL; - } - - const char* m_filePath; - int m_lineNo; - const char* m_errorName; - const char* m_errorString; -}; - -#if 1 -// If this code path is enabled, CUDA errors will be reported directly to StdWriter::out stream. - -static SlangResult _handleCUDAError(CUresult cuResult, const char* file, int line) -{ - CUDAErrorInfo info(file, line); - cuGetErrorString(cuResult, &info.m_errorString); - cuGetErrorName(cuResult, &info.m_errorName); - return info.handle(); -} - -static SlangResult _handleCUDAError(cudaError_t error, const char* file, int line) -{ - return CUDAErrorInfo(file, line, cudaGetErrorName(error), cudaGetErrorString(error)).handle(); -} - -#define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res, __FILE__, __LINE__) - -#else -// If this code path is enabled, errors are not reported, but can have an assert enabled - -static SlangResult _handleCUDAError(CUresult cuResult) -{ - SLANG_UNUSED(cuResult); - //SLANG_ASSERT(!"Failed CUDA call"); - return SLANG_FAIL; -} - -static SlangResult _handleCUDAError(cudaError_t error) -{ - SLANG_UNUSED(error); - //SLANG_ASSERT(!"Failed CUDA call"); - return SLANG_FAIL; -} - -#define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res) -#endif - -#define SLANG_CUDA_RETURN_ON_FAIL(x) { auto _res = x; if (_isError(_res)) return SLANG_CUDA_HANDLE_ERROR(_res); } -#define SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(x, r) \ - { \ - auto _res = x; \ - if (_isError(_res)) \ - { \ - return (r == CUDAReportStyle::Normal) ? SLANG_CUDA_HANDLE_ERROR(_res) : SLANG_FAIL; \ - } \ - } \ - -#define SLANG_CUDA_ASSERT_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); }; } - -#ifdef RENDER_TEST_OPTIX - -static bool _isError(OptixResult result) { return result != OPTIX_SUCCESS; } - -#if 1 -static SlangResult _handleOptixError(OptixResult result, char const* file, int line) -{ - fprintf(stderr, "%s(%d): optix: %s (%s)\n", - file, - line, - optixGetErrorString(result), - optixGetErrorName(result)); - return SLANG_FAIL; -} -#define SLANG_OPTIX_HANDLE_ERROR(RESULT) _handleOptixError(RESULT, __FILE__, __LINE__) -#else -#define SLANG_OPTIX_HANDLE_ERROR(RESULT) SLANG_FAIL -#endif - -#define SLANG_OPTIX_RETURN_ON_FAIL(EXPR) do { auto _res = EXPR; if(_isError(_res)) return SLANG_OPTIX_HANDLE_ERROR(_res); } while(0) - -void _optixLogCallback(unsigned int level, const char* tag, const char* message, void* userData) -{ - fprintf(stderr, "optix: %s (%s)\n", - message, - tag); -} - -#endif - -class MemoryCUDAResource : public CUDAResource -{ -public: - typedef CUDAResource Super; - - /// Dtor - ~MemoryCUDAResource() - { - if (m_cudaMemory) - { - SLANG_CUDA_ASSERT_ON_FAIL(cuMemFree(m_cudaMemory)); - } - } - - static MemoryCUDAResource* asResource(BindSet::Value* value) - { - return value ? dynamic_cast<MemoryCUDAResource*>(value->m_target.Ptr()) : nullptr; - } - /// Helper function to get the CUDA memory pointer when given a value - static CUdeviceptr getCUDAData(BindSet::Value* value) - { - auto resource = asResource(value); - return resource ? resource->m_cudaMemory : CUdeviceptr(); - } - - virtual uint64_t getBindlessHandle() override - { - return (uint64_t)m_cudaMemory; - } - - CUdeviceptr m_cudaMemory = CUdeviceptr(); -}; - -class TextureCUDAResource : public CUDAResource -{ -public: - typedef CUDAResource Super; - - ~TextureCUDAResource() - { - if (m_cudaSurfObj) - { - SLANG_CUDA_ASSERT_ON_FAIL(cuSurfObjectDestroy(m_cudaSurfObj)); - } - if (m_cudaTexObj) - { - SLANG_CUDA_ASSERT_ON_FAIL(cuTexObjectDestroy(m_cudaTexObj)); - } - if (m_cudaArray) - { - SLANG_CUDA_ASSERT_ON_FAIL(cuArrayDestroy(m_cudaArray)); - } - if (m_cudaMipMappedArray) - { - SLANG_CUDA_ASSERT_ON_FAIL(cuMipmappedArrayDestroy(m_cudaMipMappedArray)); - } - } - - static TextureCUDAResource* asResource(BindSet::Value* value) - { - return value ? dynamic_cast<TextureCUDAResource*>(value->m_target.Ptr()) : nullptr; - } - - static CUtexObject getTexObject(BindSet::Value* value) - { - auto resource = asResource(value); - // It's an assumption here that 0 is okay for null. Seems to work... - return resource ? resource->m_cudaTexObj : CUtexObject(0); - } - - static CUsurfObject getSurfObject(BindSet::Value* value) - { - auto resource = asResource(value); - return resource ? resource->m_cudaSurfObj : CUsurfObject(0); - } - - virtual uint64_t getBindlessHandle() override - { - return (uint64_t)m_cudaTexObj; - } - - // The texObject is for reading 'texture' like things. This is an opaque type, that's backed by a long long - CUtexObject m_cudaTexObj = CUtexObject(); - - // The surfObj is for reading/writing 'texture like' things, but not for sampling. - CUsurfObject m_cudaSurfObj = CUsurfObject(); - - CUarray m_cudaArray = CUarray(); - CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray(); -}; - -class ScopeCUDAModule -{ -public: - - operator CUmodule () const { return m_module; } - - ScopeCUDAModule(): m_module(nullptr) {} - SlangResult load(const void* image) - { - release(); - SLANG_CUDA_RETURN_ON_FAIL(cuModuleLoadData(&m_module, image)); - return SLANG_OK; - } - void release() - { - if (m_module) - { - cuModuleUnload(m_module); - m_module = nullptr; - } - } - - ~ScopeCUDAModule() { release(); } - - CUmodule m_module; -}; - -class ScopeCUDAStream -{ -public: - - SlangResult init(unsigned int flags) - { - release(); - SLANG_ASSERT(m_stream == nullptr); - SLANG_CUDA_RETURN_ON_FAIL(cuStreamCreate(&m_stream, flags)); - return SLANG_OK; - } - - SlangResult sync() - { - if (m_stream) - { - SLANG_CUDA_RETURN_ON_FAIL(cuStreamSynchronize(m_stream)); - } - else - { - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceSynchronize()); - } - return SLANG_OK; - } - - void release() - { - if (m_stream) - { - sync(); - SLANG_CUDA_ASSERT_ON_FAIL(cuStreamDestroy(m_stream)); - m_stream = nullptr; - } - } - - ScopeCUDAStream():m_stream(nullptr) {} - - ~ScopeCUDAStream() { release(); } - - operator CUstream () const { return m_stream; } - - CUstream m_stream; -}; - -static int _calcSMCountPerMultiProcessor(int major, int minor) -{ - // Defines for GPU Architecture types (using the SM version to determine - // the # of cores per SM - struct SMInfo - { - int sm; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version - int coreCount; - }; - - static const SMInfo infos[] = - { - {0x30, 192}, - {0x32, 192}, - {0x35, 192}, - {0x37, 192}, - {0x50, 128}, - {0x52, 128}, - {0x53, 128}, - {0x60, 64}, - {0x61, 128}, - {0x62, 128}, - {0x70, 64}, - {0x72, 64}, - {0x75, 64} - }; - - const int sm = ((major << 4) + minor); - for (Index i = 0; i < SLANG_COUNT_OF(infos); ++i) - { - if (infos[i].sm == sm) - { - return infos[i].coreCount; - } - } - - const auto& last = infos[SLANG_COUNT_OF(infos) - 1]; - - // It must be newer presumably - SLANG_ASSERT(sm > last.sm); - - // Default to the last entry - return last.coreCount; -} - -static SlangResult _findMaxFlopsDeviceIndex(int* outDeviceIndex) -{ - int smPerMultiproc = 0; - int maxPerfDevice = -1; - int deviceCount = 0; - int devicesProhibited = 0; - - uint64_t maxComputePerf = 0; - SLANG_CUDA_RETURN_ON_FAIL(cudaGetDeviceCount(&deviceCount)); - - // Find the best CUDA capable GPU device - for (int currentDevice = 0; currentDevice < deviceCount; ++currentDevice) - { - int computeMode = -1, major = 0, minor = 0; - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, currentDevice)); - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, currentDevice)); - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, currentDevice)); - - // If this GPU is not running on Compute Mode prohibited, - // then we can add it to the list - if (computeMode != cudaComputeModeProhibited) - { - if (major == 9999 && minor == 9999) - { - smPerMultiproc = 1; - } - else - { - smPerMultiproc = _calcSMCountPerMultiProcessor(major, minor); - } - - int multiProcessorCount = 0, clockRate = 0; - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, currentDevice)); - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, currentDevice)); - uint64_t compute_perf = uint64_t(multiProcessorCount) * smPerMultiproc * clockRate; - - if (compute_perf > maxComputePerf) - { - maxComputePerf = compute_perf; - maxPerfDevice = currentDevice; - } - } - else - { - devicesProhibited++; - } - } - - if (maxPerfDevice < 0) - { - return SLANG_FAIL; - } - - *outDeviceIndex = maxPerfDevice; - return SLANG_OK; -} - -static SlangResult _initCuda(CUDAReportStyle reportType = CUDAReportStyle::Normal) -{ - static CUresult res = cuInit(0); - SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(res, reportType); - return SLANG_OK; -} - -class ScopeCUDAContext -{ -public: - ScopeCUDAContext() : - m_context(nullptr), - m_device(-1), - m_deviceIndex(-1) - {} - - SlangResult init(unsigned int flags, int deviceIndex, CUDAReportStyle reportType = CUDAReportStyle::Normal) - { - SLANG_RETURN_ON_FAIL(_initCuda(reportType)); - - if (m_context) - { - cuCtxDestroy(m_context); - m_context = nullptr; - } - - m_deviceIndex = deviceIndex; - SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, deviceIndex)); - - SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType); - return SLANG_OK; - } - - SlangResult init(unsigned int flags, CUDAReportStyle reportType = CUDAReportStyle::Normal) - { - SLANG_RETURN_ON_FAIL(_initCuda(reportType)); - - SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceIndex(&m_deviceIndex)); - SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(m_deviceIndex), reportType); - - if (m_context) - { - cuCtxDestroy(m_context); - m_context = nullptr; - } - - SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, m_deviceIndex)); - - SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType); - return SLANG_OK; - } - - ~ScopeCUDAContext() - { - if (m_context) - { - cuCtxDestroy(m_context); - } - } - SLANG_FORCE_INLINE operator CUcontext () const { return m_context; } - - int m_deviceIndex; - CUdevice m_device; - CUcontext m_context; -}; - -/* static */SlangResult CUDAComputeUtil::parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult) -{ - outResult = false; - - if (feature.startsWith("cuda_sm_")) - { - const UnownedStringSlice versionSlice = UnownedStringSlice(feature.begin() + 8, feature.end()); - SemanticVersion requiredVersion; - SLANG_RETURN_ON_FAIL(SemanticVersion::parse(versionSlice, '_', requiredVersion)); - - // Need to get the version from the cuda device - ScopeCUDAContext context; - SLANG_RETURN_ON_FAIL(context.init(0, CUDAReportStyle::Silent)); - - const int deviceIndex = context.m_deviceIndex; - - int computeMode = -1; - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, deviceIndex)); - - // If we don't have compute mode availability, we can't execute - if (computeMode == cudaComputeModeProhibited) - { - return SLANG_FAIL; - } - - int major, minor; - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); - SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); - - SemanticVersion actualVersion; - actualVersion.set(major, minor); - - outResult = actualVersion >= requiredVersion; - - return SLANG_OK; - } - - return SLANG_FAIL; -} - -/* static */bool CUDAComputeUtil::hasFeature(const Slang::UnownedStringSlice& feature) -{ - bool res; - return SLANG_SUCCEEDED(parseFeature(feature, res)) ? res : false; -} - -/* static */bool CUDAComputeUtil::canCreateDevice() -{ - ScopeCUDAContext context; - return SLANG_SUCCEEDED(context.init(0, CUDAReportStyle::Silent)); -} - -static bool _hasReadAccess(SlangResourceAccess access) -{ - return access = SLANG_RESOURCE_ACCESS_READ || access == SLANG_RESOURCE_ACCESS_READ_WRITE; -} - -static bool _hasWriteAccess(SlangResourceAccess access) -{ - return access == SLANG_RESOURCE_ACCESS_READ_WRITE; -} - -/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource) -{ - SlangResourceAccess access = SLANG_RESOURCE_ACCESS_READ; - SlangResourceShape baseShape = SLANG_TEXTURE_2D; - if (typeLayout) - { - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - access = type->getResourceAccess(); - - if (!(access == SLANG_RESOURCE_ACCESS_READ || access == SLANG_RESOURCE_ACCESS_READ_WRITE)) - { - SLANG_ASSERT(!"Only read or read write currently supported"); - return SLANG_FAIL; - } - baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; - } - else - { - if (srcEntry.textureDesc.isCube) - { - baseShape = SLANG_TEXTURE_CUBE; - } - else - { - switch (srcEntry.textureDesc.dimension) - { - case 1: - baseShape = SLANG_TEXTURE_1D; - break; - case 2: - baseShape = SLANG_TEXTURE_2D; - break; - case 3: - baseShape = SLANG_TEXTURE_3D; - break; - default: - break; - } - } - if (srcEntry.textureDesc.isRWTexture) - access = SLANG_RESOURCE_ACCESS_READ_WRITE; - } - CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY; - - InputTextureDesc textureDesc = srcEntry.textureDesc; - - if (_hasWriteAccess(access)) - { - textureDesc.mipMapCount = 1; - } - - // CUDA wants the unused dimensions to be 0. - // Might need to specially handle elsewhere - int width = textureDesc.size; - int height = 0; - int depth = 0; - - switch (baseShape) - { - case SLANG_TEXTURE_1D: - { - break; - } - case SLANG_TEXTURE_2D: - { - height = textureDesc.size; - break; - } - case SLANG_TEXTURE_3D: - { - height = textureDesc.size; - depth = textureDesc.size; - break; - } - case SLANG_TEXTURE_CUBE: - { - height = width; - depth = 1; - break; - } - default: - { - SLANG_ASSERT(!"Type not supported"); - return SLANG_FAIL; - } - } - - TextureData texData; - generateTextureData(texData, textureDesc); - - auto mipLevels = texData.mipLevels; - - RefPtr<TextureCUDAResource> tex = new TextureCUDAResource; - - size_t elementSize = 0; - - { - CUarray_format format = CU_AD_FORMAT_FLOAT; - int numChannels = 0; - - switch (textureDesc.format) - { - case Format::R_Float32: - { - format = CU_AD_FORMAT_FLOAT; - numChannels = 1; - elementSize = sizeof(float); - break; - } - case Format::RGBA_Unorm_UInt8: - { - format = CU_AD_FORMAT_UNSIGNED_INT8; - numChannels = 4; - elementSize = sizeof(uint32_t); - break; - } - default: - { - SLANG_ASSERT(!"Only support R_Float32/RGBA_Unorm_UInt8 formats for now"); - return SLANG_FAIL; - } - } - - if (mipLevels > 1) - { - resourceType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; - - CUDA_ARRAY3D_DESCRIPTOR arrayDesc; - memset(&arrayDesc, 0, sizeof(arrayDesc)); - - arrayDesc.Width = width; - arrayDesc.Height = height; - arrayDesc.Depth = depth; - arrayDesc.Format = format; - arrayDesc.NumChannels = numChannels; - arrayDesc.Flags = 0; - - if (textureDesc.arrayLength > 1) - { - if (baseShape == SLANG_TEXTURE_1D || - baseShape == SLANG_TEXTURE_2D || - baseShape == SLANG_TEXTURE_CUBE) - { - arrayDesc.Flags |= CUDA_ARRAY3D_LAYERED; - arrayDesc.Depth = textureDesc.arrayLength; - } - else - { - SLANG_ASSERT(!"Arrays only supported for 1D and 2D"); - return SLANG_FAIL; - } - } - - if (baseShape == SLANG_TEXTURE_CUBE) - { - arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; - arrayDesc.Depth *= 6; - } - - SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayCreate(&tex->m_cudaMipMappedArray, &arrayDesc, mipLevels)); - } - else - { - resourceType = CU_RESOURCE_TYPE_ARRAY; - - if (textureDesc.arrayLength > 1) - { - if (baseShape == SLANG_TEXTURE_1D || baseShape == SLANG_TEXTURE_2D || baseShape == SLANG_TEXTURE_CUBE) - { - SLANG_ASSERT(!"Only 1D, 2D and Cube arrays supported"); - return SLANG_FAIL; - } - - CUDA_ARRAY3D_DESCRIPTOR arrayDesc; - memset(&arrayDesc, 0, sizeof(arrayDesc)); - - // Set the depth as the array length - arrayDesc.Depth = textureDesc.arrayLength; - if (baseShape == SLANG_TEXTURE_CUBE) - { - arrayDesc.Depth *= 6; - } - - arrayDesc.Height = height; - arrayDesc.Width = width; - arrayDesc.Format = format; - arrayDesc.NumChannels = numChannels; - - if (baseShape == SLANG_TEXTURE_CUBE) - { - arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; - } - - SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); - } - else if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE) - { - CUDA_ARRAY3D_DESCRIPTOR arrayDesc; - memset(&arrayDesc, 0, sizeof(arrayDesc)); - - arrayDesc.Depth = depth; - arrayDesc.Height = height; - arrayDesc.Width = width; - arrayDesc.Format = format; - arrayDesc.NumChannels = numChannels; - - arrayDesc.Flags = 0; - - // Handle cube texture - if (baseShape == SLANG_TEXTURE_CUBE) - { - arrayDesc.Depth = 6; - arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; - } - - SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); - } - else - { - CUDA_ARRAY_DESCRIPTOR arrayDesc; - memset(&arrayDesc, 0, sizeof(arrayDesc)); - - arrayDesc.Width = width; - arrayDesc.Height = height; - arrayDesc.Format = format; - arrayDesc.NumChannels = numChannels; - - // Allocate the array, will work for 1D or 2D case - SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc)); - } - } - } - - // Work space for holding data for uploading if it needs to be rearranged - List<uint8_t> workspace; - - for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel) - { - int mipWidth = width >> mipLevel; - int mipHeight = height >> mipLevel; - int mipDepth = depth >> mipLevel; - - mipWidth = (mipWidth == 0) ? 1 : mipWidth; - mipHeight = (mipHeight == 0) ? 1 : mipHeight; - mipDepth = (mipDepth == 0) ? 1 : mipDepth; - - // If it's a cubemap then the depth is always 6 - if (baseShape == SLANG_TEXTURE_CUBE) - { - mipDepth = 6; - } - - auto dstArray = tex->m_cudaArray; - if (tex->m_cudaMipMappedArray) - { - // Get the array for the mip level - SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&dstArray, tex->m_cudaMipMappedArray, mipLevel)); - } - SLANG_ASSERT(dstArray); - - // Check using the desc to see if it's plausible - { - CUDA_ARRAY_DESCRIPTOR arrayDesc; - SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray)); - - SLANG_ASSERT(mipWidth == arrayDesc.Width); - SLANG_ASSERT(mipHeight == arrayDesc.Height || (mipHeight == 1 && arrayDesc.Height == 0)); - } - - const void* srcDataPtr = nullptr; - - if (textureDesc.arrayLength > 1) - { - SLANG_ASSERT(baseShape == SLANG_TEXTURE_1D || baseShape == SLANG_TEXTURE_2D || baseShape == SLANG_TEXTURE_CUBE); - - // TODO(JS): Here I assume that arrays are just held contiguously within a 'face' - // This seems reasonable and works with the Copy3D. - const size_t faceSizeInBytes = elementSize * mipWidth * mipHeight; - - Index faceCount = textureDesc.arrayLength; - if (baseShape == SLANG_TEXTURE_CUBE) - { - faceCount *= 6; - } - - const size_t mipSizeInBytes = faceSizeInBytes * faceCount; - workspace.setCount(mipSizeInBytes); - - // We need to add the face data from each mip - // We iterate over face count so we copy all of the cubemap faces - for (Index j = 0; j < faceCount; j++) - { - const auto& srcData = texData.dataBuffer[mipLevel + j * mipLevels]; - // Copy over to the workspace to make contiguous - ::memcpy(workspace.begin() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes); - } - - srcDataPtr = workspace.getBuffer(); - } - else - { - if (baseShape == SLANG_TEXTURE_CUBE) - { - size_t faceSizeInBytes = elementSize * mipWidth * mipHeight; - - workspace.setCount(faceSizeInBytes * 6); - - // Copy the data over to make contiguous - for (Index j = 0; j < 6; j++) - { - const auto& srcData = texData.dataBuffer[mipLevels * j + mipLevel]; - SLANG_ASSERT(mipWidth * mipHeight == srcData.getCount()); - - ::memcpy(workspace.getBuffer() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes); - } - - srcDataPtr = workspace.getBuffer(); - } - else - { - const auto& srcData = texData.dataBuffer[mipLevel]; - SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount()); - - srcDataPtr = srcData.getBuffer(); - } - } - - if (textureDesc.arrayLength > 1) - { - SLANG_ASSERT(baseShape == SLANG_TEXTURE_1D || baseShape == SLANG_TEXTURE_2D || baseShape == SLANG_TEXTURE_CUBE); - - CUDA_MEMCPY3D copyParam; - memset(©Param, 0, sizeof(copyParam)); - - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = dstArray; - - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = srcDataPtr; - copyParam.srcPitch = mipWidth * elementSize; - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = mipHeight; - // Set the depth to the array length - copyParam.Depth = textureDesc.arrayLength; - - if (baseShape == SLANG_TEXTURE_CUBE) - { - copyParam.Depth *= 6; - } - - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(©Param)); - } - else - { - switch (baseShape) - { - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: - { - CUDA_MEMCPY2D copyParam; - memset(©Param, 0, sizeof(copyParam)); - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = dstArray; - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = srcDataPtr; - copyParam.srcPitch = mipWidth * elementSize; - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = mipHeight; - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); - break; - } - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - { - CUDA_MEMCPY3D copyParam; - memset(©Param, 0, sizeof(copyParam)); - - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = dstArray; - - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = srcDataPtr; - copyParam.srcPitch = mipWidth * elementSize; - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = mipHeight; - copyParam.Depth = mipDepth; - - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(©Param)); - break; - } - - default: - { - SLANG_ASSERT(!"Not implemented"); - break; - } - } - } - } - - // Set up texture sampling parameters, and create final texture obj - - { - CUDA_RESOURCE_DESC resDesc; - memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC)); - resDesc.resType = resourceType; - - if (tex->m_cudaArray) - { - resDesc.res.array.hArray = tex->m_cudaArray; - } - if (tex->m_cudaMipMappedArray) - { - resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; - } - - if (_hasWriteAccess(access)) - { - // If has write access it's effectively UAV, and so doesn't have sampling available - SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc)); - } - else - { - // If read only it's a SRV and can sample, but cannot write - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); - texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; - - SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr)); - } - - } - - outResource = tex; - return SLANG_OK; -} - - /// Load kernel code and invoke a compute program - /// - /// Assumes that data for binding the kernel parameters is already - /// set up in `outContext.` - /// -static SlangResult _invokeComputeProgram( - CUcontext cudaContext, - ScopeCUDAStream& cudaStream, - ScopeCUDAModule& cudaModule, - const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, - const uint32_t dispatchSize[3], - CUDAComputeUtil::Context& outContext) -{ - auto reflection = slang::ProgramLayout::get(outputAndLayout.output.getRequestForReflection()); - - auto& bindSet = outContext.m_bindSet; - auto& bindRoot = outContext.m_bindRoot; - - // The global-scope shader parameters in the input Slang program - // will be collected into a single `__constant__` global variable - // in the output CUDA module. - // - // We need to query the address of the `__constant__` variable - // so that we can copy parameter data into it when invoking - // a kernel. - // - // The Slang compiler always names this symbol `SLANG_globalParams` - // so that it is easy to look up independent of the module or - // entry point in question. - // - CUdeviceptr globalParamsSymbol = 0; - size_t globalParamsSymbolSize = 0; - cuModuleGetGlobal(&globalParamsSymbol, &globalParamsSymbolSize, cudaModule, "SLANG_globalParams"); - - slang::EntryPointReflection* entryPoint = nullptr; - auto entryPointCount = reflection->getEntryPointCount(); - SLANG_ASSERT(entryPointCount == 1); - - entryPoint = reflection->getEntryPointByIndex(0); - - const char* entryPointName = entryPoint->getName(); - - // Get the entry point - CUfunction cudaEntryPoint; - SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetFunction(&cudaEntryPoint, cudaModule, entryPointName)); - - // Get the max threads per block for this function - - int maxTheadsPerBlock; - SLANG_CUDA_RETURN_ON_FAIL(cuFuncGetAttribute(&maxTheadsPerBlock, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cudaEntryPoint)); - - int sharedSizeInBytes; - SLANG_CUDA_RETURN_ON_FAIL(cuFuncGetAttribute(&sharedSizeInBytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, cudaEntryPoint)); - - // A single CUDA kernel can be invoked with thread groups - // of different shapes/sizes, but an HLSL/Slang compute - // kernel always has a fixed thread group shape baked in. - // We use reflection to query the thread-group size that - // the kernel expects, so that we can use the right size - // when invoking the kernel. - // - SlangUInt numThreadsPerAxis[3]; - entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis); - - // The argument data for the kernel has been set up in `bindRoot`, - // which encapsulates global buffers for both the global and - // entry-point parameter data. - // - // In the case of global parameters, we just need to extract the - // device address of the parameter data, so we can copy it into - // the `SLANG_globalParams` symbol. - // - { - CUdeviceptr globalParamsCUDAData = MemoryCUDAResource::getCUDAData(bindRoot.getRootValue()); - cudaMemcpyAsync( - (void*) globalParamsSymbol, - (void*) globalParamsCUDAData, - globalParamsSymbolSize, - cudaMemcpyDeviceToDevice, - cudaStream); - } - // - // In the case of the entry-point parameters, we have to deal with - // two different wrinkles. - // - // First, the `bindRoot` will have the entry-point argument data - // stored in a GPU-memory buffer, but we actually need it to be - // in host CPU memory. We handle that for now by allocating a - // temporary host memory buffer (if needed) and copying the data - // from device to host. - // - auto entryPointBindValue = bindRoot.getEntryPointValue(); - CUdeviceptr entryPointCUDAData = MemoryCUDAResource::getCUDAData(entryPointBindValue); - size_t entryPointDataSize = entryPointBindValue ? entryPointBindValue->m_sizeInBytes : 0; - void* entryPointHostData = nullptr; - if(entryPointDataSize) - { - entryPointHostData = alloca(entryPointDataSize); - cudaMemcpy(entryPointHostData, (void*)entryPointCUDAData, entryPointDataSize, cudaMemcpyDeviceToHost); - } - // - // Second, the argument data for the entry-point parameters has - // been allocated and filled in as a single buffer, but `cuLaunchKernel` - // defaults to taking pointers to each of the kernel arguments. - // - // We could loop over the entry-point parameters using the refleciton - // information, and set up a pointer to each using the offset stored - // for it in the reflection data. Such an approach would require - // us to create and fill in a dynamically-sized array here. - // - // Instead, we take advantage of a documented but seldom-used feature - // of `cuLaunchKernel` that allows the argument data for all of the - // kernel "launch parameters" to be specified as a single buffer. - // - void* extraOptions[] = { - CU_LAUNCH_PARAM_BUFFER_POINTER, (void*) entryPointHostData, - CU_LAUNCH_PARAM_BUFFER_SIZE, &entryPointDataSize, - CU_LAUNCH_PARAM_END, - }; - - // Once we have all the decessary data extracted and/or - // set up, we can launch the kernel and see what happens. - // - auto cudaLaunchResult = cuLaunchKernel(cudaEntryPoint, - dispatchSize[0], dispatchSize[1], dispatchSize[2], - int(numThreadsPerAxis[0]), int(numThreadsPerAxis[1]), int(numThreadsPerAxis[2]), // Threads per block - 0, // Shared memory size - cudaStream, // Stream. 0 is no stream. - nullptr, // Not using traditional argument passing - extraOptions); // Instead passing kernel arguments via "extra" options - SLANG_CUDA_RETURN_ON_FAIL(cudaLaunchResult); - - // Do a sync here. Makes sure any issues are detected early and not on some implicit sync - SLANG_RETURN_ON_FAIL(cudaStream.sync()); - - return SLANG_OK; -} - -#ifdef RENDER_TEST_OPTIX - /// Load kernel code and invoke a ray-tracing program - /// - /// Assumes that data for binding the kernel parameters is already - /// set up in `outContext.` - /// - /// Currently only works for programs that have a single - /// ray generation shader and no other entry points. - /// -static SlangResult _loadAndInvokeRayTracingProgram( - CUcontext cudaContext, - ScopeCUDAStream& cudaStream, - const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, - const uint32_t dispatchSize[3], - CUDAComputeUtil::Context& outContext) -{ - SLANG_OPTIX_RETURN_ON_FAIL(optixInit()); - - OptixDeviceContextOptions optixOptions = {}; - -#if _DEBUG - optixOptions.logCallbackFunction = &_optixLogCallback; - optixOptions.logCallbackLevel = 4; -#endif - - OptixDeviceContext optixContext = nullptr; - SLANG_OPTIX_RETURN_ON_FAIL(optixDeviceContextCreate(cudaContext, &optixOptions, &optixContext)); - - enum { kOptixLogSize = 2*1024 }; - char log[kOptixLogSize]; - size_t logSize = sizeof(log); - - OptixPipelineCompileOptions optixPipelineCompileOptions = {}; - optixPipelineCompileOptions.pipelineLaunchParamsVariableName = "SLANG_globalParams"; - - // We need to load modules from the PTX code available to us, - // and then also create program groups from the kernels - // in those modules. - // - // For now we will only support program groups with a single - // kernel in them, and will create one per entry point. - // - Index entryPointCount = outputAndLayout.output.kernelDescs.getCount(); - List<OptixProgramGroup> optixProgramGroups; - List<String> names; - - OptixShaderBindingTable optixSBT = {}; - - for( Index ee = 0; ee < entryPointCount; ++ee ) - { - auto& kernel = outputAndLayout.output.kernelDescs[ee]; - - // TODO: The logic here assumes that each kernel will - // come from its own independent module, and has no - // provisiion for loading modules that might contain - // multiple entry points. - // - OptixModuleCompileOptions optixModuleCompileOptions = {}; - OptixModule optixModule; - SLANG_OPTIX_RETURN_ON_FAIL(optixModuleCreateFromPTX( - optixContext, - &optixModuleCompileOptions, - &optixPipelineCompileOptions, - (char const*) kernel.codeBegin, - kernel.getCodeSize(), - log, - &logSize, - &optixModule)); - - // TODO: The logic here only handles ray-generation entry points. - // - // It would seem simple to extend this to handle other entry - // point types, by inspecting the stage of the entry points - // being loaded, and this is indeed true for the subset of - // stages that map one-to-one with OptiX "program groups." - // - // The sticking point is "hit groups" which require a collection - // of entry points to be specified together (insersection, - // any hit, and closest hit). A hit group can comprise between - // zero and three entry points. - // - // The catch for us is how to determine which entry points - // should be grouped to form hit groups. Should this be - // implied in the input code (either by naming convention - // or by new Slang language features)? Should this be set - // up via command-line arguments or something akin to - // `//TEST_INPUT` lines? - - OptixProgramGroupOptions optixProgramGroupOptions = {}; - - OptixProgramGroupDesc optixProgramGroupDesc = {}; - optixProgramGroupDesc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; - optixProgramGroupDesc.raygen.module = optixModule; - - String name = String("__raygen__") + kernel.entryPointName; - names.add(name); - optixProgramGroupDesc.raygen.entryFunctionName = name.begin(); - - OptixProgramGroup optixProgramGroup = nullptr; - SLANG_OPTIX_RETURN_ON_FAIL(optixProgramGroupCreate( - optixContext, - &optixProgramGroupDesc, - 1, - &optixProgramGroupOptions, - log, - &logSize, - &optixProgramGroup)); - - optixProgramGroups.add(optixProgramGroup); - - { - CUdeviceptr rayGenRecordPtr; - size_t rayGenRecordSize = OPTIX_SBT_RECORD_HEADER_SIZE; - - SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc((void**) &rayGenRecordPtr, rayGenRecordSize)); - - struct { char data[OPTIX_SBT_RECORD_HEADER_SIZE]; } rayGenRecordData; - SLANG_OPTIX_RETURN_ON_FAIL(optixSbtRecordPackHeader(optixProgramGroup, &rayGenRecordData)); - - SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy( - (void*) rayGenRecordPtr, - &rayGenRecordData, - rayGenRecordSize, - cudaMemcpyHostToDevice)); - - optixSBT.raygenRecord = rayGenRecordPtr; - } - } - - OptixPipeline optixPipeline = nullptr; - - OptixPipelineLinkOptions optixPipelineLinkOptions = {}; - optixPipelineLinkOptions.maxTraceDepth = 5; - optixPipelineLinkOptions.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; - optixPipelineLinkOptions.overrideUsesMotionBlur = false; - SLANG_OPTIX_RETURN_ON_FAIL(optixPipelineCreate( - optixContext, - &optixPipelineCompileOptions, - &optixPipelineLinkOptions, - optixProgramGroups.getBuffer(), - (unsigned int)optixProgramGroups.getCount(), - log, - &logSize, - &optixPipeline)); - - - { - // The OptiX API complains if we don't fill in a miss record - // in the SBT, so we will create a dummy one here to represent - // the lack of any miss shaders. - // - OptixProgramGroupOptions optixProgramGroupOptions = {}; - OptixProgramGroupDesc missGroupDesc = {}; - missGroupDesc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; - OptixProgramGroup missProgramGroup; - SLANG_OPTIX_RETURN_ON_FAIL(optixProgramGroupCreate( - optixContext, - &missGroupDesc, - 1, - &optixProgramGroupOptions, - log, - &logSize, - &missProgramGroup)); - - - CUdeviceptr missRecordPtr; - size_t missRecordSize = OPTIX_SBT_RECORD_HEADER_SIZE; - - SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc((void**) &missRecordPtr, missRecordSize)); - - struct { char data[OPTIX_SBT_RECORD_HEADER_SIZE]; } missRecordData; - SLANG_OPTIX_RETURN_ON_FAIL(optixSbtRecordPackHeader(missProgramGroup, &missRecordData)); - - SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy( - (void*) missRecordPtr, - &missRecordData, - missRecordSize, - cudaMemcpyHostToDevice)); - - optixSBT.missRecordBase = missRecordPtr; - optixSBT.missRecordCount = 1; - optixSBT.missRecordStrideInBytes = (unsigned int)missRecordSize; - } - { - // Okay, we also need a dummy hit group. - - OptixProgramGroupOptions optixProgramGroupOptions = {}; - OptixProgramGroupDesc hitGroupDesc = {}; - hitGroupDesc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; - OptixProgramGroup programGroup; - SLANG_OPTIX_RETURN_ON_FAIL(optixProgramGroupCreate( - optixContext, - &hitGroupDesc, - 1, - &optixProgramGroupOptions, - log, - &logSize, - &programGroup)); - - - CUdeviceptr recordPtr; - size_t recordSize = OPTIX_SBT_RECORD_HEADER_SIZE; - - SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc((void**) &recordPtr, recordSize)); - - struct { char data[OPTIX_SBT_RECORD_HEADER_SIZE]; } recordData; - SLANG_OPTIX_RETURN_ON_FAIL(optixSbtRecordPackHeader(programGroup, &recordData)); - - SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy( - (void*) recordPtr, - &recordData, - recordSize, - cudaMemcpyHostToDevice)); - - optixSBT.hitgroupRecordBase = recordPtr; - optixSBT.hitgroupRecordCount = 1; - optixSBT.hitgroupRecordStrideInBytes = (unsigned int)recordSize; - } - - // Work out the args - - auto& bindRoot = outContext.m_bindRoot; - - CUdeviceptr globalParams = 0; - size_t globalParamsSize; - - if( auto globalArg = bindRoot.getRootValue() ) - { - globalParams = MemoryCUDAResource::getCUDAData(globalArg); - globalParamsSize = globalArg->m_sizeInBytes; - } - - // TODO: The data for entry point parameters needs to be stored - // into the SBT. - // - // The simplest solution here would be to copy data from the `bindRoot` - // into the SBT at the point where we are setting up the SBT, but - // a more optimized approach (more similar to what a real applicaiton - // would do) would be to allocate the SBT first and then have the - // binding logic write directly into its entries. - // - // One big complication here is that there need not necessarily be - // a one-to-one relationship between the entry points (or entry-point - // groups) in a compiled ray-tracing pipeline and the entries in - // the SBT. Each SBT entry is conceptually an "instance" of one - // of the entry-point groups in the program, and there can be - // zero, one, or many instances of a given group. - // - // Modelling this more completely in `render-test` requires that - // we start having a model for the "scene" that is being rendered, - // and how entry point groups are associated with the objects in - // that scene. - // - CUdeviceptr entryPointParams = MemoryCUDAResource::getCUDAData(bindRoot.getEntryPointValue()); - - SLANG_OPTIX_RETURN_ON_FAIL(optixLaunch( - optixPipeline, - cudaStream, - globalParams, - globalParamsSize, - &optixSBT, - dispatchSize[0], - dispatchSize[1], - dispatchSize[2])); - - SLANG_RETURN_ON_FAIL(cudaStream.sync()); - - return SLANG_OK; -} -#endif - - // Fill in runtime handles (e.g. RTTI pointers values and bindless resource handles) in input buffers. -static SlangResult _fillRuntimeHandlesInBuffers( - const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, - CUDAComputeUtil::Context& context, - ScopeCUDAModule& cudaModule) -{ - Slang::ComPtr<slang::ISession> linkage; - spCompileRequest_getSession(compilationAndLayout.output.getRequestForReflection(), linkage.writeRef()); - auto& inputLayout = compilationAndLayout.layout; - for (auto& entry : inputLayout.entries) - { - for (auto& rtti : entry.rttiEntries) - { - uint64_t ptrValue = 0; - switch (rtti.type) - { - case RTTIDataEntryType::RTTIObject: - { - auto reflection = - slang::ShaderReflection::get(compilationAndLayout.output.getRequestForReflection()); - auto concreteType = reflection->findTypeByName(rtti.typeName.getBuffer()); - ComPtr<ISlangBlob> outName; - linkage->getTypeRTTIMangledName(concreteType, outName.writeRef()); - if (!outName) - return SLANG_FAIL; - SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetGlobal( - (CUdeviceptr*)&ptrValue, - nullptr, - cudaModule.m_module, - (char*)outName->getBufferPointer())); - } - break; - case RTTIDataEntryType::WitnessTable: - { - auto reflection = - slang::ShaderReflection::get(compilationAndLayout.output.getRequestForReflection()); - auto concreteType = reflection->findTypeByName(rtti.typeName.getBuffer()); - if (!concreteType) - return SLANG_FAIL; - auto interfaceType = reflection->findTypeByName(rtti.interfaceName.getBuffer()); - if (!interfaceType) - return SLANG_FAIL; - uint32_t id = 0xFFFFFFFF; - linkage->getTypeConformanceWitnessSequentialID( - concreteType, interfaceType, &id); - ptrValue = id; - break; - } - default: - break; - } - if (rtti.offset >= 0 && - rtti.offset + sizeof(ptrValue) <= - entry.bufferData.getCount() * sizeof(decltype(entry.bufferData[0]))) - { - memcpy( - ((char*)entry.bufferData.getBuffer()) + rtti.offset, - &ptrValue, - sizeof(ptrValue)); - } - else - { - return SLANG_FAIL; - } - } - - for (auto& handle : entry.bindlessHandleEntry) - { - RefPtr<CUDAResource> resource; - uint64_t handleValue = 0; - if (context.m_bindlessResources.TryGetValue(handle.name, resource)) - { - handleValue = resource->getBindlessHandle(); - } - else - { - return SLANG_FAIL; - } - if (handle.offset >= 0 && - handle.offset + sizeof(uint64_t) <= - entry.bufferData.getCount() * sizeof(decltype(entry.bufferData[0]))) - { - memcpy( - ((char*)entry.bufferData.getBuffer()) + handle.offset, - &handleValue, - sizeof(handleValue)); - } - else - { - return SLANG_FAIL; - } - } - } - return SLANG_OK; -} - -static SlangResult _createBindlessResources( - const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, - CUDAComputeUtil::Context& outContext) -{ - auto outStream = StdWriters::getOut(); - for (auto& entry : outputAndLayout.layout.entries) - { - if (!entry.isBindlessObject) - continue; - switch (entry.type) - { - case ShaderInputType::Texture: - { - RefPtr<CUDAResource> resource; - CUDAComputeUtil::createTextureResource(entry, nullptr, resource); - outContext.m_bindlessResources.Add(entry.name, resource); - break; - } - default: - outStream.print("Unsupported bindless resource type.\n"); - return SLANG_FAIL; - } - } - return SLANG_OK; -} - - /// Fill in the binding information for arguments of a CUDA program. -static SlangResult _setUpArguments( - CUcontext cudaContext, - ScopeCUDAStream& cudaStream, - ScopeCUDAModule& cudaModule, - const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, - const uint32_t dispatchSize[3], - CUDAComputeUtil::Context& outContext) -{ - auto reflection = slang::ProgramLayout::get(outputAndLayout.output.getRequestForReflection()); - - auto& bindSet = outContext.m_bindSet; - auto& bindRoot = outContext.m_bindRoot; - - // Okay now we need to set up binding - bindRoot.init(&bindSet, reflection, 0); - - // Will set up any root buffers - bindRoot.addDefaultValues(); - - // Now set up the Values from the test - - auto outStream = StdWriters::getOut(); - - _createBindlessResources(outputAndLayout, outContext); - - // Fill in RTTI pointers and bindless handles in input buffers before copying - // it to GPU memory. - // TODO: enable this for Optix path after it is refactored so that context - // creation and module loading happens before _setUpArguments. - if (outputAndLayout.output.desc.pipelineType == PipelineType::Compute) - { - SLANG_RETURN_ON_FAIL(_fillRuntimeHandlesInBuffers(outputAndLayout, outContext, cudaModule)); - } - - SLANG_RETURN_ON_FAIL(ShaderInputLayout::addBindSetValues(outputAndLayout.layout.entries, outputAndLayout.sourcePath, outStream, bindRoot)); - - ShaderInputLayout::getValueBuffers(outputAndLayout.layout.entries, bindSet, outContext.m_buffers); - - // First create all of the resources for the values - - { - const auto& values = bindSet.getValues(); - const auto& entries = outputAndLayout.layout.entries; - - for (BindSet::Value* value : values) - { - auto typeLayout = value->m_type; - - // Get the type kind, if typeLayout is not set we'll assume a 'constant buffer' will do - slang::TypeReflection::Kind kind = typeLayout ? typeLayout->getKind() : slang::TypeReflection::Kind::ConstantBuffer; - - switch (kind) - { - case slang::TypeReflection::Kind::ConstantBuffer: - case slang::TypeReflection::Kind::ParameterBlock: - { - // We can construct the buffers. We can't copy into yet, as we need to set all of the bindings first - RefPtr<MemoryCUDAResource> resource = new MemoryCUDAResource; - SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes)); - value->m_target = resource; - break; - } - case slang::TypeReflection::Kind::Resource: - { - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - - auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; - - switch (baseShape) - { - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - { - RefPtr<CUDAResource> resource; - SLANG_RETURN_ON_FAIL(CUDAComputeUtil::createTextureResource(entries[value->m_userIndex], typeLayout, resource)); - value->m_target = resource; - break; - } - case SLANG_TEXTURE_BUFFER: - { - // Need a CUDA impl for these... - // For now we can just leave as target will just be nullptr - break; - } - - case SLANG_BYTE_ADDRESS_BUFFER: - case SLANG_STRUCTURED_BUFFER: - { - // On CPU we just use the memory in the BindSet buffer, so don't need to create anything - RefPtr<MemoryCUDAResource> resource = new MemoryCUDAResource; - SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes)); - value->m_target = resource; - break; - } - } - } - default: break; - } - } - } - - // Now we need to go through all of the bindings and set the appropriate data - - { - List<BindLocation> locations; - List<BindSet::Value*> values; - bindSet.getBindings(locations, values); - - for (Index i = 0; i < locations.getCount(); ++i) - { - const auto& location = locations[i]; - BindSet::Value* value = values[i]; - - // Okay now we need to set up the actual handles that CPU will follow. - auto typeLayout = location.getTypeLayout(); - - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::Array: - { - auto elementCount = int(typeLayout->getElementCount()); - if (elementCount == 0) - { - CUDAComputeUtil::Array array = { CUdeviceptr(), 0 }; - auto resource = MemoryCUDAResource::asResource(value); - if (resource) - { - array.data = resource->m_cudaMemory; - array.count = value->m_elementCount; - } - - location.setUniform(&array, sizeof(array)); - } - break; - } - case slang::TypeReflection::Kind::ConstantBuffer: - case slang::TypeReflection::Kind::ParameterBlock: - { - // These map down to just pointers - *location.getUniform<CUdeviceptr>() = MemoryCUDAResource::getCUDAData(value); - break; - } - case slang::TypeReflection::Kind::Resource: - { - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - - auto access = type->getResourceAccess(); - - const auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; - - switch (baseShape) - { - case SLANG_STRUCTURED_BUFFER: - { - CUDAComputeUtil::StructuredBuffer buffer = { CUdeviceptr(), 0 }; - auto resource = MemoryCUDAResource::asResource(value); - if (resource) - { - buffer.data = resource->m_cudaMemory; - buffer.count = value->m_elementCount; - } - - location.setUniform(&buffer, sizeof(buffer)); - break; - } - case SLANG_BYTE_ADDRESS_BUFFER: - { - CUDAComputeUtil::ByteAddressBuffer buffer = { CUdeviceptr(), 0 }; - - auto resource = MemoryCUDAResource::asResource(value); - if (resource) - { - buffer.data = resource->m_cudaMemory; - buffer.sizeInBytes = value->m_sizeInBytes; - } - - location.setUniform(&buffer, sizeof(buffer)); - break; - } - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - { - if (_hasWriteAccess(access)) - { - *location.getUniform<CUsurfObject>() = TextureCUDAResource::getSurfObject(value); - } - else - { - *location.getUniform<CUtexObject>() = TextureCUDAResource::getTexObject(value); - } - break; - } - - } - break; - } - default: break; - } - } - } - - // Okay now the memory is all set up, we can copy everything over - { - const auto& values = bindSet.getValues(); - for (BindSet::Value* value : values) - { - CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value); - if (value && value->m_data && cudaMem) - { - // Okay copy the data over... - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpyHtoD(cudaMem, value->m_data, value->m_sizeInBytes)); - } - } - } - - return SLANG_OK; -} - - /// Read back any output arguments from a CUDA program. -static SlangResult _readBackOutputs( - CUcontext cudaContext, - ScopeCUDAStream& cudaStream, - const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, - const uint32_t dispatchSize[3], - CUDAComputeUtil::Context& outContext) -{ - const auto& entries = outputAndLayout.layout.entries; - - for (Index i = 0; i < entries.getCount(); ++i) - { - const auto& entry = entries[i]; - BindSet::Value* value = outContext.m_buffers[i]; - - if (entry.isOutput) - { - // Copy back to CPU memory - CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value); - if (value && value->m_data && cudaMem) - { - // Okay copy the data back... - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpyDtoH(value->m_data, cudaMem, value->m_sizeInBytes)); - } - } - } - - return SLANG_OK; -} - -SlangResult _loadCUDAModule( - const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, - ScopeCUDAModule& outModule) -{ - const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute); - if (index < 0) - { - return SLANG_FAIL; - } - const auto& kernelDesc = outputAndLayout.output.kernelDescs[index]; - SLANG_RETURN_ON_FAIL(outModule.load(kernelDesc.codeBegin)); - return SLANG_OK; -} - - /// Load and invoke a CUDA program (either compute or ray-tracing) -SlangResult _loadAndInvokeKernel( - CUcontext cudaContext, - ScopeCUDAStream& cudaStream, - ScopeCUDAModule& cudaModule, - const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, - const uint32_t dispatchSize[3], - CUDAComputeUtil::Context& outContext) -{ - switch( outputAndLayout.output.desc.pipelineType ) - { - case PipelineType::Compute: - return _invokeComputeProgram( - cudaContext, cudaStream, cudaModule, outputAndLayout, dispatchSize, outContext); - - case PipelineType::RayTracing: -#ifdef RENDER_TEST_OPTIX - return _loadAndInvokeRayTracingProgram( - cudaContext, cudaStream, outputAndLayout, dispatchSize, outContext); -#endif - break; - - default: break; - } - - return SLANG_FAIL; -} - - /// Execute a CUDA program (either compute or ray-tracing) - /// - /// This function handles loading code and argument data, - /// invoking the kernel(s), and reading back results. - /// -/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext) -{ - ScopeCUDAContext cudaContext; - SLANG_RETURN_ON_FAIL(cudaContext.init(0)); - - // A default stream, will act as a global stream. Calling sync will globally sync - ScopeCUDAStream cudaStream; - //SLANG_CUDA_RETURN_ON_FAIL(cudaStream.init(cudaStreamNonBlocking)); - - ScopeCUDAModule cudaModule; - - auto& bindSet = outContext.m_bindSet; - auto& bindRoot = outContext.m_bindRoot; - - auto request = outputAndLayout.output.getRequestForReflection(); - auto reflection = (slang::ShaderReflection*) spGetReflection(request); - - // Load cuda module first so its symbols may be queried and filled into argument buffers. - // TODO: refactor optix path to also front-load its context creation and module loading here. - // For now just front-load compute kernels. - if (outputAndLayout.output.desc.pipelineType == PipelineType::Compute) - { - SLANG_RETURN_ON_FAIL(_loadCUDAModule(outputAndLayout, cudaModule)); - } - - SLANG_RETURN_ON_FAIL(_setUpArguments( - cudaContext, cudaStream, cudaModule, outputAndLayout, dispatchSize, outContext)); - - SLANG_RETURN_ON_FAIL(_loadAndInvokeKernel( - cudaContext, cudaStream, cudaModule, outputAndLayout, dispatchSize, outContext)); - - // Finally we need to copy the data back - SLANG_RETURN_ON_FAIL(_readBackOutputs( - cudaContext, cudaStream, outputAndLayout, dispatchSize, outContext)); - - // Release all othe CUDA resource/allocations - bindSet.releaseValueTargets(); - outContext.releaseBindlessResources(); - - return SLANG_OK; -} - - -void CUDAComputeUtil::Context::releaseBindlessResources() -{ - m_bindlessResources = decltype(m_bindlessResources)(); -} - -} // namespace renderer_test diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h deleted file mode 100644 index 9c6c8b9b4..000000000 --- a/tools/render-test/cuda/cuda-compute-util.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef CUDA_COMPUTE_UTIL_H -#define CUDA_COMPUTE_UTIL_H - -#include "../slang-support.h" -#include "../options.h" - -#include "source/core/slang-smart-pointer.h" - -namespace renderer_test { - -// Base class for CUDA resources. This includes textures but also -// memory allocations -class CUDAResource : public Slang::RefObject -{ -public: - virtual uint64_t getBindlessHandle() = 0; -}; - -struct CUDAComputeUtil -{ - // Define here, so we don't need to include the CUDA header - typedef size_t CUdeviceptr; - - /// NOTE! MUST match up to definitions in the CUDA prelude - struct ByteAddressBuffer - { - CUdeviceptr data; - size_t sizeInBytes; - }; - struct StructuredBuffer - { - CUdeviceptr data; - size_t count; - }; - struct Array - { - CUdeviceptr data; - size_t count; - }; - - struct Context - { - /// Holds the binding information - BindSet m_bindSet; - CPULikeBindRoot m_bindRoot; - /// Buffers are held in same order as entries in layout (useful for dumping out bindings) - Slang::List<BindSet::Value*> m_buffers; - Slang::OrderedDictionary<Slang::String, Slang::RefPtr<CUDAResource>> m_bindlessResources; - void releaseBindlessResources(); - }; - - static SlangResult parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult); - - static bool hasFeature(const Slang::UnownedStringSlice& feature); - - static SlangResult createTextureResource( - const ShaderInputLayoutEntry& srcEntry, - slang::TypeLayoutReflection* typeLayout, - Slang::RefPtr<CUDAResource>& outResource); - - static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext); - - static bool canCreateDevice(); -}; - - -} // renderer_test - -#endif //CPU_MEMORY_BINDING_H diff --git a/tools/render-test/options.cpp b/tools/render-test/options.cpp index fa32bb9c0..b5d75adf2 100644 --- a/tools/render-test/options.cpp +++ b/tools/render-test/options.cpp @@ -253,7 +253,9 @@ static SlangResult _setRendererType(DeviceType type, const char* arg, Slang::Wri } else if (strcmp(arg, "-shaderobj") == 0) { - outOptions.useShaderObjects = true; + // Note: We ignore this option because it is always enabled now. + // + // TODO: At some point we could warn/error and deprecate this option. } else { diff --git a/tools/render-test/options.h b/tools/render-test/options.h index c051a4d09..6ca1ef499 100644 --- a/tools/render-test/options.h +++ b/tools/render-test/options.h @@ -67,8 +67,6 @@ struct Options bool useDXIL = false; bool onlyStartup = false; - bool useShaderObjects = false; - bool performanceProfile = false; bool dontAddDefaultEntryPoints = false; diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index 15100e2a5..02f20dd40 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -23,8 +23,6 @@ #include "../../source/core/slang-test-tool-util.h" -#include "cpu-compute-util.h" - #define ENABLE_RENDERDOC_INTEGRATION 0 #if ENABLE_RENDERDOC_INTEGRATION @@ -33,10 +31,6 @@ # include <Windows.h> #endif -#if RENDER_TEST_CUDA -# include "cuda/cuda-compute-util.h" -#endif - namespace renderer_test { using Slang::Result; @@ -80,8 +74,9 @@ struct ShaderOutputPlan { struct Item { - Index inputLayoutEntryIndex; - ComPtr<IResource> resource; + Index inputLayoutEntryIndex; + ComPtr<IResource> resource; + slang::TypeLayoutReflection* typeLayout = nullptr; }; List<Item> items; @@ -105,7 +100,7 @@ public: virtual void applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) = 0; virtual void setProjectionMatrix(IResourceCommandEncoder* encoder) = 0; - virtual Result writeBindingOutput(BindRoot* bindRoot, const char* fileName) = 0; + virtual Result writeBindingOutput(const char* fileName) = 0; Result writeScreen(const char* filename); @@ -142,29 +137,6 @@ protected: Options m_options; }; -class LegacyRenderTestApp : public RenderTestApp -{ -public: - virtual void applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) SLANG_OVERRIDE; - virtual void setProjectionMatrix(IResourceCommandEncoder* encoder) SLANG_OVERRIDE; - virtual Result initialize( - SlangSession* session, - IDevice* device, - const Options& options, - const ShaderCompilerUtil::Input& input) SLANG_OVERRIDE; - - BindingStateImpl* getBindingState() const { return m_bindingState; } - - virtual Result writeBindingOutput(BindRoot* bindRoot, const char* fileName) override; - virtual void finalizeImpl() SLANG_OVERRIDE; - -protected: - uintptr_t m_constantBufferSize; - ComPtr<IBufferResource> m_constantBuffer; - RefPtr<BindingStateImpl> m_bindingState; - int m_numAddedConstantBuffers; ///< Constant buffers can be added to the binding directly. Will be added at the end. -}; - class ShaderObjectRenderTestApp : public RenderTestApp { public: @@ -175,7 +147,7 @@ public: IDevice* device, const Options& options, const ShaderCompilerUtil::Input& input) SLANG_OVERRIDE; - virtual Result writeBindingOutput(BindRoot* bindRoot, const char* fileName) override; + virtual Result writeBindingOutput(const char* fileName) override; protected: virtual void finalizeImpl() SLANG_OVERRIDE; @@ -223,6 +195,7 @@ SlangResult _assignVarsFromLayout( StdWriters::getError().print("error: could not find shader parameter matching '%s'\n", entry.name.begin()); return SLANG_E_INVALID_ARG; } + slang::TypeLayoutReflection* typeLayout = entryCursor.getTypeLayout(); ComPtr<IResource> resource; switch(entry.type) @@ -296,35 +269,6 @@ SlangResult _assignVarsFromLayout( } break; } - -#if 0 - switch(srcBuffer.type) - { - case InputBufferType::ConstantBuffer: - descriptorSet->setConstantBuffer(rangeIndex, 0, bufferResource); - break; - - case InputBufferType::StorageBuffer: - { - ResourceView::Desc viewDesc; - viewDesc.type = ResourceView::Type::UnorderedAccess; - viewDesc.format = srcBuffer.format; - auto bufferView = renderer->createBufferView( - bufferResource, - viewDesc); - descriptorSet->setResource(rangeIndex, 0, bufferView); - } - break; - } - - if(srcEntry.isOutput) - { - BindingStateImpl::OutputBinding binding; - binding.entryIndex = i; - binding.resource = bufferResource; - outputBindings.add(binding); - } -#endif } break; @@ -344,18 +288,6 @@ SlangResult _assignVarsFromLayout( viewDesc); entryCursor.setCombinedTextureSampler(textureView, sampler); - -#if 0 - descriptorSet->setCombinedTextureSampler(rangeIndex, 0, textureView, sampler); - - if(srcEntry.isOutput) - { - BindingStateImpl::OutputBinding binding; - binding.entryIndex = i; - binding.resource = texture; - outputBindings.add(binding); - } -#endif } break; @@ -381,18 +313,6 @@ SlangResult _assignVarsFromLayout( } entryCursor.setResource(textureView); - -#if 0 - descriptorSet->setResource(rangeIndex, 0, textureView); - - if(srcEntry.isOutput) - { - BindingStateImpl::OutputBinding binding; - binding.entryIndex = i; - binding.resource = texture; - outputBindings.add(binding); - } -#endif } break; @@ -401,9 +321,6 @@ SlangResult _assignVarsFromLayout( auto sampler = _createSamplerState(device, entry.samplerDesc); entryCursor.setSampler(sampler); -#if 0 - descriptorSet->setSampler(rangeIndex, 0, sampler); -#endif } break; @@ -459,6 +376,7 @@ SlangResult _assignVarsFromLayout( ShaderOutputPlan::Item item; item.inputLayoutEntryIndex = entryIndex; item.resource = resource; + item.typeLayout = typeLayout; ioOutputPlan.items.add(item); } @@ -466,11 +384,6 @@ SlangResult _assignVarsFromLayout( return SLANG_OK; } -void LegacyRenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) -{ - m_bindingState->apply(encoder, pipelineType); -} - void ShaderObjectRenderTestApp::applyBinding(PipelineType pipelineType, ICommandEncoder* encoder) { switch (pipelineType) @@ -496,112 +409,6 @@ void ShaderObjectRenderTestApp::applyBinding(PipelineType pipelineType, ICommand } } -SlangResult LegacyRenderTestApp::initialize( - SlangSession* session, - IDevice* device, - const Options& options, - const ShaderCompilerUtil::Input& input) -{ - m_options = options; - - m_device = device; - - SLANG_RETURN_ON_FAIL(_initializeShaders(session, device, options.shaderType, input)); - - _initializeRenderPass(); - - m_numAddedConstantBuffers = 0; - - // TODO(tfoley): use each API's reflection interface to query the constant-buffer size needed - m_constantBufferSize = 16 * sizeof(float); - - IBufferResource::Desc constantBufferDesc; - constantBufferDesc.init(m_constantBufferSize); - constantBufferDesc.cpuAccessFlags = IResource::AccessFlag::Write; - - m_constantBuffer = - device->createBufferResource(IResource::Usage::ConstantBuffer, constantBufferDesc); - if (!m_constantBuffer) - return SLANG_FAIL; - - //! Hack -> if doing a graphics test, add an extra binding for our dynamic constant buffer - // - // TODO: Should probably be more sophisticated than this - with 'dynamic' constant buffer/s - // binding always being specified in the test file - ComPtr<IBufferResource> addedConstantBuffer; - switch (m_options.shaderType) - { - default: - break; - - case Options::ShaderProgramType::Graphics: - case Options::ShaderProgramType::GraphicsCompute: - addedConstantBuffer = m_constantBuffer; - m_numAddedConstantBuffers++; - break; - } - - BindingStateImpl* bindingState = nullptr; - SLANG_RETURN_ON_FAIL(ShaderRendererUtil::createBindingState( - m_shaderInputLayout, m_device, addedConstantBuffer, &bindingState)); - m_bindingState = bindingState; - - // Do other initialization that doesn't depend on the source language. - - // Input Assembler (IA) - - const InputElementDesc inputElements[] = { - {"A", 0, Format::RGB_Float32, offsetof(Vertex, position)}, - {"A", 1, Format::RGB_Float32, offsetof(Vertex, color)}, - {"A", 2, Format::RG_Float32, offsetof(Vertex, uv)}, - }; - - m_inputLayout = m_device->createInputLayout(inputElements, SLANG_COUNT_OF(inputElements)); - if (!m_inputLayout) - return SLANG_FAIL; - - IBufferResource::Desc vertexBufferDesc; - vertexBufferDesc.init(kVertexCount * sizeof(Vertex)); - - m_vertexBuffer = m_device->createBufferResource( - IResource::Usage::VertexBuffer, vertexBufferDesc, kVertexData); - if (!m_vertexBuffer) - return SLANG_FAIL; - - { - switch (m_options.shaderType) - { - default: - assert(!"unexpected test shader type"); - return SLANG_FAIL; - - case Options::ShaderProgramType::Compute: - { - ComputePipelineStateDesc desc; - desc.pipelineLayout = m_bindingState->pipelineLayout; - desc.program = m_shaderProgram; - - m_pipelineState = m_device->createComputePipelineState(desc); - } - break; - - case Options::ShaderProgramType::Graphics: - case Options::ShaderProgramType::GraphicsCompute: - { - GraphicsPipelineStateDesc desc; - desc.pipelineLayout = m_bindingState->pipelineLayout; - desc.program = m_shaderProgram; - desc.inputLayout = m_inputLayout; - desc.framebufferLayout = m_framebufferLayout; - m_pipelineState = m_device->createGraphicsPipelineState(desc); - } - break; - } - } - // If success must have a pipeline state - return m_pipelineState ? SLANG_OK : SLANG_FAIL; -} - SlangResult ShaderObjectRenderTestApp::initialize( SlangSession* session, IDevice* device, @@ -700,13 +507,6 @@ SlangResult ShaderObjectRenderTestApp::initialize( return m_pipelineState ? SLANG_OK : SLANG_FAIL; } -void LegacyRenderTestApp::finalizeImpl() -{ - m_constantBuffer = nullptr; - m_bindingState = nullptr; - RenderTestApp::finalizeImpl(); -} - void ShaderObjectRenderTestApp::finalizeImpl() { m_programVars = nullptr; @@ -802,12 +602,6 @@ void RenderTestApp::_initializeRenderPass() m_device->createRenderPassLayout(renderPassDesc, m_renderPass.writeRef()); } -void LegacyRenderTestApp::setProjectionMatrix(IResourceCommandEncoder* encoder) -{ - auto info = m_device->getDeviceInfo(); - encoder->uploadBufferData(m_constantBuffer, 0, sizeof(float) * 16, info.identityProjectionMatrix); -} - void ShaderObjectRenderTestApp::setProjectionMatrix(IResourceCommandEncoder* encoder) { SLANG_UNUSED(encoder); @@ -863,50 +657,7 @@ void RenderTestApp::finalizeImpl() { } -Result LegacyRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const char* fileName) -{ - // Wait until everything is complete - m_queue->wait(); - - FILE * f = fopen(fileName, "wb"); - if (!f) - { - return SLANG_FAIL; - } - FileWriter writer(f, WriterFlags(0)); - - for(auto binding : m_bindingState->outputBindings) - { - auto i = binding.entryIndex; - const auto& layoutBinding = m_shaderInputLayout.entries[i]; - - assert(layoutBinding.isOutput); - - if (binding.resource && binding.resource->getType() == IResource::Type::Buffer) - { - IBufferResource* bufferResource = static_cast<IBufferResource*>(binding.resource.get()); - const size_t bufferSize = bufferResource->getDesc()->sizeInBytes; - ComPtr<ISlangBlob> blob; - m_device->readBufferResource(bufferResource, 0, bufferSize, blob.writeRef()); - if (!blob) - { - return SLANG_FAIL; - } - - const SlangResult res = ShaderInputLayout::writeBinding( - bindRoot, m_shaderInputLayout.entries[i], blob->getBufferPointer(), bufferSize, &writer); - SLANG_RETURN_ON_FAIL(res); - } - else - { - printf("invalid output type at %d.\n", int(i)); - } - } - - return SLANG_OK; -} - -Result ShaderObjectRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const char* fileName) +Result ShaderObjectRenderTestApp::writeBindingOutput(const char* fileName) { // Wait until everything is complete m_queue->wait(); @@ -935,8 +686,12 @@ Result ShaderObjectRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const c { return SLANG_FAIL; } - const SlangResult res = - ShaderInputLayout::writeBinding(bindRoot, inputEntry, blob->getBufferPointer(), bufferSize, &writer); + const SlangResult res = ShaderInputLayout::writeBinding( + inputEntry, + m_options.outputUsingType ? outputItem.typeLayout : nullptr, // TODO: always output using type + blob->getBufferPointer(), + bufferSize, + &writer); SLANG_RETURN_ON_FAIL(res); } else @@ -947,7 +702,6 @@ Result ShaderObjectRenderTestApp::writeBindingOutput(BindRoot* bindRoot, const c return SLANG_OK; } - Result RenderTestApp::writeScreen(const char* filename) { size_t rowPitch, pixelSize; @@ -1037,13 +791,7 @@ Result RenderTestApp::update() auto request = m_compilationOutput.output.getRequestForReflection(); auto slangReflection = (slang::ShaderReflection*) spGetReflection(request); - BindSet bindSet; - GPULikeBindRoot bindRoot; - bindRoot.init(&bindSet, slangReflection, 0); - - BindRoot* outputBindRoot = m_options.outputUsingType ? &bindRoot : nullptr; - - SLANG_RETURN_ON_FAIL(writeBindingOutput(outputBindRoot, m_options.outputPath)); + SLANG_RETURN_ON_FAIL(writeBindingOutput(m_options.outputPath)); } else { @@ -1258,7 +1006,8 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi case DeviceType::CUDA: { #if RENDER_TEST_CUDA - return SLANG_SUCCEEDED(spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_NVRTC)) && CUDAComputeUtil::canCreateDevice() ? SLANG_OK : SLANG_FAIL; + if(SLANG_FAILED(spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_NVRTC))) + return SLANG_FAIL; #else return SLANG_FAIL; #endif @@ -1293,119 +1042,6 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi return SLANG_E_NOT_AVAILABLE; } - // If it's CPU testing we don't need a window or a renderer - if (options.deviceType == DeviceType::CPU && !options.useShaderObjects) - { - // Check we have all the required features - for (const auto& renderFeature : options.renderFeatures) - { - if (!CPUComputeUtil::hasFeature(renderFeature.getUnownedSlice())) - { - return SLANG_E_NOT_AVAILABLE; - } - } - - ShaderCompilerUtil::OutputAndLayout compilationAndLayout; - SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, options, input, compilationAndLayout)); - - { - // Get the shared library -> it contains the executable code, we need to keep around if we recompile - ComPtr<ISlangSharedLibrary> sharedLibrary; - SLANG_RETURN_ON_FAIL(spGetEntryPointHostCallable(compilationAndLayout.output.getRequestForKernels(), 0, 0, sharedLibrary.writeRef())); - - // This is a hack to work around, reflection when compiling straight C/C++ code. In that case the code is just passed - // straight through to the C++ compiler so no reflection. In these tests though we should have conditional code - // (performance-profile.slang for example), such that there is both a slang and C++ code, and it is the job - // of the test implementer to *ensure* that the straight C++ code has the same layout as the slang C++ backend. - // - // If we are running c/c++ we still need binding information, so compile again as slang source - if (options.sourceLanguage == SLANG_SOURCE_LANGUAGE_C || input.sourceLanguage == SLANG_SOURCE_LANGUAGE_CPP) - { - ShaderCompilerUtil::Input slangInput = input; - slangInput.sourceLanguage = SLANG_SOURCE_LANGUAGE_SLANG; - slangInput.passThrough = SLANG_PASS_THROUGH_NONE; - // We just want CPP, so we get suitable reflection - slangInput.target = SLANG_CPP_SOURCE; - - SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, options, slangInput, compilationAndLayout)); - } - - // calculate binding - CPUComputeUtil::Context context; - SLANG_RETURN_ON_FAIL(CPUComputeUtil::createBindlessResources(compilationAndLayout, context)); - SLANG_RETURN_ON_FAIL(CPUComputeUtil::fillRuntimeHandleInBuffers(compilationAndLayout, context, sharedLibrary.get())); - SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, context)); - - // Get the execution info from the lib - CPUComputeUtil::ExecuteInfo info; - SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcExecuteInfo(CPUComputeUtil::ExecuteStyle::GroupRange, sharedLibrary, options.computeDispatchSize, compilationAndLayout, context, info)); - - const uint64_t startTicks = ProcessUtil::getClockTick(); - - SLANG_RETURN_ON_FAIL(CPUComputeUtil::execute(info)); - - if (options.performanceProfile) - { - const uint64_t endTicks = ProcessUtil::getClockTick(); - _outputProfileTime(startTicks, endTicks); - } - - if (options.outputPath) - { - BindRoot* outputBindRoot = options.outputUsingType ? &context.m_bindRoot : nullptr; - - - // Dump everything out that was written - SLANG_RETURN_ON_FAIL(ShaderInputLayout::writeBindings(outputBindRoot, compilationAndLayout.layout, context.m_buffers, options.outputPath)); - - // Check all execution styles produce the same result - SLANG_RETURN_ON_FAIL(CPUComputeUtil::checkStyleConsistency(sharedLibrary, options.computeDispatchSize, compilationAndLayout)); - } - } - - return SLANG_OK; - } - - if (options.deviceType == DeviceType::CUDA && !options.useShaderObjects) - { -#if RENDER_TEST_CUDA - // Check we have all the required features - for (const auto& renderFeature : options.renderFeatures) - { - if (!CUDAComputeUtil::hasFeature(renderFeature.getUnownedSlice())) - { - return SLANG_E_NOT_AVAILABLE; - } - } - - ShaderCompilerUtil::OutputAndLayout compilationAndLayout; - SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, options, input, compilationAndLayout)); - - const uint64_t startTicks = ProcessUtil::getClockTick(); - - CUDAComputeUtil::Context context; - SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout, options.computeDispatchSize, context)); - - if (options.performanceProfile) - { - const uint64_t endTicks = ProcessUtil::getClockTick(); - _outputProfileTime(startTicks, endTicks); - } - - if (options.outputPath) - { - BindRoot* outputBindRoot = options.outputUsingType ? &context.m_bindRoot : nullptr; - - // Dump everything out that was written - SLANG_RETURN_ON_FAIL(ShaderInputLayout::writeBindings(outputBindRoot, compilationAndLayout.layout, context.m_buffers, options.outputPath)); - } - - return SLANG_OK; -#else - return SLANG_FAIL; -#endif - } - Slang::ComPtr<IDevice> device; { IDevice::Desc desc = {}; @@ -1465,11 +1101,10 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi } { - RefPtr<RenderTestApp> app; - if (options.useShaderObjects) - app = new ShaderObjectRenderTestApp(); - else - app = new LegacyRenderTestApp(); + // TODO: We shouldn't need to heap-allocate the `ShaderObjectRenderTestApp` + // since there is no longer any meaningful inheritance going on. + // + RefPtr<RenderTestApp> app = new ShaderObjectRenderTestApp(); renderDocBeginFrame(); SLANG_RETURN_ON_FAIL(app->initialize(session, device, options, input)); app->update(); diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp index 2f7162f35..071c694b5 100644 --- a/tools/render-test/shader-input-layout.cpp +++ b/tools/render-test/shader-input-layout.cpp @@ -631,180 +631,12 @@ namespace renderer_test } } - /* static */SlangResult ShaderInputLayout::addBindSetValues(const Slang::List<ShaderInputLayoutEntry>& entries, const String& sourcePath, WriterHelper outStream, BindRoot& bindRoot) - { - BindSet* bindSet = bindRoot.getBindSet(); - SLANG_ASSERT(bindSet); - - for (Index entryIndex = 0; entryIndex < entries.getCount(); ++entryIndex) - { - auto& entry = entries[entryIndex]; - if (entry.isBindlessObject) - continue; - - if (entry.name.getLength() == 0) - { - outStream.print("No 'name' specified for value in '%s'\n", sourcePath.getBuffer()); - return SLANG_FAIL; - } - - BindLocation location = BindLocation::Invalid; - SLANG_RETURN_ON_FAIL(bindRoot.parse(entry.name, sourcePath, outStream, location)); - - auto& srcEntry = entries[entryIndex]; - - auto typeLayout = location.getTypeLayout(); - const auto kind = typeLayout->getKind(); - switch (kind) - { - case slang::TypeReflection::Kind::Array: - { - auto elementCount = int(typeLayout->getElementCount()); - if (elementCount == 0) - { - if (srcEntry.type == ShaderInputType::Array) - { - // Set the size - SLANG_RETURN_ON_FAIL(bindRoot.setArrayCount(location, srcEntry.arrayDesc.size)); - } - break; - } - break; - } - case slang::TypeReflection::Kind::Vector: - case slang::TypeReflection::Kind::Matrix: - case slang::TypeReflection::Kind::Scalar: - case slang::TypeReflection::Kind::Struct: - { - SLANG_RETURN_ON_FAIL(location.setUniform(srcEntry.bufferData.getBuffer(), srcEntry.bufferData.getCount() * sizeof(unsigned int))); - break; - } - default: - break; - case slang::TypeReflection::Kind::ConstantBuffer: - { - SLANG_RETURN_ON_FAIL(bindSet->setBufferContents(location, srcEntry.bufferData.getBuffer(), srcEntry.bufferData.getCount() * sizeof(unsigned int))); - break; - } - case slang::TypeReflection::Kind::ParameterBlock: - { - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - SLANG_UNUSED(elementTypeLayout); - break; - } - case slang::TypeReflection::Kind::TextureBuffer: - { - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - SLANG_UNUSED(elementTypeLayout); - break; - } - case slang::TypeReflection::Kind::ShaderStorageBuffer: - { - auto elementTypeLayout = typeLayout->getElementTypeLayout(); - SLANG_UNUSED(elementTypeLayout); - break; - } - case slang::TypeReflection::Kind::GenericTypeParameter: - { - const char* name = typeLayout->getName(); - SLANG_UNUSED(name); - break; - } - case slang::TypeReflection::Kind::Interface: - { - const char* name = typeLayout->getName(); - SLANG_UNUSED(name); - break; - } - case slang::TypeReflection::Kind::Resource: - { - if (BindSet::isTextureType(typeLayout)) - { - // We don't bother setting any data - BindSet::Value* value = bindSet->createTextureValue(typeLayout); - value->m_userIndex = entryIndex; - bindSet->setAt(location, value); - break; - } - - auto type = typeLayout->getType(); - auto shape = type->getResourceShape(); - - //auto access = type->getResourceAccess(); - - switch (shape & SLANG_RESOURCE_BASE_SHAPE_MASK) - { - default: - assert(!"unhandled case"); - break; - case SLANG_BYTE_ADDRESS_BUFFER: - case SLANG_STRUCTURED_BUFFER: - { - size_t bufferSize = srcEntry.bufferData.getCount() * sizeof(unsigned int); - - BindSet::Value* value = bindSet->createBufferValue(typeLayout, bufferSize, srcEntry.bufferData.getBuffer()); - SLANG_ASSERT(value); - - value->m_userIndex = entryIndex; - - bindSet->setAt(location, value); - break; - } - } - if (shape & SLANG_TEXTURE_ARRAY_FLAG) - { - - } - if (shape & SLANG_TEXTURE_MULTISAMPLE_FLAG) - { - - } - - break; - } - } - } - - return SLANG_OK; - } - - /* static */void ShaderInputLayout::getValueBuffers(const Slang::List<ShaderInputLayoutEntry>& entries, const BindSet& bindSet, List<BindSet::Value*>& outBuffers) - { - outBuffers.setCount(entries.getCount()); - - for (Index i = 0; i< outBuffers.getCount(); ++i) - { - outBuffers[i] = nullptr; - } - - const auto& values = bindSet.getValues(); - for (BindSet::Value* value : values) - { - if (value->m_userIndex >= 0) - { - outBuffers[value->m_userIndex] = value; - } - } - } - - /* static */SlangResult ShaderInputLayout::writeBinding(BindRoot* bindRoot, const ShaderInputLayoutEntry& entry, const void* data, size_t sizeInBytes, WriterHelper writer) + /* static */SlangResult ShaderInputLayout::writeBinding(const ShaderInputLayoutEntry& entry, slang::TypeLayoutReflection* typeLayout, const void* data, size_t sizeInBytes, WriterHelper writer) { typedef slang::TypeReflection::ScalarType ScalarType; slang::TypeReflection::ScalarType scalarType = slang::TypeReflection::ScalarType::None; - slang::TypeLayoutReflection* typeLayout = nullptr; - - if (bindRoot && entry.name.getLength()) - { - BindLocation location; - if (SLANG_SUCCEEDED(bindRoot->parse(entry.name, "", writer, location))) - { - // We should have the type of the item - typeLayout = location.m_typeLayout; - } - } - slang::TypeLayoutReflection* elementTypeLayout = nullptr; if (typeLayout) @@ -954,32 +786,6 @@ namespace renderer_test return SLANG_OK; } - /* static */SlangResult ShaderInputLayout::writeBindings(BindRoot* bindRoot, const ShaderInputLayout& layout, const List<BindSet::Value*>& buffers, WriterHelper writer) - { - const auto& entries = layout.entries; - for (int i = 0; i < entries.getCount(); ++i) - { - const auto& entry = entries[i]; - if (entry.isOutput) - { - BindSet::Value* buffer = buffers[i]; - writeBinding(bindRoot, entries[i], buffer->m_data, buffer->m_sizeInBytes, writer); - } - } - - return SLANG_OK; - } - - /* static */SlangResult ShaderInputLayout::writeBindings(BindRoot* bindRoot, const ShaderInputLayout& layout, const List<BindSet::Value*>& buffers, const String& fileName) - { - FILE * f = fopen(fileName.getBuffer(), "wb"); - if (!f) - { - return SLANG_FAIL; - } - FileWriter fileWriter(f, WriterFlags(0)); - return writeBindings(bindRoot, layout, buffers, &fileWriter); - } void generateTextureData(TextureData& output, const InputTextureDesc& desc) { diff --git a/tools/render-test/shader-input-layout.h b/tools/render-test/shader-input-layout.h index 253f065f6..01ef5c443 100644 --- a/tools/render-test/shader-input-layout.h +++ b/tools/render-test/shader-input-layout.h @@ -6,9 +6,6 @@ #include "source/core/slang-writer.h" - -#include "bind-location.h" - #include "slang-gfx.h" namespace renderer_test { @@ -127,23 +124,8 @@ public: void parse(Slang::RandomGenerator* rand, const char* source); - /// Adds to bind set resources as defined in entries. - /// Note: No actual resources are created on a device, these are just the 'Resource' structures that are held on the BindSet - /// For buffers, the Resources will be setup with the contents of the entry. - /// That if a resource is created that maps to an entry, the m_userData member of Resource will be set to it's index - static SlangResult addBindSetValues(const Slang::List<ShaderInputLayoutEntry>& entries, const Slang::String& sourcePath, Slang::WriterHelper outError, BindRoot& bindRoot); - - /// Put into outBuffer the value buffers that were set via addbindSetValues (which will set m_userIndex to be the entries index) - static void getValueBuffers(const Slang::List<ShaderInputLayoutEntry>& entries, const BindSet& bindSet, Slang::List<BindSet::Value*>& outBuffers); - /// Writes a binding, if bindRoot is set, will try to honor the underlying type when outputting. If not will dump as uint32_t hex. - static SlangResult writeBinding(BindRoot* bindRoot, const ShaderInputLayoutEntry& entry, const void* data, size_t sizeInBytes, Slang::WriterHelper writer); - - /// Write all bindings, using data from buffers - static SlangResult writeBindings(BindRoot* bindRoot, const ShaderInputLayout& layout, const Slang::List<BindSet::Value*>& buffers, Slang::WriterHelper writer); - - /// Write bindings from values in memory from buffers - static SlangResult writeBindings(BindRoot* bindRoot, const ShaderInputLayout& layout, const Slang::List<BindSet::Value*>& buffers, const Slang::String& fileName); + static SlangResult writeBinding(const ShaderInputLayoutEntry& entry, slang::TypeLayoutReflection* typeLayout, const void* data, size_t sizeInBytes, Slang::WriterHelper writer); }; void generateTextureDataRGB8(TextureData& output, const InputTextureDesc& desc); diff --git a/tools/render-test/shader-renderer-util.cpp b/tools/render-test/shader-renderer-util.cpp index 903164567..ede744445 100644 --- a/tools/render-test/shader-renderer-util.cpp +++ b/tools/render-test/shader-renderer-util.cpp @@ -7,30 +7,6 @@ namespace renderer_test { using namespace Slang; using Slang::Result; -void BindingStateImpl::apply(ICommandEncoder* encoder, PipelineType pipelineType) -{ - switch (pipelineType) - { - case PipelineType::Compute: - { - ComPtr<IComputeCommandEncoder> computeEncoder; - encoder->queryInterface(SLANG_UUID_IComputeCommandEncoder, (void**)computeEncoder.writeRef()); - computeEncoder->setDescriptorSet(pipelineLayout, 0, descriptorSet); - } - break; - case PipelineType::Graphics: - { - ComPtr<IRenderCommandEncoder> renderEncoder; - encoder->queryInterface( - SLANG_UUID_IRenderCommandEncoder, (void**)renderEncoder.writeRef()); - renderEncoder->setDescriptorSet(pipelineLayout, 0, descriptorSet); - } - break; - default: - throw "unknown pipeline type"; - } -} - /* static */ Result ShaderRendererUtil::generateTextureResource( const InputTextureDesc& inputDesc, int bindFlags, @@ -178,275 +154,4 @@ ComPtr<ISamplerState> _createSamplerState(IDevice* device, return device->createSamplerState(_calcSamplerDesc(srcDesc)); } -/* static */ Result ShaderRendererUtil::createBindingState( - const ShaderInputLayout& layout, - IDevice* device, - IBufferResource* addedConstantBuffer, - BindingStateImpl** outBindingState) -{ - auto srcEntries = layout.entries.getBuffer(); - auto numEntries = layout.entries.getCount(); - - const int textureBindFlags = IResource::BindFlag::NonPixelShaderResource | IResource::BindFlag::PixelShaderResource; - - List<IDescriptorSetLayout::SlotRangeDesc> slotRangeDescs; - List<Index> mapEntryToSlotRange; - - if(addedConstantBuffer) - { - IDescriptorSetLayout::SlotRangeDesc slotRangeDesc; - slotRangeDesc.type = DescriptorSlotType::UniformBuffer; - - slotRangeDescs.add(slotRangeDesc); - } - - for (Index i = 0; i < numEntries; i++) - { - const ShaderInputLayoutEntry& srcEntry = srcEntries[i]; - SLANG_ASSERT(srcEntry.onlyCPULikeBinding == false); - - mapEntryToSlotRange.add(slotRangeDescs.getCount()); - IDescriptorSetLayout::SlotRangeDesc slotRangeDesc; - - switch (srcEntry.type) - { - case ShaderInputType::Buffer: - { - const InputBufferDesc& srcBuffer = srcEntry.bufferDesc; - - switch (srcBuffer.type) - { - case InputBufferType::ConstantBuffer: - slotRangeDesc.type = DescriptorSlotType::UniformBuffer; - break; - - case InputBufferType::StorageBuffer: - slotRangeDesc.type = DescriptorSlotType::StorageBuffer; - break; - - case InputBufferType::RootConstantBuffer: - { - // A root constant buffer maps to a root constant range - // where the `count` of slots is equal to the number - // of bytes of data. - // - Slang::UInt size = srcEntry.bufferData.getCount() * sizeof(srcEntry.bufferData[0]); - slotRangeDesc.type = DescriptorSlotType::RootConstant; - slotRangeDesc.count = size; - } - break; - } - } - break; - - case ShaderInputType::CombinedTextureSampler: - { - slotRangeDesc.type = DescriptorSlotType::CombinedImageSampler; - } - break; - - case ShaderInputType::Texture: - { - if (srcEntry.textureDesc.isRWTexture) - { - slotRangeDesc.type = DescriptorSlotType::StorageImage; - } - else - { - slotRangeDesc.type = DescriptorSlotType::SampledImage; - } - } - break; - - case ShaderInputType::Sampler: - slotRangeDesc.type = DescriptorSlotType::Sampler; - break; - - case ShaderInputType::Object: - // We ignore the `Object` case here, knowing that it is meant for the shader-object path. - continue; - - default: - assert(!"Unhandled type"); - return SLANG_FAIL; - } - slotRangeDescs.add(slotRangeDesc); - } - - IDescriptorSetLayout::Desc descriptorSetLayoutDesc; - descriptorSetLayoutDesc.slotRangeCount = slotRangeDescs.getCount(); - descriptorSetLayoutDesc.slotRanges = slotRangeDescs.getBuffer(); - - auto descriptorSetLayout = device->createDescriptorSetLayout(descriptorSetLayoutDesc); - if(!descriptorSetLayout) return SLANG_FAIL; - - List<IPipelineLayout::DescriptorSetDesc> pipelineDescriptorSets; - pipelineDescriptorSets.add(IPipelineLayout::DescriptorSetDesc(descriptorSetLayout)); - - IPipelineLayout::Desc pipelineLayoutDesc; - pipelineLayoutDesc.renderTargetCount = layout.numRenderTargets; - pipelineLayoutDesc.descriptorSetCount = pipelineDescriptorSets.getCount(); - pipelineLayoutDesc.descriptorSets = pipelineDescriptorSets.getBuffer(); - - auto pipelineLayout = device->createPipelineLayout(pipelineLayoutDesc); - if(!pipelineLayout) return SLANG_FAIL; - - auto descriptorSet = - device->createDescriptorSet(descriptorSetLayout, IDescriptorSet::Flag::Transient); - if(!descriptorSet) return SLANG_FAIL; - - List<BindingStateImpl::OutputBinding> outputBindings; - - if(addedConstantBuffer) - { - descriptorSet->setConstantBuffer(0, 0, addedConstantBuffer); - } - for (int i = 0; i < numEntries; i++) - { - const ShaderInputLayoutEntry& srcEntry = srcEntries[i]; - - auto rangeIndex = mapEntryToSlotRange[i]; - - switch (srcEntry.type) - { - case ShaderInputType::Buffer: - { - const InputBufferDesc& srcBuffer = srcEntry.bufferDesc; - const size_t bufferSize = srcEntry.bufferData.getCount() * sizeof(uint32_t); - - if( srcBuffer.type == InputBufferType::RootConstantBuffer ) - { - // A root constant buffer at the HLSL/Slang level actually - // maps to root constant data stored directly in the descriptor - // set, and thus does not need/want us to allocate a buffer - // to hold the data. - // - // Instead, we set the data directly here and then bypass - // the logic that handles the buffer-backed cases below. - // - descriptorSet->setRootConstants(rangeIndex, 0, bufferSize, srcEntry.bufferData.getBuffer()); - break; - } - - ComPtr<IBufferResource> bufferResource; - SLANG_RETURN_ON_FAIL(createBufferResource( - srcEntry.bufferDesc, - srcEntry.isOutput, - bufferSize, - srcEntry.bufferData.getBuffer(), - device, - bufferResource)); - - switch(srcBuffer.type) - { - case InputBufferType::ConstantBuffer: - descriptorSet->setConstantBuffer(rangeIndex, 0, bufferResource); - break; - - case InputBufferType::StorageBuffer: - { - IResourceView::Desc viewDesc; - viewDesc.type = IResourceView::Type::UnorderedAccess; - viewDesc.format = srcBuffer.format; - auto bufferView = device->createBufferView( - bufferResource, - viewDesc); - descriptorSet->setResource(rangeIndex, 0, bufferView); - } - break; - } - - if(srcEntry.isOutput) - { - BindingStateImpl::OutputBinding binding; - binding.entryIndex = i; - binding.resource = bufferResource; - outputBindings.add(binding); - } - } - break; - - case ShaderInputType::CombinedTextureSampler: - { - ComPtr<ITextureResource> texture; - SLANG_RETURN_ON_FAIL(generateTextureResource( - srcEntry.textureDesc, textureBindFlags, device, texture)); - - auto sampler = _createSamplerState(device, srcEntry.samplerDesc); - - IResourceView::Desc viewDesc; - viewDesc.type = IResourceView::Type::ShaderResource; - auto textureView = device->createTextureView( - texture, - viewDesc); - - descriptorSet->setCombinedTextureSampler(rangeIndex, 0, textureView, sampler); - - if(srcEntry.isOutput) - { - BindingStateImpl::OutputBinding binding; - binding.entryIndex = i; - binding.resource = texture; - outputBindings.add(binding); - } - } - break; - - case ShaderInputType::Texture: - { - ComPtr<ITextureResource> texture; - SLANG_RETURN_ON_FAIL(generateTextureResource( - srcEntry.textureDesc, textureBindFlags, device, texture)); - - // TODO: support UAV textures... - - IResourceView::Desc viewDesc; - viewDesc.type = IResourceView::Type::ShaderResource; - auto textureView = device->createTextureView( - texture, - viewDesc); - - if (!textureView) - { - return SLANG_FAIL; - } - - descriptorSet->setResource(rangeIndex, 0, textureView); - - if(srcEntry.isOutput) - { - BindingStateImpl::OutputBinding binding; - binding.entryIndex = i; - binding.resource = texture; - outputBindings.add(binding); - } - } - break; - - case ShaderInputType::Sampler: - { - auto sampler = _createSamplerState(device, srcEntry.samplerDesc); - descriptorSet->setSampler(rangeIndex, 0, sampler); - } - break; - - case ShaderInputType::Object: - break; - - default: - assert(!"Unhandled type"); - return SLANG_FAIL; - } - } - - BindingStateImpl* bindingState = new BindingStateImpl(); - bindingState->descriptorSet = descriptorSet; - bindingState->pipelineLayout = pipelineLayout; - bindingState->outputBindings = outputBindings; - bindingState->m_numRenderTargets = layout.numRenderTargets; - - *outBindingState = bindingState; - return SLANG_OK; -} - } // renderer_test diff --git a/tools/render-test/shader-renderer-util.h b/tools/render-test/shader-renderer-util.h index 9d583331f..8771d21f6 100644 --- a/tools/render-test/shader-renderer-util.h +++ b/tools/render-test/shader-renderer-util.h @@ -8,45 +8,6 @@ namespace renderer_test { using namespace Slang; -struct BindingStateImpl : public Slang::RefObject -{ - /// A register set consists of one or more contiguous indices. - /// To be valid index >= 0 and size >= 1 - struct RegisterRange - { - /// True if contains valid contents - bool isValid() const { return size > 0; } - /// True if valid single value - bool isSingle() const { return size == 1; } - /// Get as a single index (must be at least one index) - int getSingleIndex() const { return (size == 1) ? index : -1; } - /// Return the first index - int getFirstIndex() const { return (size > 0) ? index : -1; } - /// True if contains register index - bool hasRegister(int registerIndex) const { return registerIndex >= index && registerIndex < index + size; } - - static RegisterRange makeInvalid() { return RegisterRange{ -1, 0 }; } - static RegisterRange makeSingle(int index) { return RegisterRange{ int16_t(index), 1 }; } - static RegisterRange makeRange(int index, int size) { return RegisterRange{ int16_t(index), uint16_t(size) }; } - - int16_t index; ///< The base index - uint16_t size; ///< The amount of register indices - }; - - void apply(ICommandEncoder* encoder, PipelineType pipelineType); - - struct OutputBinding - { - ComPtr<IResource> resource; - Slang::UInt entryIndex; - }; - List<OutputBinding> outputBindings; - - ComPtr<IPipelineLayout> pipelineLayout; - ComPtr<IDescriptorSet> descriptorSet; - int m_numRenderTargets = 1; -}; - ComPtr<ISamplerState> _createSamplerState(IDevice* device, const InputSamplerDesc& srcDesc); /// Utility class containing functions that construct items on the renderer using the ShaderInputLayout representation @@ -75,13 +36,6 @@ struct ShaderRendererUtil const void* initData, IDevice* device, ComPtr<IBufferResource>& bufferOut); - - /// Create BindingState::Desc from the contents of layout - static Slang::Result createBindingState( - const ShaderInputLayout& layout, - IDevice* device, - IBufferResource* addedConstantBuffer, - BindingStateImpl** outBindingState); }; } // renderer_test |
