diff options
| -rw-r--r-- | source/slang/lower-to-ir.cpp | 77 | ||||
| -rw-r--r-- | tests/compute/matrix-layout-structured-buffer.slang | 51 | ||||
| -rw-r--r-- | tests/compute/matrix-layout-structured-buffer.slang.expected.txt | 12 | ||||
| -rw-r--r-- | tools/gfx/render-d3d11.cpp | 45 | ||||
| -rw-r--r-- | tools/gfx/render-d3d12.cpp | 17 |
5 files changed, 163 insertions, 39 deletions
diff --git a/source/slang/lower-to-ir.cpp b/source/slang/lower-to-ir.cpp index e5d62d257..af0664288 100644 --- a/source/slang/lower-to-ir.cpp +++ b/source/slang/lower-to-ir.cpp @@ -1849,6 +1849,43 @@ void addArgs( // +// When we try to turn a `LoweredValInfo` into an address of some temporary storage, +// we can either do it "aggressively" or not (what we'll call the "default" behavior, +// although it isn't strictly more common). +// +// The case that this is mostly there to address is when somebody writes an operation +// like: +// +// foo[a] = b; +// +// In that case, we might as well just use the `set` accessor if there is one, rather +// than complicate things. However, in more complex cases like: +// +// foo[a].x = b; +// +// there is no way to satisfy the semantics of the code the user wrote (in terms of +// only writing one vector component, and not a full vector) by using the `set` +// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into +// an address. +// +// TODO: realistically IR lowering is too early to be binding to this choice, +// because different accessors might be supported on different targets. +// +enum class TryGetAddressMode +{ + Default, + Aggressive, +}; + +/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address. +LoweredValInfo tryGetAddress( + IRGenContext* context, + LoweredValInfo const& inVal, + TryGetAddressMode mode); + + +// + template<typename Derived> struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo> { @@ -2607,6 +2644,17 @@ struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo> IRInst* indexVal) { auto builder = getBuilder(); + + // The `tryGetAddress` operation will take a complex value representation + // and try to turn it into a single pointer, if possible. + // + baseVal = tryGetAddress(context, baseVal, TryGetAddressMode::Aggressive); + + // The `materialize` operation should ensure that we only have to deal + // with the small number of base cases for lowered value representations. + // + baseVal = materialize(context, baseVal); + switch (baseVal.flavor) { case LoweredValInfo::Flavor::Simple: @@ -3646,35 +3694,6 @@ static LoweredValInfo moveIntoMutableTemp( return var; } -// When we try to turn a `LoweredValInfo` into an address of some temporary storage, -// we can either do it "aggressively" or not (what we'll call the "default" behavior, -// although it isn't strictly more common). -// -// The case that this is mostly there to address is when somebody writes an operation -// like: -// -// foo[a] = b; -// -// In that case, we might as well just use the `set` accessor if there is one, rather -// than complicate things. However, in more complex cases like: -// -// foo[a].x = b; -// -// there is no way to satisfy the semantics of the code the user wrote (in terms of -// only writing one vector component, and not a full vector) by using the `set` -// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into -// an address. -// -// TODO: realistically IR lowering is too early to be binding to this choice, -// because different accessors might be supported on different targets. -// -enum class TryGetAddressMode -{ - Default, - Aggressive, -}; - -/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address. LoweredValInfo tryGetAddress( IRGenContext* context, LoweredValInfo const& inVal, diff --git a/tests/compute/matrix-layout-structured-buffer.slang b/tests/compute/matrix-layout-structured-buffer.slang new file mode 100644 index 000000000..fc46d7815 --- /dev/null +++ b/tests/compute/matrix-layout-structured-buffer.slang @@ -0,0 +1,51 @@ +// matrix-layout-structured-buffer.slang + +// This test is set up to confirm that `StructuredBuffer` types are +// always laid out column-major by fxc/dxc, even when row-major layout has been +// requested globally. +// +// This behavior should be considered a bug in Slang because either: +// +// 1. we should report reflection layout information that acknowledges this behavior, or +// 2. we should alter our HLSL output passed to fxc/dxc to provide consistent +// behavior that matches our reflection data. +// +// For now this test exists to document the situation. It's output can/should +// be updated if we decide to fix the underlying problem by taking option (2). +// + +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -xslang -matrix-layout-row-major +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -xslang -matrix-layout-column-major + + +//TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23], stride=48):dxbinding(0),glbinding(0) +RWStructuredBuffer<int3x4> gMatrices; + +int test(int val) +{ + int N = 256; + + int tmp = 0; + + tmp = tmp*N + gMatrices[val%2][(val )%3][(val )%4]; + tmp = tmp*N + gMatrices[val%2][(val+1)%3][(val )%4]; + tmp = tmp*N + gMatrices[val%2][(val )%3][(val+1)%4]; + tmp = tmp*N + val; + tmp = tmp + 0x80000000; + + return tmp; +} + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0], stride=4):dxbinding(0),glbinding(1),out +RWStructuredBuffer<int> buffer; + +[numthreads(12, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint tid = dispatchThreadID.x; + + int val = tid; + val = test(val); + + buffer[tid] = val; +} diff --git a/tests/compute/matrix-layout-structured-buffer.slang.expected.txt b/tests/compute/matrix-layout-structured-buffer.slang.expected.txt new file mode 100644 index 000000000..18d8be654 --- /dev/null +++ b/tests/compute/matrix-layout-structured-buffer.slang.expected.txt @@ -0,0 +1,12 @@ +80010300 +90111301 +88060B02 +95160C03 +81020404 +910F1405 +86070906 +96170D07 +82000508 +8F101209 +87080A0A +97150E0B diff --git a/tools/gfx/render-d3d11.cpp b/tools/gfx/render-d3d11.cpp index 1848755bc..690b869ca 100644 --- a/tools/gfx/render-d3d11.cpp +++ b/tools/gfx/render-d3d11.cpp @@ -778,8 +778,15 @@ Result D3D11Renderer::createBufferResource(Resource::Usage initialUsage, const B BufferResource::Desc srcDesc(descIn); srcDesc.setDefaults(initialUsage); - // Make aligned to 256 bytes... not sure why, but if you remove this the tests do fail. - const size_t alignedSizeInBytes = D3DUtil::calcAligned(srcDesc.sizeInBytes, 256); + auto d3dBindFlags = _calcResourceBindFlags(srcDesc.bindFlags); + + size_t alignedSizeInBytes = srcDesc.sizeInBytes; + + if(d3dBindFlags & D3D11_BIND_CONSTANT_BUFFER) + { + // Make aligned to 256 bytes... not sure why, but if you remove this the tests do fail. + alignedSizeInBytes = D3DUtil::calcAligned(alignedSizeInBytes, 256); + } // Hack to make the initialization never read from out of bounds memory, by copying into a buffer List<uint8_t> initDataBuffer; @@ -792,7 +799,7 @@ Result D3D11Renderer::createBufferResource(Resource::Usage initialUsage, const B D3D11_BUFFER_DESC bufferDesc = { 0 }; bufferDesc.ByteWidth = UINT(alignedSizeInBytes); - bufferDesc.BindFlags = _calcResourceBindFlags(srcDesc.bindFlags); + bufferDesc.BindFlags = d3dBindFlags; // For read we'll need to do some staging bufferDesc.CPUAccessFlags = _calcResourceAccessFlags(descIn.cpuAccessFlags & Resource::AccessFlag::Write); bufferDesc.Usage = D3D11_USAGE_DEFAULT; @@ -1049,7 +1056,6 @@ Result D3D11Renderer::createBufferView(BufferResource* buffer, ResourceView::Des uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; uavDesc.Format = D3DUtil::getMapFormat(desc.format); uavDesc.Buffer.FirstElement = 0; - uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes); if(resourceDesc.elementSize) { @@ -1059,6 +1065,11 @@ Result D3D11Renderer::createBufferView(BufferResource* buffer, ResourceView::Des { uavDesc.Buffer.Flags |= D3D11_BUFFER_UAV_FLAG_RAW; uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4); + } + else + { + uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format)); } ComPtr<ID3D11UnorderedAccessView> uav; @@ -1077,16 +1088,34 @@ Result D3D11Renderer::createBufferView(BufferResource* buffer, ResourceView::Des D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; srvDesc.Format = D3DUtil::getMapFormat(desc.format); - srvDesc.Buffer.ElementOffset = 0; - srvDesc.Buffer.ElementWidth = 1; srvDesc.Buffer.FirstElement = 0; - srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes); if(resourceDesc.elementSize) { - srvDesc.Buffer.ElementWidth = resourceDesc.elementSize; srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize); } + else if(desc.format == Format::Unknown) + { + // We need to switch to a different member of the `union`, + // so that we can set the `BufferEx.Flags` member. + // + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX; + + // Because we've switched, we need to re-set the `FirstElement` + // field to be valid, since we can't count on all compilers + // to respect that `Buffer.FirstElement` and `BufferEx.FirstElement` + // alias in memory. + // + srvDesc.BufferEx.FirstElement = 0; + + srvDesc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + srvDesc.BufferEx.NumElements = UINT(resourceDesc.sizeInBytes / 4); + } + else + { + srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format)); + } ComPtr<ID3D11ShaderResourceView> srv; SLANG_RETURN_ON_FAIL(m_device->CreateShaderResourceView(resourceImpl->m_buffer, &srvDesc, srv.writeRef())); diff --git a/tools/gfx/render-d3d12.cpp b/tools/gfx/render-d3d12.cpp index a7fda22c8..0b611a219 100644 --- a/tools/gfx/render-d3d12.cpp +++ b/tools/gfx/render-d3d12.cpp @@ -2255,7 +2255,6 @@ Result D3D12Renderer::createBufferView(BufferResource* buffer, ResourceView::Des uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; uavDesc.Format = D3DUtil::getMapFormat(desc.format); uavDesc.Buffer.FirstElement = 0; - uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes); if(resourceDesc.elementSize) { @@ -2266,6 +2265,11 @@ Result D3D12Renderer::createBufferView(BufferResource* buffer, ResourceView::Des { uavDesc.Buffer.Flags |= D3D12_BUFFER_UAV_FLAG_RAW; uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; + uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4); + } + else + { + uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format)); } @@ -2284,13 +2288,22 @@ Result D3D12Renderer::createBufferView(BufferResource* buffer, ResourceView::Des srvDesc.Format = D3DUtil::getMapFormat(desc.format); srvDesc.Buffer.StructureByteStride = 0; srvDesc.Buffer.FirstElement = 0; - srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes); if(resourceDesc.elementSize) { srvDesc.Buffer.StructureByteStride = resourceDesc.elementSize; srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize); } + else if(desc.format == Format::Unknown) + { + srvDesc.Buffer.Flags |= D3D12_BUFFER_SRV_FLAG_RAW; + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; + srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4); + } + else + { + srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format)); + } SLANG_RETURN_ON_FAIL(m_viewAllocator.allocate(&viewImpl->m_descriptor)); m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle); |
