summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2019-03-08 14:59:47 -0800
committerGitHub <noreply@github.com>2019-03-08 14:59:47 -0800
commit281c67b8d92899f462695fe75a26467743a497e8 (patch)
tree2e5fda65e02ce19897de22ce0c1485cd9c83bd53
parent4aab9cc79e7b0496ec447bad67225dc1c3486bef (diff)
Confirm layout for structured buffer of matrices, and related fixes (#889)
* Fix up handling of NumElements for D3D buffer views For everything but structured buffers we'd been setting this to the size in bytes, but that isn't really valid at all. The `NumElements` member in the view descs is supposed to be the number of buffer elements, so it would be capped at the byte size divided by the element size. This change fixes the computation of `NumElements` to take the size of the format into account for non-structured views. For the "raw" case, we use a size of 4 bytes since that matches the `DXGI_FORMAT_R32_TYPELESS` format we use (which seems to be required for raw buffers). I also added support for the raw case for SRVs where it didn't seem to be supported before (not that any of our tests cover it). * Fix handling of size padding for D3D11 buffers The existing code was enforcing a 256-byte-aligned size for all buffers, but this can cause problems for a structured buffer. A structured buffer must have a size that is a multiple of the stride, so a structured buffer with a 48-byte stride and a 96-byte size would get rounded up to 256 bytes, which is not an integer multiple of 48. This change makes it so that we only apply the padding to constant buffers. According to MSDN, constant buffers only require padding to a 16-byte aligned size, and no other restrictions are listed for D3D11, but it is difficult to know whether those constrains are exhaustive. I've left in the 256-byte padding for now (rather than switch to 16-byte), even though I suspect that was only needed as a band-aid for the `NumElements` issue fixed by another commit. * Fix an IR generation bug when indxing into a strutured buffer element The problem here arises when we have a structured buffer of matrices (an array type would likely trigger it too): ```hlsl RWStructuredBuffer<float3x4> gMatrices; ``` and then we index into it directly, rather than copying to a temporary: ```hlsl // CRASH: float v = gMatrices[i][j][k]; // OKAY: float3x4 m = gMatrices[i]; float v = m[j][k]; ``` The underlying issue is that our IR lowering pass tries to defer the decision about whether to use a `get` vs. `set` vs. `ref` accessor for a subscript until as late as possible (this is to deal with the fact that sometimes D3D can provide a `ref` accessor where GLSL can only provide a `get` or `set`). We probably need to overhaul that aspect of IR codegen sooner or later, but this change uses some of the existing machinery to try to force the `gMatrices[i]` subexpression to take the form of a pointer when doing sub-indexing like this. This fixes the present case, and hopefully shouldn't break anything else that used to work (because the subroutines I'm using to coerce the `gMatrices[i]` expression should be idempotent on the cases that were already implemented). * Add a test case to confirm fxc/dxc layout for structured buffers of matrices Even when row-major layout is requested globally, fxc and dxc seem to lay out a `StructuredBuffer<float3x4>` with column-major layout on the elements. This commit adds a test that confirms that behavior. This commit does not try to implement a fix for the issue (either fixing Slang's layout reflection information to be correct for what fxc/dxc do in practice, or fixing Slang's HLSL output to work around the fxc/dxc behavior), but just documents the status quo. If/when we decide how we'd like to handle the issue long-term, this test can/should be updated to match the decision we make. * fixup: build breakage on clang/gcc This is one of those cases where the Microsoft compiler is letting through some stuff that isn't technically valid C++ ("delayed template parsing"). Fixed by just moving some declarations to earlier in the file.
-rw-r--r--source/slang/lower-to-ir.cpp77
-rw-r--r--tests/compute/matrix-layout-structured-buffer.slang51
-rw-r--r--tests/compute/matrix-layout-structured-buffer.slang.expected.txt12
-rw-r--r--tools/gfx/render-d3d11.cpp45
-rw-r--r--tools/gfx/render-d3d12.cpp17
5 files changed, 163 insertions, 39 deletions
diff --git a/source/slang/lower-to-ir.cpp b/source/slang/lower-to-ir.cpp
index e5d62d257..af0664288 100644
--- a/source/slang/lower-to-ir.cpp
+++ b/source/slang/lower-to-ir.cpp
@@ -1849,6 +1849,43 @@ void addArgs(
//
+// When we try to turn a `LoweredValInfo` into an address of some temporary storage,
+// we can either do it "aggressively" or not (what we'll call the "default" behavior,
+// although it isn't strictly more common).
+//
+// The case that this is mostly there to address is when somebody writes an operation
+// like:
+//
+// foo[a] = b;
+//
+// In that case, we might as well just use the `set` accessor if there is one, rather
+// than complicate things. However, in more complex cases like:
+//
+// foo[a].x = b;
+//
+// there is no way to satisfy the semantics of the code the user wrote (in terms of
+// only writing one vector component, and not a full vector) by using the `set`
+// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into
+// an address.
+//
+// TODO: realistically IR lowering is too early to be binding to this choice,
+// because different accessors might be supported on different targets.
+//
+enum class TryGetAddressMode
+{
+ Default,
+ Aggressive,
+};
+
+/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address.
+LoweredValInfo tryGetAddress(
+ IRGenContext* context,
+ LoweredValInfo const& inVal,
+ TryGetAddressMode mode);
+
+
+//
+
template<typename Derived>
struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo>
{
@@ -2607,6 +2644,17 @@ struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo>
IRInst* indexVal)
{
auto builder = getBuilder();
+
+ // The `tryGetAddress` operation will take a complex value representation
+ // and try to turn it into a single pointer, if possible.
+ //
+ baseVal = tryGetAddress(context, baseVal, TryGetAddressMode::Aggressive);
+
+ // The `materialize` operation should ensure that we only have to deal
+ // with the small number of base cases for lowered value representations.
+ //
+ baseVal = materialize(context, baseVal);
+
switch (baseVal.flavor)
{
case LoweredValInfo::Flavor::Simple:
@@ -3646,35 +3694,6 @@ static LoweredValInfo moveIntoMutableTemp(
return var;
}
-// When we try to turn a `LoweredValInfo` into an address of some temporary storage,
-// we can either do it "aggressively" or not (what we'll call the "default" behavior,
-// although it isn't strictly more common).
-//
-// The case that this is mostly there to address is when somebody writes an operation
-// like:
-//
-// foo[a] = b;
-//
-// In that case, we might as well just use the `set` accessor if there is one, rather
-// than complicate things. However, in more complex cases like:
-//
-// foo[a].x = b;
-//
-// there is no way to satisfy the semantics of the code the user wrote (in terms of
-// only writing one vector component, and not a full vector) by using the `set`
-// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into
-// an address.
-//
-// TODO: realistically IR lowering is too early to be binding to this choice,
-// because different accessors might be supported on different targets.
-//
-enum class TryGetAddressMode
-{
- Default,
- Aggressive,
-};
-
-/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address.
LoweredValInfo tryGetAddress(
IRGenContext* context,
LoweredValInfo const& inVal,
diff --git a/tests/compute/matrix-layout-structured-buffer.slang b/tests/compute/matrix-layout-structured-buffer.slang
new file mode 100644
index 000000000..fc46d7815
--- /dev/null
+++ b/tests/compute/matrix-layout-structured-buffer.slang
@@ -0,0 +1,51 @@
+// matrix-layout-structured-buffer.slang
+
+// This test is set up to confirm that `StructuredBuffer` types are
+// always laid out column-major by fxc/dxc, even when row-major layout has been
+// requested globally.
+//
+// This behavior should be considered a bug in Slang because either:
+//
+// 1. we should report reflection layout information that acknowledges this behavior, or
+// 2. we should alter our HLSL output passed to fxc/dxc to provide consistent
+// behavior that matches our reflection data.
+//
+// For now this test exists to document the situation. It's output can/should
+// be updated if we decide to fix the underlying problem by taking option (2).
+//
+
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -xslang -matrix-layout-row-major
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -xslang -matrix-layout-column-major
+
+
+//TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23], stride=48):dxbinding(0),glbinding(0)
+RWStructuredBuffer<int3x4> gMatrices;
+
+int test(int val)
+{
+ int N = 256;
+
+ int tmp = 0;
+
+ tmp = tmp*N + gMatrices[val%2][(val )%3][(val )%4];
+ tmp = tmp*N + gMatrices[val%2][(val+1)%3][(val )%4];
+ tmp = tmp*N + gMatrices[val%2][(val )%3][(val+1)%4];
+ tmp = tmp*N + val;
+ tmp = tmp + 0x80000000;
+
+ return tmp;
+}
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0], stride=4):dxbinding(0),glbinding(1),out
+RWStructuredBuffer<int> buffer;
+
+[numthreads(12, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ uint tid = dispatchThreadID.x;
+
+ int val = tid;
+ val = test(val);
+
+ buffer[tid] = val;
+}
diff --git a/tests/compute/matrix-layout-structured-buffer.slang.expected.txt b/tests/compute/matrix-layout-structured-buffer.slang.expected.txt
new file mode 100644
index 000000000..18d8be654
--- /dev/null
+++ b/tests/compute/matrix-layout-structured-buffer.slang.expected.txt
@@ -0,0 +1,12 @@
+80010300
+90111301
+88060B02
+95160C03
+81020404
+910F1405
+86070906
+96170D07
+82000508
+8F101209
+87080A0A
+97150E0B
diff --git a/tools/gfx/render-d3d11.cpp b/tools/gfx/render-d3d11.cpp
index 1848755bc..690b869ca 100644
--- a/tools/gfx/render-d3d11.cpp
+++ b/tools/gfx/render-d3d11.cpp
@@ -778,8 +778,15 @@ Result D3D11Renderer::createBufferResource(Resource::Usage initialUsage, const B
BufferResource::Desc srcDesc(descIn);
srcDesc.setDefaults(initialUsage);
- // Make aligned to 256 bytes... not sure why, but if you remove this the tests do fail.
- const size_t alignedSizeInBytes = D3DUtil::calcAligned(srcDesc.sizeInBytes, 256);
+ auto d3dBindFlags = _calcResourceBindFlags(srcDesc.bindFlags);
+
+ size_t alignedSizeInBytes = srcDesc.sizeInBytes;
+
+ if(d3dBindFlags & D3D11_BIND_CONSTANT_BUFFER)
+ {
+ // Make aligned to 256 bytes... not sure why, but if you remove this the tests do fail.
+ alignedSizeInBytes = D3DUtil::calcAligned(alignedSizeInBytes, 256);
+ }
// Hack to make the initialization never read from out of bounds memory, by copying into a buffer
List<uint8_t> initDataBuffer;
@@ -792,7 +799,7 @@ Result D3D11Renderer::createBufferResource(Resource::Usage initialUsage, const B
D3D11_BUFFER_DESC bufferDesc = { 0 };
bufferDesc.ByteWidth = UINT(alignedSizeInBytes);
- bufferDesc.BindFlags = _calcResourceBindFlags(srcDesc.bindFlags);
+ bufferDesc.BindFlags = d3dBindFlags;
// For read we'll need to do some staging
bufferDesc.CPUAccessFlags = _calcResourceAccessFlags(descIn.cpuAccessFlags & Resource::AccessFlag::Write);
bufferDesc.Usage = D3D11_USAGE_DEFAULT;
@@ -1049,7 +1056,6 @@ Result D3D11Renderer::createBufferView(BufferResource* buffer, ResourceView::Des
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
uavDesc.Format = D3DUtil::getMapFormat(desc.format);
uavDesc.Buffer.FirstElement = 0;
- uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes);
if(resourceDesc.elementSize)
{
@@ -1059,6 +1065,11 @@ Result D3D11Renderer::createBufferView(BufferResource* buffer, ResourceView::Des
{
uavDesc.Buffer.Flags |= D3D11_BUFFER_UAV_FLAG_RAW;
uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
+ uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4);
+ }
+ else
+ {
+ uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format));
}
ComPtr<ID3D11UnorderedAccessView> uav;
@@ -1077,16 +1088,34 @@ Result D3D11Renderer::createBufferView(BufferResource* buffer, ResourceView::Des
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvDesc.Format = D3DUtil::getMapFormat(desc.format);
- srvDesc.Buffer.ElementOffset = 0;
- srvDesc.Buffer.ElementWidth = 1;
srvDesc.Buffer.FirstElement = 0;
- srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes);
if(resourceDesc.elementSize)
{
- srvDesc.Buffer.ElementWidth = resourceDesc.elementSize;
srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize);
}
+ else if(desc.format == Format::Unknown)
+ {
+ // We need to switch to a different member of the `union`,
+ // so that we can set the `BufferEx.Flags` member.
+ //
+ srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
+
+ // Because we've switched, we need to re-set the `FirstElement`
+ // field to be valid, since we can't count on all compilers
+ // to respect that `Buffer.FirstElement` and `BufferEx.FirstElement`
+ // alias in memory.
+ //
+ srvDesc.BufferEx.FirstElement = 0;
+
+ srvDesc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
+ srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;
+ srvDesc.BufferEx.NumElements = UINT(resourceDesc.sizeInBytes / 4);
+ }
+ else
+ {
+ srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format));
+ }
ComPtr<ID3D11ShaderResourceView> srv;
SLANG_RETURN_ON_FAIL(m_device->CreateShaderResourceView(resourceImpl->m_buffer, &srvDesc, srv.writeRef()));
diff --git a/tools/gfx/render-d3d12.cpp b/tools/gfx/render-d3d12.cpp
index a7fda22c8..0b611a219 100644
--- a/tools/gfx/render-d3d12.cpp
+++ b/tools/gfx/render-d3d12.cpp
@@ -2255,7 +2255,6 @@ Result D3D12Renderer::createBufferView(BufferResource* buffer, ResourceView::Des
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uavDesc.Format = D3DUtil::getMapFormat(desc.format);
uavDesc.Buffer.FirstElement = 0;
- uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes);
if(resourceDesc.elementSize)
{
@@ -2266,6 +2265,11 @@ Result D3D12Renderer::createBufferView(BufferResource* buffer, ResourceView::Des
{
uavDesc.Buffer.Flags |= D3D12_BUFFER_UAV_FLAG_RAW;
uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
+ uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4);
+ }
+ else
+ {
+ uavDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format));
}
@@ -2284,13 +2288,22 @@ Result D3D12Renderer::createBufferView(BufferResource* buffer, ResourceView::Des
srvDesc.Format = D3DUtil::getMapFormat(desc.format);
srvDesc.Buffer.StructureByteStride = 0;
srvDesc.Buffer.FirstElement = 0;
- srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes);
if(resourceDesc.elementSize)
{
srvDesc.Buffer.StructureByteStride = resourceDesc.elementSize;
srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / resourceDesc.elementSize);
}
+ else if(desc.format == Format::Unknown)
+ {
+ srvDesc.Buffer.Flags |= D3D12_BUFFER_SRV_FLAG_RAW;
+ srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;
+ srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / 4);
+ }
+ else
+ {
+ srvDesc.Buffer.NumElements = UINT(resourceDesc.sizeInBytes / RendererUtil::getFormatSize(desc.format));
+ }
SLANG_RETURN_ON_FAIL(m_viewAllocator.allocate(&viewImpl->m_descriptor));
m_device->CreateShaderResourceView(resourceImpl->m_resource, &srvDesc, viewImpl->m_descriptor.cpuHandle);