diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2018-05-02 14:44:13 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-05-02 14:44:13 -0700 |
| commit | 384df864fdd2c518924d32295a13894f16295d43 (patch) | |
| tree | e2e2687ce295dfcf407ec47c0e6ce9231863ec96 /source | |
| parent | 60bcc6809f57e12f3705cc65cb325b0983b08899 (diff) | |
Add support for "swizzled stores" (#544)
This was a known issue in our IR representation, which was now biting a user. The basic problem is that in code like the following:
```hlsl
RWStructureBuffer<float4> buffer;
...
buffer[index].xz = value;
```
we ideally want to be able to reproduce the original HLSL code exactly, but that requires directly encoding the way that this code writes to two elements of a vector, but not the others.
The currently lowering strategy we had produced IR something like:
```hlsl
float4 tmp = buffer[index];
tmp.xz = value;
buffer[index] = tmp;
```
That transformation might seem valid, but it has some big problems:
* It generates UAV reads that are not needed, which could impact performance
* It performs read-modify-write operations on memory that the programmer didn't explicitly write, which could create data races
The fix here is somewhat obvious: if the "base" of a swizzle operation on a left-hand side resolves to a pointer in our IR, then we can output a "swizzled store" instead of the read-modify-write dance. We currently keep the read-modify-write around since it is potentially needed as a fallback in the general case.
Along the way I also tried to make sure that we handle the case where we have a swizzle of a swizzle on the left-hand side:
```hlsl
buffer[index].xz.y = value;
```
That code should behave the same as `buffer[index].z = value`. I am currently detecting and cleaning up this logic in the lowering path for `SwizzleExpr`, because that is the only place in the lowering logic that "swizzled l-values" currently get created.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/emit.cpp | 25 | ||||
| -rw-r--r-- | source/slang/ir-inst-defs.h | 19 | ||||
| -rw-r--r-- | source/slang/ir-insts.h | 34 | ||||
| -rw-r--r-- | source/slang/ir.cpp | 39 | ||||
| -rw-r--r-- | source/slang/lower-to-ir.cpp | 161 |
5 files changed, 231 insertions, 47 deletions
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index be6112abe..9f9a089c8 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -3317,6 +3317,31 @@ struct EmitVisitor emit(";\n"); } break; + + case kIROp_SwizzledStore: + { + auto ii = cast<IRSwizzledStore>(inst); + emit("("); + emitIROperand(ctx, ii->getDest(), mode); + emit(")."); + UInt elementCount = ii->getElementCount(); + for (UInt ee = 0; ee < elementCount; ++ee) + { + IRInst* irElementIndex = ii->getElementIndex(ee); + assert(irElementIndex->op == kIROp_IntLit); + IRConstant* irConst = (IRConstant*)irElementIndex; + + UInt elementIndex = (UInt)irConst->u.intVal; + assert(elementIndex < 4); + + char const* kComponents[] = { "x", "y", "z", "w" }; + emit(kComponents[elementIndex]); + } + emit(" = "); + emitIROperand(ctx, ii->getSource(), mode); + emit(";\n"); + } + break; } } diff --git a/source/slang/ir-inst-defs.h b/source/slang/ir-inst-defs.h index 3e37259ea..035defa53 100644 --- a/source/slang/ir-inst-defs.h +++ b/source/slang/ir-inst-defs.h @@ -265,6 +265,25 @@ INST(swizzle, swizzle, 1, 0) // INST(swizzleSet, swizzleSet, 2, 0) +// Store to memory with a swizzle +// +// TODO: eventually this should be reduced to just +// a write mask by moving the actual swizzle to the RHS. +// +// swizzleStore %dst %src %idx0 %idx1 ... +// +// where: +// - `dst` is a vector<T,N> +// - `src` is a vector<T,M> +// - `idx0` through `idx[M-1]` are literal integers +// +// The semantics of the op is: +// +// for(ii : 0 ... M-1 ) +// dst[ii] = src[idx[ii]]; +// +INST(SwizzledStore, swizzledStore, 2, 0) + /* IRTerminatorInst */ diff --git a/source/slang/ir-insts.h b/source/slang/ir-insts.h index 6b8a8b21e..22685f316 100644 --- a/source/slang/ir-insts.h +++ b/source/slang/ir-insts.h @@ -322,7 +322,7 @@ struct IRSwitch : IRTerminatorInst IRBlock* getCaseLabel(UInt index) { return (IRBlock*) getOperand(3 + index*2 + 1); } }; -struct IRSwizzle : IRReturn +struct IRSwizzle : IRInst { IRUse base; @@ -337,7 +337,7 @@ struct IRSwizzle : IRReturn } }; -struct IRSwizzleSet : IRReturn +struct IRSwizzleSet : IRInst { IRUse base; IRUse source; @@ -354,6 +354,22 @@ struct IRSwizzleSet : IRReturn } }; +struct IRSwizzledStore : IRInst +{ + IRInst* getDest() { return getOperand(0); } + IRInst* getSource() { return getOperand(1); } + UInt getElementCount() + { + return getOperandCount() - 2; + } + IRInst* getElementIndex(UInt index) + { + return getOperand(index + 2); + } + + IR_LEAF_ISA(SwizzledStore) +}; + // An IR `var` instruction conceptually represents // a stack allocation of some memory. struct IRVar : IRInst @@ -722,6 +738,20 @@ struct IRBuilder UInt elementCount, UInt const* elementIndices); + IRInst* emitSwizzledStore( + IRInst* dest, + IRInst* source, + UInt elementCount, + IRInst* const* elementIndices); + + IRInst* emitSwizzledStore( + IRInst* dest, + IRInst* source, + UInt elementCount, + UInt const* elementIndices); + + + IRInst* emitReturn( IRInst* val); diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp index a4a118250..53050b6b9 100644 --- a/source/slang/ir.cpp +++ b/source/slang/ir.cpp @@ -2033,6 +2033,45 @@ namespace Slang return emitSwizzleSet(type, base, source, elementCount, irElementIndices); } + IRInst* IRBuilder::emitSwizzledStore( + IRInst* dest, + IRInst* source, + UInt elementCount, + IRInst* const* elementIndices) + { + IRInst* fixedArgs[] = { dest, source }; + UInt fixedArgCount = sizeof(fixedArgs) / sizeof(fixedArgs[0]); + + auto inst = createInstImpl<IRSwizzledStore>( + this, + kIROp_SwizzledStore, + nullptr, + fixedArgCount, + fixedArgs, + elementCount, + elementIndices); + + addInst(inst); + return inst; + } + + IRInst* IRBuilder::emitSwizzledStore( + IRInst* dest, + IRInst* source, + UInt elementCount, + UInt const* elementIndices) + { + auto intType = getBasicType(BaseType::Int); + + IRInst* irElementIndices[4]; + for (UInt ii = 0; ii < elementCount; ++ii) + { + irElementIndices[ii] = getIntValue(intType, elementIndices[ii]); + } + + return emitSwizzledStore(dest, source, elementCount, irElementIndices); + } + IRInst* IRBuilder::emitReturn( IRInst* val) { diff --git a/source/slang/lower-to-ir.cpp b/source/slang/lower-to-ir.cpp index 371c68809..2d5f169f3 100644 --- a/source/slang/lower-to-ir.cpp +++ b/source/slang/lower-to-ir.cpp @@ -1914,13 +1914,58 @@ struct LValueExprLoweringVisitor : ExprLoweringVisitorBase<LValueExprLoweringVis RefPtr<SwizzledLValueInfo> swizzledLValue = new SwizzledLValueInfo(); swizzledLValue->type = irType; - swizzledLValue->base = loweredBase; UInt elementCount = (UInt)expr->elementCount; swizzledLValue->elementCount = elementCount; - for (UInt ii = 0; ii < elementCount; ++ii) + + // As a small optimization, we will detect if the base expression + // has also lowered into a swizzle and only return a single + // swizzle instead of nested swizzles. + // + // E.g., if we have input like `foo[i].zw.y` we should optimize it + // down to just `foo[i].w`. + // + if(loweredBase.flavor == LoweredValInfo::Flavor::SwizzledLValue) + { + auto baseSwizzleInfo = loweredBase.getSwizzledLValueInfo(); + + // Our new swizzle witll use the same base expression (e.g., + // `foo[i]` in our example above), but will need to remap + // the swizzle indices it uses. + // + swizzledLValue->base = baseSwizzleInfo->base; + for (UInt ii = 0; ii < elementCount; ++ii) + { + // First we get the swizzle element of the "outer" swizzle, + // as it was written by the user. In our running example of + // `foo[i].zw.y` this is the `y` element reference. + // + UInt originalElementIndex = UInt(expr->elementIndices[ii]); + + // Next we will use that original element index to figure + // out which of the elements of the original swizzle this + // should map to. + // + // In our example, `y` means index 1, and so we fetch + // element 1 from the inner swizzle sequence `zw`, to get `w`. + // + SLANG_ASSERT(originalElementIndex < baseSwizzleInfo->elementCount); + UInt remappedElementIndex = baseSwizzleInfo->elementIndices[originalElementIndex]; + + swizzledLValue->elementIndices[ii] = remappedElementIndex; + } + } + else { - swizzledLValue->elementIndices[ii] = (UInt) expr->elementIndices[ii]; + // In the default case, we can just copy the indices being + // used for the swizzle over directly from the expression, + // and use the base as-is. + // + swizzledLValue->base = loweredBase; + for (UInt ii = 0; ii < elementCount; ++ii) + { + swizzledLValue->elementIndices[ii] = (UInt) expr->elementIndices[ii]; + } } context->shared->extValues.Add(swizzledLValue); @@ -2836,52 +2881,78 @@ top: case LoweredValInfo::Flavor::SwizzledLValue: { - // The `left` value is of the form `<someLValue>.<swizzleElements>`. - // - // We could conceivably define a custom "swizzled store" instruction - // that would handle the common case where the base l-value is - // a simple lvalue (`LowerdValInfo::Flavor::Ptr`): - // - // float4 foo; - // foo.zxy = float3(...); - // - // However, this doesn't handle complex cases like the following: - // - // RWStructureBuffer<float4> foo; - // ... - // foo[index].xzy = float3(...); - // - // In a case like that, we really need to lower through a temp: - // - // float4 tmp = foo[index]; - // tmp.xzy = float3(...); - // foo[index] = tmp; - // - // We want to handle the general case, we we might as well - // try to handle everything uniformly. - // + // The `left` value is of the form `<base>.<swizzleElements>`. + // How we will handle this depends on what `base` looks like: auto swizzleInfo = left.getSwizzledLValueInfo(); auto loweredBase = swizzleInfo->base; - // Load from the base value: - IRInst* irLeftVal = getSimpleVal(context, loweredBase); - IRInst* irRightVal = getSimpleVal(context, right); - - // Now apply the swizzle - IRInst* irSwizzled = builder->emitSwizzleSet( - irLeftVal->getDataType(), - irLeftVal, - irRightVal, - swizzleInfo->elementCount, - swizzleInfo->elementIndices); + switch( loweredBase.flavor ) + { + default: + { + // Our fallback position is to lower via a temporary, e.g.: + // + // float4 tmp = <base>; + // tmp.xyz = float3(...); + // <base> = tmp; + // + + // Load from the base value + IRInst* irLeftVal = getSimpleVal(context, loweredBase); + + // Extract a simple value for the right-hand side + IRInst* irRightVal = getSimpleVal(context, right); + + // Apply the swizzle + IRInst* irSwizzled = builder->emitSwizzleSet( + irLeftVal->getDataType(), + irLeftVal, + irRightVal, + swizzleInfo->elementCount, + swizzleInfo->elementIndices); + + // And finally, store the value back where we got it. + // + // Note: this is effectively a recursive call to + // `assign()`, so we do a simple tail-recursive call here. + left = loweredBase; + right = LoweredValInfo::simple(irSwizzled); + goto top; + } + break; - // And finally, store the value back where we got it. - // - // Note: this is effectively a recursive call to - // `assign()`, so we do a simple tail-recursive call here. - left = loweredBase; - right = LoweredValInfo::simple(irSwizzled); - goto top; + case LoweredValInfo::Flavor::Ptr: + { + // We are writing through a pointer, which might be + // pointing into a UAV or other memory resource, so + // we can't introduce use a temporary like the case + // above, because then we would read and write bytes + // that are not strictly required for the store. + // + // Note that the messy case of a "swizzle of a swizzle" + // was handled already in lowering of a `SwizzleExpr`, + // so that we don't need to deal with that case here. + // + // TODO: we may need to consider whether there is + // enough value in a masked store like this to keep + // it around, in comparison to a simpler model where + // we simply form a pointer to each of the vector + // elements and write to them individually. + // + // TODO: we might also consider just special-casing + // single-element swizzles so that the common case + // can turn into a simple `store` instead of a + // `swizzledStore`. + // + IRInst* irRightVal = getSimpleVal(context, right); + builder->emitSwizzledStore( + loweredBase.val, + irRightVal, + swizzleInfo->elementCount, + swizzleInfo->elementIndices); + } + break; + } } break; |
