summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2018-05-02 14:44:13 -0700
committerGitHub <noreply@github.com>2018-05-02 14:44:13 -0700
commit384df864fdd2c518924d32295a13894f16295d43 (patch)
treee2e2687ce295dfcf407ec47c0e6ce9231863ec96
parent60bcc6809f57e12f3705cc65cb325b0983b08899 (diff)
Add support for "swizzled stores" (#544)
This was a known issue in our IR representation, which was now biting a user. The basic problem is that in code like the following: ```hlsl RWStructureBuffer<float4> buffer; ... buffer[index].xz = value; ``` we ideally want to be able to reproduce the original HLSL code exactly, but that requires directly encoding the way that this code writes to two elements of a vector, but not the others. The currently lowering strategy we had produced IR something like: ```hlsl float4 tmp = buffer[index]; tmp.xz = value; buffer[index] = tmp; ``` That transformation might seem valid, but it has some big problems: * It generates UAV reads that are not needed, which could impact performance * It performs read-modify-write operations on memory that the programmer didn't explicitly write, which could create data races The fix here is somewhat obvious: if the "base" of a swizzle operation on a left-hand side resolves to a pointer in our IR, then we can output a "swizzled store" instead of the read-modify-write dance. We currently keep the read-modify-write around since it is potentially needed as a fallback in the general case. Along the way I also tried to make sure that we handle the case where we have a swizzle of a swizzle on the left-hand side: ```hlsl buffer[index].xz.y = value; ``` That code should behave the same as `buffer[index].z = value`. I am currently detecting and cleaning up this logic in the lowering path for `SwizzleExpr`, because that is the only place in the lowering logic that "swizzled l-values" currently get created.
-rw-r--r--source/slang/emit.cpp25
-rw-r--r--source/slang/ir-inst-defs.h19
-rw-r--r--source/slang/ir-insts.h34
-rw-r--r--source/slang/ir.cpp39
-rw-r--r--source/slang/lower-to-ir.cpp161
5 files changed, 231 insertions, 47 deletions
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index be6112abe..9f9a089c8 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -3317,6 +3317,31 @@ struct EmitVisitor
emit(";\n");
}
break;
+
+ case kIROp_SwizzledStore:
+ {
+ auto ii = cast<IRSwizzledStore>(inst);
+ emit("(");
+ emitIROperand(ctx, ii->getDest(), mode);
+ emit(").");
+ UInt elementCount = ii->getElementCount();
+ for (UInt ee = 0; ee < elementCount; ++ee)
+ {
+ IRInst* irElementIndex = ii->getElementIndex(ee);
+ assert(irElementIndex->op == kIROp_IntLit);
+ IRConstant* irConst = (IRConstant*)irElementIndex;
+
+ UInt elementIndex = (UInt)irConst->u.intVal;
+ assert(elementIndex < 4);
+
+ char const* kComponents[] = { "x", "y", "z", "w" };
+ emit(kComponents[elementIndex]);
+ }
+ emit(" = ");
+ emitIROperand(ctx, ii->getSource(), mode);
+ emit(";\n");
+ }
+ break;
}
}
diff --git a/source/slang/ir-inst-defs.h b/source/slang/ir-inst-defs.h
index 3e37259ea..035defa53 100644
--- a/source/slang/ir-inst-defs.h
+++ b/source/slang/ir-inst-defs.h
@@ -265,6 +265,25 @@ INST(swizzle, swizzle, 1, 0)
//
INST(swizzleSet, swizzleSet, 2, 0)
+// Store to memory with a swizzle
+//
+// TODO: eventually this should be reduced to just
+// a write mask by moving the actual swizzle to the RHS.
+//
+// swizzleStore %dst %src %idx0 %idx1 ...
+//
+// where:
+// - `dst` is a vector<T,N>
+// - `src` is a vector<T,M>
+// - `idx0` through `idx[M-1]` are literal integers
+//
+// The semantics of the op is:
+//
+// for(ii : 0 ... M-1 )
+// dst[ii] = src[idx[ii]];
+//
+INST(SwizzledStore, swizzledStore, 2, 0)
+
/* IRTerminatorInst */
diff --git a/source/slang/ir-insts.h b/source/slang/ir-insts.h
index 6b8a8b21e..22685f316 100644
--- a/source/slang/ir-insts.h
+++ b/source/slang/ir-insts.h
@@ -322,7 +322,7 @@ struct IRSwitch : IRTerminatorInst
IRBlock* getCaseLabel(UInt index) { return (IRBlock*) getOperand(3 + index*2 + 1); }
};
-struct IRSwizzle : IRReturn
+struct IRSwizzle : IRInst
{
IRUse base;
@@ -337,7 +337,7 @@ struct IRSwizzle : IRReturn
}
};
-struct IRSwizzleSet : IRReturn
+struct IRSwizzleSet : IRInst
{
IRUse base;
IRUse source;
@@ -354,6 +354,22 @@ struct IRSwizzleSet : IRReturn
}
};
+struct IRSwizzledStore : IRInst
+{
+ IRInst* getDest() { return getOperand(0); }
+ IRInst* getSource() { return getOperand(1); }
+ UInt getElementCount()
+ {
+ return getOperandCount() - 2;
+ }
+ IRInst* getElementIndex(UInt index)
+ {
+ return getOperand(index + 2);
+ }
+
+ IR_LEAF_ISA(SwizzledStore)
+};
+
// An IR `var` instruction conceptually represents
// a stack allocation of some memory.
struct IRVar : IRInst
@@ -722,6 +738,20 @@ struct IRBuilder
UInt elementCount,
UInt const* elementIndices);
+ IRInst* emitSwizzledStore(
+ IRInst* dest,
+ IRInst* source,
+ UInt elementCount,
+ IRInst* const* elementIndices);
+
+ IRInst* emitSwizzledStore(
+ IRInst* dest,
+ IRInst* source,
+ UInt elementCount,
+ UInt const* elementIndices);
+
+
+
IRInst* emitReturn(
IRInst* val);
diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp
index a4a118250..53050b6b9 100644
--- a/source/slang/ir.cpp
+++ b/source/slang/ir.cpp
@@ -2033,6 +2033,45 @@ namespace Slang
return emitSwizzleSet(type, base, source, elementCount, irElementIndices);
}
+ IRInst* IRBuilder::emitSwizzledStore(
+ IRInst* dest,
+ IRInst* source,
+ UInt elementCount,
+ IRInst* const* elementIndices)
+ {
+ IRInst* fixedArgs[] = { dest, source };
+ UInt fixedArgCount = sizeof(fixedArgs) / sizeof(fixedArgs[0]);
+
+ auto inst = createInstImpl<IRSwizzledStore>(
+ this,
+ kIROp_SwizzledStore,
+ nullptr,
+ fixedArgCount,
+ fixedArgs,
+ elementCount,
+ elementIndices);
+
+ addInst(inst);
+ return inst;
+ }
+
+ IRInst* IRBuilder::emitSwizzledStore(
+ IRInst* dest,
+ IRInst* source,
+ UInt elementCount,
+ UInt const* elementIndices)
+ {
+ auto intType = getBasicType(BaseType::Int);
+
+ IRInst* irElementIndices[4];
+ for (UInt ii = 0; ii < elementCount; ++ii)
+ {
+ irElementIndices[ii] = getIntValue(intType, elementIndices[ii]);
+ }
+
+ return emitSwizzledStore(dest, source, elementCount, irElementIndices);
+ }
+
IRInst* IRBuilder::emitReturn(
IRInst* val)
{
diff --git a/source/slang/lower-to-ir.cpp b/source/slang/lower-to-ir.cpp
index 371c68809..2d5f169f3 100644
--- a/source/slang/lower-to-ir.cpp
+++ b/source/slang/lower-to-ir.cpp
@@ -1914,13 +1914,58 @@ struct LValueExprLoweringVisitor : ExprLoweringVisitorBase<LValueExprLoweringVis
RefPtr<SwizzledLValueInfo> swizzledLValue = new SwizzledLValueInfo();
swizzledLValue->type = irType;
- swizzledLValue->base = loweredBase;
UInt elementCount = (UInt)expr->elementCount;
swizzledLValue->elementCount = elementCount;
- for (UInt ii = 0; ii < elementCount; ++ii)
+
+ // As a small optimization, we will detect if the base expression
+ // has also lowered into a swizzle and only return a single
+ // swizzle instead of nested swizzles.
+ //
+ // E.g., if we have input like `foo[i].zw.y` we should optimize it
+ // down to just `foo[i].w`.
+ //
+ if(loweredBase.flavor == LoweredValInfo::Flavor::SwizzledLValue)
+ {
+ auto baseSwizzleInfo = loweredBase.getSwizzledLValueInfo();
+
+ // Our new swizzle witll use the same base expression (e.g.,
+ // `foo[i]` in our example above), but will need to remap
+ // the swizzle indices it uses.
+ //
+ swizzledLValue->base = baseSwizzleInfo->base;
+ for (UInt ii = 0; ii < elementCount; ++ii)
+ {
+ // First we get the swizzle element of the "outer" swizzle,
+ // as it was written by the user. In our running example of
+ // `foo[i].zw.y` this is the `y` element reference.
+ //
+ UInt originalElementIndex = UInt(expr->elementIndices[ii]);
+
+ // Next we will use that original element index to figure
+ // out which of the elements of the original swizzle this
+ // should map to.
+ //
+ // In our example, `y` means index 1, and so we fetch
+ // element 1 from the inner swizzle sequence `zw`, to get `w`.
+ //
+ SLANG_ASSERT(originalElementIndex < baseSwizzleInfo->elementCount);
+ UInt remappedElementIndex = baseSwizzleInfo->elementIndices[originalElementIndex];
+
+ swizzledLValue->elementIndices[ii] = remappedElementIndex;
+ }
+ }
+ else
{
- swizzledLValue->elementIndices[ii] = (UInt) expr->elementIndices[ii];
+ // In the default case, we can just copy the indices being
+ // used for the swizzle over directly from the expression,
+ // and use the base as-is.
+ //
+ swizzledLValue->base = loweredBase;
+ for (UInt ii = 0; ii < elementCount; ++ii)
+ {
+ swizzledLValue->elementIndices[ii] = (UInt) expr->elementIndices[ii];
+ }
}
context->shared->extValues.Add(swizzledLValue);
@@ -2836,52 +2881,78 @@ top:
case LoweredValInfo::Flavor::SwizzledLValue:
{
- // The `left` value is of the form `<someLValue>.<swizzleElements>`.
- //
- // We could conceivably define a custom "swizzled store" instruction
- // that would handle the common case where the base l-value is
- // a simple lvalue (`LowerdValInfo::Flavor::Ptr`):
- //
- // float4 foo;
- // foo.zxy = float3(...);
- //
- // However, this doesn't handle complex cases like the following:
- //
- // RWStructureBuffer<float4> foo;
- // ...
- // foo[index].xzy = float3(...);
- //
- // In a case like that, we really need to lower through a temp:
- //
- // float4 tmp = foo[index];
- // tmp.xzy = float3(...);
- // foo[index] = tmp;
- //
- // We want to handle the general case, we we might as well
- // try to handle everything uniformly.
- //
+ // The `left` value is of the form `<base>.<swizzleElements>`.
+ // How we will handle this depends on what `base` looks like:
auto swizzleInfo = left.getSwizzledLValueInfo();
auto loweredBase = swizzleInfo->base;
- // Load from the base value:
- IRInst* irLeftVal = getSimpleVal(context, loweredBase);
- IRInst* irRightVal = getSimpleVal(context, right);
-
- // Now apply the swizzle
- IRInst* irSwizzled = builder->emitSwizzleSet(
- irLeftVal->getDataType(),
- irLeftVal,
- irRightVal,
- swizzleInfo->elementCount,
- swizzleInfo->elementIndices);
+ switch( loweredBase.flavor )
+ {
+ default:
+ {
+ // Our fallback position is to lower via a temporary, e.g.:
+ //
+ // float4 tmp = <base>;
+ // tmp.xyz = float3(...);
+ // <base> = tmp;
+ //
+
+ // Load from the base value
+ IRInst* irLeftVal = getSimpleVal(context, loweredBase);
+
+ // Extract a simple value for the right-hand side
+ IRInst* irRightVal = getSimpleVal(context, right);
+
+ // Apply the swizzle
+ IRInst* irSwizzled = builder->emitSwizzleSet(
+ irLeftVal->getDataType(),
+ irLeftVal,
+ irRightVal,
+ swizzleInfo->elementCount,
+ swizzleInfo->elementIndices);
+
+ // And finally, store the value back where we got it.
+ //
+ // Note: this is effectively a recursive call to
+ // `assign()`, so we do a simple tail-recursive call here.
+ left = loweredBase;
+ right = LoweredValInfo::simple(irSwizzled);
+ goto top;
+ }
+ break;
- // And finally, store the value back where we got it.
- //
- // Note: this is effectively a recursive call to
- // `assign()`, so we do a simple tail-recursive call here.
- left = loweredBase;
- right = LoweredValInfo::simple(irSwizzled);
- goto top;
+ case LoweredValInfo::Flavor::Ptr:
+ {
+ // We are writing through a pointer, which might be
+ // pointing into a UAV or other memory resource, so
+ // we can't introduce use a temporary like the case
+ // above, because then we would read and write bytes
+ // that are not strictly required for the store.
+ //
+ // Note that the messy case of a "swizzle of a swizzle"
+ // was handled already in lowering of a `SwizzleExpr`,
+ // so that we don't need to deal with that case here.
+ //
+ // TODO: we may need to consider whether there is
+ // enough value in a masked store like this to keep
+ // it around, in comparison to a simpler model where
+ // we simply form a pointer to each of the vector
+ // elements and write to them individually.
+ //
+ // TODO: we might also consider just special-casing
+ // single-element swizzles so that the common case
+ // can turn into a simple `store` instead of a
+ // `swizzledStore`.
+ //
+ IRInst* irRightVal = getSimpleVal(context, right);
+ builder->emitSwizzledStore(
+ loweredBase.val,
+ irRightVal,
+ swizzleInfo->elementCount,
+ swizzleInfo->elementIndices);
+ }
+ break;
+ }
}
break;