From 384df864fdd2c518924d32295a13894f16295d43 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Wed, 2 May 2018 14:44:13 -0700 Subject: Add support for "swizzled stores" (#544) This was a known issue in our IR representation, which was now biting a user. The basic problem is that in code like the following: ```hlsl RWStructureBuffer buffer; ... buffer[index].xz = value; ``` we ideally want to be able to reproduce the original HLSL code exactly, but that requires directly encoding the way that this code writes to two elements of a vector, but not the others. The currently lowering strategy we had produced IR something like: ```hlsl float4 tmp = buffer[index]; tmp.xz = value; buffer[index] = tmp; ``` That transformation might seem valid, but it has some big problems: * It generates UAV reads that are not needed, which could impact performance * It performs read-modify-write operations on memory that the programmer didn't explicitly write, which could create data races The fix here is somewhat obvious: if the "base" of a swizzle operation on a left-hand side resolves to a pointer in our IR, then we can output a "swizzled store" instead of the read-modify-write dance. We currently keep the read-modify-write around since it is potentially needed as a fallback in the general case. Along the way I also tried to make sure that we handle the case where we have a swizzle of a swizzle on the left-hand side: ```hlsl buffer[index].xz.y = value; ``` That code should behave the same as `buffer[index].z = value`. I am currently detecting and cleaning up this logic in the lowering path for `SwizzleExpr`, because that is the only place in the lowering logic that "swizzled l-values" currently get created. --- source/slang/lower-to-ir.cpp | 161 +++++++++++++++++++++++++++++++------------ 1 file changed, 116 insertions(+), 45 deletions(-) (limited to 'source/slang/lower-to-ir.cpp') diff --git a/source/slang/lower-to-ir.cpp b/source/slang/lower-to-ir.cpp index 371c68809..2d5f169f3 100644 --- a/source/slang/lower-to-ir.cpp +++ b/source/slang/lower-to-ir.cpp @@ -1914,13 +1914,58 @@ struct LValueExprLoweringVisitor : ExprLoweringVisitorBase swizzledLValue = new SwizzledLValueInfo(); swizzledLValue->type = irType; - swizzledLValue->base = loweredBase; UInt elementCount = (UInt)expr->elementCount; swizzledLValue->elementCount = elementCount; - for (UInt ii = 0; ii < elementCount; ++ii) + + // As a small optimization, we will detect if the base expression + // has also lowered into a swizzle and only return a single + // swizzle instead of nested swizzles. + // + // E.g., if we have input like `foo[i].zw.y` we should optimize it + // down to just `foo[i].w`. + // + if(loweredBase.flavor == LoweredValInfo::Flavor::SwizzledLValue) + { + auto baseSwizzleInfo = loweredBase.getSwizzledLValueInfo(); + + // Our new swizzle witll use the same base expression (e.g., + // `foo[i]` in our example above), but will need to remap + // the swizzle indices it uses. + // + swizzledLValue->base = baseSwizzleInfo->base; + for (UInt ii = 0; ii < elementCount; ++ii) + { + // First we get the swizzle element of the "outer" swizzle, + // as it was written by the user. In our running example of + // `foo[i].zw.y` this is the `y` element reference. + // + UInt originalElementIndex = UInt(expr->elementIndices[ii]); + + // Next we will use that original element index to figure + // out which of the elements of the original swizzle this + // should map to. + // + // In our example, `y` means index 1, and so we fetch + // element 1 from the inner swizzle sequence `zw`, to get `w`. + // + SLANG_ASSERT(originalElementIndex < baseSwizzleInfo->elementCount); + UInt remappedElementIndex = baseSwizzleInfo->elementIndices[originalElementIndex]; + + swizzledLValue->elementIndices[ii] = remappedElementIndex; + } + } + else { - swizzledLValue->elementIndices[ii] = (UInt) expr->elementIndices[ii]; + // In the default case, we can just copy the indices being + // used for the swizzle over directly from the expression, + // and use the base as-is. + // + swizzledLValue->base = loweredBase; + for (UInt ii = 0; ii < elementCount; ++ii) + { + swizzledLValue->elementIndices[ii] = (UInt) expr->elementIndices[ii]; + } } context->shared->extValues.Add(swizzledLValue); @@ -2836,52 +2881,78 @@ top: case LoweredValInfo::Flavor::SwizzledLValue: { - // The `left` value is of the form `.`. - // - // We could conceivably define a custom "swizzled store" instruction - // that would handle the common case where the base l-value is - // a simple lvalue (`LowerdValInfo::Flavor::Ptr`): - // - // float4 foo; - // foo.zxy = float3(...); - // - // However, this doesn't handle complex cases like the following: - // - // RWStructureBuffer foo; - // ... - // foo[index].xzy = float3(...); - // - // In a case like that, we really need to lower through a temp: - // - // float4 tmp = foo[index]; - // tmp.xzy = float3(...); - // foo[index] = tmp; - // - // We want to handle the general case, we we might as well - // try to handle everything uniformly. - // + // The `left` value is of the form `.`. + // How we will handle this depends on what `base` looks like: auto swizzleInfo = left.getSwizzledLValueInfo(); auto loweredBase = swizzleInfo->base; - // Load from the base value: - IRInst* irLeftVal = getSimpleVal(context, loweredBase); - IRInst* irRightVal = getSimpleVal(context, right); - - // Now apply the swizzle - IRInst* irSwizzled = builder->emitSwizzleSet( - irLeftVal->getDataType(), - irLeftVal, - irRightVal, - swizzleInfo->elementCount, - swizzleInfo->elementIndices); + switch( loweredBase.flavor ) + { + default: + { + // Our fallback position is to lower via a temporary, e.g.: + // + // float4 tmp = ; + // tmp.xyz = float3(...); + // = tmp; + // + + // Load from the base value + IRInst* irLeftVal = getSimpleVal(context, loweredBase); + + // Extract a simple value for the right-hand side + IRInst* irRightVal = getSimpleVal(context, right); + + // Apply the swizzle + IRInst* irSwizzled = builder->emitSwizzleSet( + irLeftVal->getDataType(), + irLeftVal, + irRightVal, + swizzleInfo->elementCount, + swizzleInfo->elementIndices); + + // And finally, store the value back where we got it. + // + // Note: this is effectively a recursive call to + // `assign()`, so we do a simple tail-recursive call here. + left = loweredBase; + right = LoweredValInfo::simple(irSwizzled); + goto top; + } + break; - // And finally, store the value back where we got it. - // - // Note: this is effectively a recursive call to - // `assign()`, so we do a simple tail-recursive call here. - left = loweredBase; - right = LoweredValInfo::simple(irSwizzled); - goto top; + case LoweredValInfo::Flavor::Ptr: + { + // We are writing through a pointer, which might be + // pointing into a UAV or other memory resource, so + // we can't introduce use a temporary like the case + // above, because then we would read and write bytes + // that are not strictly required for the store. + // + // Note that the messy case of a "swizzle of a swizzle" + // was handled already in lowering of a `SwizzleExpr`, + // so that we don't need to deal with that case here. + // + // TODO: we may need to consider whether there is + // enough value in a masked store like this to keep + // it around, in comparison to a simpler model where + // we simply form a pointer to each of the vector + // elements and write to them individually. + // + // TODO: we might also consider just special-casing + // single-element swizzles so that the common case + // can turn into a simple `store` instead of a + // `swizzledStore`. + // + IRInst* irRightVal = getSimpleVal(context, right); + builder->emitSwizzledStore( + loweredBase.val, + irRightVal, + swizzleInfo->elementCount, + swizzleInfo->elementIndices); + } + break; + } } break; -- cgit v1.2.3