From a1e79e4b6d40387fc9b43e0dfa1c7dd3fb0f6e58 Mon Sep 17 00:00:00 2001 From: kaizhangNV <149626564+kaizhangNV@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:20:13 +0800 Subject: Fix the cuda left-hand swizzle issue (#3538) (#3691) --- source/slang/slang-emit-c-like.cpp | 104 +++++++++++++++++++++++++++++-------- source/slang/slang-emit-c-like.h | 4 ++ 2 files changed, 87 insertions(+), 21 deletions(-) (limited to 'source') diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index c7561e611..451147dd0 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -34,6 +34,9 @@ namespace Slang { +bool isCPUTarget(TargetRequest* targetReq); +bool isCUDATarget(TargetRequest* targetReq); + struct CLikeSourceEmitter::ComputeEmitActionsContext { IRInst* moduleInst; @@ -352,6 +355,43 @@ void CLikeSourceEmitter::_emitType(IRType* type, DeclaratorInfo* declarator) } } +void CLikeSourceEmitter::_emitSwizzleStorePerElement(IRInst* inst) +{ + auto subscriptOuter = getInfo(EmitOp::General); + auto subscriptPrec = getInfo(EmitOp::Postfix); + + auto ii = cast(inst); + + UInt elementCount = ii->getElementCount(); + UInt dstIndex = 0; + for (UInt ee = 0; ee < elementCount; ++ee) + { + bool needCloseSubscript = maybeEmitParens(subscriptOuter, subscriptPrec); + + emitDereferenceOperand(ii->getDest(), leftSide(subscriptOuter, subscriptPrec)); + m_writer->emit("."); + + IRInst* irElementIndex = ii->getElementIndex(ee); + SLANG_RELEASE_ASSERT(irElementIndex->getOp() == kIROp_IntLit); + + IRConstant* irConst = (IRConstant*)irElementIndex; + + UInt elementIndex = (UInt)irConst->value.intVal; + SLANG_RELEASE_ASSERT(elementIndex < 4); + + char const* kComponents[] = { "x", "y", "z", "w" }; + m_writer->emit(kComponents[elementIndex]); + + maybeCloseParens(needCloseSubscript); + + m_writer->emit(" = "); + emitOperand(ii->getSource(), getInfo(EmitOp::General)); + m_writer->emit("."); + m_writer->emit(kComponents[dstIndex++]); + m_writer->emit(";\n"); + } +} + void CLikeSourceEmitter::emitWitnessTable(IRWitnessTable* witnessTable) { SLANG_UNUSED(witnessTable); @@ -1494,6 +1534,19 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst) } } } + + // For cuda and cpu targets don't support swizzle on the left-hand-side + // variable, e.g. vec4.xy = vec2 is not allowed. + // Therefore, we don't want to fold the right-hand-side expression. + // Instead, the right-hand-side expression should be generated as a separable + // statement and stored in a temporary varible, then assign to the left-hand-side + // variable per element. E.g. vec4.x = vec2.x; vec4.y = vec2.y. + if (as(user)) + { + if (isCPUTarget(getTargetReq()) || isCUDATarget(getTargetReq())) + return false; + } + // We'd like to figure out if it is safe to fold our instruction into `user` // First, let's make sure they are in the same block/parent: @@ -2760,32 +2813,41 @@ void CLikeSourceEmitter::_emitInst(IRInst* inst) case kIROp_SwizzledStore: { - auto subscriptOuter = getInfo(EmitOp::General); - auto subscriptPrec = getInfo(EmitOp::Postfix); - bool needCloseSubscript = maybeEmitParens(subscriptOuter, subscriptPrec); + // cpp and cuda target don't support swizzle on the left handside, so we + // have to assign the element one by one. + if (isCPUTarget(getTargetReq()) || isCUDATarget(getTargetReq())) + { + _emitSwizzleStorePerElement(inst); + } + else + { + auto subscriptOuter = getInfo(EmitOp::General); + auto subscriptPrec = getInfo(EmitOp::Postfix); + bool needCloseSubscript = maybeEmitParens(subscriptOuter, subscriptPrec); - auto ii = cast(inst); - emitDereferenceOperand(ii->getDest(), leftSide(subscriptOuter, subscriptPrec)); - m_writer->emit("."); - UInt elementCount = ii->getElementCount(); - for (UInt ee = 0; ee < elementCount; ++ee) - { - IRInst* irElementIndex = ii->getElementIndex(ee); - SLANG_RELEASE_ASSERT(irElementIndex->getOp() == kIROp_IntLit); - IRConstant* irConst = (IRConstant*)irElementIndex; + auto ii = cast(inst); + emitDereferenceOperand(ii->getDest(), leftSide(subscriptOuter, subscriptPrec)); + m_writer->emit("."); + UInt elementCount = ii->getElementCount(); + for (UInt ee = 0; ee < elementCount; ++ee) + { + IRInst* irElementIndex = ii->getElementIndex(ee); + SLANG_RELEASE_ASSERT(irElementIndex->getOp() == kIROp_IntLit); + IRConstant* irConst = (IRConstant*)irElementIndex; - UInt elementIndex = (UInt)irConst->value.intVal; - SLANG_RELEASE_ASSERT(elementIndex < 4); + UInt elementIndex = (UInt)irConst->value.intVal; + SLANG_RELEASE_ASSERT(elementIndex < 4); - char const* kComponents[] = { "x", "y", "z", "w" }; - m_writer->emit(kComponents[elementIndex]); - } - maybeCloseParens(needCloseSubscript); + char const* kComponents[] = { "x", "y", "z", "w" }; + m_writer->emit(kComponents[elementIndex]); + } + maybeCloseParens(needCloseSubscript); - m_writer->emit(" = "); - emitOperand(ii->getSource(), getInfo(EmitOp::General)); - m_writer->emit(";\n"); + m_writer->emit(" = "); + emitOperand(ii->getSource(), getInfo(EmitOp::General)); + m_writer->emit(";\n"); + } } break; diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 01b5681b7..46cdf2145 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -531,6 +531,10 @@ public: // Sort witnessTable entries according to the order defined in the witnessed interface type. List getSortedWitnessTableEntries(IRWitnessTable* witnessTable); + // Special handling for swizzleStore call, save the right-handside vector to a temporary variable + // first, then assign the corresponding elements to the left-handside vector one by one. + void _emitSwizzleStorePerElement(IRInst* inst); + CodeGenContext* m_codeGenContext = nullptr; IRModule* m_irModule = nullptr; -- cgit v1.2.3