From fbac017938343724407ab036abd736c942b4e187 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 14 Apr 2020 17:00:11 -0400 Subject: CUDA global scope initialization of arrays without function calls. (#1320) * Fix CUDA output of a static const array if values are all literals. * Fix bug in Convert definition. * Output makeArray such that is deconstructed on CUDA to fill in based on what the target type is. Tries to expand such that there are no function calls so that static const global scope definitions work. * Fix unbounded-array-of-array-syntax.slang to work correctly on CUDA. * Remove tabs. * Check works with static const vector/matrix. * Fix typo in type comparison. * Shorten _areEquivalent test. * Rename _emitInitializerList. Some small comment fixes. Co-authored-by: Tim Foley --- source/slang/slang-emit-cpp.cpp | 2 +- source/slang/slang-emit-cuda.cpp | 149 +++++++++++++++++++++ source/slang/slang-emit-cuda.h | 3 + tests/compute/static-const-matrix-array.slang | 36 +++++ .../static-const-matrix-array.slang.expected.txt | 9 ++ .../compute/unbounded-array-of-array-syntax.slang | 1 + 6 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 tests/compute/static-const-matrix-array.slang create mode 100644 tests/compute/static-const-matrix-array.slang.expected.txt diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 8f59da214..716a8f046 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -999,7 +999,7 @@ void CPPSourceEmitter::_emitConstructConvertDefinition(const UnownedStringSlice& IRType* dstElemType = _getElementType(retType); //IRType* srcElemType = _getElementType(srcType); - TypeDimension dim = _getTypeDimension(srcType, false); + TypeDimension dim = _getTypeDimension(retType, false); UnownedStringSlice rowTypeName; if (dim.rowCount > 1) diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 702543fc8..ac1e1ea63 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -349,6 +349,138 @@ void CUDASourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* d } } +static bool _areEquivalent(IRType* a, IRType* b) +{ + if (a == b) + { + return true; + } + if (a->op != b->op) + { + return false; + } + + switch (a->op) + { + case kIROp_VectorType: + { + IRVectorType* vecA = static_cast(a); + IRVectorType* vecB = static_cast(b); + return GetIntVal(vecA->getElementCount()) == GetIntVal(vecB->getElementCount()) && + _areEquivalent(vecA->getElementType(), vecB->getElementType()); + } + case kIROp_MatrixType: + { + IRMatrixType* matA = static_cast(a); + IRMatrixType* matB = static_cast(b); + return GetIntVal(matA->getColumnCount()) == GetIntVal(matB->getColumnCount()) && + GetIntVal(matA->getRowCount()) == GetIntVal(matB->getRowCount()) && + _areEquivalent(matA->getElementType(), matB->getElementType()); + } + default: + { + return as(a) != nullptr; + } + } +} + +void CUDASourceEmitter::_emitInitializerListValue(IRType* dstType, IRInst* value) +{ + // When constructing a matrix or vector from a single value this is handled by the default path + + switch (value->op) + { + case kIROp_Construct: + case kIROp_MakeMatrix: + case kIROp_makeVector: + { + IRType* type = value->getDataType(); + + // If the types are the same, we can can just break down and use + if (_areEquivalent(dstType, type)) + { + if (auto vecType = as(type)) + { + if (UInt(GetIntVal(vecType->getElementCount())) == value->getOperandCount()) + { + _emitInitializerList(vecType->getElementType(), value->getOperands(), value->getOperandCount()); + return; + } + } + else if (auto matType = as(type)) + { + const Index colCount = Index(GetIntVal(matType->getColumnCount())); + const Index rowCount = Index(GetIntVal(matType->getRowCount())); + + // TODO(JS): If num cols = 1, then it *doesn't* actually return a vector. + // That could be argued is an error because we want swizzling or [] to work. + IRType* rowType = m_typeSet.addVectorType(matType->getElementType(), int(colCount)); + IRVectorType* rowVectorType = as(rowType); + const Index operandCount = Index(value->getOperandCount()); + + // Can init, with vectors. + // For now special case if the rowVectorType is not actually a vector (when elementSize == 1) + if (operandCount == rowCount || rowVectorType == nullptr) + { + // We have to output vectors + + // Emit the braces for the Matrix struct, contains an row array. + m_writer->emit("{\n"); + m_writer->indent(); + _emitInitializerList(rowType, value->getOperands(), rowCount); + m_writer->dedent(); + m_writer->emit("\n}"); + return; + } + else if (operandCount == rowCount * colCount) + { + // Handle if all are explicitly defined + IRType* elementType = matType->getElementType(); + IRUse* operands = value->getOperands(); + + // Emit the braces for the Matrix struct, and the array of rows + m_writer->emit("{\n"); + m_writer->indent(); + m_writer->emit("{\n"); + m_writer->indent(); + for (Index i = 0; i < rowCount; ++i) + { + if (i != 0) m_writer->emit(", "); + _emitInitializerList(elementType, operands, colCount); + operands += colCount; + } + m_writer->dedent(); + m_writer->emit("\n}"); + m_writer->dedent(); + m_writer->emit("\n}"); + return; + } + } + } + + break; + } + } + + // All other cases we just use the default emitting - might not work on arrays defined in global scope on CUDA though + emitOperand(value, getInfo(EmitOp::General)); +} + +void CUDASourceEmitter::_emitInitializerList(IRType* elementType, IRUse* operands, Index operandCount) +{ + m_writer->emit("{\n"); + m_writer->indent(); + + for (Index i = 0; i < operandCount; ++i) + { + if (i != 0) m_writer->emit(", "); + _emitInitializerListValue(elementType, operands[i].get()); + } + + m_writer->dedent(); + m_writer->emit("\n}"); +} + bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) { switch(inst->op) @@ -369,6 +501,23 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu } break; } + case kIROp_makeArray: + { + IRType* dataType = inst->getDataType(); + IRArrayType* arrayType = as(dataType); + + IRType* elementType = arrayType->getElementType(); + + // Emit braces for the FixedArray struct. + m_writer->emit("{\n"); + m_writer->indent(); + + _emitInitializerList(elementType, inst->getOperands(), Index(inst->getOperandCount())); + + m_writer->dedent(); + m_writer->emit("\n}"); + return true; + } default: break; } diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h index dce3b4eb8..669bf2d20 100644 --- a/source/slang/slang-emit-cuda.h +++ b/source/slang/slang-emit-cuda.h @@ -76,6 +76,9 @@ protected: SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName); + void _emitInitializerList(IRType* elementType, IRUse* operands, Index operandCount); + void _emitInitializerListValue(IRType* elementType, IRInst* value); + RefPtr m_extensionTracker; }; diff --git a/tests/compute/static-const-matrix-array.slang b/tests/compute/static-const-matrix-array.slang new file mode 100644 index 000000000..2ac132121 --- /dev/null +++ b/tests/compute/static-const-matrix-array.slang @@ -0,0 +1,36 @@ +// static-const-array.slang + +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX:-cpu -slang -compute -output-using-type + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out, name outputBuffer +RWStructuredBuffer outputBuffer; + + +static const matrix kMatrix = { 1, 2, 4, -1, -2, -3 }; +static const float3 kVector = { -1, -2, -3 }; + +static const matrix kArray[2] = +{ + matrix(0, 1, 2, 3, 4, 5), + matrix(float3(6, 7, 8), float3(9, 10, 11)), +}; + +float test(int inVal, int index) +{ + matrix mat = kArray[index] + kMatrix; + mat[0] =+ kVector; + + float2 a = { inVal, inVal + 1}; + float3 v = mul(a, mat); + return v.x + v.y + v.z; +} + +[numthreads(8, 1, 1)] +void computeMain(uint3 tid : SV_DispatchThreadID) +{ + int inVal = tid.x; + float outVal = test(inVal, inVal & 1); + outputBuffer[inVal] = outVal; +} \ No newline at end of file diff --git a/tests/compute/static-const-matrix-array.slang.expected.txt b/tests/compute/static-const-matrix-array.slang.expected.txt new file mode 100644 index 000000000..7c72762a0 --- /dev/null +++ b/tests/compute/static-const-matrix-array.slang.expected.txt @@ -0,0 +1,9 @@ +type: float +6.000000 +42.000000 +6.000000 +78.000000 +6.000000 +114.000000 +6.000000 +150.000000 diff --git a/tests/compute/unbounded-array-of-array-syntax.slang b/tests/compute/unbounded-array-of-array-syntax.slang index 21b05dc71..17bb5eb2e 100644 --- a/tests/compute/unbounded-array-of-array-syntax.slang +++ b/tests/compute/unbounded-array-of-array-syntax.slang @@ -2,6 +2,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute //TEST:CROSS_COMPILE:-target dxbc-assembly -entry computeMain -profile cs_5_1 //TEST:CROSS_COMPILE:-target spirv-assembly -entry computeMain -profile cs_5_1 +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; -- cgit v1.2.3