diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-04-30 16:51:25 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-30 13:51:25 -0700 |
| commit | 1a4a51301d084dd1c8c5906eb810eb6caf6f3963 (patch) | |
| tree | 3eac138d918853f88bb8e2b5f14ed36a57e54d7a /source | |
| parent | c45f368ae404798db67a601749c6e0047fba75ef (diff) | |
Preliminary CUDA half maths (#1827)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Split out StringEscapeUtil.
* Added StringEscapeUtil.
* Fix typo in unix quoting type.
* Small comment improvements.
* Try to fix linux linking issue.
* Fix typo.
* Attempt to fix linux link issue.
* Update VS proj even though nothing really changed.
* Fix another typo issue.
* Fix for windows issue.
Fixed bug.
* Make separate Utils for escaping.
* Fix typo.
* Split out into StringEscapeHandler.
* Windows shell does handle removing quotes (so remove code to remove them).
* Handle unescaping if not initiating using the shell.
* Slight improvement around shell like decoding.
* Simplify command extraction.
* Add shared-library category type.
* Fix bug in command extraction.
* Typo in transcendental category.
* Enable unit-test on in smoke test category.
* Make parsing failing output as a failing test.
* Fixes for transcendental tests. Disable tests that do not work.
* Changed category parsing.
* Removed the TestResult parameter from _gatherTestsForFile.
Made testsList only output.
* Remove testing if all tests were disabled.
* Make args of CommandLine always unescaped.
* Add category.
* Don't need escaping on unix/linux.
* Remove some no longer used functions.
* Add requireSMVersion to CUDAExtensionTracker.
* half-calc.slang now works for CUDA.
* bit-cast-16-bit works on CUDA.
* WIP handling of CUDA vector<half> types.
* Half swizzle CUDA.
* Half vector test.
* Fix swizzle half bug.
* Fix compilation issue with narrowing to Index.
Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'source')
| -rwxr-xr-x | source/slang/slang-compiler.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 214 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.h | 16 |
3 files changed, 187 insertions, 45 deletions
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index 1d416634a..736250219 100755 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -1413,6 +1413,8 @@ SlangResult dissassembleDXILUsingDXC( // Look for the version if (auto cudaTracker = as<CUDAExtensionTracker>(source.extensionTracker)) { + cudaTracker->finalize(); + if (cudaTracker->m_smVersion.isSet()) { DownstreamCompiler::CapabilityVersion version; diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index a259ea933..5f7eada68 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -10,6 +10,21 @@ namespace Slang { + + +void CUDAExtensionTracker::finalize() +{ + if (isBaseTypeRequired(BaseType::Half)) + { + // The cuda_fp16.hpp header indicates the need is for version 5.3, but when this is tried + // NVRTC says it cannot load builtins. + // The lowest version that this does work for is 6.0, so that's what we use here. + + // https://docs.nvidia.com/cuda/nvrtc/index.html#group__options + requireSMVersion(SemanticVersion(6, 0)); + } +} + static bool _isSingleNameBasicType(IROp op) { switch (op) @@ -152,17 +167,74 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT return Super::calcScalarFuncName(op, type, outBuilder); } -SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) +void CUDASourceEmitter::emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp) { - SLANG_UNUSED(target); + typedef HLSLIntrinsic::Op Op; + + if (auto vecType = as <IRVectorType>(specOp->returnType)) + { + if (auto baseType = as<IRBasicType>(vecType->getElementType())) + { + if (baseType->getBaseType() == BaseType::Half) + { + switch (specOp->op) + { + case Op::Init: + case Op::Add: + case Op::Mul: + case Op::Div: + + case Op::Neg: + + case Op::ConstructFromScalar: + + case Op::Leq: + case Op::Less: + case Op::Greater: + case Op::Geq: + case Op::Neq: + case Op::Eql: + { + return; + } + } + } + } + } - if (target == CodeGenTarget::CSource) + switch (specOp->op) { - return Super::calcTypeName(type, target, out); + case Op::Init: + { + // Special case handling + auto returnType = specOp->returnType; + + if (auto vecType = as <IRVectorType>(returnType)) + { + if (auto baseType = as<IRBasicType>(vecType->getElementType())) + { + if (baseType->getBaseType() == BaseType::Half) + { + // Defined already in cuda-prelude.h + return; + } + } + } + + break; + } + default: break; } - // We allow C source, because if we need a name - SLANG_ASSERT(target == CodeGenTarget::CUDASource); + Super::emitSpecializedOperationDefinition(specOp); +} + +SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) +{ + SLANG_UNUSED(target); + + // The names CUDA produces are all compatible with 'C' (ie they aren't templated types) + SLANG_ASSERT(target == CodeGenTarget::CUDASource || target == CodeGenTarget::CSource); switch (type->getOp()) { @@ -180,30 +252,6 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, out << prefix << vecCount; return SLANG_OK; } - -#if 0 - case kIROp_MatrixType: - { - auto matType = static_cast<IRMatrixType*>(type); - - auto elementType = matType->getElementType(); - const auto rowCount = int(getIntVal(matType->getRowCount())); - const auto colCount = int(getIntVal(matType->getColumnCount())); - - out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">"; - return SLANG_OK; - } - case kIROp_UnsizedArrayType: - { - auto arrayType = static_cast<IRUnsizedArrayType*>(type); - auto elementType = arrayType->getElementType(); - - out << "Array<"; - SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out)); - out << ">"; - return SLANG_OK; - } -#endif default: { if (isNominalOp(type->getOp())) @@ -519,10 +567,102 @@ void CUDASourceEmitter::_emitInitializerList(IRType* elementType, IRUse* operand m_writer->emit("\n}"); } +void CUDASourceEmitter::_emitGetHalfVectorElement(IRInst* base, Index index, Index vecSize, const EmitOpInfo& inOuterPrec) +{ + SLANG_ASSERT(index < vecSize); + + EmitOpInfo outerPrec = inOuterPrec; + + auto prec = getInfo(EmitOp::Postfix); + const bool needClose = maybeEmitParens(outerPrec, prec); + + emitOperand(base, leftSide(outerPrec, prec)); + + m_writer->emit("."); + + switch (vecSize) + { + default: + { + char const* kComponents[] = { "x", "y", "z", "w" }; + m_writer->emit(kComponents[index]); + break; + } + case 3: + { + char const* kComponents[] = { "xy.x", "xy.y", "z"}; + m_writer->emit(kComponents[index]); + break; + } + case 4: + { + char const* kComponents[] = { "xy.x", "xy.y", "zw.x", "zw.y" }; + m_writer->emit(kComponents[index]); + break; + } + } + + maybeCloseParens(needClose); +} + bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) { switch(inst->getOp()) { + case kIROp_swizzle: + { + // We need to special case for half types. + auto swizzleInst = static_cast<IRSwizzle*>(inst); + + IRInst* baseInst = swizzleInst->getBase(); + IRType* baseType = baseInst->getDataType(); + + // If we are swizzling from a built in type, + if (as<IRBasicType>(baseType)) + { + // Just use the default behavior + } + else if (auto vecType = as<IRVectorType>(baseType)) + { + if (auto basicType = as<IRBasicType>(vecType->getElementType())) + { + if (basicType->getBaseType() == BaseType::Half) + { + const Index vecElementCount = Index(getIntVal(vecType->getElementCount())); + + const Index elementCount = Index(swizzleInst->getElementCount()); + if (elementCount == 1) + { + const Index index = Index(getIntVal(swizzleInst->getElementIndex(0))); + _emitGetHalfVectorElement(baseInst, index, vecElementCount, inOuterPrec); + } + else + { + auto outerPrec = getInfo(EmitOp::General); + + m_writer->emit("make___half"); + m_writer->emitInt64(elementCount); + m_writer->emit("("); + + for (Index i = 0; i < elementCount; ++i) + { + if (i) + { + m_writer->emit(", "); + } + + const Index index = Index(getIntVal(swizzleInst->getElementIndex(i))); + _emitGetHalfVectorElement(baseInst, index, vecElementCount, outerPrec); + } + + m_writer->emit(")"); + } + return true; + } + } + } + break; + } case kIROp_Construct: { // Simple constructor call @@ -558,7 +698,7 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu } case kIROp_WaveMaskBallot: { - _requireCUDASMVersion(SemanticVersion(7, 0)); + m_extensionTracker->requireSMVersion(SemanticVersion(7, 0)); m_writer->emit("__ballot_sync("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); @@ -569,7 +709,7 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu } case kIROp_WaveMaskMatch: { - _requireCUDASMVersion(SemanticVersion(7, 0)); + m_extensionTracker->requireSMVersion(SemanticVersion(7, 0)); m_writer->emit("__match_any_sync("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); @@ -584,14 +724,6 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu return Super::tryEmitInstExprImpl(inst, inOuterPrec); } -void CUDASourceEmitter::_requireCUDASMVersion(SemanticVersion const& version) -{ - if (version > m_extensionTracker->m_smVersion) - { - m_extensionTracker->m_smVersion = version; - } -} - void CUDASourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) { // Does this function declare any requirements on CUDA capabilities @@ -603,7 +735,7 @@ void CUDASourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) { SemanticVersion version; version.setFromInteger(SemanticVersion::IntegerType(smDecoration->getCUDASMVersion())); - _requireCUDASMVersion(version); + m_extensionTracker->requireSMVersion(version); } } } diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h index a5d227c6b..b73948525 100644 --- a/source/slang/slang-emit-cuda.h +++ b/source/slang/slang-emit-cuda.h @@ -18,7 +18,14 @@ public: void requireBaseType(BaseType baseType) { m_baseTypeFlags |= _getFlag(baseType); } bool isBaseTypeRequired(BaseType baseType) { return (m_baseTypeFlags & _getFlag(baseType)) != 0; } + /// Ensure that the generated code is compiled for at least CUDA SM `version` + void requireSMVersion(const SemanticVersion& smVersion) { m_smVersion = (smVersion > m_smVersion) ? smVersion : m_smVersion; } + + /// Should be called before reading out values. + void finalize(); + protected: + static BaseTypeFlags _getFlag(BaseType baseType) { return BaseTypeFlags(1) << int(baseType); } BaseTypeFlags m_baseTypeFlags = 0; @@ -86,15 +93,16 @@ protected: // CPPSourceEmitter overrides virtual SlangResult calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) SLANG_OVERRIDE; virtual SlangResult calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder) SLANG_OVERRIDE; - + + virtual void emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp) SLANG_OVERRIDE; + SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName); void _emitInitializerList(IRType* elementType, IRUse* operands, Index operandCount); void _emitInitializerListValue(IRType* elementType, IRInst* value); - /// Ensure that the generated code is compiled for at least CUDA SM `version` - void _requireCUDASMVersion(SemanticVersion const& version); - + void _emitGetHalfVectorElement(IRInst* baseInst, Index index, Index vecSize, const EmitOpInfo& inOuterPrec); + RefPtr<CUDAExtensionTracker> m_extensionTracker; }; |
