summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-04-30 16:51:25 -0400
committerGitHub <noreply@github.com>2021-04-30 13:51:25 -0700
commit1a4a51301d084dd1c8c5906eb810eb6caf6f3963 (patch)
tree3eac138d918853f88bb8e2b5f14ed36a57e54d7a /source
parentc45f368ae404798db67a601749c6e0047fba75ef (diff)
Preliminary CUDA half maths (#1827)
* #include an absolute path didn't work - because paths were taken to always be relative. * Split out StringEscapeUtil. * Added StringEscapeUtil. * Fix typo in unix quoting type. * Small comment improvements. * Try to fix linux linking issue. * Fix typo. * Attempt to fix linux link issue. * Update VS proj even though nothing really changed. * Fix another typo issue. * Fix for windows issue. Fixed bug. * Make separate Utils for escaping. * Fix typo. * Split out into StringEscapeHandler. * Windows shell does handle removing quotes (so remove code to remove them). * Handle unescaping if not initiating using the shell. * Slight improvement around shell like decoding. * Simplify command extraction. * Add shared-library category type. * Fix bug in command extraction. * Typo in transcendental category. * Enable unit-test on in smoke test category. * Make parsing failing output as a failing test. * Fixes for transcendental tests. Disable tests that do not work. * Changed category parsing. * Removed the TestResult parameter from _gatherTestsForFile. Made testsList only output. * Remove testing if all tests were disabled. * Make args of CommandLine always unescaped. * Add category. * Don't need escaping on unix/linux. * Remove some no longer used functions. * Add requireSMVersion to CUDAExtensionTracker. * half-calc.slang now works for CUDA. * bit-cast-16-bit works on CUDA. * WIP handling of CUDA vector<half> types. * Half swizzle CUDA. * Half vector test. * Fix swizzle half bug. * Fix compilation issue with narrowing to Index. Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'source')
-rwxr-xr-xsource/slang/slang-compiler.cpp2
-rw-r--r--source/slang/slang-emit-cuda.cpp214
-rw-r--r--source/slang/slang-emit-cuda.h16
3 files changed, 187 insertions, 45 deletions
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index 1d416634a..736250219 100755
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -1413,6 +1413,8 @@ SlangResult dissassembleDXILUsingDXC(
// Look for the version
if (auto cudaTracker = as<CUDAExtensionTracker>(source.extensionTracker))
{
+ cudaTracker->finalize();
+
if (cudaTracker->m_smVersion.isSet())
{
DownstreamCompiler::CapabilityVersion version;
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index a259ea933..5f7eada68 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -10,6 +10,21 @@
namespace Slang {
+
+
+void CUDAExtensionTracker::finalize()
+{
+ if (isBaseTypeRequired(BaseType::Half))
+ {
+ // The cuda_fp16.hpp header indicates the need is for version 5.3, but when this is tried
+ // NVRTC says it cannot load builtins.
+ // The lowest version that this does work for is 6.0, so that's what we use here.
+
+ // https://docs.nvidia.com/cuda/nvrtc/index.html#group__options
+ requireSMVersion(SemanticVersion(6, 0));
+ }
+}
+
static bool _isSingleNameBasicType(IROp op)
{
switch (op)
@@ -152,17 +167,74 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT
return Super::calcScalarFuncName(op, type, outBuilder);
}
-SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out)
+void CUDASourceEmitter::emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp)
{
- SLANG_UNUSED(target);
+ typedef HLSLIntrinsic::Op Op;
+
+ if (auto vecType = as <IRVectorType>(specOp->returnType))
+ {
+ if (auto baseType = as<IRBasicType>(vecType->getElementType()))
+ {
+ if (baseType->getBaseType() == BaseType::Half)
+ {
+ switch (specOp->op)
+ {
+ case Op::Init:
+ case Op::Add:
+ case Op::Mul:
+ case Op::Div:
+
+ case Op::Neg:
+
+ case Op::ConstructFromScalar:
+
+ case Op::Leq:
+ case Op::Less:
+ case Op::Greater:
+ case Op::Geq:
+ case Op::Neq:
+ case Op::Eql:
+ {
+ return;
+ }
+ }
+ }
+ }
+ }
- if (target == CodeGenTarget::CSource)
+ switch (specOp->op)
{
- return Super::calcTypeName(type, target, out);
+ case Op::Init:
+ {
+ // Special case handling
+ auto returnType = specOp->returnType;
+
+ if (auto vecType = as <IRVectorType>(returnType))
+ {
+ if (auto baseType = as<IRBasicType>(vecType->getElementType()))
+ {
+ if (baseType->getBaseType() == BaseType::Half)
+ {
+ // Defined already in cuda-prelude.h
+ return;
+ }
+ }
+ }
+
+ break;
+ }
+ default: break;
}
- // We allow C source, because if we need a name
- SLANG_ASSERT(target == CodeGenTarget::CUDASource);
+ Super::emitSpecializedOperationDefinition(specOp);
+}
+
+SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out)
+{
+ SLANG_UNUSED(target);
+
+ // The names CUDA produces are all compatible with 'C' (ie they aren't templated types)
+ SLANG_ASSERT(target == CodeGenTarget::CUDASource || target == CodeGenTarget::CSource);
switch (type->getOp())
{
@@ -180,30 +252,6 @@ SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target,
out << prefix << vecCount;
return SLANG_OK;
}
-
-#if 0
- case kIROp_MatrixType:
- {
- auto matType = static_cast<IRMatrixType*>(type);
-
- auto elementType = matType->getElementType();
- const auto rowCount = int(getIntVal(matType->getRowCount()));
- const auto colCount = int(getIntVal(matType->getColumnCount()));
-
- out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">";
- return SLANG_OK;
- }
- case kIROp_UnsizedArrayType:
- {
- auto arrayType = static_cast<IRUnsizedArrayType*>(type);
- auto elementType = arrayType->getElementType();
-
- out << "Array<";
- SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out));
- out << ">";
- return SLANG_OK;
- }
-#endif
default:
{
if (isNominalOp(type->getOp()))
@@ -519,10 +567,102 @@ void CUDASourceEmitter::_emitInitializerList(IRType* elementType, IRUse* operand
m_writer->emit("\n}");
}
+void CUDASourceEmitter::_emitGetHalfVectorElement(IRInst* base, Index index, Index vecSize, const EmitOpInfo& inOuterPrec)
+{
+ SLANG_ASSERT(index < vecSize);
+
+ EmitOpInfo outerPrec = inOuterPrec;
+
+ auto prec = getInfo(EmitOp::Postfix);
+ const bool needClose = maybeEmitParens(outerPrec, prec);
+
+ emitOperand(base, leftSide(outerPrec, prec));
+
+ m_writer->emit(".");
+
+ switch (vecSize)
+ {
+ default:
+ {
+ char const* kComponents[] = { "x", "y", "z", "w" };
+ m_writer->emit(kComponents[index]);
+ break;
+ }
+ case 3:
+ {
+ char const* kComponents[] = { "xy.x", "xy.y", "z"};
+ m_writer->emit(kComponents[index]);
+ break;
+ }
+ case 4:
+ {
+ char const* kComponents[] = { "xy.x", "xy.y", "zw.x", "zw.y" };
+ m_writer->emit(kComponents[index]);
+ break;
+ }
+ }
+
+ maybeCloseParens(needClose);
+}
+
bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
{
switch(inst->getOp())
{
+ case kIROp_swizzle:
+ {
+ // We need to special case for half types.
+ auto swizzleInst = static_cast<IRSwizzle*>(inst);
+
+ IRInst* baseInst = swizzleInst->getBase();
+ IRType* baseType = baseInst->getDataType();
+
+ // If we are swizzling from a built in type,
+ if (as<IRBasicType>(baseType))
+ {
+ // Just use the default behavior
+ }
+ else if (auto vecType = as<IRVectorType>(baseType))
+ {
+ if (auto basicType = as<IRBasicType>(vecType->getElementType()))
+ {
+ if (basicType->getBaseType() == BaseType::Half)
+ {
+ const Index vecElementCount = Index(getIntVal(vecType->getElementCount()));
+
+ const Index elementCount = Index(swizzleInst->getElementCount());
+ if (elementCount == 1)
+ {
+ const Index index = Index(getIntVal(swizzleInst->getElementIndex(0)));
+ _emitGetHalfVectorElement(baseInst, index, vecElementCount, inOuterPrec);
+ }
+ else
+ {
+ auto outerPrec = getInfo(EmitOp::General);
+
+ m_writer->emit("make___half");
+ m_writer->emitInt64(elementCount);
+ m_writer->emit("(");
+
+ for (Index i = 0; i < elementCount; ++i)
+ {
+ if (i)
+ {
+ m_writer->emit(", ");
+ }
+
+ const Index index = Index(getIntVal(swizzleInst->getElementIndex(i)));
+ _emitGetHalfVectorElement(baseInst, index, vecElementCount, outerPrec);
+ }
+
+ m_writer->emit(")");
+ }
+ return true;
+ }
+ }
+ }
+ break;
+ }
case kIROp_Construct:
{
// Simple constructor call
@@ -558,7 +698,7 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
}
case kIROp_WaveMaskBallot:
{
- _requireCUDASMVersion(SemanticVersion(7, 0));
+ m_extensionTracker->requireSMVersion(SemanticVersion(7, 0));
m_writer->emit("__ballot_sync(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
@@ -569,7 +709,7 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
}
case kIROp_WaveMaskMatch:
{
- _requireCUDASMVersion(SemanticVersion(7, 0));
+ m_extensionTracker->requireSMVersion(SemanticVersion(7, 0));
m_writer->emit("__match_any_sync(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
@@ -584,14 +724,6 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
return Super::tryEmitInstExprImpl(inst, inOuterPrec);
}
-void CUDASourceEmitter::_requireCUDASMVersion(SemanticVersion const& version)
-{
- if (version > m_extensionTracker->m_smVersion)
- {
- m_extensionTracker->m_smVersion = version;
- }
-}
-
void CUDASourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst)
{
// Does this function declare any requirements on CUDA capabilities
@@ -603,7 +735,7 @@ void CUDASourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst)
{
SemanticVersion version;
version.setFromInteger(SemanticVersion::IntegerType(smDecoration->getCUDASMVersion()));
- _requireCUDASMVersion(version);
+ m_extensionTracker->requireSMVersion(version);
}
}
}
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
index a5d227c6b..b73948525 100644
--- a/source/slang/slang-emit-cuda.h
+++ b/source/slang/slang-emit-cuda.h
@@ -18,7 +18,14 @@ public:
void requireBaseType(BaseType baseType) { m_baseTypeFlags |= _getFlag(baseType); }
bool isBaseTypeRequired(BaseType baseType) { return (m_baseTypeFlags & _getFlag(baseType)) != 0; }
+ /// Ensure that the generated code is compiled for at least CUDA SM `version`
+ void requireSMVersion(const SemanticVersion& smVersion) { m_smVersion = (smVersion > m_smVersion) ? smVersion : m_smVersion; }
+
+ /// Should be called before reading out values.
+ void finalize();
+
protected:
+
static BaseTypeFlags _getFlag(BaseType baseType) { return BaseTypeFlags(1) << int(baseType); }
BaseTypeFlags m_baseTypeFlags = 0;
@@ -86,15 +93,16 @@ protected:
// CPPSourceEmitter overrides
virtual SlangResult calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) SLANG_OVERRIDE;
virtual SlangResult calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder) SLANG_OVERRIDE;
-
+
+ virtual void emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp) SLANG_OVERRIDE;
+
SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName);
void _emitInitializerList(IRType* elementType, IRUse* operands, Index operandCount);
void _emitInitializerListValue(IRType* elementType, IRInst* value);
- /// Ensure that the generated code is compiled for at least CUDA SM `version`
- void _requireCUDASMVersion(SemanticVersion const& version);
-
+ void _emitGetHalfVectorElement(IRInst* baseInst, Index index, Index vecSize, const EmitOpInfo& inOuterPrec);
+
RefPtr<CUDAExtensionTracker> m_extensionTracker;
};