summaryrefslogtreecommitdiff
path: root/source/slang/slang-emit-cuda.cpp
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-01-21 09:38:10 -0500
committerGitHub <noreply@github.com>2020-01-21 09:38:10 -0500
commit47392bc72b826b4ad427b703391a77e697735a65 (patch)
tree7c541c4295742b765124f42bab9f713276c83580 /source/slang/slang-emit-cuda.cpp
parenta8669ade5cb3add8b9ce08e2c3bd96e93190bca8 (diff)
CUDA support improvements (#1168)
* Add test result for compile-to-cuda * Add RAII for some CUDA types to simplify usage. * First pass handling of some instrinsics on CUDA (for example transcendentals) * CUDA working with built in intrinsics. * Add missing CUDA prelude intrinsics. * CUDA matches CPU output on simple-cross-compile.slang * First pass at hlsl-scalar-float-intrinsic.slang test. * Fix smoothstep impl on CUDA and CPU. * Fixed step intrinsic on CUDA/CPU. * Added operator[] to Matrix for C++, to allow row access. Needs a fix for CUDA. * Fixed warning on clang build.
Diffstat (limited to 'source/slang/slang-emit-cuda.cpp')
-rw-r--r--source/slang/slang-emit-cuda.cpp212
1 files changed, 156 insertions, 56 deletions
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 39a25aafa..c72b9125a 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -10,6 +10,23 @@
namespace Slang {
+static bool _isSingleNameBasicType(IROp op)
+{
+ switch (op)
+ {
+ case kIROp_Int64Type:
+ case kIROp_UInt8Type:
+ case kIROp_UInt16Type:
+ case kIROp_UIntType:
+ case kIROp_UInt64Type:
+ {
+ return false;
+ }
+ default: return true;
+
+ }
+}
+
/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
{
switch (op)
@@ -110,10 +127,93 @@ SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texTy
return SLANG_OK;
}
-void CUDASourceEmitter::emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp)
+
+SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder)
{
- m_writer->emit("__device__ ");
- Super::emitSpecializedOperationDefinition(specOp);
+ typedef HLSLIntrinsic::Op Op;
+
+ UnownedStringSlice funcName;
+
+ switch (op)
+ {
+ case Op::Sin:
+ case Op::Cos:
+ case Op::Tan:
+ case Op::ArcSin:
+ case Op::ArcCos:
+ case Op::ArcTan:
+ case Op::ArcTan2:
+ case Op::Floor:
+ case Op::Ceil:
+ case Op::FMod:
+ case Op::Exp2:
+ case Op::Exp:
+ case Op::Log:
+ case Op::Log2:
+ case Op::Log10:
+ case Op::FRem:
+ case Op::Sqrt:
+ case Op::RecipSqrt:
+ case Op::Pow:
+ case Op::Trunc:
+ {
+ if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
+ {
+ funcName = HLSLIntrinsic::getInfo(op).funcName;
+ }
+ break;
+ }
+ case Op::Max:
+ case Op::Min:
+ case Op::Abs:
+ {
+ // There are only floating point built in versions of these, prefixed with f
+ if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
+ {
+ outBuilder << "f";
+ outBuilder << HLSLIntrinsic::getInfo(op).funcName;
+
+ if (type->op == kIROp_FloatType)
+ {
+ outBuilder << "f";
+ }
+ return SLANG_OK;
+ }
+ break;
+ }
+
+ default: break;
+ }
+
+ if (funcName.size())
+ {
+ outBuilder << funcName;
+ if (type->op == kIROp_FloatType)
+ {
+ outBuilder << "f";
+ }
+ return SLANG_OK;
+ }
+
+ // Missing ones:
+ //
+ // sincos - the built in uses pointer, so we'll just define in prelude
+ // rcp
+ // sign
+ // saturate
+ // frac
+ // smoothstep
+ // lerp
+ // clamp
+ // step
+ //
+ // For integer types
+ // abs
+ // min
+ // max
+
+ // Defer to the supers impl
+ return Super::calcScalarFuncName(op, type, outBuilder);
}
SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out)
@@ -278,73 +378,73 @@ void CUDASourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPre
Super::emitOperandImpl(inst, outerPrec);
}
-bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
+void CUDASourceEmitter::emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec)
{
- switch (inst->op)
+ switch (specOp->op)
{
- case kIROp_Construct:
- case kIROp_makeVector:
+ case HLSLIntrinsic::Op::Init:
{
- if (inst->getOperandCount() == 1)
- {
- EmitOpInfo outerPrec = inOuterPrec;
- bool needClose = false;
+ // For CUDA vector types we construct with make_
- auto prec = getInfo(EmitOp::Prefix);
- needClose = maybeEmitParens(outerPrec, prec);
+ auto writer = m_writer;
- // Need to emit as cast for HLSL
- m_writer->emit("(");
- emitType(inst->getDataType());
- m_writer->emit(") ");
- emitOperand(inst->getOperand(0), rightSide(outerPrec, prec));
+ IRType* retType = specOp->returnType;
- maybeCloseParens(needClose);
- // Handled
- return true;
- }
- else
+ switch (retType->op)
{
- m_writer->emit("make_");
- m_writer->emit(_getTypeName(inst->getDataType()));
- emitArgs(inst);
- return true;
+ case kIROp_VectorType:
+ {
+ // Get the type name
+ writer->emit("make_");
+ emitType(retType);
+ writer->emitChar('(');
+
+ for (int i = 0; i < numOperands; ++i)
+ {
+ if (i > 0)
+ {
+ writer->emit(", ");
+ }
+ emitOperand(operands[i].get(), getInfo(EmitOp::General));
+ }
+
+ writer->emitChar(')');
+ return;
+ }
+ default: break;
}
break;
}
- case kIROp_MakeMatrix:
- {
- return false;
- }
- case kIROp_BitCast:
+ default: break;
+ }
+
+ return Super::emitCall(specOp, inst, operands, numOperands, inOuterPrec);
+}
+
+bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
+{
+ switch(inst->op)
+ {
+ case kIROp_Construct:
{
- auto toType = extractBaseType(inst->getDataType());
- switch (toType)
+ // Simple constructor call
+ // On CUDA some of the built in types can't be used as constructors directly
+
+ IRType* type = inst->getDataType();
+ if (auto basicType = as<IRBasicType>(type) && !_isSingleNameBasicType(type->op))
{
- default:
- m_writer->emit("/* unhandled */");
- break;
- case BaseType::UInt:
- break;
- case BaseType::Int:
- m_writer->emit("(");
- emitType(inst->getDataType());
- m_writer->emit(")");
- break;
- case BaseType::Float:
- m_writer->emit("asfloat");
- break;
+ m_writer->emit("(");
+ emitType(inst->getDataType());
+ m_writer->emit(")");
+ emitArgs(inst);
+ return true;
}
-
- m_writer->emit("(");
- emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
- m_writer->emit(")");
- return true;
+ break;
}
default: break;
}
- // Not handled
- return false;
+
+ return Super::tryEmitInstExprImpl(inst, inOuterPrec);
}
void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq)
@@ -398,7 +498,7 @@ void CUDASourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func)
void CUDASourceEmitter::emitSimpleFuncImpl(IRFunc* func)
{
- // Mark as run on device. Don't need to worry about entry point, as that is output separtely to call the __device_ implementation
+ // Mark as run on device. Don't need to worry about entry point, as that is output separately to call the __device_ implementation
m_writer->emit("__device__ ");
CLikeSourceEmitter::emitSimpleFuncImpl(func);
@@ -444,7 +544,7 @@ void CUDASourceEmitter::emitPreprocessorDirectivesImpl()
// Emit all the intrinsics that were used
for (const auto& keyValue : m_intrinsicNameMap)
{
- emitSpecializedOperationDefinition(keyValue.Key);
+ _maybeEmitSpecializedOperationDefinition(keyValue.Key);
}
}