CUDA support improvements (#1168)

* Add test result for compile-to-cuda * Add RAII for some CUDA types to simplify usage. * First pass handling of some instrinsics on CUDA (for example transcendentals) * CUDA working with built in intrinsics. * Add missing CUDA prelude intrinsics. * CUDA matches CPU output on simple-cross-compile.slang * First pass at hlsl-scalar-float-intrinsic.slang test. * Fix smoothstep impl on CUDA and CPU. * Fixed step intrinsic on CUDA/CPU. * Added operator[] to Matrix for C++, to allow row access. Needs a fix for CUDA. * Fixed warning on clang build.
author: jsmall-nvidia <jsmall@nvidia.com> 2020-01-21 09:38:10 -0500
committer: GitHub <noreply@github.com> 2020-01-21 09:38:10 -0500
commit: 47392bc72b826b4ad427b703391a77e697735a65 (patch)
tree: 7c541c4295742b765124f42bab9f713276c83580 /source/slang/slang-emit-cuda.cpp
parent: a8669ade5cb3add8b9ce08e2c3bd96e93190bca8 (diff)
1 files changed, 156 insertions, 56 deletions
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 39a25aafa..c72b9125a 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -10,6 +10,23 @@
 
 namespace Slang {
 
+static bool _isSingleNameBasicType(IROp op)
+{
+    switch (op)
+    {
+        case kIROp_Int64Type:   
+        case kIROp_UInt8Type: 
+        case kIROp_UInt16Type:
+        case kIROp_UIntType: 
+        case kIROp_UInt64Type:
+        {
+            return false;
+        }
+        default: return true;
+
+    }
+}
+
 /* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
 {
     switch (op)
@@ -110,10 +127,93 @@ SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texTy
     return SLANG_OK;
 }
 
-void CUDASourceEmitter::emitSpecializedOperationDefinition(const HLSLIntrinsic* specOp)
+
+SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder)
 {
-    m_writer->emit("__device__ ");
-    Super::emitSpecializedOperationDefinition(specOp);
+    typedef HLSLIntrinsic::Op Op;
+
+    UnownedStringSlice funcName;
+    
+    switch (op)
+    {
+        case Op::Sin:
+        case Op::Cos:
+        case Op::Tan:
+        case Op::ArcSin:
+        case Op::ArcCos:
+        case Op::ArcTan:
+        case Op::ArcTan2:
+        case Op::Floor:
+        case Op::Ceil:
+        case Op::FMod:
+        case Op::Exp2:
+        case Op::Exp:
+        case Op::Log:
+        case Op::Log2:
+        case Op::Log10:
+        case Op::FRem:
+        case Op::Sqrt:
+        case Op::RecipSqrt:
+        case Op::Pow:
+        case Op::Trunc:
+        {
+            if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
+            {
+                funcName = HLSLIntrinsic::getInfo(op).funcName;
+            }
+            break;
+        }
+        case Op::Max:
+        case Op::Min:
+        case Op::Abs:
+        {
+            // There are only floating point built in versions of these, prefixed with f
+            if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
+            {
+                outBuilder << "f";
+                outBuilder << HLSLIntrinsic::getInfo(op).funcName;
+
+                if (type->op == kIROp_FloatType)
+                {
+                    outBuilder << "f";
+                }
+                return SLANG_OK;
+            }
+            break;
+        }
+
+        default: break;
+    }
+
+    if (funcName.size())
+    {
+        outBuilder << funcName;
+        if (type->op == kIROp_FloatType)
+        {
+            outBuilder << "f";
+        }
+        return SLANG_OK;
+    }
+
+    // Missing ones:
+    // 
+    // sincos - the built in uses pointer, so we'll just define in prelude
+    // rcp
+    // sign
+    // saturate
+    // frac
+    // smoothstep
+    // lerp
+    // clamp
+    // step
+    // 
+    // For integer types
+    // abs
+    // min
+    // max
+
+    // Defer to the supers impl
+    return Super::calcScalarFuncName(op, type, outBuilder);
 }
 
 SlangResult CUDASourceEmitter::calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out)
@@ -278,73 +378,73 @@ void CUDASourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPre
     Super::emitOperandImpl(inst, outerPrec);
 }
 
-bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
+void CUDASourceEmitter::emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec)
 {
-    switch (inst->op)
+    switch (specOp->op)
     {
-        case kIROp_Construct:
-        case kIROp_makeVector:
+        case HLSLIntrinsic::Op::Init:
         {
-            if (inst->getOperandCount() == 1)
-            {
-                EmitOpInfo outerPrec = inOuterPrec;
-                bool needClose = false;
+            // For CUDA vector types we construct with make_
 
-                auto prec = getInfo(EmitOp::Prefix);
-                needClose = maybeEmitParens(outerPrec, prec);
+            auto writer = m_writer;
 
-                // Need to emit as cast for HLSL
-                m_writer->emit("(");
-                emitType(inst->getDataType());
-                m_writer->emit(") ");
-                emitOperand(inst->getOperand(0), rightSide(outerPrec, prec));
+            IRType* retType = specOp->returnType;
 
-                maybeCloseParens(needClose);
-                // Handled
-                return true;
-            }
-            else
+            switch (retType->op)
             {
-                m_writer->emit("make_");
-                m_writer->emit(_getTypeName(inst->getDataType()));
-                emitArgs(inst);
-                return true;
+                case kIROp_VectorType:
+                {
+                    // Get the type name
+                    writer->emit("make_");
+                    emitType(retType);
+                    writer->emitChar('(');
+
+                    for (int i = 0; i < numOperands; ++i)
+                    {
+                        if (i > 0)
+                        {
+                            writer->emit(", ");
+                        }
+                        emitOperand(operands[i].get(), getInfo(EmitOp::General));
+                    }
+
+                    writer->emitChar(')');
+                    return;
+                }
+                default: break; 
             }
             break;
         }
-        case kIROp_MakeMatrix:
-        {
-            return false;
-        }
-        case kIROp_BitCast:
+        default: break;
+    }
+
+    return Super::emitCall(specOp, inst, operands, numOperands, inOuterPrec);
+}
+
+bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
+{
+    switch(inst->op)
+    {
+        case kIROp_Construct:
         {
-            auto toType = extractBaseType(inst->getDataType());
-            switch (toType)
+            // Simple constructor call
+            // On CUDA some of the built in types can't be used as constructors directly
+
+            IRType* type = inst->getDataType();
+            if (auto basicType = as<IRBasicType>(type) && !_isSingleNameBasicType(type->op))
             {
-                default:
-                    m_writer->emit("/* unhandled */");
-                    break;
-                case BaseType::UInt:
-                    break;
-                case BaseType::Int:
-                    m_writer->emit("(");
-                    emitType(inst->getDataType());
-                    m_writer->emit(")");
-                    break;
-                case BaseType::Float:
-                    m_writer->emit("asfloat");
-                    break;
+                m_writer->emit("(");
+                emitType(inst->getDataType());
+                m_writer->emit(")");
+                emitArgs(inst);
+                return true;
             }
-
-            m_writer->emit("(");
-            emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
-            m_writer->emit(")");
-            return true;
+            break;
         }
         default: break;
     }
-    // Not handled
-    return false;
+
+    return Super::tryEmitInstExprImpl(inst, inOuterPrec);
 }
 
 void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq)
@@ -398,7 +498,7 @@ void CUDASourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func)
 
 void CUDASourceEmitter::emitSimpleFuncImpl(IRFunc* func)
 {
-    // Mark as run on device. Don't need to worry about entry point, as that is output separtely to call the __device_ implementation
+    // Mark as run on device. Don't need to worry about entry point, as that is output separately to call the __device_ implementation
     m_writer->emit("__device__ ");    
 
     CLikeSourceEmitter::emitSimpleFuncImpl(func);
@@ -444,7 +544,7 @@ void CUDASourceEmitter::emitPreprocessorDirectivesImpl()
     // Emit all the intrinsics that were used
     for (const auto& keyValue : m_intrinsicNameMap)
     {
-        emitSpecializedOperationDefinition(keyValue.Key);
+        _maybeEmitSpecializedOperationDefinition(keyValue.Key);
     }
 }
author	jsmall-nvidia <jsmall@nvidia.com>	2020-01-21 09:38:10 -0500
committer	GitHub <noreply@github.com>	2020-01-21 09:38:10 -0500
commit	47392bc72b826b4ad427b703391a77e697735a65 (patch)
tree	7c541c4295742b765124f42bab9f713276c83580 /source/slang/slang-emit-cuda.cpp
parent	a8669ade5cb3add8b9ce08e2c3bd96e93190bca8 (diff)