WIP CUDA source emit (#1157)

* CPPCompiler -> DownstreamCompiler * Added DownstreamCompileResult to start abstraction such that we don't need files. * * Split out slang-blob.cpp * Made CompileResult hold a DownstreamCompileResult - for access to binary or ISlangSharedLibrary * Keep temporary files in scope. * Add a hash to the hex dump stream. * Move all file tracking into DownstreamCompiler. * WIP support for nvrtc. * WIP: Adding support for nvrtc compiler. Adding enum types, wiring up the nvrtc into slang. * Fix remaining CPPCompiler references. * Fix order issue on target string matching. * Use ISlangSharedLibrary for nvrtc. * Use DownstreamCompiler for nvrtc. * WIP first pass at compilation win nvrtc. * Added testing if file is on file system into CommandLineDownstreamCompiler. Added sourceContentsPath. * Make test cuda-compile.cu work by just compiling not comparing output. * Genearlize DownstreamCompiler usage. * Fix warning on clang. * Remove CompilerType from DownstreamCompiler. * Use DownstreamCompiler interface for all compilers. NOTE for FXC, DXC and GLSLANG this doesn't mean using 'compile' - it's still extracting functions from shared library. * Replace DownstreamCompiler::SourceType -> SlangSourceLanguage * Replace _canCompile with something data driven. * Fix compiling on gcc/clang for DownstreamCompiler. * Moved some text conversions into DownstreamCompiler. * Fix problem on non-vc builds with not having return on locateCompilers for VS. * Change so no warning for code not reachable on locateCompilers for vs. * WIP: CUDA code generation - currently just using CPU layout and HLSL. * emitXXXForEntryPoint -> emitEntryPointSource emitSourceForEntryPoint -> emitEntryPointSourceFromIR Fix up generating cuda to get PTX. * WIP emitting cuda for IR. * Small improvements to CUDA ouput. * Disable the CUDA emit test, as output not currently compilable.
author: jsmall-nvidia <jsmall@nvidia.com> 2019-12-19 11:23:14 -0500
committer: GitHub <noreply@github.com> 2019-12-19 11:23:14 -0500
commit: e3fe0319467546bae070137c58dcf8f9fbe93c79 (patch)
tree: 6cc26ccda33725e98c4a9a0408cf31a1348db268 /source/slang/slang-emit-cuda.cpp
parent: 60934d98fbc20d83b5e149e72a197ec4f5c61580 (diff)
1 files changed, 844 insertions, 0 deletions
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
new file mode 100644
index 000000000..feafc4e4e
--- /dev/null
+++ b/source/slang/slang-emit-cuda.cpp
@@ -0,0 +1,844 @@
+// slang-emit-cuda.cpp
+#include "slang-emit-cuda.h"
+
+#include "../core/slang-writer.h"
+
+#include "slang-emit-source-writer.h"
+#include "slang-mangled-lexer.h"
+
+#include <assert.h>
+
+namespace Slang {
+
+/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
+{
+    switch (op)
+    {
+        case kIROp_VoidType:    return UnownedStringSlice("void");
+        case kIROp_BoolType:    return UnownedStringSlice("bool");
+
+        case kIROp_Int8Type:    return UnownedStringSlice("char");
+        case kIROp_Int16Type:   return UnownedStringSlice("short");
+        case kIROp_IntType:     return UnownedStringSlice("int");
+        case kIROp_Int64Type:   return UnownedStringSlice("long long");
+
+        case kIROp_UInt8Type:   return UnownedStringSlice("unsigned char");
+        case kIROp_UInt16Type:  return UnownedStringSlice("unsigned short");
+        case kIROp_UIntType:    return UnownedStringSlice("unsigned int");
+        case kIROp_UInt64Type:  return UnownedStringSlice("unsigned long long");
+
+            // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
+        case kIROp_HalfType:    return UnownedStringSlice("half");
+
+        case kIROp_FloatType:   return UnownedStringSlice("float");
+        case kIROp_DoubleType:  return UnownedStringSlice("double");
+        default:                return UnownedStringSlice();
+    }
+}
+
+
+/* static */ UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op)
+{
+    switch (op)
+    {
+        case kIROp_BoolType:    return UnownedStringSlice("bool");
+
+        case kIROp_Int8Type:    return UnownedStringSlice("char");
+        case kIROp_Int16Type:   return UnownedStringSlice("short");
+        case kIROp_IntType:     return UnownedStringSlice("int");
+        case kIROp_Int64Type:   return UnownedStringSlice("longlong");
+
+        case kIROp_UInt8Type:   return UnownedStringSlice("uchar");
+        case kIROp_UInt16Type:  return UnownedStringSlice("ushort");
+        case kIROp_UIntType:    return UnownedStringSlice("uint");
+        case kIROp_UInt64Type:  return UnownedStringSlice("ulonglong");
+
+            // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
+        case kIROp_HalfType:    return UnownedStringSlice("half");
+
+        case kIROp_FloatType:   return UnownedStringSlice("float");
+        case kIROp_DoubleType:  return UnownedStringSlice("double");
+        default:                return UnownedStringSlice();
+    }
+}
+
+SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName)
+{
+    // texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
+
+    // Not clear how to do this yet
+    if (texType->isMultisample() || texType->isArray())
+    {
+        return SLANG_FAIL;
+    }
+
+    outName << "texture<";
+    outName << _getCUDATypeName(texType->getElementType());
+    outName << ", ";
+
+    switch (texType->GetBaseShape())
+    {
+        case TextureFlavor::Shape::Shape1D:		outName << "cudaTextureType1D";		break;
+        case TextureFlavor::Shape::Shape2D:		outName << "cudaTextureType2D";		break;
+        case TextureFlavor::Shape::Shape3D:		outName << "cudaTextureType3D";		break;
+        case TextureFlavor::Shape::ShapeCube:	outName << "cudaTextureTypeCubemap";	break;
+        case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer";         break;
+        default:
+            SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape");
+            return SLANG_FAIL;
+    }
+
+    outName << ", ";
+
+    switch (texType->getAccess())
+    {
+        case SLANG_RESOURCE_ACCESS_READ:
+        {
+            // Other value is cudaReadModeNormalizedFloat 
+
+            outName << "cudaReadModeElementType";
+            break;
+        }
+        default:
+        {
+            SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode");
+            return SLANG_FAIL;
+        }
+    }
+
+    outName << ">";
+    return SLANG_OK;
+}
+
+// This is junk.. 
+static UnownedStringSlice _getCUDAResourceTypePrefix(IROp op)
+{
+    switch (op)
+    {
+        case kIROp_HLSLStructuredBufferType:            return UnownedStringSlice::fromLiteral("StructuredBuffer");
+        case kIROp_HLSLRWStructuredBufferType:          return UnownedStringSlice::fromLiteral("RWStructuredBuffer");
+        case kIROp_HLSLRWByteAddressBufferType:         return UnownedStringSlice::fromLiteral("RWByteAddressBuffer");
+        case kIROp_HLSLByteAddressBufferType:           return UnownedStringSlice::fromLiteral("ByteAddressBuffer");
+        case kIROp_SamplerStateType:                    return UnownedStringSlice::fromLiteral("SamplerState");
+        case kIROp_SamplerComparisonStateType:                  return UnownedStringSlice::fromLiteral("SamplerComparisonState");
+        case kIROp_HLSLRasterizerOrderedStructuredBufferType:   return UnownedStringSlice::fromLiteral("RasterizerOrderedStructuredBuffer");
+        case kIROp_HLSLAppendStructuredBufferType:              return UnownedStringSlice::fromLiteral("AppendStructuredBuffer");
+        case kIROp_HLSLConsumeStructuredBufferType:             return UnownedStringSlice::fromLiteral("ConsumeStructuredBuffer");
+        case kIROp_HLSLRasterizerOrderedByteAddressBufferType:  return UnownedStringSlice::fromLiteral("RasterizerOrderedByteAddressBuffer");
+        case kIROp_RaytracingAccelerationStructureType:         return UnownedStringSlice::fromLiteral("RaytracingAccelerationStructure");
+
+        default:                                        return UnownedStringSlice();
+    }
+}
+
+SlangResult CUDASourceEmitter::_calcCUDATypeName(IRType* type, StringBuilder& out)
+{
+    switch (type->op)
+    {
+        case kIROp_HalfType:
+        {
+            // Special case half
+            out << getBuiltinTypeName(kIROp_FloatType);
+            return SLANG_OK;
+        }
+        case kIROp_VectorType:
+        {
+            auto vecType = static_cast<IRVectorType*>(type);
+            auto vecCount = int(GetIntVal(vecType->getElementCount()));
+            const IROp elemType = vecType->getElementType()->op;
+
+            UnownedStringSlice prefix = getVectorPrefix(elemType);
+            if (prefix.size() <= 0)
+            {
+                return SLANG_FAIL;
+            }
+            out << prefix << vecCount;
+            return SLANG_OK;
+        }
+#if 0
+        case kIROp_MatrixType:
+        {
+            auto matType = static_cast<IRMatrixType*>(type);
+
+            auto elementType = matType->getElementType();
+            const auto rowCount = int(GetIntVal(matType->getRowCount()));
+            const auto colCount = int(GetIntVal(matType->getColumnCount()));
+
+            if (target == CodeGenTarget::CPPSource)
+            {
+                out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">";
+            }
+            else
+            {
+                out << "Mat";
+                const UnownedStringSlice postFix = _getCTypeVecPostFix(_getCType(elementType->op));
+                out << postFix;
+                if (postFix.size() > 1)
+                {
+                    out << "_";
+                }
+                out << rowCount;
+                out << colCount;
+            }
+            return SLANG_OK;
+        }
+        case kIROp_ArrayType:
+        {
+            auto arrayType = static_cast<IRArrayType*>(type);
+            auto elementType = arrayType->getElementType();
+            int elementCount = int(GetIntVal(arrayType->getElementCount()));
+
+            out << "FixedArray<";
+            SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out));
+            out << ", " << elementCount << ">";
+            return SLANG_OK;
+        }
+        case kIROp_UnsizedArrayType:
+        {
+            auto arrayType = static_cast<IRUnsizedArrayType*>(type);
+            auto elementType = arrayType->getElementType();
+
+            out << "Array<";
+            SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out));
+            out << ">";
+            return SLANG_OK;
+        }
+#endif
+        default:
+        {
+            if (isNominalOp(type->op))
+            {
+                out << getName(type);
+                return SLANG_OK;
+            }
+
+            if (IRBasicType::isaImpl(type->op))
+            {
+                out << getBuiltinTypeName(type->op);
+                return SLANG_OK;
+            }
+
+            if (auto texType = as<IRTextureTypeBase>(type))
+            {
+                // We don't support TextureSampler, so ignore that
+                if (texType->op != kIROp_TextureSamplerType)
+                {
+                    return _calcCUDATextureTypeName(texType, out);
+                }
+            }
+
+            // If _getResourceTypePrefix returns something, we assume can output any specialization after it in order.
+            {
+                UnownedStringSlice prefix = _getCUDAResourceTypePrefix(type->op);
+                if (prefix.size() > 0)
+                {
+                    auto oldWriter = m_writer;
+                    SourceManager* sourceManager = oldWriter->getSourceManager();
+
+                    // TODO(JS): This is a bit of a hack. We don't want to emit the result here,
+                    // so we replace the writer, write out the type, grab the contents, and restore the writer
+
+                    SourceWriter writer(sourceManager, LineDirectiveMode::None);
+                    m_writer = &writer;
+
+                    m_writer->emit(prefix);
+
+                    // TODO(JS).
+                    // Assumes ordering of types matches ordering of operands.
+
+                    UInt operandCount = type->getOperandCount();
+                    if (operandCount)
+                    {
+                        m_writer->emit("<");
+                        for (UInt ii = 0; ii < operandCount; ++ii)
+                        {
+                            if (ii != 0)
+                            {
+                                m_writer->emit(", ");
+                            }
+                            emitVal(type->getOperand(ii), getInfo(EmitOp::General));
+                        }
+                        m_writer->emit(">");
+                    }
+
+                    out << writer.getContent();
+
+                    m_writer = oldWriter;
+                    return SLANG_OK;
+                }
+            }
+
+            break;
+        }
+    }
+
+    SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled type for CUDA emit");
+    return SLANG_FAIL;
+}
+
+
+UnownedStringSlice CUDASourceEmitter::_getCUDATypeName(IRType* type)
+{
+    StringSlicePool::Handle handle = StringSlicePool::kNullHandle;
+    if (m_typeNameMap.TryGetValue(type, handle))
+    {
+        return m_slicePool.getSlice(handle);
+    }
+
+#if 0
+    if (type->op == kIROp_MatrixType)
+    {
+        auto matType = static_cast<IRMatrixType*>(type);
+
+        auto elementType = matType->getElementType();
+        const auto rowCount = int(GetIntVal(matType->getRowCount()));
+        const auto colCount = int(GetIntVal(matType->getColumnCount()));
+
+        // Make sure the vector type the matrix is built on is added
+        useType(_getVecType(elementType, colCount));
+    }
+#endif
+
+    StringBuilder builder;
+    if (SLANG_SUCCEEDED(_calcCUDATypeName(type, builder)))
+    {
+        handle = m_slicePool.add(builder);
+    }
+
+    m_typeNameMap.Add(type, handle);
+
+    SLANG_ASSERT(handle != StringSlicePool::kNullHandle);
+    return m_slicePool.getSlice(handle);
+}
+
+void CUDASourceEmitter::_emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val)
+{
+    SLANG_UNUSED(entryPoint);
+    assert(val);
+
+    m_writer->emit("[");
+    m_writer->emit(name);
+    m_writer->emit("(\"");
+    m_writer->emit(val->getStringSlice());
+    m_writer->emit("\")]\n");
+}
+
+void CUDASourceEmitter::_emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val)
+{
+    SLANG_UNUSED(entryPoint);
+    SLANG_ASSERT(val);
+
+    auto intVal = GetIntVal(val);
+
+    m_writer->emit("[");
+    m_writer->emit(name);
+    m_writer->emit("(");
+    m_writer->emit(intVal);
+    m_writer->emit(")]\n");
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling)
+{
+    if (!chain)
+        return;
+    if (!chain->varLayout->usesResourceKind(kind))
+        return;
+
+    UInt index = getBindingOffset(chain, kind);
+    UInt space = getBindingSpace(chain, kind);
+
+    switch (kind)
+    {
+        case LayoutResourceKind::Uniform:
+        {
+            UInt offset = index;
+
+            // The HLSL `c` register space is logically grouped in 16-byte registers,
+            // while we try to traffic in byte offsets. That means we need to pick
+            // a register number, based on the starting offset in 16-byte register
+            // units, and then a "component" within that register, based on 4-byte
+            // offsets from there. We cannot support more fine-grained offsets than that.
+
+            m_writer->emit(" : ");
+            m_writer->emit(uniformSemanticSpelling);
+            m_writer->emit("(c");
+
+            // Size of a logical `c` register in bytes
+            auto registerSize = 16;
+
+            // Size of each component of a logical `c` register, in bytes
+            auto componentSize = 4;
+
+            size_t startRegister = offset / registerSize;
+            m_writer->emit(int(startRegister));
+
+            size_t byteOffsetInRegister = offset % registerSize;
+
+            // If this field doesn't start on an even register boundary,
+            // then we need to emit additional information to pick the
+            // right component to start from
+            if (byteOffsetInRegister != 0)
+            {
+                // The value had better occupy a whole number of components.
+                SLANG_RELEASE_ASSERT(byteOffsetInRegister % componentSize == 0);
+
+                size_t startComponent = byteOffsetInRegister / componentSize;
+
+                static const char* kComponentNames[] = { "x", "y", "z", "w" };
+                m_writer->emit(".");
+                m_writer->emit(kComponentNames[startComponent]);
+            }
+            m_writer->emit(")");
+        }
+        break;
+
+        case LayoutResourceKind::RegisterSpace:
+        case LayoutResourceKind::GenericResource:
+        case LayoutResourceKind::ExistentialTypeParam:
+        case LayoutResourceKind::ExistentialObjectParam:
+            // ignore
+            break;
+        default:
+        {
+            m_writer->emit(" : register(");
+            switch (kind)
+            {
+                case LayoutResourceKind::ConstantBuffer:
+                    m_writer->emit("b");
+                    break;
+                case LayoutResourceKind::ShaderResource:
+                    m_writer->emit("t");
+                    break;
+                case LayoutResourceKind::UnorderedAccess:
+                    m_writer->emit("u");
+                    break;
+                case LayoutResourceKind::SamplerState:
+                    m_writer->emit("s");
+                    break;
+                default:
+                    SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled HLSL register type");
+                    break;
+            }
+            m_writer->emit(index);
+            if (space)
+            {
+                m_writer->emit(", space");
+                m_writer->emit(space);
+            }
+            m_writer->emit(")");
+        }
+    }
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling)
+{
+    if (!chain) return;
+
+    auto layout = chain->varLayout;
+
+    switch (getSourceStyle())
+    {
+        default:
+            return;
+
+        case SourceStyle::HLSL:
+            break;
+    }
+
+    for (auto rr : layout->getOffsetAttrs())
+    {
+        _emitCUDARegisterSemantic(rr->getResourceKind(), chain, uniformSemanticSpelling);
+    }
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling)
+{
+    if (!varLayout)
+        return;
+
+    EmitVarChain chain(varLayout);
+    _emitCUDARegisterSemantics(&chain, uniformSemanticSpelling);
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain)
+{
+    if (!chain)
+        return;
+
+    auto layout = chain->varLayout;
+    for (auto rr : layout->getOffsetAttrs())
+    {
+        _emitCUDARegisterSemantic(rr->getResourceKind(), chain, "packoffset");
+    }
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain)
+{
+    EmitVarChain chain(fieldLayout, inChain);
+    _emitCUDAParameterGroupFieldLayoutSemantics(&chain);
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type)
+{
+    if (as<IRTextureBufferType>(type))
+    {
+        m_writer->emit("tbuffer ");
+    }
+    else
+    {
+        m_writer->emit("cbuffer ");
+    }
+    m_writer->emit(getName(varDecl));
+
+    auto varLayout = getVarLayout(varDecl);
+    SLANG_RELEASE_ASSERT(varLayout);
+
+    EmitVarChain blockChain(varLayout);
+
+    EmitVarChain containerChain = blockChain;
+    EmitVarChain elementChain = blockChain;
+
+    auto typeLayout = varLayout->getTypeLayout();
+    if (auto parameterGroupTypeLayout = as<IRParameterGroupTypeLayout>(typeLayout))
+    {
+        containerChain = EmitVarChain(parameterGroupTypeLayout->getContainerVarLayout(), &blockChain);
+        elementChain = EmitVarChain(parameterGroupTypeLayout->getElementVarLayout(), &blockChain);
+
+        typeLayout = parameterGroupTypeLayout->getElementVarLayout()->getTypeLayout();
+    }
+
+    _emitCUDARegisterSemantic(LayoutResourceKind::ConstantBuffer, &containerChain);
+
+    m_writer->emit("\n{\n");
+    m_writer->indent();
+
+    auto elementType = type->getElementType();
+
+    emitType(elementType, getName(varDecl));
+    m_writer->emit(";\n");
+
+    m_writer->dedent();
+    m_writer->emit("}\n");
+}
+
+void CUDASourceEmitter::emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling)
+{
+    auto layout = getVarLayout(inst); 
+    if (layout)
+    {
+        _emitCUDARegisterSemantics(layout, uniformSemanticSpelling);
+    }
+}
+
+void CUDASourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type)
+{
+    _emitCUDAParameterGroup(varDecl, type);
+}
+
+void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor)
+{
+    auto profile = m_effectiveProfile;
+    auto stage = entryPointDecor->getProfile().GetStage();
+
+    switch (stage)
+    {
+        case Stage::Compute:
+        {
+            Int sizeAlongAxis[kThreadGroupAxisCount];
+            getComputeThreadGroupSize(irFunc, sizeAlongAxis);
+
+#if 0
+            m_writer->emit("[numthreads(");
+            for (int ii = 0; ii < kThreadGroupAxisCount; ++ii)
+            {
+                if (ii != 0) m_writer->emit(", ");
+                m_writer->emit(sizeAlongAxis[ii]);
+            }
+            m_writer->emit(")]\n");
+#endif
+
+            m_writer->emit("__global__ ");
+            break;
+        }
+        
+        // TODO: There are other stages that will need this kind of handling.
+        default:
+            break;
+    }
+}
+
+void CUDASourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const&  outerPrec)
+{
+    if (shouldFoldInstIntoUseSites(inst))
+    {
+        emitInstExpr(inst, outerPrec);
+        return;
+    }
+
+    switch (inst->op)
+    {
+        case kIROp_Param:
+        {
+            auto varLayout = getVarLayout(inst);
+            if (varLayout)
+            {
+                if (auto systemValueSemantic = varLayout->findSystemValueSemanticAttr())
+                {
+                    String semanticNameSpelling = systemValueSemantic->getName();
+                    semanticNameSpelling = semanticNameSpelling.toLower();
+
+                    if (semanticNameSpelling == "sv_dispatchthreadid")
+                    {
+                        m_semanticUsedFlags |= SemanticUsedFlag::DispatchThreadID;
+                        m_writer->emit("((blockIdx * blockDim) + threadIdx)");
+
+                        return;
+                    }
+                    else if (semanticNameSpelling == "sv_groupid")
+                    {
+                        m_semanticUsedFlags |= SemanticUsedFlag::GroupID;
+                        m_writer->emit("blockIdx");
+                        return;
+                    }
+                    else if (semanticNameSpelling == "sv_groupthreadid")
+                    {
+                        m_semanticUsedFlags |= SemanticUsedFlag::GroupThreadID;
+                        m_writer->emit("threadIdx");
+                        return;
+                    }
+                }
+            }
+
+            break;
+        }
+        default: break;
+    }
+    m_writer->emit(getName(inst));
+}
+
+bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
+{
+    switch (inst->op)
+    {
+        case kIROp_Construct:
+        case kIROp_makeVector:
+        {
+            if (inst->getOperandCount() == 1)
+            {
+                EmitOpInfo outerPrec = inOuterPrec;
+                bool needClose = false;
+
+                auto prec = getInfo(EmitOp::Prefix);
+                needClose = maybeEmitParens(outerPrec, prec);
+
+                // Need to emit as cast for HLSL
+                m_writer->emit("(");
+                emitType(inst->getDataType());
+                m_writer->emit(") ");
+                emitOperand(inst->getOperand(0), rightSide(outerPrec, prec));
+
+                maybeCloseParens(needClose);
+                // Handled
+                return true;
+            }
+            else
+            {
+                m_writer->emit("make_");
+                m_writer->emit(_getCUDATypeName(inst->getDataType()));
+                emitArgs(inst);
+                return true;
+            }
+            break;
+        }
+        case kIROp_MakeMatrix:
+        {
+            return false;
+        }
+        case kIROp_BitCast:
+        {
+            auto toType = extractBaseType(inst->getDataType());
+            switch (toType)
+            {
+                default:
+                    m_writer->emit("/* unhandled */");
+                    break;
+                case BaseType::UInt:
+                    break;
+                case BaseType::Int:
+                    m_writer->emit("(");
+                    emitType(inst->getDataType());
+                    m_writer->emit(")");
+                    break;
+                case BaseType::Float:
+                    m_writer->emit("asfloat");
+                    break;
+            }
+
+            m_writer->emit("(");
+            emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+            m_writer->emit(")");
+            return true;
+        }
+        default: break;
+    }
+    // Not handled
+    return false;
+}
+
+void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq)
+{
+    SLANG_UNUSED(targetReq);
+}
+
+void CUDASourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount)
+{
+    m_writer->emit(getVectorPrefix(elementType->op));
+    m_writer->emit(elementCount);
+}
+
+void CUDASourceEmitter::emitSimpleTypeImpl(IRType* type)
+{
+    m_writer->emit(_getCUDATypeName(type));
+}
+
+void CUDASourceEmitter::emitRateQualifiersImpl(IRRate* rate)
+{
+    if (as<IRGroupSharedRate>(rate))
+    {
+        m_writer->emit("groupshared ");
+    }
+}
+
+void CUDASourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func)
+{
+    m_writer->emit("(");
+
+    bool hasEmittedParam = false;
+    auto firstParam = func->getFirstParam();
+    for (auto pp = firstParam; pp; pp = pp->getNextParam())
+    {
+        auto varLayout = getVarLayout(pp);
+        if (varLayout && varLayout->findSystemValueSemanticAttr())
+        {
+            // If it has a semantic don't output, it will be accessed via a global
+            continue;
+        }
+
+        if (hasEmittedParam)
+            m_writer->emit(", ");
+
+        emitSimpleFuncParamImpl(pp);
+        hasEmittedParam = true;
+    }
+
+    m_writer->emit(")");
+}
+
+void CUDASourceEmitter::emitSimpleFuncImpl(IRFunc* func)
+{
+    if (IREntryPointDecoration* entryPointDecor = func->findDecoration<IREntryPointDecoration>())
+    {
+        // If its an entry point, we let the entry point attribute control the output
+        Super::emitSimpleFuncImpl(func);
+    }
+    else
+    {
+        // If it's not an entry point mark as device
+        m_writer->emit("__device__ ");
+        Super::emitSimpleFuncImpl(func);
+    }
+}
+
+void CUDASourceEmitter::emitSemanticsImpl(IRInst* inst)
+{
+    if (auto semanticDecoration = inst->findDecoration<IRSemanticDecoration>())
+    {
+        m_writer->emit(" : ");
+        m_writer->emit(semanticDecoration->getSemanticName());
+        return;
+    }
+
+    if (auto layoutDecoration = inst->findDecoration<IRLayoutDecoration>())
+    {
+        auto layout = layoutDecoration->getLayout();
+        if (auto varLayout = as<IRVarLayout>(layout))
+        {
+            emitSemanticsUsingVarLayout(varLayout);
+        }
+        else if (auto entryPointLayout = as<IREntryPointLayout>(layout))
+        {
+            if (auto resultLayout = entryPointLayout->getResultLayout())
+            {
+                emitSemanticsUsingVarLayout(resultLayout);
+            }
+        }
+    }
+}
+
+static UnownedStringSlice _getInterpolationModifierText(IRInterpolationMode mode)
+{
+    switch (mode)
+    {
+        case IRInterpolationMode::NoInterpolation:      return UnownedStringSlice::fromLiteral("nointerpolation");
+        case IRInterpolationMode::NoPerspective:        return UnownedStringSlice::fromLiteral("noperspective");
+        case IRInterpolationMode::Linear:               return UnownedStringSlice::fromLiteral("linear");
+        case IRInterpolationMode::Sample:               return UnownedStringSlice::fromLiteral("sample");
+        case IRInterpolationMode::Centroid:             return UnownedStringSlice::fromLiteral("centroid");
+        default:                                        return UnownedStringSlice();
+    }
+}
+
+void CUDASourceEmitter::emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout)
+{
+    SLANG_UNUSED(layout);
+    SLANG_UNUSED(valueType);
+
+    for (auto dd : varInst->getDecorations())
+    {
+        if (dd->op != kIROp_InterpolationModeDecoration)
+            continue;
+
+        auto decoration = (IRInterpolationModeDecoration*)dd;
+  
+        UnownedStringSlice modeText = _getInterpolationModifierText(decoration->getMode());
+        if (modeText.size() > 0)
+        {
+            m_writer->emit(modeText);
+            m_writer->emitChar(' ');
+        }
+    }
+}
+
+void CUDASourceEmitter::emitVarDecorationsImpl(IRInst* varDecl)
+{
+    if (varDecl->findDecoration<IRGloballyCoherentDecoration>())
+    {
+        m_writer->emit("globallycoherent\n");
+    }
+}
+
+void CUDASourceEmitter::emitMatrixLayoutModifiersImpl(IRVarLayout* layout)
+{
+    // When a variable has a matrix type, we want to emit an explicit
+    // layout qualifier based on what the layout has been computed to be.
+    //
+
+    auto typeLayout = layout->getTypeLayout()->unwrapArray();
+
+    if (auto matrixTypeLayout = as<IRMatrixTypeLayout>(typeLayout))
+    {
+        switch (matrixTypeLayout->getMode())
+        {
+            case kMatrixLayoutMode_ColumnMajor:
+                m_writer->emit("column_major ");
+                break;
+
+            case kMatrixLayoutMode_RowMajor:
+                m_writer->emit("row_major ");
+                break;
+        }
+    }
+}
+
+
+} // namespace Slang
author	jsmall-nvidia <jsmall@nvidia.com>	2019-12-19 11:23:14 -0500
committer	GitHub <noreply@github.com>	2019-12-19 11:23:14 -0500
commit	e3fe0319467546bae070137c58dcf8f9fbe93c79 (patch)
tree	6cc26ccda33725e98c4a9a0408cf31a1348db268 /source/slang/slang-emit-cuda.cpp
parent	60934d98fbc20d83b5e149e72a197ec4f5c61580 (diff)