diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2019-12-19 11:23:14 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-12-19 11:23:14 -0500 |
| commit | e3fe0319467546bae070137c58dcf8f9fbe93c79 (patch) | |
| tree | 6cc26ccda33725e98c4a9a0408cf31a1348db268 /source/slang/slang-emit-cuda.cpp | |
| parent | 60934d98fbc20d83b5e149e72a197ec4f5c61580 (diff) | |
WIP CUDA source emit (#1157)
* CPPCompiler -> DownstreamCompiler
* Added DownstreamCompileResult to start abstraction such that we don't need files.
* * Split out slang-blob.cpp
* Made CompileResult hold a DownstreamCompileResult - for access to binary or ISlangSharedLibrary
* Keep temporary files in scope.
* Add a hash to the hex dump stream.
* Move all file tracking into DownstreamCompiler.
* WIP support for nvrtc.
* WIP: Adding support for nvrtc compiler.
Adding enum types, wiring up the nvrtc into slang.
* Fix remaining CPPCompiler references.
* Fix order issue on target string matching.
* Use ISlangSharedLibrary for nvrtc.
* Use DownstreamCompiler for nvrtc.
* WIP first pass at compilation win nvrtc.
* Added testing if file is on file system into CommandLineDownstreamCompiler.
Added sourceContentsPath.
* Make test cuda-compile.cu work by just compiling not comparing output.
* Genearlize DownstreamCompiler usage.
* Fix warning on clang.
* Remove CompilerType from DownstreamCompiler.
* Use DownstreamCompiler interface for all compilers.
NOTE for FXC, DXC and GLSLANG this doesn't mean using 'compile' - it's still extracting functions from shared library.
* Replace DownstreamCompiler::SourceType -> SlangSourceLanguage
* Replace _canCompile with something data driven.
* Fix compiling on gcc/clang for DownstreamCompiler.
* Moved some text conversions into DownstreamCompiler.
* Fix problem on non-vc builds with not having return on locateCompilers for VS.
* Change so no warning for code not reachable on locateCompilers for vs.
* WIP: CUDA code generation - currently just using CPU layout and HLSL.
* emitXXXForEntryPoint -> emitEntryPointSource
emitSourceForEntryPoint -> emitEntryPointSourceFromIR
Fix up generating cuda to get PTX.
* WIP emitting cuda for IR.
* Small improvements to CUDA ouput.
* Disable the CUDA emit test, as output not currently compilable.
Diffstat (limited to 'source/slang/slang-emit-cuda.cpp')
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 844 |
1 files changed, 844 insertions, 0 deletions
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp new file mode 100644 index 000000000..feafc4e4e --- /dev/null +++ b/source/slang/slang-emit-cuda.cpp @@ -0,0 +1,844 @@ +// slang-emit-cuda.cpp +#include "slang-emit-cuda.h" + +#include "../core/slang-writer.h" + +#include "slang-emit-source-writer.h" +#include "slang-mangled-lexer.h" + +#include <assert.h> + +namespace Slang { + +/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op) +{ + switch (op) + { + case kIROp_VoidType: return UnownedStringSlice("void"); + case kIROp_BoolType: return UnownedStringSlice("bool"); + + case kIROp_Int8Type: return UnownedStringSlice("char"); + case kIROp_Int16Type: return UnownedStringSlice("short"); + case kIROp_IntType: return UnownedStringSlice("int"); + case kIROp_Int64Type: return UnownedStringSlice("long long"); + + case kIROp_UInt8Type: return UnownedStringSlice("unsigned char"); + case kIROp_UInt16Type: return UnownedStringSlice("unsigned short"); + case kIROp_UIntType: return UnownedStringSlice("unsigned int"); + case kIROp_UInt64Type: return UnownedStringSlice("unsigned long long"); + + // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting + case kIROp_HalfType: return UnownedStringSlice("half"); + + case kIROp_FloatType: return UnownedStringSlice("float"); + case kIROp_DoubleType: return UnownedStringSlice("double"); + default: return UnownedStringSlice(); + } +} + + +/* static */ UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op) +{ + switch (op) + { + case kIROp_BoolType: return UnownedStringSlice("bool"); + + case kIROp_Int8Type: return UnownedStringSlice("char"); + case kIROp_Int16Type: return UnownedStringSlice("short"); + case kIROp_IntType: return UnownedStringSlice("int"); + case kIROp_Int64Type: return UnownedStringSlice("longlong"); + + case kIROp_UInt8Type: return UnownedStringSlice("uchar"); + case kIROp_UInt16Type: return UnownedStringSlice("ushort"); + case kIROp_UIntType: return UnownedStringSlice("uint"); + case kIROp_UInt64Type: return UnownedStringSlice("ulonglong"); + + // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting + case kIROp_HalfType: return UnownedStringSlice("half"); + + case kIROp_FloatType: return UnownedStringSlice("float"); + case kIROp_DoubleType: return UnownedStringSlice("double"); + default: return UnownedStringSlice(); + } +} + +SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName) +{ + // texture<float, cudaTextureType2D, cudaReadModeElementType> texRef; + + // Not clear how to do this yet + if (texType->isMultisample() || texType->isArray()) + { + return SLANG_FAIL; + } + + outName << "texture<"; + outName << _getCUDATypeName(texType->getElementType()); + outName << ", "; + + switch (texType->GetBaseShape()) + { + case TextureFlavor::Shape::Shape1D: outName << "cudaTextureType1D"; break; + case TextureFlavor::Shape::Shape2D: outName << "cudaTextureType2D"; break; + case TextureFlavor::Shape::Shape3D: outName << "cudaTextureType3D"; break; + case TextureFlavor::Shape::ShapeCube: outName << "cudaTextureTypeCubemap"; break; + case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer"; break; + default: + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape"); + return SLANG_FAIL; + } + + outName << ", "; + + switch (texType->getAccess()) + { + case SLANG_RESOURCE_ACCESS_READ: + { + // Other value is cudaReadModeNormalizedFloat + + outName << "cudaReadModeElementType"; + break; + } + default: + { + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode"); + return SLANG_FAIL; + } + } + + outName << ">"; + return SLANG_OK; +} + +// This is junk.. +static UnownedStringSlice _getCUDAResourceTypePrefix(IROp op) +{ + switch (op) + { + case kIROp_HLSLStructuredBufferType: return UnownedStringSlice::fromLiteral("StructuredBuffer"); + case kIROp_HLSLRWStructuredBufferType: return UnownedStringSlice::fromLiteral("RWStructuredBuffer"); + case kIROp_HLSLRWByteAddressBufferType: return UnownedStringSlice::fromLiteral("RWByteAddressBuffer"); + case kIROp_HLSLByteAddressBufferType: return UnownedStringSlice::fromLiteral("ByteAddressBuffer"); + case kIROp_SamplerStateType: return UnownedStringSlice::fromLiteral("SamplerState"); + case kIROp_SamplerComparisonStateType: return UnownedStringSlice::fromLiteral("SamplerComparisonState"); + case kIROp_HLSLRasterizerOrderedStructuredBufferType: return UnownedStringSlice::fromLiteral("RasterizerOrderedStructuredBuffer"); + case kIROp_HLSLAppendStructuredBufferType: return UnownedStringSlice::fromLiteral("AppendStructuredBuffer"); + case kIROp_HLSLConsumeStructuredBufferType: return UnownedStringSlice::fromLiteral("ConsumeStructuredBuffer"); + case kIROp_HLSLRasterizerOrderedByteAddressBufferType: return UnownedStringSlice::fromLiteral("RasterizerOrderedByteAddressBuffer"); + case kIROp_RaytracingAccelerationStructureType: return UnownedStringSlice::fromLiteral("RaytracingAccelerationStructure"); + + default: return UnownedStringSlice(); + } +} + +SlangResult CUDASourceEmitter::_calcCUDATypeName(IRType* type, StringBuilder& out) +{ + switch (type->op) + { + case kIROp_HalfType: + { + // Special case half + out << getBuiltinTypeName(kIROp_FloatType); + return SLANG_OK; + } + case kIROp_VectorType: + { + auto vecType = static_cast<IRVectorType*>(type); + auto vecCount = int(GetIntVal(vecType->getElementCount())); + const IROp elemType = vecType->getElementType()->op; + + UnownedStringSlice prefix = getVectorPrefix(elemType); + if (prefix.size() <= 0) + { + return SLANG_FAIL; + } + out << prefix << vecCount; + return SLANG_OK; + } +#if 0 + case kIROp_MatrixType: + { + auto matType = static_cast<IRMatrixType*>(type); + + auto elementType = matType->getElementType(); + const auto rowCount = int(GetIntVal(matType->getRowCount())); + const auto colCount = int(GetIntVal(matType->getColumnCount())); + + if (target == CodeGenTarget::CPPSource) + { + out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">"; + } + else + { + out << "Mat"; + const UnownedStringSlice postFix = _getCTypeVecPostFix(_getCType(elementType->op)); + out << postFix; + if (postFix.size() > 1) + { + out << "_"; + } + out << rowCount; + out << colCount; + } + return SLANG_OK; + } + case kIROp_ArrayType: + { + auto arrayType = static_cast<IRArrayType*>(type); + auto elementType = arrayType->getElementType(); + int elementCount = int(GetIntVal(arrayType->getElementCount())); + + out << "FixedArray<"; + SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out)); + out << ", " << elementCount << ">"; + return SLANG_OK; + } + case kIROp_UnsizedArrayType: + { + auto arrayType = static_cast<IRUnsizedArrayType*>(type); + auto elementType = arrayType->getElementType(); + + out << "Array<"; + SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out)); + out << ">"; + return SLANG_OK; + } +#endif + default: + { + if (isNominalOp(type->op)) + { + out << getName(type); + return SLANG_OK; + } + + if (IRBasicType::isaImpl(type->op)) + { + out << getBuiltinTypeName(type->op); + return SLANG_OK; + } + + if (auto texType = as<IRTextureTypeBase>(type)) + { + // We don't support TextureSampler, so ignore that + if (texType->op != kIROp_TextureSamplerType) + { + return _calcCUDATextureTypeName(texType, out); + } + } + + // If _getResourceTypePrefix returns something, we assume can output any specialization after it in order. + { + UnownedStringSlice prefix = _getCUDAResourceTypePrefix(type->op); + if (prefix.size() > 0) + { + auto oldWriter = m_writer; + SourceManager* sourceManager = oldWriter->getSourceManager(); + + // TODO(JS): This is a bit of a hack. We don't want to emit the result here, + // so we replace the writer, write out the type, grab the contents, and restore the writer + + SourceWriter writer(sourceManager, LineDirectiveMode::None); + m_writer = &writer; + + m_writer->emit(prefix); + + // TODO(JS). + // Assumes ordering of types matches ordering of operands. + + UInt operandCount = type->getOperandCount(); + if (operandCount) + { + m_writer->emit("<"); + for (UInt ii = 0; ii < operandCount; ++ii) + { + if (ii != 0) + { + m_writer->emit(", "); + } + emitVal(type->getOperand(ii), getInfo(EmitOp::General)); + } + m_writer->emit(">"); + } + + out << writer.getContent(); + + m_writer = oldWriter; + return SLANG_OK; + } + } + + break; + } + } + + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled type for CUDA emit"); + return SLANG_FAIL; +} + + +UnownedStringSlice CUDASourceEmitter::_getCUDATypeName(IRType* type) +{ + StringSlicePool::Handle handle = StringSlicePool::kNullHandle; + if (m_typeNameMap.TryGetValue(type, handle)) + { + return m_slicePool.getSlice(handle); + } + +#if 0 + if (type->op == kIROp_MatrixType) + { + auto matType = static_cast<IRMatrixType*>(type); + + auto elementType = matType->getElementType(); + const auto rowCount = int(GetIntVal(matType->getRowCount())); + const auto colCount = int(GetIntVal(matType->getColumnCount())); + + // Make sure the vector type the matrix is built on is added + useType(_getVecType(elementType, colCount)); + } +#endif + + StringBuilder builder; + if (SLANG_SUCCEEDED(_calcCUDATypeName(type, builder))) + { + handle = m_slicePool.add(builder); + } + + m_typeNameMap.Add(type, handle); + + SLANG_ASSERT(handle != StringSlicePool::kNullHandle); + return m_slicePool.getSlice(handle); +} + +void CUDASourceEmitter::_emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val) +{ + SLANG_UNUSED(entryPoint); + assert(val); + + m_writer->emit("["); + m_writer->emit(name); + m_writer->emit("(\""); + m_writer->emit(val->getStringSlice()); + m_writer->emit("\")]\n"); +} + +void CUDASourceEmitter::_emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val) +{ + SLANG_UNUSED(entryPoint); + SLANG_ASSERT(val); + + auto intVal = GetIntVal(val); + + m_writer->emit("["); + m_writer->emit(name); + m_writer->emit("("); + m_writer->emit(intVal); + m_writer->emit(")]\n"); +} + +void CUDASourceEmitter::_emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling) +{ + if (!chain) + return; + if (!chain->varLayout->usesResourceKind(kind)) + return; + + UInt index = getBindingOffset(chain, kind); + UInt space = getBindingSpace(chain, kind); + + switch (kind) + { + case LayoutResourceKind::Uniform: + { + UInt offset = index; + + // The HLSL `c` register space is logically grouped in 16-byte registers, + // while we try to traffic in byte offsets. That means we need to pick + // a register number, based on the starting offset in 16-byte register + // units, and then a "component" within that register, based on 4-byte + // offsets from there. We cannot support more fine-grained offsets than that. + + m_writer->emit(" : "); + m_writer->emit(uniformSemanticSpelling); + m_writer->emit("(c"); + + // Size of a logical `c` register in bytes + auto registerSize = 16; + + // Size of each component of a logical `c` register, in bytes + auto componentSize = 4; + + size_t startRegister = offset / registerSize; + m_writer->emit(int(startRegister)); + + size_t byteOffsetInRegister = offset % registerSize; + + // If this field doesn't start on an even register boundary, + // then we need to emit additional information to pick the + // right component to start from + if (byteOffsetInRegister != 0) + { + // The value had better occupy a whole number of components. + SLANG_RELEASE_ASSERT(byteOffsetInRegister % componentSize == 0); + + size_t startComponent = byteOffsetInRegister / componentSize; + + static const char* kComponentNames[] = { "x", "y", "z", "w" }; + m_writer->emit("."); + m_writer->emit(kComponentNames[startComponent]); + } + m_writer->emit(")"); + } + break; + + case LayoutResourceKind::RegisterSpace: + case LayoutResourceKind::GenericResource: + case LayoutResourceKind::ExistentialTypeParam: + case LayoutResourceKind::ExistentialObjectParam: + // ignore + break; + default: + { + m_writer->emit(" : register("); + switch (kind) + { + case LayoutResourceKind::ConstantBuffer: + m_writer->emit("b"); + break; + case LayoutResourceKind::ShaderResource: + m_writer->emit("t"); + break; + case LayoutResourceKind::UnorderedAccess: + m_writer->emit("u"); + break; + case LayoutResourceKind::SamplerState: + m_writer->emit("s"); + break; + default: + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled HLSL register type"); + break; + } + m_writer->emit(index); + if (space) + { + m_writer->emit(", space"); + m_writer->emit(space); + } + m_writer->emit(")"); + } + } +} + +void CUDASourceEmitter::_emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling) +{ + if (!chain) return; + + auto layout = chain->varLayout; + + switch (getSourceStyle()) + { + default: + return; + + case SourceStyle::HLSL: + break; + } + + for (auto rr : layout->getOffsetAttrs()) + { + _emitCUDARegisterSemantic(rr->getResourceKind(), chain, uniformSemanticSpelling); + } +} + +void CUDASourceEmitter::_emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling) +{ + if (!varLayout) + return; + + EmitVarChain chain(varLayout); + _emitCUDARegisterSemantics(&chain, uniformSemanticSpelling); +} + +void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain) +{ + if (!chain) + return; + + auto layout = chain->varLayout; + for (auto rr : layout->getOffsetAttrs()) + { + _emitCUDARegisterSemantic(rr->getResourceKind(), chain, "packoffset"); + } +} + +void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain) +{ + EmitVarChain chain(fieldLayout, inChain); + _emitCUDAParameterGroupFieldLayoutSemantics(&chain); +} + +void CUDASourceEmitter::_emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) +{ + if (as<IRTextureBufferType>(type)) + { + m_writer->emit("tbuffer "); + } + else + { + m_writer->emit("cbuffer "); + } + m_writer->emit(getName(varDecl)); + + auto varLayout = getVarLayout(varDecl); + SLANG_RELEASE_ASSERT(varLayout); + + EmitVarChain blockChain(varLayout); + + EmitVarChain containerChain = blockChain; + EmitVarChain elementChain = blockChain; + + auto typeLayout = varLayout->getTypeLayout(); + if (auto parameterGroupTypeLayout = as<IRParameterGroupTypeLayout>(typeLayout)) + { + containerChain = EmitVarChain(parameterGroupTypeLayout->getContainerVarLayout(), &blockChain); + elementChain = EmitVarChain(parameterGroupTypeLayout->getElementVarLayout(), &blockChain); + + typeLayout = parameterGroupTypeLayout->getElementVarLayout()->getTypeLayout(); + } + + _emitCUDARegisterSemantic(LayoutResourceKind::ConstantBuffer, &containerChain); + + m_writer->emit("\n{\n"); + m_writer->indent(); + + auto elementType = type->getElementType(); + + emitType(elementType, getName(varDecl)); + m_writer->emit(";\n"); + + m_writer->dedent(); + m_writer->emit("}\n"); +} + +void CUDASourceEmitter::emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling) +{ + auto layout = getVarLayout(inst); + if (layout) + { + _emitCUDARegisterSemantics(layout, uniformSemanticSpelling); + } +} + +void CUDASourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) +{ + _emitCUDAParameterGroup(varDecl, type); +} + +void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor) +{ + auto profile = m_effectiveProfile; + auto stage = entryPointDecor->getProfile().GetStage(); + + switch (stage) + { + case Stage::Compute: + { + Int sizeAlongAxis[kThreadGroupAxisCount]; + getComputeThreadGroupSize(irFunc, sizeAlongAxis); + +#if 0 + m_writer->emit("[numthreads("); + for (int ii = 0; ii < kThreadGroupAxisCount; ++ii) + { + if (ii != 0) m_writer->emit(", "); + m_writer->emit(sizeAlongAxis[ii]); + } + m_writer->emit(")]\n"); +#endif + + m_writer->emit("__global__ "); + break; + } + + // TODO: There are other stages that will need this kind of handling. + default: + break; + } +} + +void CUDASourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec) +{ + if (shouldFoldInstIntoUseSites(inst)) + { + emitInstExpr(inst, outerPrec); + return; + } + + switch (inst->op) + { + case kIROp_Param: + { + auto varLayout = getVarLayout(inst); + if (varLayout) + { + if (auto systemValueSemantic = varLayout->findSystemValueSemanticAttr()) + { + String semanticNameSpelling = systemValueSemantic->getName(); + semanticNameSpelling = semanticNameSpelling.toLower(); + + if (semanticNameSpelling == "sv_dispatchthreadid") + { + m_semanticUsedFlags |= SemanticUsedFlag::DispatchThreadID; + m_writer->emit("((blockIdx * blockDim) + threadIdx)"); + + return; + } + else if (semanticNameSpelling == "sv_groupid") + { + m_semanticUsedFlags |= SemanticUsedFlag::GroupID; + m_writer->emit("blockIdx"); + return; + } + else if (semanticNameSpelling == "sv_groupthreadid") + { + m_semanticUsedFlags |= SemanticUsedFlag::GroupThreadID; + m_writer->emit("threadIdx"); + return; + } + } + } + + break; + } + default: break; + } + m_writer->emit(getName(inst)); +} + +bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) +{ + switch (inst->op) + { + case kIROp_Construct: + case kIROp_makeVector: + { + if (inst->getOperandCount() == 1) + { + EmitOpInfo outerPrec = inOuterPrec; + bool needClose = false; + + auto prec = getInfo(EmitOp::Prefix); + needClose = maybeEmitParens(outerPrec, prec); + + // Need to emit as cast for HLSL + m_writer->emit("("); + emitType(inst->getDataType()); + m_writer->emit(") "); + emitOperand(inst->getOperand(0), rightSide(outerPrec, prec)); + + maybeCloseParens(needClose); + // Handled + return true; + } + else + { + m_writer->emit("make_"); + m_writer->emit(_getCUDATypeName(inst->getDataType())); + emitArgs(inst); + return true; + } + break; + } + case kIROp_MakeMatrix: + { + return false; + } + case kIROp_BitCast: + { + auto toType = extractBaseType(inst->getDataType()); + switch (toType) + { + default: + m_writer->emit("/* unhandled */"); + break; + case BaseType::UInt: + break; + case BaseType::Int: + m_writer->emit("("); + emitType(inst->getDataType()); + m_writer->emit(")"); + break; + case BaseType::Float: + m_writer->emit("asfloat"); + break; + } + + m_writer->emit("("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(")"); + return true; + } + default: break; + } + // Not handled + return false; +} + +void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq) +{ + SLANG_UNUSED(targetReq); +} + +void CUDASourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) +{ + m_writer->emit(getVectorPrefix(elementType->op)); + m_writer->emit(elementCount); +} + +void CUDASourceEmitter::emitSimpleTypeImpl(IRType* type) +{ + m_writer->emit(_getCUDATypeName(type)); +} + +void CUDASourceEmitter::emitRateQualifiersImpl(IRRate* rate) +{ + if (as<IRGroupSharedRate>(rate)) + { + m_writer->emit("groupshared "); + } +} + +void CUDASourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func) +{ + m_writer->emit("("); + + bool hasEmittedParam = false; + auto firstParam = func->getFirstParam(); + for (auto pp = firstParam; pp; pp = pp->getNextParam()) + { + auto varLayout = getVarLayout(pp); + if (varLayout && varLayout->findSystemValueSemanticAttr()) + { + // If it has a semantic don't output, it will be accessed via a global + continue; + } + + if (hasEmittedParam) + m_writer->emit(", "); + + emitSimpleFuncParamImpl(pp); + hasEmittedParam = true; + } + + m_writer->emit(")"); +} + +void CUDASourceEmitter::emitSimpleFuncImpl(IRFunc* func) +{ + if (IREntryPointDecoration* entryPointDecor = func->findDecoration<IREntryPointDecoration>()) + { + // If its an entry point, we let the entry point attribute control the output + Super::emitSimpleFuncImpl(func); + } + else + { + // If it's not an entry point mark as device + m_writer->emit("__device__ "); + Super::emitSimpleFuncImpl(func); + } +} + +void CUDASourceEmitter::emitSemanticsImpl(IRInst* inst) +{ + if (auto semanticDecoration = inst->findDecoration<IRSemanticDecoration>()) + { + m_writer->emit(" : "); + m_writer->emit(semanticDecoration->getSemanticName()); + return; + } + + if (auto layoutDecoration = inst->findDecoration<IRLayoutDecoration>()) + { + auto layout = layoutDecoration->getLayout(); + if (auto varLayout = as<IRVarLayout>(layout)) + { + emitSemanticsUsingVarLayout(varLayout); + } + else if (auto entryPointLayout = as<IREntryPointLayout>(layout)) + { + if (auto resultLayout = entryPointLayout->getResultLayout()) + { + emitSemanticsUsingVarLayout(resultLayout); + } + } + } +} + +static UnownedStringSlice _getInterpolationModifierText(IRInterpolationMode mode) +{ + switch (mode) + { + case IRInterpolationMode::NoInterpolation: return UnownedStringSlice::fromLiteral("nointerpolation"); + case IRInterpolationMode::NoPerspective: return UnownedStringSlice::fromLiteral("noperspective"); + case IRInterpolationMode::Linear: return UnownedStringSlice::fromLiteral("linear"); + case IRInterpolationMode::Sample: return UnownedStringSlice::fromLiteral("sample"); + case IRInterpolationMode::Centroid: return UnownedStringSlice::fromLiteral("centroid"); + default: return UnownedStringSlice(); + } +} + +void CUDASourceEmitter::emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) +{ + SLANG_UNUSED(layout); + SLANG_UNUSED(valueType); + + for (auto dd : varInst->getDecorations()) + { + if (dd->op != kIROp_InterpolationModeDecoration) + continue; + + auto decoration = (IRInterpolationModeDecoration*)dd; + + UnownedStringSlice modeText = _getInterpolationModifierText(decoration->getMode()); + if (modeText.size() > 0) + { + m_writer->emit(modeText); + m_writer->emitChar(' '); + } + } +} + +void CUDASourceEmitter::emitVarDecorationsImpl(IRInst* varDecl) +{ + if (varDecl->findDecoration<IRGloballyCoherentDecoration>()) + { + m_writer->emit("globallycoherent\n"); + } +} + +void CUDASourceEmitter::emitMatrixLayoutModifiersImpl(IRVarLayout* layout) +{ + // When a variable has a matrix type, we want to emit an explicit + // layout qualifier based on what the layout has been computed to be. + // + + auto typeLayout = layout->getTypeLayout()->unwrapArray(); + + if (auto matrixTypeLayout = as<IRMatrixTypeLayout>(typeLayout)) + { + switch (matrixTypeLayout->getMode()) + { + case kMatrixLayoutMode_ColumnMajor: + m_writer->emit("column_major "); + break; + + case kMatrixLayoutMode_RowMajor: + m_writer->emit("row_major "); + break; + } + } +} + + +} // namespace Slang |
