diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2019-12-19 11:23:14 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-12-19 11:23:14 -0500 |
| commit | e3fe0319467546bae070137c58dcf8f9fbe93c79 (patch) | |
| tree | 6cc26ccda33725e98c4a9a0408cf31a1348db268 | |
| parent | 60934d98fbc20d83b5e149e72a197ec4f5c61580 (diff) | |
WIP CUDA source emit (#1157)
* CPPCompiler -> DownstreamCompiler
* Added DownstreamCompileResult to start abstraction such that we don't need files.
* * Split out slang-blob.cpp
* Made CompileResult hold a DownstreamCompileResult - for access to binary or ISlangSharedLibrary
* Keep temporary files in scope.
* Add a hash to the hex dump stream.
* Move all file tracking into DownstreamCompiler.
* WIP support for nvrtc.
* WIP: Adding support for nvrtc compiler.
Adding enum types, wiring up the nvrtc into slang.
* Fix remaining CPPCompiler references.
* Fix order issue on target string matching.
* Use ISlangSharedLibrary for nvrtc.
* Use DownstreamCompiler for nvrtc.
* WIP first pass at compilation win nvrtc.
* Added testing if file is on file system into CommandLineDownstreamCompiler.
Added sourceContentsPath.
* Make test cuda-compile.cu work by just compiling not comparing output.
* Genearlize DownstreamCompiler usage.
* Fix warning on clang.
* Remove CompilerType from DownstreamCompiler.
* Use DownstreamCompiler interface for all compilers.
NOTE for FXC, DXC and GLSLANG this doesn't mean using 'compile' - it's still extracting functions from shared library.
* Replace DownstreamCompiler::SourceType -> SlangSourceLanguage
* Replace _canCompile with something data driven.
* Fix compiling on gcc/clang for DownstreamCompiler.
* Moved some text conversions into DownstreamCompiler.
* Fix problem on non-vc builds with not having return on locateCompilers for VS.
* Change so no warning for code not reachable on locateCompilers for vs.
* WIP: CUDA code generation - currently just using CPU layout and HLSL.
* emitXXXForEntryPoint -> emitEntryPointSource
emitSourceForEntryPoint -> emitEntryPointSourceFromIR
Fix up generating cuda to get PTX.
* WIP emitting cuda for IR.
* Small improvements to CUDA ouput.
* Disable the CUDA emit test, as output not currently compilable.
| -rw-r--r-- | source/core/slang-downstream-compiler.cpp | 13 | ||||
| -rw-r--r-- | source/core/slang-downstream-compiler.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-compiler.cpp | 186 | ||||
| -rw-r--r-- | source/slang/slang-compiler.h | 9 | ||||
| -rw-r--r-- | source/slang/slang-dxc-support.cpp | 9 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 36 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 844 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.h | 80 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 20 | ||||
| -rw-r--r-- | source/slang/slang-emit.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-link.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-parameter-binding.cpp | 16 | ||||
| -rw-r--r-- | source/slang/slang-type-layout.cpp | 113 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj | 4 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj.filters | 6 | ||||
| -rw-r--r-- | tests/cuda/compile-to-cuda.slang | 29 | ||||
| -rw-r--r-- | tools/slang-test/slang-test-main.cpp | 1 |
18 files changed, 1217 insertions, 160 deletions
diff --git a/source/core/slang-downstream-compiler.cpp b/source/core/slang-downstream-compiler.cpp index 2e78ea22b..2f0cff1a9 100644 --- a/source/core/slang-downstream-compiler.cpp +++ b/source/core/slang-downstream-compiler.cpp @@ -182,6 +182,19 @@ void DownstreamCompiler::Desc::appendAsText(StringBuilder& out) const return UnownedStringSlice::fromLiteral("unknown"); } +/* static */SlangCompileTarget DownstreamCompiler::getCompileTarget(SlangSourceLanguage sourceLanguage) +{ + switch (sourceLanguage) + { + case SLANG_SOURCE_LANGUAGE_HLSL: return SLANG_HLSL; + case SLANG_SOURCE_LANGUAGE_GLSL: return SLANG_GLSL; + case SLANG_SOURCE_LANGUAGE_C: return SLANG_C_SOURCE; + case SLANG_SOURCE_LANGUAGE_CPP: return SLANG_CPP_SOURCE; + case SLANG_SOURCE_LANGUAGE_CUDA: return SLANG_CUDA_SOURCE; + default: return SLANG_TARGET_UNKNOWN; + } +} + /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DownstreamDiagnostics !!!!!!!!!!!!!!!!!!!!!!*/ Index DownstreamDiagnostics::getCountByType(Diagnostic::Type type) const diff --git a/source/core/slang-downstream-compiler.h b/source/core/slang-downstream-compiler.h index 57fb88a3c..a4238ce38 100644 --- a/source/core/slang-downstream-compiler.h +++ b/source/core/slang-downstream-compiler.h @@ -296,6 +296,9 @@ public: /// Get the compilers name static UnownedStringSlice getPassThroughName(SlangPassThrough passThru); + /// Given a source language return as the equivalent compile target + static SlangCompileTarget getCompileTarget(SlangSourceLanguage sourceLanguage); + protected: static Infos s_infos; diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index 07bfaacc9..d0fca1ab5 100644 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -478,8 +478,6 @@ namespace Slang return PassThroughMode::None; } case CodeGenTarget::GLSL: - case CodeGenTarget::GLSL_Vulkan: - case CodeGenTarget::GLSL_Vulkan_OneDesc: { // Can always output GLSL return PassThroughMode::None; @@ -489,6 +487,11 @@ namespace Slang // Can always output HLSL return PassThroughMode::None; } + case CodeGenTarget::CUDASource: + { + // Can always output CUDA + return PassThroughMode::None; + } case CodeGenTarget::SPIRVAssembly: case CodeGenTarget::SPIRV: { @@ -504,6 +507,11 @@ namespace Slang { return PassThroughMode::Dxc; } + case CodeGenTarget::GLSL_Vulkan: + case CodeGenTarget::GLSL_Vulkan_OneDesc: + { + return PassThroughMode::Glslang; + } case CodeGenTarget::CPPSource: case CodeGenTarget::CSource: { @@ -581,10 +589,11 @@ namespace Slang outCodeBuilder << fileContent << "\n"; } - String emitHLSLForEntryPoint( + String emitEntryPointSource( BackEndCompileRequest* compileRequest, Int entryPointIndex, TargetRequest* targetReq, + CodeGenTarget target, EndToEndCompileRequest* endToEndReq) { if(auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex)) @@ -596,91 +605,41 @@ namespace Slang // mode. StringBuilder codeBuilder; - for(auto sourceFile : translationUnit->getSourceFiles()) - { - _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder); - } - - return codeBuilder.ProduceString(); - } - else - { - return emitEntryPointSource( - compileRequest, - entryPointIndex, - CodeGenTarget::HLSL, - targetReq); - } - } - - String emitCPPForEntryPoint( - BackEndCompileRequest* compileRequest, - Int entryPointIndex, - TargetRequest* targetReq, - EndToEndCompileRequest* endToEndReq) - { - if (auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex)) - { - // Generate a string that includes the content of - // the source file(s), along with a line directive - // to ensure that we get reasonable messages - // from the downstream compiler when in pass-through - // mode. - - StringBuilder codeBuilder; - for (auto sourceFile : translationUnit->getSourceFiles()) + if (target == CodeGenTarget::GLSL) { - _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder); + // Special case GLSL + int translationUnitCounter = 0; + for (auto sourceFile : translationUnit->getSourceFiles()) + { + int translationUnitIndex = translationUnitCounter++; + + // We want to output `#line` directives, but we need + // to skip this for the first file, since otherwise + // some GLSL implementations will get tripped up by + // not having the `#version` directive be the first + // thing in the file. + if (translationUnitIndex != 0) + { + codeBuilder << "#line 1 " << translationUnitIndex << "\n"; + } + codeBuilder << sourceFile->getContent() << "\n"; + } } - - return codeBuilder.ProduceString(); - } - else - { - return emitEntryPointSource(compileRequest, entryPointIndex, CodeGenTarget::CPPSource, targetReq); - } - } - - String emitGLSLForEntryPoint( - BackEndCompileRequest* compileRequest, - Int entryPointIndex, - TargetRequest* targetReq, - EndToEndCompileRequest* endToEndReq) - { - if(auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex)) - { - // Generate a string that includes the content of - // the source file(s), along with a line directive - // to ensure that we get reasonable messages - // from the downstream compiler when in pass-through - // mode. - - StringBuilder codeBuilder; - int translationUnitCounter = 0; - for(auto sourceFile : translationUnit->getSourceFiles()) + else { - int translationUnitIndex = translationUnitCounter++; - - // We want to output `#line` directives, but we need - // to skip this for the first file, since otherwise - // some GLSL implementations will get tripped up by - // not having the `#version` directive be the first - // thing in the file. - if(translationUnitIndex != 0) + for(auto sourceFile : translationUnit->getSourceFiles()) { - codeBuilder << "#line 1 " << translationUnitIndex << "\n"; + _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder); } - codeBuilder << sourceFile->getContent() << "\n"; } - return codeBuilder.ProduceString(); } else { - return emitEntryPointSource( + return emitEntryPointSourceFromIR( compileRequest, entryPointIndex, - CodeGenTarget::GLSL, + target, targetReq); } } @@ -964,7 +923,7 @@ namespace Slang return SLANG_FAIL; } - auto hlslCode = emitHLSLForEntryPoint(compileRequest, entryPointIndex, targetReq, endToEndReq); + auto hlslCode = emitEntryPointSource(compileRequest, entryPointIndex, targetReq, CodeGenTarget::HLSL, endToEndReq); maybeDumpIntermediate(compileRequest, hlslCode.getBuffer(), CodeGenTarget::HLSL); auto profile = getEffectiveProfile(entryPoint, targetReq); @@ -1251,13 +1210,16 @@ SlangResult dissassembleDXILUsingDXC( EndToEndCompileRequest* endToEndReq, RefPtr<DownstreamCompileResult>& outResult) { + outResult.setNull(); + auto sink = slangRequest->getSink(); auto session = slangRequest->getSession(); const String originalSourcePath = calcSourcePathForEntryPoint(endToEndReq, entryPointIndex); - outResult.setNull(); + CodeGenTarget sourceTarget = CodeGenTarget::None; + SourceLanguage sourceLanguage = SourceLanguage::Unknown; PassThroughMode downstreamCompiler = endToEndReq->passThrough; @@ -1265,23 +1227,26 @@ SlangResult dissassembleDXILUsingDXC( if (downstreamCompiler == PassThroughMode::None) { auto target = targetReq->target; - switch (target) { case CodeGenTarget::PTX: { - downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SLANG_SOURCE_LANGUAGE_CUDA)); + sourceTarget = CodeGenTarget::CUDASource; + sourceLanguage = SourceLanguage::CUDA; break; } case CodeGenTarget::HostCallable: case CodeGenTarget::SharedLibrary: case CodeGenTarget::Executable: { - downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SLANG_SOURCE_LANGUAGE_CPP)); + sourceTarget = CodeGenTarget::CPPSource; + sourceLanguage = SourceLanguage::CPP; break; } default: break; } + + downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SlangSourceLanguage(sourceLanguage))); } // Get the required downstream compiler @@ -1301,8 +1266,6 @@ SlangResult dissassembleDXILUsingDXC( return SLANG_FAIL; } - SourceLanguage rawSourceLanguage = SourceLanguage::Unknown; - Dictionary<String, String> preprocessorDefinitions; List<String> includePaths; @@ -1357,8 +1320,10 @@ SlangResult dissassembleDXILUsingDXC( } // We are just passing thru, so it's whatever it originally was - rawSourceLanguage = translationUnit->sourceLanguage; + sourceLanguage = translationUnit->sourceLanguage; + sourceTarget = CodeGenTarget(DownstreamCompiler::getCompileTarget(SlangSourceLanguage(sourceLanguage))); + // Special case if we have a single file, so that we pass the path, and the contents const auto& sourceFiles = translationUnit->getSourceFiles(); if (sourceFiles.getCount() == 1) { @@ -1372,30 +1337,18 @@ SlangResult dissassembleDXILUsingDXC( } else { - // If can't just use file, concat together and make - StringBuilder codeBuilder; - for (auto sourceFile : translationUnit->getSourceFiles()) - { - _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder); - } - options.sourceContents = codeBuilder.ProduceString(); + options.sourceContents = emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq); } } else { - options.sourceContents = emitCPPForEntryPoint( - slangRequest, - entryPointIndex, - targetReq, - endToEndReq); - - maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), CodeGenTarget::CPPSource); + options.sourceContents = emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq); - rawSourceLanguage = SourceLanguage::CPP; + maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), sourceTarget); } // Set the source type - options.sourceLanguage = SlangSourceLanguage(rawSourceLanguage); + options.sourceLanguage = SlangSourceLanguage(sourceLanguage); // Disable exceptions and security checks options.flags &= ~(CompileOptions::Flag::EnableExceptionHandling | CompileOptions::Flag::EnableSecurityChecks); @@ -1556,10 +1509,11 @@ SlangResult dissassembleDXILUsingDXC( { spirvOut.clear(); - String rawGLSL = emitGLSLForEntryPoint( + String rawGLSL = emitEntryPointSource( slangRequest, entryPointIndex, targetReq, + CodeGenTarget::GLSL, endToEndReq); maybeDumpIntermediate(slangRequest, rawGLSL.getBuffer(), CodeGenTarget::GLSL); @@ -1671,38 +1625,18 @@ SlangResult dissassembleDXILUsingDXC( } } break; - case CodeGenTarget::HLSL: - { - String code = emitHLSLForEntryPoint( - compileRequest, - entryPointIndex, - targetReq, - endToEndReq); - maybeDumpIntermediate(compileRequest, code.getBuffer(), target); - result = CompileResult(code); - } - break; - case CodeGenTarget::GLSL: - { - String code = emitGLSLForEntryPoint( - compileRequest, - entryPointIndex, - targetReq, - endToEndReq); - maybeDumpIntermediate(compileRequest, code.getBuffer(), target); - result = CompileResult(code); - } - break; - + case CodeGenTarget::HLSL: + case CodeGenTarget::CUDASource: case CodeGenTarget::CPPSource: case CodeGenTarget::CSource: { String code = emitEntryPointSource( compileRequest, entryPointIndex, - target, - targetReq); + targetReq, + target, + endToEndReq); maybeDumpIntermediate(compileRequest, code.getBuffer(), target); result = CompileResult(code); } diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 1a385ab23..2783197af 100644 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -1797,6 +1797,15 @@ namespace Slang @return the appropriate source filename */ String calcSourcePathForEntryPoint(EndToEndCompileRequest* endToEndReq, UInt entryPointIndex); + /* Emits entry point source taking into account if a pass-through or not. Uses 'target' to determine + the target (not targetReq) */ + String emitEntryPointSource( + BackEndCompileRequest* compileRequest, + Int entryPointIndex, + TargetRequest* targetReq, + CodeGenTarget target, + EndToEndCompileRequest* endToEndReq); + struct TypeCheckingCache; // diff --git a/source/slang/slang-dxc-support.cpp b/source/slang/slang-dxc-support.cpp index e0ca4df82..828138143 100644 --- a/source/slang/slang-dxc-support.cpp +++ b/source/slang/slang-dxc-support.cpp @@ -29,11 +29,7 @@ namespace Slang { String GetHLSLProfileName(Profile profile); - String emitHLSLForEntryPoint( - BackEndCompileRequest* compileRequest, - Int entryPointIndex, - TargetRequest* targetReq, - EndToEndCompileRequest* endToEndReq); + SlangResult locateDXCCompilers(const String& path, ISlangSharedLibraryLoader* loader, DownstreamCompilerSet* set); @@ -85,10 +81,11 @@ namespace Slang // Now let's go ahead and generate HLSL for the entry // point, since we'll need that to feed into dxc. - auto hlslCode = emitHLSLForEntryPoint( + auto hlslCode = emitEntryPointSource( compileRequest, entryPointIndex, targetReq, + CodeGenTarget::HLSL, endToEndReq); maybeDumpIntermediate(compileRequest, hlslCode.getBuffer(), CodeGenTarget::HLSL); diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 2c1bcbe6d..1a2fbb0f4 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -98,6 +98,7 @@ struct CLikeSourceEmitter::ComputeEmitActionsContext { return SourceStyle::HLSL; } + case CodeGenTarget::PTX: case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: case CodeGenTarget::DXBytecode: @@ -115,6 +116,10 @@ struct CLikeSourceEmitter::ComputeEmitActionsContext { return SourceStyle::CPP; } + case CodeGenTarget::CUDASource: + { + return SourceStyle::CUDA; + } } } @@ -343,6 +348,7 @@ bool CLikeSourceEmitter::isTargetIntrinsicModifierApplicable(const String& targe case SourceStyle::CPP: return targetName == "cpp"; case SourceStyle::GLSL: return targetName == "glsl"; case SourceStyle::HLSL: return targetName == "hlsl"; + case SourceStyle::CUDA: return targetName == "cuda"; } } @@ -1021,6 +1027,7 @@ void CLikeSourceEmitter::emitInstResultDecl(IRInst* inst) switch (getSourceStyle()) { + case SourceStyle::CUDA: case SourceStyle::HLSL: case SourceStyle::C: case SourceStyle::CPP: @@ -2507,6 +2514,22 @@ void CLikeSourceEmitter::emitSimpleFuncParamImpl(IRParam* param) emitSemantics(param); } +void CLikeSourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func) +{ + m_writer->emit("("); + + auto firstParam = func->getFirstParam(); + for (auto pp = firstParam; pp; pp = pp->getNextParam()) + { + if (pp != firstParam) + m_writer->emit(", "); + + emitSimpleFuncParamImpl(pp); + } + + m_writer->emit(")"); +} + void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func) { auto resultType = func->getResultType(); @@ -2521,18 +2544,7 @@ void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func) auto name = getName(func); emitType(resultType, name); - - m_writer->emit("("); - auto firstParam = func->getFirstParam(); - for( auto pp = firstParam; pp; pp = pp->getNextParam()) - { - if(pp != firstParam) - m_writer->emit(", "); - - emitSimpleFuncParamImpl(pp); - } - m_writer->emit(")"); - + emitSimpleFuncParamsImpl(func); emitSemantics(func); // TODO: encode declaration vs. definition diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 4b5c68b6c..a6c48dc73 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -47,6 +47,7 @@ public: HLSL, C, CPP, + CUDA, CountOf, }; @@ -315,6 +316,7 @@ public: virtual void emitRateQualifiersImpl(IRRate* rate) { SLANG_UNUSED(rate); } virtual void emitSemanticsImpl(IRInst* inst) { SLANG_UNUSED(inst); } virtual void emitSimpleFuncParamImpl(IRParam* param); + virtual void emitSimpleFuncParamsImpl(IRFunc* func); virtual void emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) { SLANG_UNUSED(varInst); SLANG_UNUSED(valueType); SLANG_UNUSED(layout); } virtual void emitSimpleTypeImpl(IRType* type) = 0; virtual void emitVarDecorationsImpl(IRInst* varDecl) { SLANG_UNUSED(varDecl); } diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp new file mode 100644 index 000000000..feafc4e4e --- /dev/null +++ b/source/slang/slang-emit-cuda.cpp @@ -0,0 +1,844 @@ +// slang-emit-cuda.cpp +#include "slang-emit-cuda.h" + +#include "../core/slang-writer.h" + +#include "slang-emit-source-writer.h" +#include "slang-mangled-lexer.h" + +#include <assert.h> + +namespace Slang { + +/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op) +{ + switch (op) + { + case kIROp_VoidType: return UnownedStringSlice("void"); + case kIROp_BoolType: return UnownedStringSlice("bool"); + + case kIROp_Int8Type: return UnownedStringSlice("char"); + case kIROp_Int16Type: return UnownedStringSlice("short"); + case kIROp_IntType: return UnownedStringSlice("int"); + case kIROp_Int64Type: return UnownedStringSlice("long long"); + + case kIROp_UInt8Type: return UnownedStringSlice("unsigned char"); + case kIROp_UInt16Type: return UnownedStringSlice("unsigned short"); + case kIROp_UIntType: return UnownedStringSlice("unsigned int"); + case kIROp_UInt64Type: return UnownedStringSlice("unsigned long long"); + + // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting + case kIROp_HalfType: return UnownedStringSlice("half"); + + case kIROp_FloatType: return UnownedStringSlice("float"); + case kIROp_DoubleType: return UnownedStringSlice("double"); + default: return UnownedStringSlice(); + } +} + + +/* static */ UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op) +{ + switch (op) + { + case kIROp_BoolType: return UnownedStringSlice("bool"); + + case kIROp_Int8Type: return UnownedStringSlice("char"); + case kIROp_Int16Type: return UnownedStringSlice("short"); + case kIROp_IntType: return UnownedStringSlice("int"); + case kIROp_Int64Type: return UnownedStringSlice("longlong"); + + case kIROp_UInt8Type: return UnownedStringSlice("uchar"); + case kIROp_UInt16Type: return UnownedStringSlice("ushort"); + case kIROp_UIntType: return UnownedStringSlice("uint"); + case kIROp_UInt64Type: return UnownedStringSlice("ulonglong"); + + // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting + case kIROp_HalfType: return UnownedStringSlice("half"); + + case kIROp_FloatType: return UnownedStringSlice("float"); + case kIROp_DoubleType: return UnownedStringSlice("double"); + default: return UnownedStringSlice(); + } +} + +SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName) +{ + // texture<float, cudaTextureType2D, cudaReadModeElementType> texRef; + + // Not clear how to do this yet + if (texType->isMultisample() || texType->isArray()) + { + return SLANG_FAIL; + } + + outName << "texture<"; + outName << _getCUDATypeName(texType->getElementType()); + outName << ", "; + + switch (texType->GetBaseShape()) + { + case TextureFlavor::Shape::Shape1D: outName << "cudaTextureType1D"; break; + case TextureFlavor::Shape::Shape2D: outName << "cudaTextureType2D"; break; + case TextureFlavor::Shape::Shape3D: outName << "cudaTextureType3D"; break; + case TextureFlavor::Shape::ShapeCube: outName << "cudaTextureTypeCubemap"; break; + case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer"; break; + default: + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape"); + return SLANG_FAIL; + } + + outName << ", "; + + switch (texType->getAccess()) + { + case SLANG_RESOURCE_ACCESS_READ: + { + // Other value is cudaReadModeNormalizedFloat + + outName << "cudaReadModeElementType"; + break; + } + default: + { + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode"); + return SLANG_FAIL; + } + } + + outName << ">"; + return SLANG_OK; +} + +// This is junk.. +static UnownedStringSlice _getCUDAResourceTypePrefix(IROp op) +{ + switch (op) + { + case kIROp_HLSLStructuredBufferType: return UnownedStringSlice::fromLiteral("StructuredBuffer"); + case kIROp_HLSLRWStructuredBufferType: return UnownedStringSlice::fromLiteral("RWStructuredBuffer"); + case kIROp_HLSLRWByteAddressBufferType: return UnownedStringSlice::fromLiteral("RWByteAddressBuffer"); + case kIROp_HLSLByteAddressBufferType: return UnownedStringSlice::fromLiteral("ByteAddressBuffer"); + case kIROp_SamplerStateType: return UnownedStringSlice::fromLiteral("SamplerState"); + case kIROp_SamplerComparisonStateType: return UnownedStringSlice::fromLiteral("SamplerComparisonState"); + case kIROp_HLSLRasterizerOrderedStructuredBufferType: return UnownedStringSlice::fromLiteral("RasterizerOrderedStructuredBuffer"); + case kIROp_HLSLAppendStructuredBufferType: return UnownedStringSlice::fromLiteral("AppendStructuredBuffer"); + case kIROp_HLSLConsumeStructuredBufferType: return UnownedStringSlice::fromLiteral("ConsumeStructuredBuffer"); + case kIROp_HLSLRasterizerOrderedByteAddressBufferType: return UnownedStringSlice::fromLiteral("RasterizerOrderedByteAddressBuffer"); + case kIROp_RaytracingAccelerationStructureType: return UnownedStringSlice::fromLiteral("RaytracingAccelerationStructure"); + + default: return UnownedStringSlice(); + } +} + +SlangResult CUDASourceEmitter::_calcCUDATypeName(IRType* type, StringBuilder& out) +{ + switch (type->op) + { + case kIROp_HalfType: + { + // Special case half + out << getBuiltinTypeName(kIROp_FloatType); + return SLANG_OK; + } + case kIROp_VectorType: + { + auto vecType = static_cast<IRVectorType*>(type); + auto vecCount = int(GetIntVal(vecType->getElementCount())); + const IROp elemType = vecType->getElementType()->op; + + UnownedStringSlice prefix = getVectorPrefix(elemType); + if (prefix.size() <= 0) + { + return SLANG_FAIL; + } + out << prefix << vecCount; + return SLANG_OK; + } +#if 0 + case kIROp_MatrixType: + { + auto matType = static_cast<IRMatrixType*>(type); + + auto elementType = matType->getElementType(); + const auto rowCount = int(GetIntVal(matType->getRowCount())); + const auto colCount = int(GetIntVal(matType->getColumnCount())); + + if (target == CodeGenTarget::CPPSource) + { + out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">"; + } + else + { + out << "Mat"; + const UnownedStringSlice postFix = _getCTypeVecPostFix(_getCType(elementType->op)); + out << postFix; + if (postFix.size() > 1) + { + out << "_"; + } + out << rowCount; + out << colCount; + } + return SLANG_OK; + } + case kIROp_ArrayType: + { + auto arrayType = static_cast<IRArrayType*>(type); + auto elementType = arrayType->getElementType(); + int elementCount = int(GetIntVal(arrayType->getElementCount())); + + out << "FixedArray<"; + SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out)); + out << ", " << elementCount << ">"; + return SLANG_OK; + } + case kIROp_UnsizedArrayType: + { + auto arrayType = static_cast<IRUnsizedArrayType*>(type); + auto elementType = arrayType->getElementType(); + + out << "Array<"; + SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out)); + out << ">"; + return SLANG_OK; + } +#endif + default: + { + if (isNominalOp(type->op)) + { + out << getName(type); + return SLANG_OK; + } + + if (IRBasicType::isaImpl(type->op)) + { + out << getBuiltinTypeName(type->op); + return SLANG_OK; + } + + if (auto texType = as<IRTextureTypeBase>(type)) + { + // We don't support TextureSampler, so ignore that + if (texType->op != kIROp_TextureSamplerType) + { + return _calcCUDATextureTypeName(texType, out); + } + } + + // If _getResourceTypePrefix returns something, we assume can output any specialization after it in order. + { + UnownedStringSlice prefix = _getCUDAResourceTypePrefix(type->op); + if (prefix.size() > 0) + { + auto oldWriter = m_writer; + SourceManager* sourceManager = oldWriter->getSourceManager(); + + // TODO(JS): This is a bit of a hack. We don't want to emit the result here, + // so we replace the writer, write out the type, grab the contents, and restore the writer + + SourceWriter writer(sourceManager, LineDirectiveMode::None); + m_writer = &writer; + + m_writer->emit(prefix); + + // TODO(JS). + // Assumes ordering of types matches ordering of operands. + + UInt operandCount = type->getOperandCount(); + if (operandCount) + { + m_writer->emit("<"); + for (UInt ii = 0; ii < operandCount; ++ii) + { + if (ii != 0) + { + m_writer->emit(", "); + } + emitVal(type->getOperand(ii), getInfo(EmitOp::General)); + } + m_writer->emit(">"); + } + + out << writer.getContent(); + + m_writer = oldWriter; + return SLANG_OK; + } + } + + break; + } + } + + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled type for CUDA emit"); + return SLANG_FAIL; +} + + +UnownedStringSlice CUDASourceEmitter::_getCUDATypeName(IRType* type) +{ + StringSlicePool::Handle handle = StringSlicePool::kNullHandle; + if (m_typeNameMap.TryGetValue(type, handle)) + { + return m_slicePool.getSlice(handle); + } + +#if 0 + if (type->op == kIROp_MatrixType) + { + auto matType = static_cast<IRMatrixType*>(type); + + auto elementType = matType->getElementType(); + const auto rowCount = int(GetIntVal(matType->getRowCount())); + const auto colCount = int(GetIntVal(matType->getColumnCount())); + + // Make sure the vector type the matrix is built on is added + useType(_getVecType(elementType, colCount)); + } +#endif + + StringBuilder builder; + if (SLANG_SUCCEEDED(_calcCUDATypeName(type, builder))) + { + handle = m_slicePool.add(builder); + } + + m_typeNameMap.Add(type, handle); + + SLANG_ASSERT(handle != StringSlicePool::kNullHandle); + return m_slicePool.getSlice(handle); +} + +void CUDASourceEmitter::_emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val) +{ + SLANG_UNUSED(entryPoint); + assert(val); + + m_writer->emit("["); + m_writer->emit(name); + m_writer->emit("(\""); + m_writer->emit(val->getStringSlice()); + m_writer->emit("\")]\n"); +} + +void CUDASourceEmitter::_emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val) +{ + SLANG_UNUSED(entryPoint); + SLANG_ASSERT(val); + + auto intVal = GetIntVal(val); + + m_writer->emit("["); + m_writer->emit(name); + m_writer->emit("("); + m_writer->emit(intVal); + m_writer->emit(")]\n"); +} + +void CUDASourceEmitter::_emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling) +{ + if (!chain) + return; + if (!chain->varLayout->usesResourceKind(kind)) + return; + + UInt index = getBindingOffset(chain, kind); + UInt space = getBindingSpace(chain, kind); + + switch (kind) + { + case LayoutResourceKind::Uniform: + { + UInt offset = index; + + // The HLSL `c` register space is logically grouped in 16-byte registers, + // while we try to traffic in byte offsets. That means we need to pick + // a register number, based on the starting offset in 16-byte register + // units, and then a "component" within that register, based on 4-byte + // offsets from there. We cannot support more fine-grained offsets than that. + + m_writer->emit(" : "); + m_writer->emit(uniformSemanticSpelling); + m_writer->emit("(c"); + + // Size of a logical `c` register in bytes + auto registerSize = 16; + + // Size of each component of a logical `c` register, in bytes + auto componentSize = 4; + + size_t startRegister = offset / registerSize; + m_writer->emit(int(startRegister)); + + size_t byteOffsetInRegister = offset % registerSize; + + // If this field doesn't start on an even register boundary, + // then we need to emit additional information to pick the + // right component to start from + if (byteOffsetInRegister != 0) + { + // The value had better occupy a whole number of components. + SLANG_RELEASE_ASSERT(byteOffsetInRegister % componentSize == 0); + + size_t startComponent = byteOffsetInRegister / componentSize; + + static const char* kComponentNames[] = { "x", "y", "z", "w" }; + m_writer->emit("."); + m_writer->emit(kComponentNames[startComponent]); + } + m_writer->emit(")"); + } + break; + + case LayoutResourceKind::RegisterSpace: + case LayoutResourceKind::GenericResource: + case LayoutResourceKind::ExistentialTypeParam: + case LayoutResourceKind::ExistentialObjectParam: + // ignore + break; + default: + { + m_writer->emit(" : register("); + switch (kind) + { + case LayoutResourceKind::ConstantBuffer: + m_writer->emit("b"); + break; + case LayoutResourceKind::ShaderResource: + m_writer->emit("t"); + break; + case LayoutResourceKind::UnorderedAccess: + m_writer->emit("u"); + break; + case LayoutResourceKind::SamplerState: + m_writer->emit("s"); + break; + default: + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled HLSL register type"); + break; + } + m_writer->emit(index); + if (space) + { + m_writer->emit(", space"); + m_writer->emit(space); + } + m_writer->emit(")"); + } + } +} + +void CUDASourceEmitter::_emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling) +{ + if (!chain) return; + + auto layout = chain->varLayout; + + switch (getSourceStyle()) + { + default: + return; + + case SourceStyle::HLSL: + break; + } + + for (auto rr : layout->getOffsetAttrs()) + { + _emitCUDARegisterSemantic(rr->getResourceKind(), chain, uniformSemanticSpelling); + } +} + +void CUDASourceEmitter::_emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling) +{ + if (!varLayout) + return; + + EmitVarChain chain(varLayout); + _emitCUDARegisterSemantics(&chain, uniformSemanticSpelling); +} + +void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain) +{ + if (!chain) + return; + + auto layout = chain->varLayout; + for (auto rr : layout->getOffsetAttrs()) + { + _emitCUDARegisterSemantic(rr->getResourceKind(), chain, "packoffset"); + } +} + +void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain) +{ + EmitVarChain chain(fieldLayout, inChain); + _emitCUDAParameterGroupFieldLayoutSemantics(&chain); +} + +void CUDASourceEmitter::_emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) +{ + if (as<IRTextureBufferType>(type)) + { + m_writer->emit("tbuffer "); + } + else + { + m_writer->emit("cbuffer "); + } + m_writer->emit(getName(varDecl)); + + auto varLayout = getVarLayout(varDecl); + SLANG_RELEASE_ASSERT(varLayout); + + EmitVarChain blockChain(varLayout); + + EmitVarChain containerChain = blockChain; + EmitVarChain elementChain = blockChain; + + auto typeLayout = varLayout->getTypeLayout(); + if (auto parameterGroupTypeLayout = as<IRParameterGroupTypeLayout>(typeLayout)) + { + containerChain = EmitVarChain(parameterGroupTypeLayout->getContainerVarLayout(), &blockChain); + elementChain = EmitVarChain(parameterGroupTypeLayout->getElementVarLayout(), &blockChain); + + typeLayout = parameterGroupTypeLayout->getElementVarLayout()->getTypeLayout(); + } + + _emitCUDARegisterSemantic(LayoutResourceKind::ConstantBuffer, &containerChain); + + m_writer->emit("\n{\n"); + m_writer->indent(); + + auto elementType = type->getElementType(); + + emitType(elementType, getName(varDecl)); + m_writer->emit(";\n"); + + m_writer->dedent(); + m_writer->emit("}\n"); +} + +void CUDASourceEmitter::emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling) +{ + auto layout = getVarLayout(inst); + if (layout) + { + _emitCUDARegisterSemantics(layout, uniformSemanticSpelling); + } +} + +void CUDASourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) +{ + _emitCUDAParameterGroup(varDecl, type); +} + +void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor) +{ + auto profile = m_effectiveProfile; + auto stage = entryPointDecor->getProfile().GetStage(); + + switch (stage) + { + case Stage::Compute: + { + Int sizeAlongAxis[kThreadGroupAxisCount]; + getComputeThreadGroupSize(irFunc, sizeAlongAxis); + +#if 0 + m_writer->emit("[numthreads("); + for (int ii = 0; ii < kThreadGroupAxisCount; ++ii) + { + if (ii != 0) m_writer->emit(", "); + m_writer->emit(sizeAlongAxis[ii]); + } + m_writer->emit(")]\n"); +#endif + + m_writer->emit("__global__ "); + break; + } + + // TODO: There are other stages that will need this kind of handling. + default: + break; + } +} + +void CUDASourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec) +{ + if (shouldFoldInstIntoUseSites(inst)) + { + emitInstExpr(inst, outerPrec); + return; + } + + switch (inst->op) + { + case kIROp_Param: + { + auto varLayout = getVarLayout(inst); + if (varLayout) + { + if (auto systemValueSemantic = varLayout->findSystemValueSemanticAttr()) + { + String semanticNameSpelling = systemValueSemantic->getName(); + semanticNameSpelling = semanticNameSpelling.toLower(); + + if (semanticNameSpelling == "sv_dispatchthreadid") + { + m_semanticUsedFlags |= SemanticUsedFlag::DispatchThreadID; + m_writer->emit("((blockIdx * blockDim) + threadIdx)"); + + return; + } + else if (semanticNameSpelling == "sv_groupid") + { + m_semanticUsedFlags |= SemanticUsedFlag::GroupID; + m_writer->emit("blockIdx"); + return; + } + else if (semanticNameSpelling == "sv_groupthreadid") + { + m_semanticUsedFlags |= SemanticUsedFlag::GroupThreadID; + m_writer->emit("threadIdx"); + return; + } + } + } + + break; + } + default: break; + } + m_writer->emit(getName(inst)); +} + +bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) +{ + switch (inst->op) + { + case kIROp_Construct: + case kIROp_makeVector: + { + if (inst->getOperandCount() == 1) + { + EmitOpInfo outerPrec = inOuterPrec; + bool needClose = false; + + auto prec = getInfo(EmitOp::Prefix); + needClose = maybeEmitParens(outerPrec, prec); + + // Need to emit as cast for HLSL + m_writer->emit("("); + emitType(inst->getDataType()); + m_writer->emit(") "); + emitOperand(inst->getOperand(0), rightSide(outerPrec, prec)); + + maybeCloseParens(needClose); + // Handled + return true; + } + else + { + m_writer->emit("make_"); + m_writer->emit(_getCUDATypeName(inst->getDataType())); + emitArgs(inst); + return true; + } + break; + } + case kIROp_MakeMatrix: + { + return false; + } + case kIROp_BitCast: + { + auto toType = extractBaseType(inst->getDataType()); + switch (toType) + { + default: + m_writer->emit("/* unhandled */"); + break; + case BaseType::UInt: + break; + case BaseType::Int: + m_writer->emit("("); + emitType(inst->getDataType()); + m_writer->emit(")"); + break; + case BaseType::Float: + m_writer->emit("asfloat"); + break; + } + + m_writer->emit("("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(")"); + return true; + } + default: break; + } + // Not handled + return false; +} + +void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq) +{ + SLANG_UNUSED(targetReq); +} + +void CUDASourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) +{ + m_writer->emit(getVectorPrefix(elementType->op)); + m_writer->emit(elementCount); +} + +void CUDASourceEmitter::emitSimpleTypeImpl(IRType* type) +{ + m_writer->emit(_getCUDATypeName(type)); +} + +void CUDASourceEmitter::emitRateQualifiersImpl(IRRate* rate) +{ + if (as<IRGroupSharedRate>(rate)) + { + m_writer->emit("groupshared "); + } +} + +void CUDASourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func) +{ + m_writer->emit("("); + + bool hasEmittedParam = false; + auto firstParam = func->getFirstParam(); + for (auto pp = firstParam; pp; pp = pp->getNextParam()) + { + auto varLayout = getVarLayout(pp); + if (varLayout && varLayout->findSystemValueSemanticAttr()) + { + // If it has a semantic don't output, it will be accessed via a global + continue; + } + + if (hasEmittedParam) + m_writer->emit(", "); + + emitSimpleFuncParamImpl(pp); + hasEmittedParam = true; + } + + m_writer->emit(")"); +} + +void CUDASourceEmitter::emitSimpleFuncImpl(IRFunc* func) +{ + if (IREntryPointDecoration* entryPointDecor = func->findDecoration<IREntryPointDecoration>()) + { + // If its an entry point, we let the entry point attribute control the output + Super::emitSimpleFuncImpl(func); + } + else + { + // If it's not an entry point mark as device + m_writer->emit("__device__ "); + Super::emitSimpleFuncImpl(func); + } +} + +void CUDASourceEmitter::emitSemanticsImpl(IRInst* inst) +{ + if (auto semanticDecoration = inst->findDecoration<IRSemanticDecoration>()) + { + m_writer->emit(" : "); + m_writer->emit(semanticDecoration->getSemanticName()); + return; + } + + if (auto layoutDecoration = inst->findDecoration<IRLayoutDecoration>()) + { + auto layout = layoutDecoration->getLayout(); + if (auto varLayout = as<IRVarLayout>(layout)) + { + emitSemanticsUsingVarLayout(varLayout); + } + else if (auto entryPointLayout = as<IREntryPointLayout>(layout)) + { + if (auto resultLayout = entryPointLayout->getResultLayout()) + { + emitSemanticsUsingVarLayout(resultLayout); + } + } + } +} + +static UnownedStringSlice _getInterpolationModifierText(IRInterpolationMode mode) +{ + switch (mode) + { + case IRInterpolationMode::NoInterpolation: return UnownedStringSlice::fromLiteral("nointerpolation"); + case IRInterpolationMode::NoPerspective: return UnownedStringSlice::fromLiteral("noperspective"); + case IRInterpolationMode::Linear: return UnownedStringSlice::fromLiteral("linear"); + case IRInterpolationMode::Sample: return UnownedStringSlice::fromLiteral("sample"); + case IRInterpolationMode::Centroid: return UnownedStringSlice::fromLiteral("centroid"); + default: return UnownedStringSlice(); + } +} + +void CUDASourceEmitter::emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) +{ + SLANG_UNUSED(layout); + SLANG_UNUSED(valueType); + + for (auto dd : varInst->getDecorations()) + { + if (dd->op != kIROp_InterpolationModeDecoration) + continue; + + auto decoration = (IRInterpolationModeDecoration*)dd; + + UnownedStringSlice modeText = _getInterpolationModifierText(decoration->getMode()); + if (modeText.size() > 0) + { + m_writer->emit(modeText); + m_writer->emitChar(' '); + } + } +} + +void CUDASourceEmitter::emitVarDecorationsImpl(IRInst* varDecl) +{ + if (varDecl->findDecoration<IRGloballyCoherentDecoration>()) + { + m_writer->emit("globallycoherent\n"); + } +} + +void CUDASourceEmitter::emitMatrixLayoutModifiersImpl(IRVarLayout* layout) +{ + // When a variable has a matrix type, we want to emit an explicit + // layout qualifier based on what the layout has been computed to be. + // + + auto typeLayout = layout->getTypeLayout()->unwrapArray(); + + if (auto matrixTypeLayout = as<IRMatrixTypeLayout>(typeLayout)) + { + switch (matrixTypeLayout->getMode()) + { + case kMatrixLayoutMode_ColumnMajor: + m_writer->emit("column_major "); + break; + + case kMatrixLayoutMode_RowMajor: + m_writer->emit("row_major "); + break; + } + } +} + + +} // namespace Slang diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h new file mode 100644 index 000000000..6de2f5ea3 --- /dev/null +++ b/source/slang/slang-emit-cuda.h @@ -0,0 +1,80 @@ +// slang-emit-cuda.h +#ifndef SLANG_EMIT_CUDA_H +#define SLANG_EMIT_CUDA_H + +#include "slang-emit-c-like.h" + +namespace Slang +{ + +class CUDASourceEmitter : public CLikeSourceEmitter +{ +public: + typedef CLikeSourceEmitter Super; + + typedef uint32_t SemanticUsedFlags; + struct SemanticUsedFlag + { + enum Enum : SemanticUsedFlags + { + DispatchThreadID = 0x01, + GroupThreadID = 0x02, + GroupID = 0x04, + }; + }; + + static UnownedStringSlice getBuiltinTypeName(IROp op); + static UnownedStringSlice getVectorPrefix(IROp op); + + CUDASourceEmitter(const Desc& desc) : + Super(desc), + m_slicePool(StringSlicePool::Style::Default) + {} + +protected: + + virtual void emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling) SLANG_OVERRIDE; + virtual void emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) SLANG_OVERRIDE; + virtual void emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor) SLANG_OVERRIDE; + virtual void emitLayoutDirectivesImpl(TargetRequest* targetReq) SLANG_OVERRIDE; + virtual void emitRateQualifiersImpl(IRRate* rate) SLANG_OVERRIDE; + virtual void emitSemanticsImpl(IRInst* inst) SLANG_OVERRIDE; + virtual void emitSimpleFuncImpl(IRFunc* func) SLANG_OVERRIDE; + virtual void emitSimpleFuncParamsImpl(IRFunc* func) SLANG_OVERRIDE; + virtual void emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) SLANG_OVERRIDE; + virtual void emitSimpleTypeImpl(IRType* type) SLANG_OVERRIDE; + virtual void emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) SLANG_OVERRIDE; + virtual void emitVarDecorationsImpl(IRInst* varDecl) SLANG_OVERRIDE; + virtual void emitMatrixLayoutModifiersImpl(IRVarLayout* layout) SLANG_OVERRIDE; + virtual void emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec) SLANG_OVERRIDE; + + virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; + + // Emit a single `register` semantic, as appropriate for a given resource-type-specific layout info + // Keyword to use in the uniform case (`register` for globals, `packoffset` inside a `cbuffer`) + void _emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling = "register"); + + // Emit all the `register` semantics that are appropriate for a particular variable layout + void _emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling = "register"); + void _emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling = "register"); + + void _emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain); + void _emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain); + + void _emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type); + + void _emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val); + void _emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val); + + SlangResult _calcCUDATypeName(IRType* type, StringBuilder& out); + UnownedStringSlice _getCUDATypeName(IRType* inType); + SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName); + + Dictionary<IRType*, StringSlicePool::Handle> m_typeNameMap; + StringSlicePool m_slicePool; + + UInt m_semanticUsedFlags = 0; +}; + +} +#endif diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 9aa7b1203..19c3cad68 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -32,23 +32,12 @@ #include "slang-emit-glsl.h" #include "slang-emit-hlsl.h" #include "slang-emit-cpp.h" +#include "slang-emit-cuda.h" #include <assert.h> namespace Slang { -enum class BuiltInCOp -{ - Splat, //< Splat a single value to all values of a vector or matrix type - Init, //< Initialize with parameters (must match the type) -}; - - -// - - -// - EntryPointLayout* findEntryPointLayout( ProgramLayout* programLayout, EntryPoint* entryPoint) @@ -444,7 +433,7 @@ Result linkAndOptimizeIR( return SLANG_OK; } -String emitEntryPointSource( +String emitEntryPointSourceFromIR( BackEndCompileRequest* compileRequest, Int entryPointIndex, CodeGenTarget target, @@ -499,6 +488,11 @@ String emitEntryPointSource( sourceEmitter = new HLSLSourceEmitter(desc); break; } + case SourceStyle::CUDA: + { + sourceEmitter = new CUDASourceEmitter(desc); + break; + } default: break; } diff --git a/source/slang/slang-emit.h b/source/slang/slang-emit.h index e9ee361d7..1f3298316 100644 --- a/source/slang/slang-emit.h +++ b/source/slang/slang-emit.h @@ -34,7 +34,7 @@ namespace Slang /// generate different HLSL output if we know it /// will be used to generate SPIR-V). /// - String emitEntryPointSource( + String emitEntryPointSourceFromIR( BackEndCompileRequest* compileRequest, Int entryPointIndex, CodeGenTarget target, diff --git a/source/slang/slang-ir-link.cpp b/source/slang/slang-ir-link.cpp index 6399e3f6b..904a88d78 100644 --- a/source/slang/slang-ir-link.cpp +++ b/source/slang/slang-ir-link.cpp @@ -919,9 +919,13 @@ String getTargetName(IRSpecContext* context) case CodeGenTarget::CPPSource: return "cpp"; + case CodeGenTarget::CUDASource: + return "cuda"; + case CodeGenTarget::SPIRV: return "spirv"; + default: SLANG_UNEXPECTED("unhandled case"); UNREACHABLE_RETURN("unknown"); diff --git a/source/slang/slang-parameter-binding.cpp b/source/slang/slang-parameter-binding.cpp index 5cd4156c3..8ebc4f420 100644 --- a/source/slang/slang-parameter-binding.cpp +++ b/source/slang/slang-parameter-binding.cpp @@ -2585,6 +2585,19 @@ static bool _isCPUTarget(CodeGenTarget target) } } +static bool _isPTXTarget(CodeGenTarget target) +{ + switch (target) + { + case CodeGenTarget::CUDASource: + case CodeGenTarget::PTX: + { + return true; + } + default: return false; + } +} + /// Keep track of the running global counter for entry points and global parameters visited. /// /// Because of explicit `register` and `[[vk::binding(...)]]` support, parameter binding @@ -2970,7 +2983,8 @@ RefPtr<ProgramLayout> generateParameterBindings( // On a CPU target, it's okay to have global scope parameters that use Uniform resources (because on CPU // all resources are 'Uniform') - if (!_isCPUTarget(targetReq->target)) + // TODO(JS): We'll just assume the same with CUDA target for now.. + if (!_isCPUTarget(targetReq->target) && !_isPTXTarget(targetReq->target)) { for( auto& parameterInfo : sharedContext.parameters ) { diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index 88e62323b..772686163 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -398,6 +398,13 @@ struct CPULayoutRulesImpl : DefaultLayoutRulesImpl } }; +// TODO(JS): Most likely wrong. For layout for CUDA, we'll just do the default to get things up and running +struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl +{ + typedef DefaultLayoutRulesImpl Super; +}; + + struct HLSLStructuredBufferLayoutRulesImpl : DefaultLayoutRulesImpl { // HLSL structured buffers drop the restrictions added for constant buffers, @@ -653,9 +660,29 @@ struct CPULayoutRulesFamilyImpl : LayoutRulesFamilyImpl LayoutRulesImpl* getStructuredBufferRules() override; }; +struct CUDALayoutRulesFamilyImpl : LayoutRulesFamilyImpl +{ + virtual LayoutRulesImpl* getConstantBufferRules() override; + virtual LayoutRulesImpl* getPushConstantBufferRules() override; + virtual LayoutRulesImpl* getTextureBufferRules() override; + virtual LayoutRulesImpl* getVaryingInputRules() override; + virtual LayoutRulesImpl* getVaryingOutputRules() override; + virtual LayoutRulesImpl* getSpecializationConstantRules() override; + virtual LayoutRulesImpl* getShaderStorageBufferRules() override; + virtual LayoutRulesImpl* getParameterBlockRules() override; + + LayoutRulesImpl* getRayPayloadParameterRules() override; + LayoutRulesImpl* getCallablePayloadParameterRules() override; + LayoutRulesImpl* getHitAttributesParameterRules() override; + + LayoutRulesImpl* getShaderRecordConstantBufferRules() override; + LayoutRulesImpl* getStructuredBufferRules() override; +}; + GLSLLayoutRulesFamilyImpl kGLSLLayoutRulesFamilyImpl; HLSLLayoutRulesFamilyImpl kHLSLLayoutRulesFamilyImpl; CPULayoutRulesFamilyImpl kCPULayoutRulesFamilyImpl; +CUDALayoutRulesFamilyImpl kCUDALayoutRulesFamilyImpl; // CPU case @@ -703,6 +730,12 @@ struct CPUObjectLayoutRulesImpl : ObjectLayoutRulesImpl }; +// TODO(JS): Most likely wrong! Use CPU layout for CUDA for now +struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl +{ + typedef CPUObjectLayoutRulesImpl Super; + +}; static CPUObjectLayoutRulesImpl kCPUObjectLayoutRulesImpl; static CPULayoutRulesImpl kCPULayoutRulesImpl; @@ -711,6 +744,16 @@ LayoutRulesImpl kCPULayoutRulesImpl_ = { &kCPULayoutRulesFamilyImpl, &kCPULayoutRulesImpl, &kCPUObjectLayoutRulesImpl, }; +// CUDA + +static CUDAObjectLayoutRulesImpl kCUDAObjectLayoutRulesImpl; +static CUDALayoutRulesImpl kCUALayoutRulesImpl; + +LayoutRulesImpl kCUDALayoutRulesImpl_ = { + &kCPULayoutRulesFamilyImpl, &kCUALayoutRulesImpl, &kCUDAObjectLayoutRulesImpl, +}; + + // GLSL cases LayoutRulesImpl kStd140LayoutRulesImpl_ = { @@ -986,6 +1029,69 @@ LayoutRulesImpl* CPULayoutRulesFamilyImpl::getStructuredBufferRules() return &kCPULayoutRulesImpl_; } +// CUDA Family + +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getConstantBufferRules() +{ + return &kCPULayoutRulesImpl_; +} + +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getPushConstantBufferRules() +{ + return &kCPULayoutRulesImpl_; +} + +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getTextureBufferRules() +{ + return nullptr; +} + +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getVaryingInputRules() +{ + return nullptr; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getVaryingOutputRules() +{ + return nullptr; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getSpecializationConstantRules() +{ + return nullptr; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getShaderStorageBufferRules() +{ + return nullptr; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getParameterBlockRules() +{ + // Not clear - just use similar to CPU + return &kCUDALayoutRulesImpl_; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getRayPayloadParameterRules() +{ + return nullptr; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getCallablePayloadParameterRules() +{ + return nullptr; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getHitAttributesParameterRules() +{ + return nullptr; +} +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getShaderRecordConstantBufferRules() +{ + // Just following HLSLs lead for the moment + return &kCUDALayoutRulesImpl_; +} + +LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getStructuredBufferRules() +{ + return &kCUDALayoutRulesImpl_; +} + + + LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targetReq) { switch (targetReq->getTarget()) @@ -1019,6 +1125,13 @@ LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targe return &kCPULayoutRulesFamilyImpl; } + case CodeGenTarget::PTX: + case CodeGenTarget::CUDASource: + { + return &kCUDALayoutRulesFamilyImpl; + } + + default: return nullptr; } diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj index 8c8a712ce..e120e006f 100644 --- a/source/slang/slang.vcxproj +++ b/source/slang/slang.vcxproj @@ -199,6 +199,7 @@ <ClInclude Include="slang-diagnostics.h" /> <ClInclude Include="slang-emit-c-like.h" /> <ClInclude Include="slang-emit-cpp.h" /> + <ClInclude Include="slang-emit-cuda.h" /> <ClInclude Include="slang-emit-glsl-extension-tracker.h" /> <ClInclude Include="slang-emit-glsl.h" /> <ClInclude Include="slang-emit-hlsl.h" /> @@ -280,6 +281,7 @@ <ClCompile Include="slang-dxc-support.cpp" /> <ClCompile Include="slang-emit-c-like.cpp" /> <ClCompile Include="slang-emit-cpp.cpp" /> + <ClCompile Include="slang-emit-cuda.cpp" /> <ClCompile Include="slang-emit-glsl-extension-tracker.cpp" /> <ClCompile Include="slang-emit-glsl.cpp" /> <ClCompile Include="slang-emit-hlsl.cpp" /> @@ -373,4 +375,4 @@ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <ImportGroup Label="ExtensionTargets"> </ImportGroup> -</Project>
\ No newline at end of file +</Project>
\ No newline at end of file diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters index c562d28e3..1c619a3d5 100644 --- a/source/slang/slang.vcxproj.filters +++ b/source/slang/slang.vcxproj.filters @@ -48,6 +48,9 @@ <ClInclude Include="slang-emit-cpp.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="slang-emit-cuda.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="slang-emit-glsl-extension-tracker.h"> <Filter>Header Files</Filter> </ClInclude> @@ -287,6 +290,9 @@ <ClCompile Include="slang-emit-cpp.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="slang-emit-cuda.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="slang-emit-glsl-extension-tracker.cpp"> <Filter>Source Files</Filter> </ClCompile> diff --git a/tests/cuda/compile-to-cuda.slang b/tests/cuda/compile-to-cuda.slang new file mode 100644 index 000000000..6166aaf0b --- /dev/null +++ b/tests/cuda/compile-to-cuda.slang @@ -0,0 +1,29 @@ +//DISABLE_TEST(smoke):SIMPLE: -target ptx -entry computeMain -stage compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer : register(u0); + +int quantize(double value) +{ + return int(value * 256); +} + +int quantize(float value) +{ + return int(value * 256); +} + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + float values[] = { -9, 9, -3, 3 }; + + int tid = int(dispatchThreadID.x); + float value = values[tid]; + + outputBuffer[tid * 4] = quantize(sin(value)); + outputBuffer[tid * 4 + 1] = quantize(cos(value)); + + outputBuffer[tid * 4 + 2] = quantize(sin(double(value))); + outputBuffer[tid * 4 + 3] = quantize(cos(double(value))); +} diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp index cdfe6a1a7..6401ac852 100644 --- a/tools/slang-test/slang-test-main.cpp +++ b/tools/slang-test/slang-test-main.cpp @@ -505,6 +505,7 @@ static PassThroughFlags _getPassThroughFlagsForTarget(SlangCompileTarget target) case SLANG_GLSL: case SLANG_C_SOURCE: case SLANG_CPP_SOURCE: + case SLANG_CUDA_SOURCE: { return 0; } |
