summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2019-12-19 11:23:14 -0500
committerGitHub <noreply@github.com>2019-12-19 11:23:14 -0500
commite3fe0319467546bae070137c58dcf8f9fbe93c79 (patch)
tree6cc26ccda33725e98c4a9a0408cf31a1348db268
parent60934d98fbc20d83b5e149e72a197ec4f5c61580 (diff)
WIP CUDA source emit (#1157)
* CPPCompiler -> DownstreamCompiler * Added DownstreamCompileResult to start abstraction such that we don't need files. * * Split out slang-blob.cpp * Made CompileResult hold a DownstreamCompileResult - for access to binary or ISlangSharedLibrary * Keep temporary files in scope. * Add a hash to the hex dump stream. * Move all file tracking into DownstreamCompiler. * WIP support for nvrtc. * WIP: Adding support for nvrtc compiler. Adding enum types, wiring up the nvrtc into slang. * Fix remaining CPPCompiler references. * Fix order issue on target string matching. * Use ISlangSharedLibrary for nvrtc. * Use DownstreamCompiler for nvrtc. * WIP first pass at compilation win nvrtc. * Added testing if file is on file system into CommandLineDownstreamCompiler. Added sourceContentsPath. * Make test cuda-compile.cu work by just compiling not comparing output. * Genearlize DownstreamCompiler usage. * Fix warning on clang. * Remove CompilerType from DownstreamCompiler. * Use DownstreamCompiler interface for all compilers. NOTE for FXC, DXC and GLSLANG this doesn't mean using 'compile' - it's still extracting functions from shared library. * Replace DownstreamCompiler::SourceType -> SlangSourceLanguage * Replace _canCompile with something data driven. * Fix compiling on gcc/clang for DownstreamCompiler. * Moved some text conversions into DownstreamCompiler. * Fix problem on non-vc builds with not having return on locateCompilers for VS. * Change so no warning for code not reachable on locateCompilers for vs. * WIP: CUDA code generation - currently just using CPU layout and HLSL. * emitXXXForEntryPoint -> emitEntryPointSource emitSourceForEntryPoint -> emitEntryPointSourceFromIR Fix up generating cuda to get PTX. * WIP emitting cuda for IR. * Small improvements to CUDA ouput. * Disable the CUDA emit test, as output not currently compilable.
-rw-r--r--source/core/slang-downstream-compiler.cpp13
-rw-r--r--source/core/slang-downstream-compiler.h3
-rw-r--r--source/slang/slang-compiler.cpp186
-rw-r--r--source/slang/slang-compiler.h9
-rw-r--r--source/slang/slang-dxc-support.cpp9
-rw-r--r--source/slang/slang-emit-c-like.cpp36
-rw-r--r--source/slang/slang-emit-c-like.h2
-rw-r--r--source/slang/slang-emit-cuda.cpp844
-rw-r--r--source/slang/slang-emit-cuda.h80
-rw-r--r--source/slang/slang-emit.cpp20
-rw-r--r--source/slang/slang-emit.h2
-rw-r--r--source/slang/slang-ir-link.cpp4
-rw-r--r--source/slang/slang-parameter-binding.cpp16
-rw-r--r--source/slang/slang-type-layout.cpp113
-rw-r--r--source/slang/slang.vcxproj4
-rw-r--r--source/slang/slang.vcxproj.filters6
-rw-r--r--tests/cuda/compile-to-cuda.slang29
-rw-r--r--tools/slang-test/slang-test-main.cpp1
18 files changed, 1217 insertions, 160 deletions
diff --git a/source/core/slang-downstream-compiler.cpp b/source/core/slang-downstream-compiler.cpp
index 2e78ea22b..2f0cff1a9 100644
--- a/source/core/slang-downstream-compiler.cpp
+++ b/source/core/slang-downstream-compiler.cpp
@@ -182,6 +182,19 @@ void DownstreamCompiler::Desc::appendAsText(StringBuilder& out) const
return UnownedStringSlice::fromLiteral("unknown");
}
+/* static */SlangCompileTarget DownstreamCompiler::getCompileTarget(SlangSourceLanguage sourceLanguage)
+{
+ switch (sourceLanguage)
+ {
+ case SLANG_SOURCE_LANGUAGE_HLSL: return SLANG_HLSL;
+ case SLANG_SOURCE_LANGUAGE_GLSL: return SLANG_GLSL;
+ case SLANG_SOURCE_LANGUAGE_C: return SLANG_C_SOURCE;
+ case SLANG_SOURCE_LANGUAGE_CPP: return SLANG_CPP_SOURCE;
+ case SLANG_SOURCE_LANGUAGE_CUDA: return SLANG_CUDA_SOURCE;
+ default: return SLANG_TARGET_UNKNOWN;
+ }
+}
+
/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DownstreamDiagnostics !!!!!!!!!!!!!!!!!!!!!!*/
Index DownstreamDiagnostics::getCountByType(Diagnostic::Type type) const
diff --git a/source/core/slang-downstream-compiler.h b/source/core/slang-downstream-compiler.h
index 57fb88a3c..a4238ce38 100644
--- a/source/core/slang-downstream-compiler.h
+++ b/source/core/slang-downstream-compiler.h
@@ -296,6 +296,9 @@ public:
/// Get the compilers name
static UnownedStringSlice getPassThroughName(SlangPassThrough passThru);
+ /// Given a source language return as the equivalent compile target
+ static SlangCompileTarget getCompileTarget(SlangSourceLanguage sourceLanguage);
+
protected:
static Infos s_infos;
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index 07bfaacc9..d0fca1ab5 100644
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -478,8 +478,6 @@ namespace Slang
return PassThroughMode::None;
}
case CodeGenTarget::GLSL:
- case CodeGenTarget::GLSL_Vulkan:
- case CodeGenTarget::GLSL_Vulkan_OneDesc:
{
// Can always output GLSL
return PassThroughMode::None;
@@ -489,6 +487,11 @@ namespace Slang
// Can always output HLSL
return PassThroughMode::None;
}
+ case CodeGenTarget::CUDASource:
+ {
+ // Can always output CUDA
+ return PassThroughMode::None;
+ }
case CodeGenTarget::SPIRVAssembly:
case CodeGenTarget::SPIRV:
{
@@ -504,6 +507,11 @@ namespace Slang
{
return PassThroughMode::Dxc;
}
+ case CodeGenTarget::GLSL_Vulkan:
+ case CodeGenTarget::GLSL_Vulkan_OneDesc:
+ {
+ return PassThroughMode::Glslang;
+ }
case CodeGenTarget::CPPSource:
case CodeGenTarget::CSource:
{
@@ -581,10 +589,11 @@ namespace Slang
outCodeBuilder << fileContent << "\n";
}
- String emitHLSLForEntryPoint(
+ String emitEntryPointSource(
BackEndCompileRequest* compileRequest,
Int entryPointIndex,
TargetRequest* targetReq,
+ CodeGenTarget target,
EndToEndCompileRequest* endToEndReq)
{
if(auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex))
@@ -596,91 +605,41 @@ namespace Slang
// mode.
StringBuilder codeBuilder;
- for(auto sourceFile : translationUnit->getSourceFiles())
- {
- _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
- }
-
- return codeBuilder.ProduceString();
- }
- else
- {
- return emitEntryPointSource(
- compileRequest,
- entryPointIndex,
- CodeGenTarget::HLSL,
- targetReq);
- }
- }
-
- String emitCPPForEntryPoint(
- BackEndCompileRequest* compileRequest,
- Int entryPointIndex,
- TargetRequest* targetReq,
- EndToEndCompileRequest* endToEndReq)
- {
- if (auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex))
- {
- // Generate a string that includes the content of
- // the source file(s), along with a line directive
- // to ensure that we get reasonable messages
- // from the downstream compiler when in pass-through
- // mode.
-
- StringBuilder codeBuilder;
- for (auto sourceFile : translationUnit->getSourceFiles())
+ if (target == CodeGenTarget::GLSL)
{
- _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
+ // Special case GLSL
+ int translationUnitCounter = 0;
+ for (auto sourceFile : translationUnit->getSourceFiles())
+ {
+ int translationUnitIndex = translationUnitCounter++;
+
+ // We want to output `#line` directives, but we need
+ // to skip this for the first file, since otherwise
+ // some GLSL implementations will get tripped up by
+ // not having the `#version` directive be the first
+ // thing in the file.
+ if (translationUnitIndex != 0)
+ {
+ codeBuilder << "#line 1 " << translationUnitIndex << "\n";
+ }
+ codeBuilder << sourceFile->getContent() << "\n";
+ }
}
-
- return codeBuilder.ProduceString();
- }
- else
- {
- return emitEntryPointSource(compileRequest, entryPointIndex, CodeGenTarget::CPPSource, targetReq);
- }
- }
-
- String emitGLSLForEntryPoint(
- BackEndCompileRequest* compileRequest,
- Int entryPointIndex,
- TargetRequest* targetReq,
- EndToEndCompileRequest* endToEndReq)
- {
- if(auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex))
- {
- // Generate a string that includes the content of
- // the source file(s), along with a line directive
- // to ensure that we get reasonable messages
- // from the downstream compiler when in pass-through
- // mode.
-
- StringBuilder codeBuilder;
- int translationUnitCounter = 0;
- for(auto sourceFile : translationUnit->getSourceFiles())
+ else
{
- int translationUnitIndex = translationUnitCounter++;
-
- // We want to output `#line` directives, but we need
- // to skip this for the first file, since otherwise
- // some GLSL implementations will get tripped up by
- // not having the `#version` directive be the first
- // thing in the file.
- if(translationUnitIndex != 0)
+ for(auto sourceFile : translationUnit->getSourceFiles())
{
- codeBuilder << "#line 1 " << translationUnitIndex << "\n";
+ _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
}
- codeBuilder << sourceFile->getContent() << "\n";
}
-
return codeBuilder.ProduceString();
}
else
{
- return emitEntryPointSource(
+ return emitEntryPointSourceFromIR(
compileRequest,
entryPointIndex,
- CodeGenTarget::GLSL,
+ target,
targetReq);
}
}
@@ -964,7 +923,7 @@ namespace Slang
return SLANG_FAIL;
}
- auto hlslCode = emitHLSLForEntryPoint(compileRequest, entryPointIndex, targetReq, endToEndReq);
+ auto hlslCode = emitEntryPointSource(compileRequest, entryPointIndex, targetReq, CodeGenTarget::HLSL, endToEndReq);
maybeDumpIntermediate(compileRequest, hlslCode.getBuffer(), CodeGenTarget::HLSL);
auto profile = getEffectiveProfile(entryPoint, targetReq);
@@ -1251,13 +1210,16 @@ SlangResult dissassembleDXILUsingDXC(
EndToEndCompileRequest* endToEndReq,
RefPtr<DownstreamCompileResult>& outResult)
{
+ outResult.setNull();
+
auto sink = slangRequest->getSink();
auto session = slangRequest->getSession();
const String originalSourcePath = calcSourcePathForEntryPoint(endToEndReq, entryPointIndex);
- outResult.setNull();
+ CodeGenTarget sourceTarget = CodeGenTarget::None;
+ SourceLanguage sourceLanguage = SourceLanguage::Unknown;
PassThroughMode downstreamCompiler = endToEndReq->passThrough;
@@ -1265,23 +1227,26 @@ SlangResult dissassembleDXILUsingDXC(
if (downstreamCompiler == PassThroughMode::None)
{
auto target = targetReq->target;
-
switch (target)
{
case CodeGenTarget::PTX:
{
- downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SLANG_SOURCE_LANGUAGE_CUDA));
+ sourceTarget = CodeGenTarget::CUDASource;
+ sourceLanguage = SourceLanguage::CUDA;
break;
}
case CodeGenTarget::HostCallable:
case CodeGenTarget::SharedLibrary:
case CodeGenTarget::Executable:
{
- downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SLANG_SOURCE_LANGUAGE_CPP));
+ sourceTarget = CodeGenTarget::CPPSource;
+ sourceLanguage = SourceLanguage::CPP;
break;
}
default: break;
}
+
+ downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SlangSourceLanguage(sourceLanguage)));
}
// Get the required downstream compiler
@@ -1301,8 +1266,6 @@ SlangResult dissassembleDXILUsingDXC(
return SLANG_FAIL;
}
- SourceLanguage rawSourceLanguage = SourceLanguage::Unknown;
-
Dictionary<String, String> preprocessorDefinitions;
List<String> includePaths;
@@ -1357,8 +1320,10 @@ SlangResult dissassembleDXILUsingDXC(
}
// We are just passing thru, so it's whatever it originally was
- rawSourceLanguage = translationUnit->sourceLanguage;
+ sourceLanguage = translationUnit->sourceLanguage;
+ sourceTarget = CodeGenTarget(DownstreamCompiler::getCompileTarget(SlangSourceLanguage(sourceLanguage)));
+ // Special case if we have a single file, so that we pass the path, and the contents
const auto& sourceFiles = translationUnit->getSourceFiles();
if (sourceFiles.getCount() == 1)
{
@@ -1372,30 +1337,18 @@ SlangResult dissassembleDXILUsingDXC(
}
else
{
- // If can't just use file, concat together and make
- StringBuilder codeBuilder;
- for (auto sourceFile : translationUnit->getSourceFiles())
- {
- _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
- }
- options.sourceContents = codeBuilder.ProduceString();
+ options.sourceContents = emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq);
}
}
else
{
- options.sourceContents = emitCPPForEntryPoint(
- slangRequest,
- entryPointIndex,
- targetReq,
- endToEndReq);
-
- maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), CodeGenTarget::CPPSource);
+ options.sourceContents = emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq);
- rawSourceLanguage = SourceLanguage::CPP;
+ maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), sourceTarget);
}
// Set the source type
- options.sourceLanguage = SlangSourceLanguage(rawSourceLanguage);
+ options.sourceLanguage = SlangSourceLanguage(sourceLanguage);
// Disable exceptions and security checks
options.flags &= ~(CompileOptions::Flag::EnableExceptionHandling | CompileOptions::Flag::EnableSecurityChecks);
@@ -1556,10 +1509,11 @@ SlangResult dissassembleDXILUsingDXC(
{
spirvOut.clear();
- String rawGLSL = emitGLSLForEntryPoint(
+ String rawGLSL = emitEntryPointSource(
slangRequest,
entryPointIndex,
targetReq,
+ CodeGenTarget::GLSL,
endToEndReq);
maybeDumpIntermediate(slangRequest, rawGLSL.getBuffer(), CodeGenTarget::GLSL);
@@ -1671,38 +1625,18 @@ SlangResult dissassembleDXILUsingDXC(
}
}
break;
- case CodeGenTarget::HLSL:
- {
- String code = emitHLSLForEntryPoint(
- compileRequest,
- entryPointIndex,
- targetReq,
- endToEndReq);
- maybeDumpIntermediate(compileRequest, code.getBuffer(), target);
- result = CompileResult(code);
- }
- break;
-
case CodeGenTarget::GLSL:
- {
- String code = emitGLSLForEntryPoint(
- compileRequest,
- entryPointIndex,
- targetReq,
- endToEndReq);
- maybeDumpIntermediate(compileRequest, code.getBuffer(), target);
- result = CompileResult(code);
- }
- break;
-
+ case CodeGenTarget::HLSL:
+ case CodeGenTarget::CUDASource:
case CodeGenTarget::CPPSource:
case CodeGenTarget::CSource:
{
String code = emitEntryPointSource(
compileRequest,
entryPointIndex,
- target,
- targetReq);
+ targetReq,
+ target,
+ endToEndReq);
maybeDumpIntermediate(compileRequest, code.getBuffer(), target);
result = CompileResult(code);
}
diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h
index 1a385ab23..2783197af 100644
--- a/source/slang/slang-compiler.h
+++ b/source/slang/slang-compiler.h
@@ -1797,6 +1797,15 @@ namespace Slang
@return the appropriate source filename */
String calcSourcePathForEntryPoint(EndToEndCompileRequest* endToEndReq, UInt entryPointIndex);
+ /* Emits entry point source taking into account if a pass-through or not. Uses 'target' to determine
+ the target (not targetReq) */
+ String emitEntryPointSource(
+ BackEndCompileRequest* compileRequest,
+ Int entryPointIndex,
+ TargetRequest* targetReq,
+ CodeGenTarget target,
+ EndToEndCompileRequest* endToEndReq);
+
struct TypeCheckingCache;
//
diff --git a/source/slang/slang-dxc-support.cpp b/source/slang/slang-dxc-support.cpp
index e0ca4df82..828138143 100644
--- a/source/slang/slang-dxc-support.cpp
+++ b/source/slang/slang-dxc-support.cpp
@@ -29,11 +29,7 @@
namespace Slang
{
String GetHLSLProfileName(Profile profile);
- String emitHLSLForEntryPoint(
- BackEndCompileRequest* compileRequest,
- Int entryPointIndex,
- TargetRequest* targetReq,
- EndToEndCompileRequest* endToEndReq);
+
SlangResult locateDXCCompilers(const String& path, ISlangSharedLibraryLoader* loader, DownstreamCompilerSet* set);
@@ -85,10 +81,11 @@ namespace Slang
// Now let's go ahead and generate HLSL for the entry
// point, since we'll need that to feed into dxc.
- auto hlslCode = emitHLSLForEntryPoint(
+ auto hlslCode = emitEntryPointSource(
compileRequest,
entryPointIndex,
targetReq,
+ CodeGenTarget::HLSL,
endToEndReq);
maybeDumpIntermediate(compileRequest, hlslCode.getBuffer(), CodeGenTarget::HLSL);
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 2c1bcbe6d..1a2fbb0f4 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -98,6 +98,7 @@ struct CLikeSourceEmitter::ComputeEmitActionsContext
{
return SourceStyle::HLSL;
}
+ case CodeGenTarget::PTX:
case CodeGenTarget::SPIRV:
case CodeGenTarget::SPIRVAssembly:
case CodeGenTarget::DXBytecode:
@@ -115,6 +116,10 @@ struct CLikeSourceEmitter::ComputeEmitActionsContext
{
return SourceStyle::CPP;
}
+ case CodeGenTarget::CUDASource:
+ {
+ return SourceStyle::CUDA;
+ }
}
}
@@ -343,6 +348,7 @@ bool CLikeSourceEmitter::isTargetIntrinsicModifierApplicable(const String& targe
case SourceStyle::CPP: return targetName == "cpp";
case SourceStyle::GLSL: return targetName == "glsl";
case SourceStyle::HLSL: return targetName == "hlsl";
+ case SourceStyle::CUDA: return targetName == "cuda";
}
}
@@ -1021,6 +1027,7 @@ void CLikeSourceEmitter::emitInstResultDecl(IRInst* inst)
switch (getSourceStyle())
{
+ case SourceStyle::CUDA:
case SourceStyle::HLSL:
case SourceStyle::C:
case SourceStyle::CPP:
@@ -2507,6 +2514,22 @@ void CLikeSourceEmitter::emitSimpleFuncParamImpl(IRParam* param)
emitSemantics(param);
}
+void CLikeSourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func)
+{
+ m_writer->emit("(");
+
+ auto firstParam = func->getFirstParam();
+ for (auto pp = firstParam; pp; pp = pp->getNextParam())
+ {
+ if (pp != firstParam)
+ m_writer->emit(", ");
+
+ emitSimpleFuncParamImpl(pp);
+ }
+
+ m_writer->emit(")");
+}
+
void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func)
{
auto resultType = func->getResultType();
@@ -2521,18 +2544,7 @@ void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func)
auto name = getName(func);
emitType(resultType, name);
-
- m_writer->emit("(");
- auto firstParam = func->getFirstParam();
- for( auto pp = firstParam; pp; pp = pp->getNextParam())
- {
- if(pp != firstParam)
- m_writer->emit(", ");
-
- emitSimpleFuncParamImpl(pp);
- }
- m_writer->emit(")");
-
+ emitSimpleFuncParamsImpl(func);
emitSemantics(func);
// TODO: encode declaration vs. definition
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 4b5c68b6c..a6c48dc73 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -47,6 +47,7 @@ public:
HLSL,
C,
CPP,
+ CUDA,
CountOf,
};
@@ -315,6 +316,7 @@ public:
virtual void emitRateQualifiersImpl(IRRate* rate) { SLANG_UNUSED(rate); }
virtual void emitSemanticsImpl(IRInst* inst) { SLANG_UNUSED(inst); }
virtual void emitSimpleFuncParamImpl(IRParam* param);
+ virtual void emitSimpleFuncParamsImpl(IRFunc* func);
virtual void emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) { SLANG_UNUSED(varInst); SLANG_UNUSED(valueType); SLANG_UNUSED(layout); }
virtual void emitSimpleTypeImpl(IRType* type) = 0;
virtual void emitVarDecorationsImpl(IRInst* varDecl) { SLANG_UNUSED(varDecl); }
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
new file mode 100644
index 000000000..feafc4e4e
--- /dev/null
+++ b/source/slang/slang-emit-cuda.cpp
@@ -0,0 +1,844 @@
+// slang-emit-cuda.cpp
+#include "slang-emit-cuda.h"
+
+#include "../core/slang-writer.h"
+
+#include "slang-emit-source-writer.h"
+#include "slang-mangled-lexer.h"
+
+#include <assert.h>
+
+namespace Slang {
+
+/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
+{
+ switch (op)
+ {
+ case kIROp_VoidType: return UnownedStringSlice("void");
+ case kIROp_BoolType: return UnownedStringSlice("bool");
+
+ case kIROp_Int8Type: return UnownedStringSlice("char");
+ case kIROp_Int16Type: return UnownedStringSlice("short");
+ case kIROp_IntType: return UnownedStringSlice("int");
+ case kIROp_Int64Type: return UnownedStringSlice("long long");
+
+ case kIROp_UInt8Type: return UnownedStringSlice("unsigned char");
+ case kIROp_UInt16Type: return UnownedStringSlice("unsigned short");
+ case kIROp_UIntType: return UnownedStringSlice("unsigned int");
+ case kIROp_UInt64Type: return UnownedStringSlice("unsigned long long");
+
+ // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
+ case kIROp_HalfType: return UnownedStringSlice("half");
+
+ case kIROp_FloatType: return UnownedStringSlice("float");
+ case kIROp_DoubleType: return UnownedStringSlice("double");
+ default: return UnownedStringSlice();
+ }
+}
+
+
+/* static */ UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op)
+{
+ switch (op)
+ {
+ case kIROp_BoolType: return UnownedStringSlice("bool");
+
+ case kIROp_Int8Type: return UnownedStringSlice("char");
+ case kIROp_Int16Type: return UnownedStringSlice("short");
+ case kIROp_IntType: return UnownedStringSlice("int");
+ case kIROp_Int64Type: return UnownedStringSlice("longlong");
+
+ case kIROp_UInt8Type: return UnownedStringSlice("uchar");
+ case kIROp_UInt16Type: return UnownedStringSlice("ushort");
+ case kIROp_UIntType: return UnownedStringSlice("uint");
+ case kIROp_UInt64Type: return UnownedStringSlice("ulonglong");
+
+ // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
+ case kIROp_HalfType: return UnownedStringSlice("half");
+
+ case kIROp_FloatType: return UnownedStringSlice("float");
+ case kIROp_DoubleType: return UnownedStringSlice("double");
+ default: return UnownedStringSlice();
+ }
+}
+
+SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName)
+{
+ // texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
+
+ // Not clear how to do this yet
+ if (texType->isMultisample() || texType->isArray())
+ {
+ return SLANG_FAIL;
+ }
+
+ outName << "texture<";
+ outName << _getCUDATypeName(texType->getElementType());
+ outName << ", ";
+
+ switch (texType->GetBaseShape())
+ {
+ case TextureFlavor::Shape::Shape1D: outName << "cudaTextureType1D"; break;
+ case TextureFlavor::Shape::Shape2D: outName << "cudaTextureType2D"; break;
+ case TextureFlavor::Shape::Shape3D: outName << "cudaTextureType3D"; break;
+ case TextureFlavor::Shape::ShapeCube: outName << "cudaTextureTypeCubemap"; break;
+ case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer"; break;
+ default:
+ SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape");
+ return SLANG_FAIL;
+ }
+
+ outName << ", ";
+
+ switch (texType->getAccess())
+ {
+ case SLANG_RESOURCE_ACCESS_READ:
+ {
+ // Other value is cudaReadModeNormalizedFloat
+
+ outName << "cudaReadModeElementType";
+ break;
+ }
+ default:
+ {
+ SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode");
+ return SLANG_FAIL;
+ }
+ }
+
+ outName << ">";
+ return SLANG_OK;
+}
+
+// This is junk..
+static UnownedStringSlice _getCUDAResourceTypePrefix(IROp op)
+{
+ switch (op)
+ {
+ case kIROp_HLSLStructuredBufferType: return UnownedStringSlice::fromLiteral("StructuredBuffer");
+ case kIROp_HLSLRWStructuredBufferType: return UnownedStringSlice::fromLiteral("RWStructuredBuffer");
+ case kIROp_HLSLRWByteAddressBufferType: return UnownedStringSlice::fromLiteral("RWByteAddressBuffer");
+ case kIROp_HLSLByteAddressBufferType: return UnownedStringSlice::fromLiteral("ByteAddressBuffer");
+ case kIROp_SamplerStateType: return UnownedStringSlice::fromLiteral("SamplerState");
+ case kIROp_SamplerComparisonStateType: return UnownedStringSlice::fromLiteral("SamplerComparisonState");
+ case kIROp_HLSLRasterizerOrderedStructuredBufferType: return UnownedStringSlice::fromLiteral("RasterizerOrderedStructuredBuffer");
+ case kIROp_HLSLAppendStructuredBufferType: return UnownedStringSlice::fromLiteral("AppendStructuredBuffer");
+ case kIROp_HLSLConsumeStructuredBufferType: return UnownedStringSlice::fromLiteral("ConsumeStructuredBuffer");
+ case kIROp_HLSLRasterizerOrderedByteAddressBufferType: return UnownedStringSlice::fromLiteral("RasterizerOrderedByteAddressBuffer");
+ case kIROp_RaytracingAccelerationStructureType: return UnownedStringSlice::fromLiteral("RaytracingAccelerationStructure");
+
+ default: return UnownedStringSlice();
+ }
+}
+
+SlangResult CUDASourceEmitter::_calcCUDATypeName(IRType* type, StringBuilder& out)
+{
+ switch (type->op)
+ {
+ case kIROp_HalfType:
+ {
+ // Special case half
+ out << getBuiltinTypeName(kIROp_FloatType);
+ return SLANG_OK;
+ }
+ case kIROp_VectorType:
+ {
+ auto vecType = static_cast<IRVectorType*>(type);
+ auto vecCount = int(GetIntVal(vecType->getElementCount()));
+ const IROp elemType = vecType->getElementType()->op;
+
+ UnownedStringSlice prefix = getVectorPrefix(elemType);
+ if (prefix.size() <= 0)
+ {
+ return SLANG_FAIL;
+ }
+ out << prefix << vecCount;
+ return SLANG_OK;
+ }
+#if 0
+ case kIROp_MatrixType:
+ {
+ auto matType = static_cast<IRMatrixType*>(type);
+
+ auto elementType = matType->getElementType();
+ const auto rowCount = int(GetIntVal(matType->getRowCount()));
+ const auto colCount = int(GetIntVal(matType->getColumnCount()));
+
+ if (target == CodeGenTarget::CPPSource)
+ {
+ out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">";
+ }
+ else
+ {
+ out << "Mat";
+ const UnownedStringSlice postFix = _getCTypeVecPostFix(_getCType(elementType->op));
+ out << postFix;
+ if (postFix.size() > 1)
+ {
+ out << "_";
+ }
+ out << rowCount;
+ out << colCount;
+ }
+ return SLANG_OK;
+ }
+ case kIROp_ArrayType:
+ {
+ auto arrayType = static_cast<IRArrayType*>(type);
+ auto elementType = arrayType->getElementType();
+ int elementCount = int(GetIntVal(arrayType->getElementCount()));
+
+ out << "FixedArray<";
+ SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out));
+ out << ", " << elementCount << ">";
+ return SLANG_OK;
+ }
+ case kIROp_UnsizedArrayType:
+ {
+ auto arrayType = static_cast<IRUnsizedArrayType*>(type);
+ auto elementType = arrayType->getElementType();
+
+ out << "Array<";
+ SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out));
+ out << ">";
+ return SLANG_OK;
+ }
+#endif
+ default:
+ {
+ if (isNominalOp(type->op))
+ {
+ out << getName(type);
+ return SLANG_OK;
+ }
+
+ if (IRBasicType::isaImpl(type->op))
+ {
+ out << getBuiltinTypeName(type->op);
+ return SLANG_OK;
+ }
+
+ if (auto texType = as<IRTextureTypeBase>(type))
+ {
+ // We don't support TextureSampler, so ignore that
+ if (texType->op != kIROp_TextureSamplerType)
+ {
+ return _calcCUDATextureTypeName(texType, out);
+ }
+ }
+
+ // If _getResourceTypePrefix returns something, we assume can output any specialization after it in order.
+ {
+ UnownedStringSlice prefix = _getCUDAResourceTypePrefix(type->op);
+ if (prefix.size() > 0)
+ {
+ auto oldWriter = m_writer;
+ SourceManager* sourceManager = oldWriter->getSourceManager();
+
+ // TODO(JS): This is a bit of a hack. We don't want to emit the result here,
+ // so we replace the writer, write out the type, grab the contents, and restore the writer
+
+ SourceWriter writer(sourceManager, LineDirectiveMode::None);
+ m_writer = &writer;
+
+ m_writer->emit(prefix);
+
+ // TODO(JS).
+ // Assumes ordering of types matches ordering of operands.
+
+ UInt operandCount = type->getOperandCount();
+ if (operandCount)
+ {
+ m_writer->emit("<");
+ for (UInt ii = 0; ii < operandCount; ++ii)
+ {
+ if (ii != 0)
+ {
+ m_writer->emit(", ");
+ }
+ emitVal(type->getOperand(ii), getInfo(EmitOp::General));
+ }
+ m_writer->emit(">");
+ }
+
+ out << writer.getContent();
+
+ m_writer = oldWriter;
+ return SLANG_OK;
+ }
+ }
+
+ break;
+ }
+ }
+
+ SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled type for CUDA emit");
+ return SLANG_FAIL;
+}
+
+
+UnownedStringSlice CUDASourceEmitter::_getCUDATypeName(IRType* type)
+{
+ StringSlicePool::Handle handle = StringSlicePool::kNullHandle;
+ if (m_typeNameMap.TryGetValue(type, handle))
+ {
+ return m_slicePool.getSlice(handle);
+ }
+
+#if 0
+ if (type->op == kIROp_MatrixType)
+ {
+ auto matType = static_cast<IRMatrixType*>(type);
+
+ auto elementType = matType->getElementType();
+ const auto rowCount = int(GetIntVal(matType->getRowCount()));
+ const auto colCount = int(GetIntVal(matType->getColumnCount()));
+
+ // Make sure the vector type the matrix is built on is added
+ useType(_getVecType(elementType, colCount));
+ }
+#endif
+
+ StringBuilder builder;
+ if (SLANG_SUCCEEDED(_calcCUDATypeName(type, builder)))
+ {
+ handle = m_slicePool.add(builder);
+ }
+
+ m_typeNameMap.Add(type, handle);
+
+ SLANG_ASSERT(handle != StringSlicePool::kNullHandle);
+ return m_slicePool.getSlice(handle);
+}
+
+void CUDASourceEmitter::_emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val)
+{
+ SLANG_UNUSED(entryPoint);
+ assert(val);
+
+ m_writer->emit("[");
+ m_writer->emit(name);
+ m_writer->emit("(\"");
+ m_writer->emit(val->getStringSlice());
+ m_writer->emit("\")]\n");
+}
+
+void CUDASourceEmitter::_emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val)
+{
+ SLANG_UNUSED(entryPoint);
+ SLANG_ASSERT(val);
+
+ auto intVal = GetIntVal(val);
+
+ m_writer->emit("[");
+ m_writer->emit(name);
+ m_writer->emit("(");
+ m_writer->emit(intVal);
+ m_writer->emit(")]\n");
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling)
+{
+ if (!chain)
+ return;
+ if (!chain->varLayout->usesResourceKind(kind))
+ return;
+
+ UInt index = getBindingOffset(chain, kind);
+ UInt space = getBindingSpace(chain, kind);
+
+ switch (kind)
+ {
+ case LayoutResourceKind::Uniform:
+ {
+ UInt offset = index;
+
+ // The HLSL `c` register space is logically grouped in 16-byte registers,
+ // while we try to traffic in byte offsets. That means we need to pick
+ // a register number, based on the starting offset in 16-byte register
+ // units, and then a "component" within that register, based on 4-byte
+ // offsets from there. We cannot support more fine-grained offsets than that.
+
+ m_writer->emit(" : ");
+ m_writer->emit(uniformSemanticSpelling);
+ m_writer->emit("(c");
+
+ // Size of a logical `c` register in bytes
+ auto registerSize = 16;
+
+ // Size of each component of a logical `c` register, in bytes
+ auto componentSize = 4;
+
+ size_t startRegister = offset / registerSize;
+ m_writer->emit(int(startRegister));
+
+ size_t byteOffsetInRegister = offset % registerSize;
+
+ // If this field doesn't start on an even register boundary,
+ // then we need to emit additional information to pick the
+ // right component to start from
+ if (byteOffsetInRegister != 0)
+ {
+ // The value had better occupy a whole number of components.
+ SLANG_RELEASE_ASSERT(byteOffsetInRegister % componentSize == 0);
+
+ size_t startComponent = byteOffsetInRegister / componentSize;
+
+ static const char* kComponentNames[] = { "x", "y", "z", "w" };
+ m_writer->emit(".");
+ m_writer->emit(kComponentNames[startComponent]);
+ }
+ m_writer->emit(")");
+ }
+ break;
+
+ case LayoutResourceKind::RegisterSpace:
+ case LayoutResourceKind::GenericResource:
+ case LayoutResourceKind::ExistentialTypeParam:
+ case LayoutResourceKind::ExistentialObjectParam:
+ // ignore
+ break;
+ default:
+ {
+ m_writer->emit(" : register(");
+ switch (kind)
+ {
+ case LayoutResourceKind::ConstantBuffer:
+ m_writer->emit("b");
+ break;
+ case LayoutResourceKind::ShaderResource:
+ m_writer->emit("t");
+ break;
+ case LayoutResourceKind::UnorderedAccess:
+ m_writer->emit("u");
+ break;
+ case LayoutResourceKind::SamplerState:
+ m_writer->emit("s");
+ break;
+ default:
+ SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled HLSL register type");
+ break;
+ }
+ m_writer->emit(index);
+ if (space)
+ {
+ m_writer->emit(", space");
+ m_writer->emit(space);
+ }
+ m_writer->emit(")");
+ }
+ }
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling)
+{
+ if (!chain) return;
+
+ auto layout = chain->varLayout;
+
+ switch (getSourceStyle())
+ {
+ default:
+ return;
+
+ case SourceStyle::HLSL:
+ break;
+ }
+
+ for (auto rr : layout->getOffsetAttrs())
+ {
+ _emitCUDARegisterSemantic(rr->getResourceKind(), chain, uniformSemanticSpelling);
+ }
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling)
+{
+ if (!varLayout)
+ return;
+
+ EmitVarChain chain(varLayout);
+ _emitCUDARegisterSemantics(&chain, uniformSemanticSpelling);
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain)
+{
+ if (!chain)
+ return;
+
+ auto layout = chain->varLayout;
+ for (auto rr : layout->getOffsetAttrs())
+ {
+ _emitCUDARegisterSemantic(rr->getResourceKind(), chain, "packoffset");
+ }
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain)
+{
+ EmitVarChain chain(fieldLayout, inChain);
+ _emitCUDAParameterGroupFieldLayoutSemantics(&chain);
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type)
+{
+ if (as<IRTextureBufferType>(type))
+ {
+ m_writer->emit("tbuffer ");
+ }
+ else
+ {
+ m_writer->emit("cbuffer ");
+ }
+ m_writer->emit(getName(varDecl));
+
+ auto varLayout = getVarLayout(varDecl);
+ SLANG_RELEASE_ASSERT(varLayout);
+
+ EmitVarChain blockChain(varLayout);
+
+ EmitVarChain containerChain = blockChain;
+ EmitVarChain elementChain = blockChain;
+
+ auto typeLayout = varLayout->getTypeLayout();
+ if (auto parameterGroupTypeLayout = as<IRParameterGroupTypeLayout>(typeLayout))
+ {
+ containerChain = EmitVarChain(parameterGroupTypeLayout->getContainerVarLayout(), &blockChain);
+ elementChain = EmitVarChain(parameterGroupTypeLayout->getElementVarLayout(), &blockChain);
+
+ typeLayout = parameterGroupTypeLayout->getElementVarLayout()->getTypeLayout();
+ }
+
+ _emitCUDARegisterSemantic(LayoutResourceKind::ConstantBuffer, &containerChain);
+
+ m_writer->emit("\n{\n");
+ m_writer->indent();
+
+ auto elementType = type->getElementType();
+
+ emitType(elementType, getName(varDecl));
+ m_writer->emit(";\n");
+
+ m_writer->dedent();
+ m_writer->emit("}\n");
+}
+
+void CUDASourceEmitter::emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling)
+{
+ auto layout = getVarLayout(inst);
+ if (layout)
+ {
+ _emitCUDARegisterSemantics(layout, uniformSemanticSpelling);
+ }
+}
+
+void CUDASourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type)
+{
+ _emitCUDAParameterGroup(varDecl, type);
+}
+
+void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor)
+{
+ auto profile = m_effectiveProfile;
+ auto stage = entryPointDecor->getProfile().GetStage();
+
+ switch (stage)
+ {
+ case Stage::Compute:
+ {
+ Int sizeAlongAxis[kThreadGroupAxisCount];
+ getComputeThreadGroupSize(irFunc, sizeAlongAxis);
+
+#if 0
+ m_writer->emit("[numthreads(");
+ for (int ii = 0; ii < kThreadGroupAxisCount; ++ii)
+ {
+ if (ii != 0) m_writer->emit(", ");
+ m_writer->emit(sizeAlongAxis[ii]);
+ }
+ m_writer->emit(")]\n");
+#endif
+
+ m_writer->emit("__global__ ");
+ break;
+ }
+
+ // TODO: There are other stages that will need this kind of handling.
+ default:
+ break;
+ }
+}
+
+void CUDASourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec)
+{
+ if (shouldFoldInstIntoUseSites(inst))
+ {
+ emitInstExpr(inst, outerPrec);
+ return;
+ }
+
+ switch (inst->op)
+ {
+ case kIROp_Param:
+ {
+ auto varLayout = getVarLayout(inst);
+ if (varLayout)
+ {
+ if (auto systemValueSemantic = varLayout->findSystemValueSemanticAttr())
+ {
+ String semanticNameSpelling = systemValueSemantic->getName();
+ semanticNameSpelling = semanticNameSpelling.toLower();
+
+ if (semanticNameSpelling == "sv_dispatchthreadid")
+ {
+ m_semanticUsedFlags |= SemanticUsedFlag::DispatchThreadID;
+ m_writer->emit("((blockIdx * blockDim) + threadIdx)");
+
+ return;
+ }
+ else if (semanticNameSpelling == "sv_groupid")
+ {
+ m_semanticUsedFlags |= SemanticUsedFlag::GroupID;
+ m_writer->emit("blockIdx");
+ return;
+ }
+ else if (semanticNameSpelling == "sv_groupthreadid")
+ {
+ m_semanticUsedFlags |= SemanticUsedFlag::GroupThreadID;
+ m_writer->emit("threadIdx");
+ return;
+ }
+ }
+ }
+
+ break;
+ }
+ default: break;
+ }
+ m_writer->emit(getName(inst));
+}
+
+bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
+{
+ switch (inst->op)
+ {
+ case kIROp_Construct:
+ case kIROp_makeVector:
+ {
+ if (inst->getOperandCount() == 1)
+ {
+ EmitOpInfo outerPrec = inOuterPrec;
+ bool needClose = false;
+
+ auto prec = getInfo(EmitOp::Prefix);
+ needClose = maybeEmitParens(outerPrec, prec);
+
+ // Need to emit as cast for HLSL
+ m_writer->emit("(");
+ emitType(inst->getDataType());
+ m_writer->emit(") ");
+ emitOperand(inst->getOperand(0), rightSide(outerPrec, prec));
+
+ maybeCloseParens(needClose);
+ // Handled
+ return true;
+ }
+ else
+ {
+ m_writer->emit("make_");
+ m_writer->emit(_getCUDATypeName(inst->getDataType()));
+ emitArgs(inst);
+ return true;
+ }
+ break;
+ }
+ case kIROp_MakeMatrix:
+ {
+ return false;
+ }
+ case kIROp_BitCast:
+ {
+ auto toType = extractBaseType(inst->getDataType());
+ switch (toType)
+ {
+ default:
+ m_writer->emit("/* unhandled */");
+ break;
+ case BaseType::UInt:
+ break;
+ case BaseType::Int:
+ m_writer->emit("(");
+ emitType(inst->getDataType());
+ m_writer->emit(")");
+ break;
+ case BaseType::Float:
+ m_writer->emit("asfloat");
+ break;
+ }
+
+ m_writer->emit("(");
+ emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+ m_writer->emit(")");
+ return true;
+ }
+ default: break;
+ }
+ // Not handled
+ return false;
+}
+
+void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq)
+{
+ SLANG_UNUSED(targetReq);
+}
+
+void CUDASourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount)
+{
+ m_writer->emit(getVectorPrefix(elementType->op));
+ m_writer->emit(elementCount);
+}
+
+void CUDASourceEmitter::emitSimpleTypeImpl(IRType* type)
+{
+ m_writer->emit(_getCUDATypeName(type));
+}
+
+void CUDASourceEmitter::emitRateQualifiersImpl(IRRate* rate)
+{
+ if (as<IRGroupSharedRate>(rate))
+ {
+ m_writer->emit("groupshared ");
+ }
+}
+
+void CUDASourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func)
+{
+ m_writer->emit("(");
+
+ bool hasEmittedParam = false;
+ auto firstParam = func->getFirstParam();
+ for (auto pp = firstParam; pp; pp = pp->getNextParam())
+ {
+ auto varLayout = getVarLayout(pp);
+ if (varLayout && varLayout->findSystemValueSemanticAttr())
+ {
+ // If it has a semantic don't output, it will be accessed via a global
+ continue;
+ }
+
+ if (hasEmittedParam)
+ m_writer->emit(", ");
+
+ emitSimpleFuncParamImpl(pp);
+ hasEmittedParam = true;
+ }
+
+ m_writer->emit(")");
+}
+
+void CUDASourceEmitter::emitSimpleFuncImpl(IRFunc* func)
+{
+ if (IREntryPointDecoration* entryPointDecor = func->findDecoration<IREntryPointDecoration>())
+ {
+ // If its an entry point, we let the entry point attribute control the output
+ Super::emitSimpleFuncImpl(func);
+ }
+ else
+ {
+ // If it's not an entry point mark as device
+ m_writer->emit("__device__ ");
+ Super::emitSimpleFuncImpl(func);
+ }
+}
+
+void CUDASourceEmitter::emitSemanticsImpl(IRInst* inst)
+{
+ if (auto semanticDecoration = inst->findDecoration<IRSemanticDecoration>())
+ {
+ m_writer->emit(" : ");
+ m_writer->emit(semanticDecoration->getSemanticName());
+ return;
+ }
+
+ if (auto layoutDecoration = inst->findDecoration<IRLayoutDecoration>())
+ {
+ auto layout = layoutDecoration->getLayout();
+ if (auto varLayout = as<IRVarLayout>(layout))
+ {
+ emitSemanticsUsingVarLayout(varLayout);
+ }
+ else if (auto entryPointLayout = as<IREntryPointLayout>(layout))
+ {
+ if (auto resultLayout = entryPointLayout->getResultLayout())
+ {
+ emitSemanticsUsingVarLayout(resultLayout);
+ }
+ }
+ }
+}
+
+static UnownedStringSlice _getInterpolationModifierText(IRInterpolationMode mode)
+{
+ switch (mode)
+ {
+ case IRInterpolationMode::NoInterpolation: return UnownedStringSlice::fromLiteral("nointerpolation");
+ case IRInterpolationMode::NoPerspective: return UnownedStringSlice::fromLiteral("noperspective");
+ case IRInterpolationMode::Linear: return UnownedStringSlice::fromLiteral("linear");
+ case IRInterpolationMode::Sample: return UnownedStringSlice::fromLiteral("sample");
+ case IRInterpolationMode::Centroid: return UnownedStringSlice::fromLiteral("centroid");
+ default: return UnownedStringSlice();
+ }
+}
+
+void CUDASourceEmitter::emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout)
+{
+ SLANG_UNUSED(layout);
+ SLANG_UNUSED(valueType);
+
+ for (auto dd : varInst->getDecorations())
+ {
+ if (dd->op != kIROp_InterpolationModeDecoration)
+ continue;
+
+ auto decoration = (IRInterpolationModeDecoration*)dd;
+
+ UnownedStringSlice modeText = _getInterpolationModifierText(decoration->getMode());
+ if (modeText.size() > 0)
+ {
+ m_writer->emit(modeText);
+ m_writer->emitChar(' ');
+ }
+ }
+}
+
+void CUDASourceEmitter::emitVarDecorationsImpl(IRInst* varDecl)
+{
+ if (varDecl->findDecoration<IRGloballyCoherentDecoration>())
+ {
+ m_writer->emit("globallycoherent\n");
+ }
+}
+
+void CUDASourceEmitter::emitMatrixLayoutModifiersImpl(IRVarLayout* layout)
+{
+ // When a variable has a matrix type, we want to emit an explicit
+ // layout qualifier based on what the layout has been computed to be.
+ //
+
+ auto typeLayout = layout->getTypeLayout()->unwrapArray();
+
+ if (auto matrixTypeLayout = as<IRMatrixTypeLayout>(typeLayout))
+ {
+ switch (matrixTypeLayout->getMode())
+ {
+ case kMatrixLayoutMode_ColumnMajor:
+ m_writer->emit("column_major ");
+ break;
+
+ case kMatrixLayoutMode_RowMajor:
+ m_writer->emit("row_major ");
+ break;
+ }
+ }
+}
+
+
+} // namespace Slang
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
new file mode 100644
index 000000000..6de2f5ea3
--- /dev/null
+++ b/source/slang/slang-emit-cuda.h
@@ -0,0 +1,80 @@
+// slang-emit-cuda.h
+#ifndef SLANG_EMIT_CUDA_H
+#define SLANG_EMIT_CUDA_H
+
+#include "slang-emit-c-like.h"
+
+namespace Slang
+{
+
+class CUDASourceEmitter : public CLikeSourceEmitter
+{
+public:
+ typedef CLikeSourceEmitter Super;
+
+ typedef uint32_t SemanticUsedFlags;
+ struct SemanticUsedFlag
+ {
+ enum Enum : SemanticUsedFlags
+ {
+ DispatchThreadID = 0x01,
+ GroupThreadID = 0x02,
+ GroupID = 0x04,
+ };
+ };
+
+ static UnownedStringSlice getBuiltinTypeName(IROp op);
+ static UnownedStringSlice getVectorPrefix(IROp op);
+
+ CUDASourceEmitter(const Desc& desc) :
+ Super(desc),
+ m_slicePool(StringSlicePool::Style::Default)
+ {}
+
+protected:
+
+ virtual void emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling) SLANG_OVERRIDE;
+ virtual void emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) SLANG_OVERRIDE;
+ virtual void emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor) SLANG_OVERRIDE;
+ virtual void emitLayoutDirectivesImpl(TargetRequest* targetReq) SLANG_OVERRIDE;
+ virtual void emitRateQualifiersImpl(IRRate* rate) SLANG_OVERRIDE;
+ virtual void emitSemanticsImpl(IRInst* inst) SLANG_OVERRIDE;
+ virtual void emitSimpleFuncImpl(IRFunc* func) SLANG_OVERRIDE;
+ virtual void emitSimpleFuncParamsImpl(IRFunc* func) SLANG_OVERRIDE;
+ virtual void emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) SLANG_OVERRIDE;
+ virtual void emitSimpleTypeImpl(IRType* type) SLANG_OVERRIDE;
+ virtual void emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) SLANG_OVERRIDE;
+ virtual void emitVarDecorationsImpl(IRInst* varDecl) SLANG_OVERRIDE;
+ virtual void emitMatrixLayoutModifiersImpl(IRVarLayout* layout) SLANG_OVERRIDE;
+ virtual void emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec) SLANG_OVERRIDE;
+
+ virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
+
+ // Emit a single `register` semantic, as appropriate for a given resource-type-specific layout info
+ // Keyword to use in the uniform case (`register` for globals, `packoffset` inside a `cbuffer`)
+ void _emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling = "register");
+
+ // Emit all the `register` semantics that are appropriate for a particular variable layout
+ void _emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling = "register");
+ void _emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling = "register");
+
+ void _emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain);
+ void _emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain);
+
+ void _emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type);
+
+ void _emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val);
+ void _emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val);
+
+ SlangResult _calcCUDATypeName(IRType* type, StringBuilder& out);
+ UnownedStringSlice _getCUDATypeName(IRType* inType);
+ SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName);
+
+ Dictionary<IRType*, StringSlicePool::Handle> m_typeNameMap;
+ StringSlicePool m_slicePool;
+
+ UInt m_semanticUsedFlags = 0;
+};
+
+}
+#endif
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 9aa7b1203..19c3cad68 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -32,23 +32,12 @@
#include "slang-emit-glsl.h"
#include "slang-emit-hlsl.h"
#include "slang-emit-cpp.h"
+#include "slang-emit-cuda.h"
#include <assert.h>
namespace Slang {
-enum class BuiltInCOp
-{
- Splat, //< Splat a single value to all values of a vector or matrix type
- Init, //< Initialize with parameters (must match the type)
-};
-
-
-//
-
-
-//
-
EntryPointLayout* findEntryPointLayout(
ProgramLayout* programLayout,
EntryPoint* entryPoint)
@@ -444,7 +433,7 @@ Result linkAndOptimizeIR(
return SLANG_OK;
}
-String emitEntryPointSource(
+String emitEntryPointSourceFromIR(
BackEndCompileRequest* compileRequest,
Int entryPointIndex,
CodeGenTarget target,
@@ -499,6 +488,11 @@ String emitEntryPointSource(
sourceEmitter = new HLSLSourceEmitter(desc);
break;
}
+ case SourceStyle::CUDA:
+ {
+ sourceEmitter = new CUDASourceEmitter(desc);
+ break;
+ }
default: break;
}
diff --git a/source/slang/slang-emit.h b/source/slang/slang-emit.h
index e9ee361d7..1f3298316 100644
--- a/source/slang/slang-emit.h
+++ b/source/slang/slang-emit.h
@@ -34,7 +34,7 @@ namespace Slang
/// generate different HLSL output if we know it
/// will be used to generate SPIR-V).
///
- String emitEntryPointSource(
+ String emitEntryPointSourceFromIR(
BackEndCompileRequest* compileRequest,
Int entryPointIndex,
CodeGenTarget target,
diff --git a/source/slang/slang-ir-link.cpp b/source/slang/slang-ir-link.cpp
index 6399e3f6b..904a88d78 100644
--- a/source/slang/slang-ir-link.cpp
+++ b/source/slang/slang-ir-link.cpp
@@ -919,9 +919,13 @@ String getTargetName(IRSpecContext* context)
case CodeGenTarget::CPPSource:
return "cpp";
+ case CodeGenTarget::CUDASource:
+ return "cuda";
+
case CodeGenTarget::SPIRV:
return "spirv";
+
default:
SLANG_UNEXPECTED("unhandled case");
UNREACHABLE_RETURN("unknown");
diff --git a/source/slang/slang-parameter-binding.cpp b/source/slang/slang-parameter-binding.cpp
index 5cd4156c3..8ebc4f420 100644
--- a/source/slang/slang-parameter-binding.cpp
+++ b/source/slang/slang-parameter-binding.cpp
@@ -2585,6 +2585,19 @@ static bool _isCPUTarget(CodeGenTarget target)
}
}
+static bool _isPTXTarget(CodeGenTarget target)
+{
+ switch (target)
+ {
+ case CodeGenTarget::CUDASource:
+ case CodeGenTarget::PTX:
+ {
+ return true;
+ }
+ default: return false;
+ }
+}
+
/// Keep track of the running global counter for entry points and global parameters visited.
///
/// Because of explicit `register` and `[[vk::binding(...)]]` support, parameter binding
@@ -2970,7 +2983,8 @@ RefPtr<ProgramLayout> generateParameterBindings(
// On a CPU target, it's okay to have global scope parameters that use Uniform resources (because on CPU
// all resources are 'Uniform')
- if (!_isCPUTarget(targetReq->target))
+ // TODO(JS): We'll just assume the same with CUDA target for now..
+ if (!_isCPUTarget(targetReq->target) && !_isPTXTarget(targetReq->target))
{
for( auto& parameterInfo : sharedContext.parameters )
{
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index 88e62323b..772686163 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -398,6 +398,13 @@ struct CPULayoutRulesImpl : DefaultLayoutRulesImpl
}
};
+// TODO(JS): Most likely wrong. For layout for CUDA, we'll just do the default to get things up and running
+struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl
+{
+ typedef DefaultLayoutRulesImpl Super;
+};
+
+
struct HLSLStructuredBufferLayoutRulesImpl : DefaultLayoutRulesImpl
{
// HLSL structured buffers drop the restrictions added for constant buffers,
@@ -653,9 +660,29 @@ struct CPULayoutRulesFamilyImpl : LayoutRulesFamilyImpl
LayoutRulesImpl* getStructuredBufferRules() override;
};
+struct CUDALayoutRulesFamilyImpl : LayoutRulesFamilyImpl
+{
+ virtual LayoutRulesImpl* getConstantBufferRules() override;
+ virtual LayoutRulesImpl* getPushConstantBufferRules() override;
+ virtual LayoutRulesImpl* getTextureBufferRules() override;
+ virtual LayoutRulesImpl* getVaryingInputRules() override;
+ virtual LayoutRulesImpl* getVaryingOutputRules() override;
+ virtual LayoutRulesImpl* getSpecializationConstantRules() override;
+ virtual LayoutRulesImpl* getShaderStorageBufferRules() override;
+ virtual LayoutRulesImpl* getParameterBlockRules() override;
+
+ LayoutRulesImpl* getRayPayloadParameterRules() override;
+ LayoutRulesImpl* getCallablePayloadParameterRules() override;
+ LayoutRulesImpl* getHitAttributesParameterRules() override;
+
+ LayoutRulesImpl* getShaderRecordConstantBufferRules() override;
+ LayoutRulesImpl* getStructuredBufferRules() override;
+};
+
GLSLLayoutRulesFamilyImpl kGLSLLayoutRulesFamilyImpl;
HLSLLayoutRulesFamilyImpl kHLSLLayoutRulesFamilyImpl;
CPULayoutRulesFamilyImpl kCPULayoutRulesFamilyImpl;
+CUDALayoutRulesFamilyImpl kCUDALayoutRulesFamilyImpl;
// CPU case
@@ -703,6 +730,12 @@ struct CPUObjectLayoutRulesImpl : ObjectLayoutRulesImpl
};
+// TODO(JS): Most likely wrong! Use CPU layout for CUDA for now
+struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
+{
+ typedef CPUObjectLayoutRulesImpl Super;
+
+};
static CPUObjectLayoutRulesImpl kCPUObjectLayoutRulesImpl;
static CPULayoutRulesImpl kCPULayoutRulesImpl;
@@ -711,6 +744,16 @@ LayoutRulesImpl kCPULayoutRulesImpl_ = {
&kCPULayoutRulesFamilyImpl, &kCPULayoutRulesImpl, &kCPUObjectLayoutRulesImpl,
};
+// CUDA
+
+static CUDAObjectLayoutRulesImpl kCUDAObjectLayoutRulesImpl;
+static CUDALayoutRulesImpl kCUALayoutRulesImpl;
+
+LayoutRulesImpl kCUDALayoutRulesImpl_ = {
+ &kCPULayoutRulesFamilyImpl, &kCUALayoutRulesImpl, &kCUDAObjectLayoutRulesImpl,
+};
+
+
// GLSL cases
LayoutRulesImpl kStd140LayoutRulesImpl_ = {
@@ -986,6 +1029,69 @@ LayoutRulesImpl* CPULayoutRulesFamilyImpl::getStructuredBufferRules()
return &kCPULayoutRulesImpl_;
}
+// CUDA Family
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getConstantBufferRules()
+{
+ return &kCPULayoutRulesImpl_;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getPushConstantBufferRules()
+{
+ return &kCPULayoutRulesImpl_;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getTextureBufferRules()
+{
+ return nullptr;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getVaryingInputRules()
+{
+ return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getVaryingOutputRules()
+{
+ return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getSpecializationConstantRules()
+{
+ return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getShaderStorageBufferRules()
+{
+ return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getParameterBlockRules()
+{
+ // Not clear - just use similar to CPU
+ return &kCUDALayoutRulesImpl_;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getRayPayloadParameterRules()
+{
+ return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getCallablePayloadParameterRules()
+{
+ return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getHitAttributesParameterRules()
+{
+ return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getShaderRecordConstantBufferRules()
+{
+ // Just following HLSLs lead for the moment
+ return &kCUDALayoutRulesImpl_;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getStructuredBufferRules()
+{
+ return &kCUDALayoutRulesImpl_;
+}
+
+
+
LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targetReq)
{
switch (targetReq->getTarget())
@@ -1019,6 +1125,13 @@ LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targe
return &kCPULayoutRulesFamilyImpl;
}
+ case CodeGenTarget::PTX:
+ case CodeGenTarget::CUDASource:
+ {
+ return &kCUDALayoutRulesFamilyImpl;
+ }
+
+
default:
return nullptr;
}
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index 8c8a712ce..e120e006f 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -199,6 +199,7 @@
<ClInclude Include="slang-diagnostics.h" />
<ClInclude Include="slang-emit-c-like.h" />
<ClInclude Include="slang-emit-cpp.h" />
+ <ClInclude Include="slang-emit-cuda.h" />
<ClInclude Include="slang-emit-glsl-extension-tracker.h" />
<ClInclude Include="slang-emit-glsl.h" />
<ClInclude Include="slang-emit-hlsl.h" />
@@ -280,6 +281,7 @@
<ClCompile Include="slang-dxc-support.cpp" />
<ClCompile Include="slang-emit-c-like.cpp" />
<ClCompile Include="slang-emit-cpp.cpp" />
+ <ClCompile Include="slang-emit-cuda.cpp" />
<ClCompile Include="slang-emit-glsl-extension-tracker.cpp" />
<ClCompile Include="slang-emit-glsl.cpp" />
<ClCompile Include="slang-emit-hlsl.cpp" />
@@ -373,4 +375,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project> \ No newline at end of file
+</Project> \ No newline at end of file
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index c562d28e3..1c619a3d5 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -48,6 +48,9 @@
<ClInclude Include="slang-emit-cpp.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="slang-emit-cuda.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="slang-emit-glsl-extension-tracker.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -287,6 +290,9 @@
<ClCompile Include="slang-emit-cpp.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="slang-emit-cuda.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="slang-emit-glsl-extension-tracker.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/tests/cuda/compile-to-cuda.slang b/tests/cuda/compile-to-cuda.slang
new file mode 100644
index 000000000..6166aaf0b
--- /dev/null
+++ b/tests/cuda/compile-to-cuda.slang
@@ -0,0 +1,29 @@
+//DISABLE_TEST(smoke):SIMPLE: -target ptx -entry computeMain -stage compute
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer : register(u0);
+
+int quantize(double value)
+{
+ return int(value * 256);
+}
+
+int quantize(float value)
+{
+ return int(value * 256);
+}
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ float values[] = { -9, 9, -3, 3 };
+
+ int tid = int(dispatchThreadID.x);
+ float value = values[tid];
+
+ outputBuffer[tid * 4] = quantize(sin(value));
+ outputBuffer[tid * 4 + 1] = quantize(cos(value));
+
+ outputBuffer[tid * 4 + 2] = quantize(sin(double(value)));
+ outputBuffer[tid * 4 + 3] = quantize(cos(double(value)));
+}
diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp
index cdfe6a1a7..6401ac852 100644
--- a/tools/slang-test/slang-test-main.cpp
+++ b/tools/slang-test/slang-test-main.cpp
@@ -505,6 +505,7 @@ static PassThroughFlags _getPassThroughFlagsForTarget(SlangCompileTarget target)
case SLANG_GLSL:
case SLANG_C_SOURCE:
case SLANG_CPP_SOURCE:
+ case SLANG_CUDA_SOURCE:
{
return 0;
}