WIP CUDA source emit (#1157)

* CPPCompiler -> DownstreamCompiler * Added DownstreamCompileResult to start abstraction such that we don't need files. * * Split out slang-blob.cpp * Made CompileResult hold a DownstreamCompileResult - for access to binary or ISlangSharedLibrary * Keep temporary files in scope. * Add a hash to the hex dump stream. * Move all file tracking into DownstreamCompiler. * WIP support for nvrtc. * WIP: Adding support for nvrtc compiler. Adding enum types, wiring up the nvrtc into slang. * Fix remaining CPPCompiler references. * Fix order issue on target string matching. * Use ISlangSharedLibrary for nvrtc. * Use DownstreamCompiler for nvrtc. * WIP first pass at compilation win nvrtc. * Added testing if file is on file system into CommandLineDownstreamCompiler. Added sourceContentsPath. * Make test cuda-compile.cu work by just compiling not comparing output. * Genearlize DownstreamCompiler usage. * Fix warning on clang. * Remove CompilerType from DownstreamCompiler. * Use DownstreamCompiler interface for all compilers. NOTE for FXC, DXC and GLSLANG this doesn't mean using 'compile' - it's still extracting functions from shared library. * Replace DownstreamCompiler::SourceType -> SlangSourceLanguage * Replace _canCompile with something data driven. * Fix compiling on gcc/clang for DownstreamCompiler. * Moved some text conversions into DownstreamCompiler. * Fix problem on non-vc builds with not having return on locateCompilers for VS. * Change so no warning for code not reachable on locateCompilers for vs. * WIP: CUDA code generation - currently just using CPU layout and HLSL. * emitXXXForEntryPoint -> emitEntryPointSource emitSourceForEntryPoint -> emitEntryPointSourceFromIR Fix up generating cuda to get PTX. * WIP emitting cuda for IR. * Small improvements to CUDA ouput. * Disable the CUDA emit test, as output not currently compilable.
author: jsmall-nvidia <jsmall@nvidia.com> 2019-12-19 11:23:14 -0500
committer: GitHub <noreply@github.com> 2019-12-19 11:23:14 -0500
commit: e3fe0319467546bae070137c58dcf8f9fbe93c79 (patch)
tree: 6cc26ccda33725e98c4a9a0408cf31a1348db268
parent: 60934d98fbc20d83b5e149e72a197ec4f5c61580 (diff)
18 files changed, 1217 insertions, 160 deletions
diff --git a/source/core/slang-downstream-compiler.cpp b/source/core/slang-downstream-compiler.cpp
index 2e78ea22b..2f0cff1a9 100644
--- a/source/core/slang-downstream-compiler.cpp
+++ b/source/core/slang-downstream-compiler.cpp
@@ -182,6 +182,19 @@ void DownstreamCompiler::Desc::appendAsText(StringBuilder& out) const
     return UnownedStringSlice::fromLiteral("unknown");
 }
 
+/* static */SlangCompileTarget DownstreamCompiler::getCompileTarget(SlangSourceLanguage sourceLanguage)
+{
+    switch (sourceLanguage)
+    {
+        case SLANG_SOURCE_LANGUAGE_HLSL:    return SLANG_HLSL;
+        case SLANG_SOURCE_LANGUAGE_GLSL:    return SLANG_GLSL;
+        case SLANG_SOURCE_LANGUAGE_C:       return SLANG_C_SOURCE;
+        case SLANG_SOURCE_LANGUAGE_CPP:     return SLANG_CPP_SOURCE;
+        case SLANG_SOURCE_LANGUAGE_CUDA:    return SLANG_CUDA_SOURCE;
+        default:                            return SLANG_TARGET_UNKNOWN;
+    }
+}
+
 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DownstreamDiagnostics !!!!!!!!!!!!!!!!!!!!!!*/
 
 Index DownstreamDiagnostics::getCountByType(Diagnostic::Type type) const
diff --git a/source/core/slang-downstream-compiler.h b/source/core/slang-downstream-compiler.h
index 57fb88a3c..a4238ce38 100644
--- a/source/core/slang-downstream-compiler.h
+++ b/source/core/slang-downstream-compiler.h
@@ -296,6 +296,9 @@ public:
         /// Get the compilers name
     static UnownedStringSlice getPassThroughName(SlangPassThrough passThru);
 
+        /// Given a source language return as the equivalent compile target
+    static SlangCompileTarget getCompileTarget(SlangSourceLanguage sourceLanguage);
+
 protected:
     static Infos s_infos;
 
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index 07bfaacc9..d0fca1ab5 100644
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -478,8 +478,6 @@ namespace Slang
                 return PassThroughMode::None;
             }
             case CodeGenTarget::GLSL:
-            case CodeGenTarget::GLSL_Vulkan:
-            case CodeGenTarget::GLSL_Vulkan_OneDesc:
             {
                 // Can always output GLSL
                 return PassThroughMode::None; 
@@ -489,6 +487,11 @@ namespace Slang
                 // Can always output HLSL
                 return PassThroughMode::None;
             }
+            case CodeGenTarget::CUDASource:
+            {
+                // Can always output CUDA
+                return PassThroughMode::None;
+            }
             case CodeGenTarget::SPIRVAssembly:
             case CodeGenTarget::SPIRV:
             {
@@ -504,6 +507,11 @@ namespace Slang
             {
                 return PassThroughMode::Dxc;
             }
+            case CodeGenTarget::GLSL_Vulkan:
+            case CodeGenTarget::GLSL_Vulkan_OneDesc:
+            {
+                return PassThroughMode::Glslang;
+            }
             case CodeGenTarget::CPPSource:
             case CodeGenTarget::CSource:
             {
@@ -581,10 +589,11 @@ namespace Slang
         outCodeBuilder << fileContent << "\n";
     }
 
-    String emitHLSLForEntryPoint(
+    String emitEntryPointSource(
         BackEndCompileRequest*  compileRequest,
         Int                     entryPointIndex,
         TargetRequest*          targetReq,
+        CodeGenTarget           target,
         EndToEndCompileRequest* endToEndReq)
     {
         if(auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex))
@@ -596,91 +605,41 @@ namespace Slang
             // mode.
 
             StringBuilder codeBuilder;
-            for(auto sourceFile : translationUnit->getSourceFiles())
-            {
-                _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
-            }
-
-            return codeBuilder.ProduceString();
-        }
-        else
-        {
-            return emitEntryPointSource(
-                compileRequest,
-                entryPointIndex,
-                CodeGenTarget::HLSL,
-                targetReq);
-        }
-    }
-
-    String emitCPPForEntryPoint(
-        BackEndCompileRequest*  compileRequest,
-        Int                     entryPointIndex,
-        TargetRequest*          targetReq,
-        EndToEndCompileRequest* endToEndReq)
-    {
-        if (auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex))
-        {
-            // Generate a string that includes the content of
-            // the source file(s), along with a line directive
-            // to ensure that we get reasonable messages
-            // from the downstream compiler when in pass-through
-            // mode.
-
-            StringBuilder codeBuilder;
-            for (auto sourceFile : translationUnit->getSourceFiles())
+            if (target == CodeGenTarget::GLSL)
             {
-                _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
+                // Special case GLSL
+                int translationUnitCounter = 0;
+                for (auto sourceFile : translationUnit->getSourceFiles())
+                {
+                    int translationUnitIndex = translationUnitCounter++;
+
+                    // We want to output `#line` directives, but we need
+                    // to skip this for the first file, since otherwise
+                    // some GLSL implementations will get tripped up by
+                    // not having the `#version` directive be the first
+                    // thing in the file.
+                    if (translationUnitIndex != 0)
+                    {
+                        codeBuilder << "#line 1 " << translationUnitIndex << "\n";
+                    }
+                    codeBuilder << sourceFile->getContent() << "\n";
+                }
             }
-
-            return codeBuilder.ProduceString();
-        }
-        else
-        {
-            return emitEntryPointSource(compileRequest, entryPointIndex, CodeGenTarget::CPPSource, targetReq);
-        }
-    }
-
-    String emitGLSLForEntryPoint(
-        BackEndCompileRequest*  compileRequest,
-        Int                     entryPointIndex,
-        TargetRequest*          targetReq,
-        EndToEndCompileRequest* endToEndReq)
-    {
-        if(auto translationUnit = findPassThroughTranslationUnit(endToEndReq, entryPointIndex))
-        {
-            // Generate a string that includes the content of
-            // the source file(s), along with a line directive
-            // to ensure that we get reasonable messages
-            // from the downstream compiler when in pass-through
-            // mode.
-
-            StringBuilder codeBuilder;
-            int translationUnitCounter = 0;
-            for(auto sourceFile : translationUnit->getSourceFiles())
+            else
             {
-                int translationUnitIndex = translationUnitCounter++;
-
-                // We want to output `#line` directives, but we need
-                // to skip this for the first file, since otherwise
-                // some GLSL implementations will get tripped up by
-                // not having the `#version` directive be the first
-                // thing in the file.
-                if(translationUnitIndex != 0)
+                for(auto sourceFile : translationUnit->getSourceFiles())
                 {
-                    codeBuilder << "#line 1 " << translationUnitIndex << "\n";
+                    _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
                 }
-                codeBuilder << sourceFile->getContent() << "\n";
             }
-
             return codeBuilder.ProduceString();
         }
         else
         {
-            return emitEntryPointSource(
+            return emitEntryPointSourceFromIR(
                 compileRequest,
                 entryPointIndex,
-                CodeGenTarget::GLSL,
+                target,
                 targetReq);
         }
     }
@@ -964,7 +923,7 @@ namespace Slang
             return SLANG_FAIL;
         }
 
-        auto hlslCode = emitHLSLForEntryPoint(compileRequest, entryPointIndex, targetReq, endToEndReq);
+        auto hlslCode = emitEntryPointSource(compileRequest, entryPointIndex, targetReq, CodeGenTarget::HLSL, endToEndReq);
         maybeDumpIntermediate(compileRequest, hlslCode.getBuffer(), CodeGenTarget::HLSL);
 
         auto profile = getEffectiveProfile(entryPoint, targetReq);
@@ -1251,13 +1210,16 @@ SlangResult dissassembleDXILUsingDXC(
         EndToEndCompileRequest* endToEndReq,
         RefPtr<DownstreamCompileResult>& outResult)
     {
+        outResult.setNull();
+
         auto sink = slangRequest->getSink();
 
         auto session = slangRequest->getSession();
 
         const String originalSourcePath = calcSourcePathForEntryPoint(endToEndReq, entryPointIndex);
 
-        outResult.setNull();
+        CodeGenTarget sourceTarget = CodeGenTarget::None;
+        SourceLanguage sourceLanguage = SourceLanguage::Unknown;
 
         PassThroughMode downstreamCompiler = endToEndReq->passThrough;
 
@@ -1265,23 +1227,26 @@ SlangResult dissassembleDXILUsingDXC(
         if (downstreamCompiler == PassThroughMode::None)
         {
             auto target = targetReq->target;
-            
             switch (target)
             {
                 case CodeGenTarget::PTX:
                 {
-                    downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SLANG_SOURCE_LANGUAGE_CUDA));
+                    sourceTarget = CodeGenTarget::CUDASource;
+                    sourceLanguage = SourceLanguage::CUDA;
                     break;
                 }
                 case CodeGenTarget::HostCallable:
                 case CodeGenTarget::SharedLibrary:
                 case CodeGenTarget::Executable:
                 {
-                    downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SLANG_SOURCE_LANGUAGE_CPP));
+                    sourceTarget = CodeGenTarget::CPPSource;
+                    sourceLanguage = SourceLanguage::CPP;
                     break;
                 }
                 default: break;
             }
+
+            downstreamCompiler = PassThroughMode(session->getDefaultDownstreamCompiler(SlangSourceLanguage(sourceLanguage)));
         }
         
         // Get the required downstream compiler
@@ -1301,8 +1266,6 @@ SlangResult dissassembleDXILUsingDXC(
             return SLANG_FAIL;
         }
 
-        SourceLanguage rawSourceLanguage = SourceLanguage::Unknown;
-
         Dictionary<String, String> preprocessorDefinitions;
         List<String> includePaths;
 
@@ -1357,8 +1320,10 @@ SlangResult dissassembleDXILUsingDXC(
             }
 
             // We are just passing thru, so it's whatever it originally was
-            rawSourceLanguage = translationUnit->sourceLanguage;
+            sourceLanguage = translationUnit->sourceLanguage;
+            sourceTarget = CodeGenTarget(DownstreamCompiler::getCompileTarget(SlangSourceLanguage(sourceLanguage)));
 
+            // Special case if we have a single file, so that we pass the path, and the contents
             const auto& sourceFiles = translationUnit->getSourceFiles();
             if (sourceFiles.getCount() == 1)
             {
@@ -1372,30 +1337,18 @@ SlangResult dissassembleDXILUsingDXC(
             }
             else
             {
-                // If can't just use file, concat together and make
-                StringBuilder codeBuilder;
-                for (auto sourceFile : translationUnit->getSourceFiles())
-                {
-                    _appendCodeWithPath(sourceFile->getPathInfo().foundPath.getUnownedSlice(), sourceFile->getContent(), codeBuilder);
-                }
-                options.sourceContents = codeBuilder.ProduceString();
+                options.sourceContents = emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq);
             }
         }
         else
         {
-            options.sourceContents = emitCPPForEntryPoint(
-                slangRequest,
-                entryPointIndex,
-                targetReq,
-                endToEndReq);
-
-            maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), CodeGenTarget::CPPSource);
+            options.sourceContents = emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq);
 
-            rawSourceLanguage = SourceLanguage::CPP;
+            maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), sourceTarget);
         }
 
         // Set the source type
-        options.sourceLanguage = SlangSourceLanguage(rawSourceLanguage);
+        options.sourceLanguage = SlangSourceLanguage(sourceLanguage);
 
         // Disable exceptions and security checks
         options.flags &= ~(CompileOptions::Flag::EnableExceptionHandling | CompileOptions::Flag::EnableSecurityChecks);
@@ -1556,10 +1509,11 @@ SlangResult dissassembleDXILUsingDXC(
     {
         spirvOut.clear();
 
-        String rawGLSL = emitGLSLForEntryPoint(
+        String rawGLSL = emitEntryPointSource(
             slangRequest,
             entryPointIndex,
             targetReq,
+            CodeGenTarget::GLSL,
             endToEndReq);
         maybeDumpIntermediate(slangRequest, rawGLSL.getBuffer(), CodeGenTarget::GLSL);
 
@@ -1671,38 +1625,18 @@ SlangResult dissassembleDXILUsingDXC(
                 }
             }
             break;
-        case CodeGenTarget::HLSL:
-            {
-                String code = emitHLSLForEntryPoint(
-                    compileRequest,
-                    entryPointIndex,
-                    targetReq,
-                    endToEndReq);
-                maybeDumpIntermediate(compileRequest, code.getBuffer(), target);
-                result = CompileResult(code);
-            }
-            break;
-
         case CodeGenTarget::GLSL:
-            {
-                String code = emitGLSLForEntryPoint(
-                    compileRequest,
-                    entryPointIndex,
-                    targetReq,
-                    endToEndReq);
-                maybeDumpIntermediate(compileRequest, code.getBuffer(), target);
-                result = CompileResult(code);
-            }
-            break;
-
+        case CodeGenTarget::HLSL:
+        case CodeGenTarget::CUDASource:
         case CodeGenTarget::CPPSource:
         case CodeGenTarget::CSource:
             {
                 String code = emitEntryPointSource(
                     compileRequest,
                     entryPointIndex,
-                    target, 
-                    targetReq);
+                    targetReq,
+                    target,
+                    endToEndReq);
                 maybeDumpIntermediate(compileRequest, code.getBuffer(), target);
                 result = CompileResult(code);
             }
diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h
index 1a385ab23..2783197af 100644
--- a/source/slang/slang-compiler.h
+++ b/source/slang/slang-compiler.h
@@ -1797,6 +1797,15 @@ namespace Slang
     @return the appropriate source filename */
     String calcSourcePathForEntryPoint(EndToEndCompileRequest* endToEndReq, UInt entryPointIndex);
 
+    /* Emits entry point source taking into account if a pass-through or not. Uses 'target' to determine
+    the target (not targetReq) */
+    String emitEntryPointSource(
+        BackEndCompileRequest*  compileRequest,
+        Int                     entryPointIndex,
+        TargetRequest*          targetReq,
+        CodeGenTarget           target,
+        EndToEndCompileRequest* endToEndReq);
+
     struct TypeCheckingCache;
     //
 
diff --git a/source/slang/slang-dxc-support.cpp b/source/slang/slang-dxc-support.cpp
index e0ca4df82..828138143 100644
--- a/source/slang/slang-dxc-support.cpp
+++ b/source/slang/slang-dxc-support.cpp
@@ -29,11 +29,7 @@
 namespace Slang
 {
     String GetHLSLProfileName(Profile profile);
-    String emitHLSLForEntryPoint(
-        BackEndCompileRequest*  compileRequest,
-        Int                     entryPointIndex,
-        TargetRequest*          targetReq,
-        EndToEndCompileRequest* endToEndReq);
+
 
     SlangResult locateDXCCompilers(const String& path, ISlangSharedLibraryLoader* loader, DownstreamCompilerSet* set);
 
@@ -85,10 +81,11 @@ namespace Slang
 
         // Now let's go ahead and generate HLSL for the entry
         // point, since we'll need that to feed into dxc.
-        auto hlslCode = emitHLSLForEntryPoint(
+        auto hlslCode = emitEntryPointSource(
             compileRequest,
             entryPointIndex,
             targetReq,
+            CodeGenTarget::HLSL,
             endToEndReq);
         maybeDumpIntermediate(compileRequest, hlslCode.getBuffer(), CodeGenTarget::HLSL);
 
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 2c1bcbe6d..1a2fbb0f4 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -98,6 +98,7 @@ struct CLikeSourceEmitter::ComputeEmitActionsContext
         {
             return SourceStyle::HLSL;
         }
+        case CodeGenTarget::PTX:
         case CodeGenTarget::SPIRV:
         case CodeGenTarget::SPIRVAssembly:
         case CodeGenTarget::DXBytecode:
@@ -115,6 +116,10 @@ struct CLikeSourceEmitter::ComputeEmitActionsContext
         {
             return SourceStyle::CPP;
         }
+        case CodeGenTarget::CUDASource:
+        {
+            return SourceStyle::CUDA;
+        }
     }
 }
 
@@ -343,6 +348,7 @@ bool CLikeSourceEmitter::isTargetIntrinsicModifierApplicable(const String& targe
     case SourceStyle::CPP:  return targetName == "cpp";
     case SourceStyle::GLSL: return targetName == "glsl";
     case SourceStyle::HLSL: return targetName == "hlsl";
+    case SourceStyle::CUDA: return targetName == "cuda";
     }
 }
 
@@ -1021,6 +1027,7 @@ void CLikeSourceEmitter::emitInstResultDecl(IRInst* inst)
 
         switch (getSourceStyle())
         {
+        case SourceStyle::CUDA:
         case SourceStyle::HLSL:
         case SourceStyle::C:
         case SourceStyle::CPP:
@@ -2507,6 +2514,22 @@ void CLikeSourceEmitter::emitSimpleFuncParamImpl(IRParam* param)
     emitSemantics(param);
 }
 
+void CLikeSourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func)
+{
+    m_writer->emit("(");
+
+    auto firstParam = func->getFirstParam();
+    for (auto pp = firstParam; pp; pp = pp->getNextParam())
+    {
+        if (pp != firstParam)
+            m_writer->emit(", ");
+
+        emitSimpleFuncParamImpl(pp);
+    }
+
+    m_writer->emit(")");
+}
+
 void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func)
 {
     auto resultType = func->getResultType();
@@ -2521,18 +2544,7 @@ void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func)
     auto name = getName(func);
 
     emitType(resultType, name);
-
-    m_writer->emit("(");
-    auto firstParam = func->getFirstParam();
-    for( auto pp = firstParam; pp; pp = pp->getNextParam())
-    {
-        if(pp != firstParam)
-            m_writer->emit(", ");
-
-        emitSimpleFuncParamImpl(pp);
-    }
-    m_writer->emit(")");
-
+    emitSimpleFuncParamsImpl(func);
     emitSemantics(func);
 
     // TODO: encode declaration vs. definition
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 4b5c68b6c..a6c48dc73 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -47,6 +47,7 @@ public:
         HLSL,
         C,
         CPP,
+        CUDA,
         CountOf,
     };
     
@@ -315,6 +316,7 @@ public:
     virtual void emitRateQualifiersImpl(IRRate* rate) { SLANG_UNUSED(rate); }
     virtual void emitSemanticsImpl(IRInst* inst) { SLANG_UNUSED(inst);  }
     virtual void emitSimpleFuncParamImpl(IRParam* param);
+    virtual void emitSimpleFuncParamsImpl(IRFunc* func);
     virtual void emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) { SLANG_UNUSED(varInst); SLANG_UNUSED(valueType); SLANG_UNUSED(layout); }
     virtual void emitSimpleTypeImpl(IRType* type) = 0;
     virtual void emitVarDecorationsImpl(IRInst* varDecl) { SLANG_UNUSED(varDecl);  }
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
new file mode 100644
index 000000000..feafc4e4e
--- /dev/null
+++ b/source/slang/slang-emit-cuda.cpp
@@ -0,0 +1,844 @@
+// slang-emit-cuda.cpp
+#include "slang-emit-cuda.h"
+
+#include "../core/slang-writer.h"
+
+#include "slang-emit-source-writer.h"
+#include "slang-mangled-lexer.h"
+
+#include <assert.h>
+
+namespace Slang {
+
+/* static */ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
+{
+    switch (op)
+    {
+        case kIROp_VoidType:    return UnownedStringSlice("void");
+        case kIROp_BoolType:    return UnownedStringSlice("bool");
+
+        case kIROp_Int8Type:    return UnownedStringSlice("char");
+        case kIROp_Int16Type:   return UnownedStringSlice("short");
+        case kIROp_IntType:     return UnownedStringSlice("int");
+        case kIROp_Int64Type:   return UnownedStringSlice("long long");
+
+        case kIROp_UInt8Type:   return UnownedStringSlice("unsigned char");
+        case kIROp_UInt16Type:  return UnownedStringSlice("unsigned short");
+        case kIROp_UIntType:    return UnownedStringSlice("unsigned int");
+        case kIROp_UInt64Type:  return UnownedStringSlice("unsigned long long");
+
+            // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
+        case kIROp_HalfType:    return UnownedStringSlice("half");
+
+        case kIROp_FloatType:   return UnownedStringSlice("float");
+        case kIROp_DoubleType:  return UnownedStringSlice("double");
+        default:                return UnownedStringSlice();
+    }
+}
+
+
+/* static */ UnownedStringSlice CUDASourceEmitter::getVectorPrefix(IROp op)
+{
+    switch (op)
+    {
+        case kIROp_BoolType:    return UnownedStringSlice("bool");
+
+        case kIROp_Int8Type:    return UnownedStringSlice("char");
+        case kIROp_Int16Type:   return UnownedStringSlice("short");
+        case kIROp_IntType:     return UnownedStringSlice("int");
+        case kIROp_Int64Type:   return UnownedStringSlice("longlong");
+
+        case kIROp_UInt8Type:   return UnownedStringSlice("uchar");
+        case kIROp_UInt16Type:  return UnownedStringSlice("ushort");
+        case kIROp_UIntType:    return UnownedStringSlice("uint");
+        case kIROp_UInt64Type:  return UnownedStringSlice("ulonglong");
+
+            // Not clear just yet how we should handle half... we want all processing as float probly, but when reading/writing to memory converting
+        case kIROp_HalfType:    return UnownedStringSlice("half");
+
+        case kIROp_FloatType:   return UnownedStringSlice("float");
+        case kIROp_DoubleType:  return UnownedStringSlice("double");
+        default:                return UnownedStringSlice();
+    }
+}
+
+SlangResult CUDASourceEmitter::_calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName)
+{
+    // texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
+
+    // Not clear how to do this yet
+    if (texType->isMultisample() || texType->isArray())
+    {
+        return SLANG_FAIL;
+    }
+
+    outName << "texture<";
+    outName << _getCUDATypeName(texType->getElementType());
+    outName << ", ";
+
+    switch (texType->GetBaseShape())
+    {
+        case TextureFlavor::Shape::Shape1D:		outName << "cudaTextureType1D";		break;
+        case TextureFlavor::Shape::Shape2D:		outName << "cudaTextureType2D";		break;
+        case TextureFlavor::Shape::Shape3D:		outName << "cudaTextureType3D";		break;
+        case TextureFlavor::Shape::ShapeCube:	outName << "cudaTextureTypeCubemap";	break;
+        case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer";         break;
+        default:
+            SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape");
+            return SLANG_FAIL;
+    }
+
+    outName << ", ";
+
+    switch (texType->getAccess())
+    {
+        case SLANG_RESOURCE_ACCESS_READ:
+        {
+            // Other value is cudaReadModeNormalizedFloat 
+
+            outName << "cudaReadModeElementType";
+            break;
+        }
+        default:
+        {
+            SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode");
+            return SLANG_FAIL;
+        }
+    }
+
+    outName << ">";
+    return SLANG_OK;
+}
+
+// This is junk.. 
+static UnownedStringSlice _getCUDAResourceTypePrefix(IROp op)
+{
+    switch (op)
+    {
+        case kIROp_HLSLStructuredBufferType:            return UnownedStringSlice::fromLiteral("StructuredBuffer");
+        case kIROp_HLSLRWStructuredBufferType:          return UnownedStringSlice::fromLiteral("RWStructuredBuffer");
+        case kIROp_HLSLRWByteAddressBufferType:         return UnownedStringSlice::fromLiteral("RWByteAddressBuffer");
+        case kIROp_HLSLByteAddressBufferType:           return UnownedStringSlice::fromLiteral("ByteAddressBuffer");
+        case kIROp_SamplerStateType:                    return UnownedStringSlice::fromLiteral("SamplerState");
+        case kIROp_SamplerComparisonStateType:                  return UnownedStringSlice::fromLiteral("SamplerComparisonState");
+        case kIROp_HLSLRasterizerOrderedStructuredBufferType:   return UnownedStringSlice::fromLiteral("RasterizerOrderedStructuredBuffer");
+        case kIROp_HLSLAppendStructuredBufferType:              return UnownedStringSlice::fromLiteral("AppendStructuredBuffer");
+        case kIROp_HLSLConsumeStructuredBufferType:             return UnownedStringSlice::fromLiteral("ConsumeStructuredBuffer");
+        case kIROp_HLSLRasterizerOrderedByteAddressBufferType:  return UnownedStringSlice::fromLiteral("RasterizerOrderedByteAddressBuffer");
+        case kIROp_RaytracingAccelerationStructureType:         return UnownedStringSlice::fromLiteral("RaytracingAccelerationStructure");
+
+        default:                                        return UnownedStringSlice();
+    }
+}
+
+SlangResult CUDASourceEmitter::_calcCUDATypeName(IRType* type, StringBuilder& out)
+{
+    switch (type->op)
+    {
+        case kIROp_HalfType:
+        {
+            // Special case half
+            out << getBuiltinTypeName(kIROp_FloatType);
+            return SLANG_OK;
+        }
+        case kIROp_VectorType:
+        {
+            auto vecType = static_cast<IRVectorType*>(type);
+            auto vecCount = int(GetIntVal(vecType->getElementCount()));
+            const IROp elemType = vecType->getElementType()->op;
+
+            UnownedStringSlice prefix = getVectorPrefix(elemType);
+            if (prefix.size() <= 0)
+            {
+                return SLANG_FAIL;
+            }
+            out << prefix << vecCount;
+            return SLANG_OK;
+        }
+#if 0
+        case kIROp_MatrixType:
+        {
+            auto matType = static_cast<IRMatrixType*>(type);
+
+            auto elementType = matType->getElementType();
+            const auto rowCount = int(GetIntVal(matType->getRowCount()));
+            const auto colCount = int(GetIntVal(matType->getColumnCount()));
+
+            if (target == CodeGenTarget::CPPSource)
+            {
+                out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">";
+            }
+            else
+            {
+                out << "Mat";
+                const UnownedStringSlice postFix = _getCTypeVecPostFix(_getCType(elementType->op));
+                out << postFix;
+                if (postFix.size() > 1)
+                {
+                    out << "_";
+                }
+                out << rowCount;
+                out << colCount;
+            }
+            return SLANG_OK;
+        }
+        case kIROp_ArrayType:
+        {
+            auto arrayType = static_cast<IRArrayType*>(type);
+            auto elementType = arrayType->getElementType();
+            int elementCount = int(GetIntVal(arrayType->getElementCount()));
+
+            out << "FixedArray<";
+            SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out));
+            out << ", " << elementCount << ">";
+            return SLANG_OK;
+        }
+        case kIROp_UnsizedArrayType:
+        {
+            auto arrayType = static_cast<IRUnsizedArrayType*>(type);
+            auto elementType = arrayType->getElementType();
+
+            out << "Array<";
+            SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out));
+            out << ">";
+            return SLANG_OK;
+        }
+#endif
+        default:
+        {
+            if (isNominalOp(type->op))
+            {
+                out << getName(type);
+                return SLANG_OK;
+            }
+
+            if (IRBasicType::isaImpl(type->op))
+            {
+                out << getBuiltinTypeName(type->op);
+                return SLANG_OK;
+            }
+
+            if (auto texType = as<IRTextureTypeBase>(type))
+            {
+                // We don't support TextureSampler, so ignore that
+                if (texType->op != kIROp_TextureSamplerType)
+                {
+                    return _calcCUDATextureTypeName(texType, out);
+                }
+            }
+
+            // If _getResourceTypePrefix returns something, we assume can output any specialization after it in order.
+            {
+                UnownedStringSlice prefix = _getCUDAResourceTypePrefix(type->op);
+                if (prefix.size() > 0)
+                {
+                    auto oldWriter = m_writer;
+                    SourceManager* sourceManager = oldWriter->getSourceManager();
+
+                    // TODO(JS): This is a bit of a hack. We don't want to emit the result here,
+                    // so we replace the writer, write out the type, grab the contents, and restore the writer
+
+                    SourceWriter writer(sourceManager, LineDirectiveMode::None);
+                    m_writer = &writer;
+
+                    m_writer->emit(prefix);
+
+                    // TODO(JS).
+                    // Assumes ordering of types matches ordering of operands.
+
+                    UInt operandCount = type->getOperandCount();
+                    if (operandCount)
+                    {
+                        m_writer->emit("<");
+                        for (UInt ii = 0; ii < operandCount; ++ii)
+                        {
+                            if (ii != 0)
+                            {
+                                m_writer->emit(", ");
+                            }
+                            emitVal(type->getOperand(ii), getInfo(EmitOp::General));
+                        }
+                        m_writer->emit(">");
+                    }
+
+                    out << writer.getContent();
+
+                    m_writer = oldWriter;
+                    return SLANG_OK;
+                }
+            }
+
+            break;
+        }
+    }
+
+    SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled type for CUDA emit");
+    return SLANG_FAIL;
+}
+
+
+UnownedStringSlice CUDASourceEmitter::_getCUDATypeName(IRType* type)
+{
+    StringSlicePool::Handle handle = StringSlicePool::kNullHandle;
+    if (m_typeNameMap.TryGetValue(type, handle))
+    {
+        return m_slicePool.getSlice(handle);
+    }
+
+#if 0
+    if (type->op == kIROp_MatrixType)
+    {
+        auto matType = static_cast<IRMatrixType*>(type);
+
+        auto elementType = matType->getElementType();
+        const auto rowCount = int(GetIntVal(matType->getRowCount()));
+        const auto colCount = int(GetIntVal(matType->getColumnCount()));
+
+        // Make sure the vector type the matrix is built on is added
+        useType(_getVecType(elementType, colCount));
+    }
+#endif
+
+    StringBuilder builder;
+    if (SLANG_SUCCEEDED(_calcCUDATypeName(type, builder)))
+    {
+        handle = m_slicePool.add(builder);
+    }
+
+    m_typeNameMap.Add(type, handle);
+
+    SLANG_ASSERT(handle != StringSlicePool::kNullHandle);
+    return m_slicePool.getSlice(handle);
+}
+
+void CUDASourceEmitter::_emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val)
+{
+    SLANG_UNUSED(entryPoint);
+    assert(val);
+
+    m_writer->emit("[");
+    m_writer->emit(name);
+    m_writer->emit("(\"");
+    m_writer->emit(val->getStringSlice());
+    m_writer->emit("\")]\n");
+}
+
+void CUDASourceEmitter::_emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val)
+{
+    SLANG_UNUSED(entryPoint);
+    SLANG_ASSERT(val);
+
+    auto intVal = GetIntVal(val);
+
+    m_writer->emit("[");
+    m_writer->emit(name);
+    m_writer->emit("(");
+    m_writer->emit(intVal);
+    m_writer->emit(")]\n");
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling)
+{
+    if (!chain)
+        return;
+    if (!chain->varLayout->usesResourceKind(kind))
+        return;
+
+    UInt index = getBindingOffset(chain, kind);
+    UInt space = getBindingSpace(chain, kind);
+
+    switch (kind)
+    {
+        case LayoutResourceKind::Uniform:
+        {
+            UInt offset = index;
+
+            // The HLSL `c` register space is logically grouped in 16-byte registers,
+            // while we try to traffic in byte offsets. That means we need to pick
+            // a register number, based on the starting offset in 16-byte register
+            // units, and then a "component" within that register, based on 4-byte
+            // offsets from there. We cannot support more fine-grained offsets than that.
+
+            m_writer->emit(" : ");
+            m_writer->emit(uniformSemanticSpelling);
+            m_writer->emit("(c");
+
+            // Size of a logical `c` register in bytes
+            auto registerSize = 16;
+
+            // Size of each component of a logical `c` register, in bytes
+            auto componentSize = 4;
+
+            size_t startRegister = offset / registerSize;
+            m_writer->emit(int(startRegister));
+
+            size_t byteOffsetInRegister = offset % registerSize;
+
+            // If this field doesn't start on an even register boundary,
+            // then we need to emit additional information to pick the
+            // right component to start from
+            if (byteOffsetInRegister != 0)
+            {
+                // The value had better occupy a whole number of components.
+                SLANG_RELEASE_ASSERT(byteOffsetInRegister % componentSize == 0);
+
+                size_t startComponent = byteOffsetInRegister / componentSize;
+
+                static const char* kComponentNames[] = { "x", "y", "z", "w" };
+                m_writer->emit(".");
+                m_writer->emit(kComponentNames[startComponent]);
+            }
+            m_writer->emit(")");
+        }
+        break;
+
+        case LayoutResourceKind::RegisterSpace:
+        case LayoutResourceKind::GenericResource:
+        case LayoutResourceKind::ExistentialTypeParam:
+        case LayoutResourceKind::ExistentialObjectParam:
+            // ignore
+            break;
+        default:
+        {
+            m_writer->emit(" : register(");
+            switch (kind)
+            {
+                case LayoutResourceKind::ConstantBuffer:
+                    m_writer->emit("b");
+                    break;
+                case LayoutResourceKind::ShaderResource:
+                    m_writer->emit("t");
+                    break;
+                case LayoutResourceKind::UnorderedAccess:
+                    m_writer->emit("u");
+                    break;
+                case LayoutResourceKind::SamplerState:
+                    m_writer->emit("s");
+                    break;
+                default:
+                    SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled HLSL register type");
+                    break;
+            }
+            m_writer->emit(index);
+            if (space)
+            {
+                m_writer->emit(", space");
+                m_writer->emit(space);
+            }
+            m_writer->emit(")");
+        }
+    }
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling)
+{
+    if (!chain) return;
+
+    auto layout = chain->varLayout;
+
+    switch (getSourceStyle())
+    {
+        default:
+            return;
+
+        case SourceStyle::HLSL:
+            break;
+    }
+
+    for (auto rr : layout->getOffsetAttrs())
+    {
+        _emitCUDARegisterSemantic(rr->getResourceKind(), chain, uniformSemanticSpelling);
+    }
+}
+
+void CUDASourceEmitter::_emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling)
+{
+    if (!varLayout)
+        return;
+
+    EmitVarChain chain(varLayout);
+    _emitCUDARegisterSemantics(&chain, uniformSemanticSpelling);
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain)
+{
+    if (!chain)
+        return;
+
+    auto layout = chain->varLayout;
+    for (auto rr : layout->getOffsetAttrs())
+    {
+        _emitCUDARegisterSemantic(rr->getResourceKind(), chain, "packoffset");
+    }
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain)
+{
+    EmitVarChain chain(fieldLayout, inChain);
+    _emitCUDAParameterGroupFieldLayoutSemantics(&chain);
+}
+
+void CUDASourceEmitter::_emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type)
+{
+    if (as<IRTextureBufferType>(type))
+    {
+        m_writer->emit("tbuffer ");
+    }
+    else
+    {
+        m_writer->emit("cbuffer ");
+    }
+    m_writer->emit(getName(varDecl));
+
+    auto varLayout = getVarLayout(varDecl);
+    SLANG_RELEASE_ASSERT(varLayout);
+
+    EmitVarChain blockChain(varLayout);
+
+    EmitVarChain containerChain = blockChain;
+    EmitVarChain elementChain = blockChain;
+
+    auto typeLayout = varLayout->getTypeLayout();
+    if (auto parameterGroupTypeLayout = as<IRParameterGroupTypeLayout>(typeLayout))
+    {
+        containerChain = EmitVarChain(parameterGroupTypeLayout->getContainerVarLayout(), &blockChain);
+        elementChain = EmitVarChain(parameterGroupTypeLayout->getElementVarLayout(), &blockChain);
+
+        typeLayout = parameterGroupTypeLayout->getElementVarLayout()->getTypeLayout();
+    }
+
+    _emitCUDARegisterSemantic(LayoutResourceKind::ConstantBuffer, &containerChain);
+
+    m_writer->emit("\n{\n");
+    m_writer->indent();
+
+    auto elementType = type->getElementType();
+
+    emitType(elementType, getName(varDecl));
+    m_writer->emit(";\n");
+
+    m_writer->dedent();
+    m_writer->emit("}\n");
+}
+
+void CUDASourceEmitter::emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling)
+{
+    auto layout = getVarLayout(inst); 
+    if (layout)
+    {
+        _emitCUDARegisterSemantics(layout, uniformSemanticSpelling);
+    }
+}
+
+void CUDASourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type)
+{
+    _emitCUDAParameterGroup(varDecl, type);
+}
+
+void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor)
+{
+    auto profile = m_effectiveProfile;
+    auto stage = entryPointDecor->getProfile().GetStage();
+
+    switch (stage)
+    {
+        case Stage::Compute:
+        {
+            Int sizeAlongAxis[kThreadGroupAxisCount];
+            getComputeThreadGroupSize(irFunc, sizeAlongAxis);
+
+#if 0
+            m_writer->emit("[numthreads(");
+            for (int ii = 0; ii < kThreadGroupAxisCount; ++ii)
+            {
+                if (ii != 0) m_writer->emit(", ");
+                m_writer->emit(sizeAlongAxis[ii]);
+            }
+            m_writer->emit(")]\n");
+#endif
+
+            m_writer->emit("__global__ ");
+            break;
+        }
+        
+        // TODO: There are other stages that will need this kind of handling.
+        default:
+            break;
+    }
+}
+
+void CUDASourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const&  outerPrec)
+{
+    if (shouldFoldInstIntoUseSites(inst))
+    {
+        emitInstExpr(inst, outerPrec);
+        return;
+    }
+
+    switch (inst->op)
+    {
+        case kIROp_Param:
+        {
+            auto varLayout = getVarLayout(inst);
+            if (varLayout)
+            {
+                if (auto systemValueSemantic = varLayout->findSystemValueSemanticAttr())
+                {
+                    String semanticNameSpelling = systemValueSemantic->getName();
+                    semanticNameSpelling = semanticNameSpelling.toLower();
+
+                    if (semanticNameSpelling == "sv_dispatchthreadid")
+                    {
+                        m_semanticUsedFlags |= SemanticUsedFlag::DispatchThreadID;
+                        m_writer->emit("((blockIdx * blockDim) + threadIdx)");
+
+                        return;
+                    }
+                    else if (semanticNameSpelling == "sv_groupid")
+                    {
+                        m_semanticUsedFlags |= SemanticUsedFlag::GroupID;
+                        m_writer->emit("blockIdx");
+                        return;
+                    }
+                    else if (semanticNameSpelling == "sv_groupthreadid")
+                    {
+                        m_semanticUsedFlags |= SemanticUsedFlag::GroupThreadID;
+                        m_writer->emit("threadIdx");
+                        return;
+                    }
+                }
+            }
+
+            break;
+        }
+        default: break;
+    }
+    m_writer->emit(getName(inst));
+}
+
+bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
+{
+    switch (inst->op)
+    {
+        case kIROp_Construct:
+        case kIROp_makeVector:
+        {
+            if (inst->getOperandCount() == 1)
+            {
+                EmitOpInfo outerPrec = inOuterPrec;
+                bool needClose = false;
+
+                auto prec = getInfo(EmitOp::Prefix);
+                needClose = maybeEmitParens(outerPrec, prec);
+
+                // Need to emit as cast for HLSL
+                m_writer->emit("(");
+                emitType(inst->getDataType());
+                m_writer->emit(") ");
+                emitOperand(inst->getOperand(0), rightSide(outerPrec, prec));
+
+                maybeCloseParens(needClose);
+                // Handled
+                return true;
+            }
+            else
+            {
+                m_writer->emit("make_");
+                m_writer->emit(_getCUDATypeName(inst->getDataType()));
+                emitArgs(inst);
+                return true;
+            }
+            break;
+        }
+        case kIROp_MakeMatrix:
+        {
+            return false;
+        }
+        case kIROp_BitCast:
+        {
+            auto toType = extractBaseType(inst->getDataType());
+            switch (toType)
+            {
+                default:
+                    m_writer->emit("/* unhandled */");
+                    break;
+                case BaseType::UInt:
+                    break;
+                case BaseType::Int:
+                    m_writer->emit("(");
+                    emitType(inst->getDataType());
+                    m_writer->emit(")");
+                    break;
+                case BaseType::Float:
+                    m_writer->emit("asfloat");
+                    break;
+            }
+
+            m_writer->emit("(");
+            emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+            m_writer->emit(")");
+            return true;
+        }
+        default: break;
+    }
+    // Not handled
+    return false;
+}
+
+void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq)
+{
+    SLANG_UNUSED(targetReq);
+}
+
+void CUDASourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount)
+{
+    m_writer->emit(getVectorPrefix(elementType->op));
+    m_writer->emit(elementCount);
+}
+
+void CUDASourceEmitter::emitSimpleTypeImpl(IRType* type)
+{
+    m_writer->emit(_getCUDATypeName(type));
+}
+
+void CUDASourceEmitter::emitRateQualifiersImpl(IRRate* rate)
+{
+    if (as<IRGroupSharedRate>(rate))
+    {
+        m_writer->emit("groupshared ");
+    }
+}
+
+void CUDASourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func)
+{
+    m_writer->emit("(");
+
+    bool hasEmittedParam = false;
+    auto firstParam = func->getFirstParam();
+    for (auto pp = firstParam; pp; pp = pp->getNextParam())
+    {
+        auto varLayout = getVarLayout(pp);
+        if (varLayout && varLayout->findSystemValueSemanticAttr())
+        {
+            // If it has a semantic don't output, it will be accessed via a global
+            continue;
+        }
+
+        if (hasEmittedParam)
+            m_writer->emit(", ");
+
+        emitSimpleFuncParamImpl(pp);
+        hasEmittedParam = true;
+    }
+
+    m_writer->emit(")");
+}
+
+void CUDASourceEmitter::emitSimpleFuncImpl(IRFunc* func)
+{
+    if (IREntryPointDecoration* entryPointDecor = func->findDecoration<IREntryPointDecoration>())
+    {
+        // If its an entry point, we let the entry point attribute control the output
+        Super::emitSimpleFuncImpl(func);
+    }
+    else
+    {
+        // If it's not an entry point mark as device
+        m_writer->emit("__device__ ");
+        Super::emitSimpleFuncImpl(func);
+    }
+}
+
+void CUDASourceEmitter::emitSemanticsImpl(IRInst* inst)
+{
+    if (auto semanticDecoration = inst->findDecoration<IRSemanticDecoration>())
+    {
+        m_writer->emit(" : ");
+        m_writer->emit(semanticDecoration->getSemanticName());
+        return;
+    }
+
+    if (auto layoutDecoration = inst->findDecoration<IRLayoutDecoration>())
+    {
+        auto layout = layoutDecoration->getLayout();
+        if (auto varLayout = as<IRVarLayout>(layout))
+        {
+            emitSemanticsUsingVarLayout(varLayout);
+        }
+        else if (auto entryPointLayout = as<IREntryPointLayout>(layout))
+        {
+            if (auto resultLayout = entryPointLayout->getResultLayout())
+            {
+                emitSemanticsUsingVarLayout(resultLayout);
+            }
+        }
+    }
+}
+
+static UnownedStringSlice _getInterpolationModifierText(IRInterpolationMode mode)
+{
+    switch (mode)
+    {
+        case IRInterpolationMode::NoInterpolation:      return UnownedStringSlice::fromLiteral("nointerpolation");
+        case IRInterpolationMode::NoPerspective:        return UnownedStringSlice::fromLiteral("noperspective");
+        case IRInterpolationMode::Linear:               return UnownedStringSlice::fromLiteral("linear");
+        case IRInterpolationMode::Sample:               return UnownedStringSlice::fromLiteral("sample");
+        case IRInterpolationMode::Centroid:             return UnownedStringSlice::fromLiteral("centroid");
+        default:                                        return UnownedStringSlice();
+    }
+}
+
+void CUDASourceEmitter::emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout)
+{
+    SLANG_UNUSED(layout);
+    SLANG_UNUSED(valueType);
+
+    for (auto dd : varInst->getDecorations())
+    {
+        if (dd->op != kIROp_InterpolationModeDecoration)
+            continue;
+
+        auto decoration = (IRInterpolationModeDecoration*)dd;
+  
+        UnownedStringSlice modeText = _getInterpolationModifierText(decoration->getMode());
+        if (modeText.size() > 0)
+        {
+            m_writer->emit(modeText);
+            m_writer->emitChar(' ');
+        }
+    }
+}
+
+void CUDASourceEmitter::emitVarDecorationsImpl(IRInst* varDecl)
+{
+    if (varDecl->findDecoration<IRGloballyCoherentDecoration>())
+    {
+        m_writer->emit("globallycoherent\n");
+    }
+}
+
+void CUDASourceEmitter::emitMatrixLayoutModifiersImpl(IRVarLayout* layout)
+{
+    // When a variable has a matrix type, we want to emit an explicit
+    // layout qualifier based on what the layout has been computed to be.
+    //
+
+    auto typeLayout = layout->getTypeLayout()->unwrapArray();
+
+    if (auto matrixTypeLayout = as<IRMatrixTypeLayout>(typeLayout))
+    {
+        switch (matrixTypeLayout->getMode())
+        {
+            case kMatrixLayoutMode_ColumnMajor:
+                m_writer->emit("column_major ");
+                break;
+
+            case kMatrixLayoutMode_RowMajor:
+                m_writer->emit("row_major ");
+                break;
+        }
+    }
+}
+
+
+} // namespace Slang
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
new file mode 100644
index 000000000..6de2f5ea3
--- /dev/null
+++ b/source/slang/slang-emit-cuda.h
@@ -0,0 +1,80 @@
+// slang-emit-cuda.h
+#ifndef SLANG_EMIT_CUDA_H
+#define SLANG_EMIT_CUDA_H
+
+#include "slang-emit-c-like.h"
+
+namespace Slang
+{
+
+class CUDASourceEmitter : public CLikeSourceEmitter
+{
+public:
+    typedef CLikeSourceEmitter Super;
+
+    typedef uint32_t SemanticUsedFlags;
+    struct SemanticUsedFlag
+    {
+        enum Enum : SemanticUsedFlags
+        {
+            DispatchThreadID = 0x01,
+            GroupThreadID = 0x02,
+            GroupID = 0x04,
+        };
+    };
+
+    static UnownedStringSlice getBuiltinTypeName(IROp op);
+    static UnownedStringSlice getVectorPrefix(IROp op);
+
+    CUDASourceEmitter(const Desc& desc) :
+        Super(desc),
+        m_slicePool(StringSlicePool::Style::Default)
+    {}
+
+protected:
+
+    virtual void emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling) SLANG_OVERRIDE;
+    virtual void emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) SLANG_OVERRIDE;
+    virtual void emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor) SLANG_OVERRIDE;
+    virtual void emitLayoutDirectivesImpl(TargetRequest* targetReq) SLANG_OVERRIDE;
+    virtual void emitRateQualifiersImpl(IRRate* rate) SLANG_OVERRIDE;
+    virtual void emitSemanticsImpl(IRInst* inst) SLANG_OVERRIDE;
+    virtual void emitSimpleFuncImpl(IRFunc* func) SLANG_OVERRIDE;
+    virtual void emitSimpleFuncParamsImpl(IRFunc* func) SLANG_OVERRIDE;
+    virtual void emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) SLANG_OVERRIDE;
+    virtual void emitSimpleTypeImpl(IRType* type) SLANG_OVERRIDE;
+    virtual void emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) SLANG_OVERRIDE;
+    virtual void emitVarDecorationsImpl(IRInst* varDecl) SLANG_OVERRIDE;
+    virtual void emitMatrixLayoutModifiersImpl(IRVarLayout* layout) SLANG_OVERRIDE;
+    virtual void emitOperandImpl(IRInst* inst, EmitOpInfo const&  outerPrec) SLANG_OVERRIDE;
+
+    virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
+
+        // Emit a single `register` semantic, as appropriate for a given resource-type-specific layout info
+        // Keyword to use in the uniform case (`register` for globals, `packoffset` inside a `cbuffer`)
+    void _emitCUDARegisterSemantic(LayoutResourceKind kind, EmitVarChain* chain, char const* uniformSemanticSpelling = "register");
+
+        // Emit all the `register` semantics that are appropriate for a particular variable layout
+    void _emitCUDARegisterSemantics(EmitVarChain* chain, char const* uniformSemanticSpelling = "register");
+    void _emitCUDARegisterSemantics(IRVarLayout* varLayout, char const* uniformSemanticSpelling = "register");
+
+    void _emitCUDAParameterGroupFieldLayoutSemantics(EmitVarChain* chain);
+    void _emitCUDAParameterGroupFieldLayoutSemantics(IRVarLayout* fieldLayout, EmitVarChain* inChain);
+
+    void _emitCUDAParameterGroup(IRGlobalParam* varDecl, IRUniformParameterGroupType* type);
+
+    void _emitCUDADecorationSingleString(const char* name, IRFunc* entryPoint, IRStringLit* val);
+    void _emitCUDADecorationSingleInt(const char* name, IRFunc* entryPoint, IRIntLit* val);
+
+    SlangResult _calcCUDATypeName(IRType* type, StringBuilder& out);
+    UnownedStringSlice _getCUDATypeName(IRType* inType);
+    SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName);
+
+    Dictionary<IRType*, StringSlicePool::Handle> m_typeNameMap;
+    StringSlicePool m_slicePool;
+
+    UInt m_semanticUsedFlags = 0;
+};
+
+}
+#endif
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 9aa7b1203..19c3cad68 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -32,23 +32,12 @@
 #include "slang-emit-glsl.h"
 #include "slang-emit-hlsl.h"
 #include "slang-emit-cpp.h"
+#include "slang-emit-cuda.h"
 
 #include <assert.h>
 
 namespace Slang {
 
-enum class BuiltInCOp
-{
-    Splat,                  //< Splat a single value to all values of a vector or matrix type
-    Init,                   //< Initialize with parameters (must match the type)
-};
-
-
-//
-
-
-//
-
 EntryPointLayout* findEntryPointLayout(
     ProgramLayout*          programLayout,
     EntryPoint*             entryPoint)
@@ -444,7 +433,7 @@ Result linkAndOptimizeIR(
     return SLANG_OK;
 }
 
-String emitEntryPointSource(
+String emitEntryPointSourceFromIR(
     BackEndCompileRequest*  compileRequest,
     Int                     entryPointIndex,
     CodeGenTarget           target,
@@ -499,6 +488,11 @@ String emitEntryPointSource(
             sourceEmitter = new HLSLSourceEmitter(desc);
             break;
         }
+        case SourceStyle::CUDA:
+        {
+            sourceEmitter = new CUDASourceEmitter(desc);
+            break;
+        }
         default: break;
     }
 
diff --git a/source/slang/slang-emit.h b/source/slang/slang-emit.h
index e9ee361d7..1f3298316 100644
--- a/source/slang/slang-emit.h
+++ b/source/slang/slang-emit.h
@@ -34,7 +34,7 @@ namespace Slang
         /// generate different HLSL output if we know it
         /// will be used to generate SPIR-V).
         ///
-    String emitEntryPointSource(
+    String emitEntryPointSourceFromIR(
         BackEndCompileRequest*  compileRequest,
         Int                     entryPointIndex,
         CodeGenTarget           target,
diff --git a/source/slang/slang-ir-link.cpp b/source/slang/slang-ir-link.cpp
index 6399e3f6b..904a88d78 100644
--- a/source/slang/slang-ir-link.cpp
+++ b/source/slang/slang-ir-link.cpp
@@ -919,9 +919,13 @@ String getTargetName(IRSpecContext* context)
     case CodeGenTarget::CPPSource:
         return "cpp";
 
+    case CodeGenTarget::CUDASource:
+        return "cuda";
+
     case CodeGenTarget::SPIRV:
         return "spirv";
 
+
     default:
         SLANG_UNEXPECTED("unhandled case");
         UNREACHABLE_RETURN("unknown");
diff --git a/source/slang/slang-parameter-binding.cpp b/source/slang/slang-parameter-binding.cpp
index 5cd4156c3..8ebc4f420 100644
--- a/source/slang/slang-parameter-binding.cpp
+++ b/source/slang/slang-parameter-binding.cpp
@@ -2585,6 +2585,19 @@ static bool _isCPUTarget(CodeGenTarget target)
     }
 }
 
+static bool _isPTXTarget(CodeGenTarget target)
+{
+    switch (target)
+    {
+        case CodeGenTarget::CUDASource:
+        case CodeGenTarget::PTX:
+        {
+            return true;
+        }
+        default: return false;
+    }
+}
+
     /// Keep track of the running global counter for entry points and global parameters visited.
     ///
     /// Because of explicit `register` and `[[vk::binding(...)]]` support, parameter binding
@@ -2970,7 +2983,8 @@ RefPtr<ProgramLayout> generateParameterBindings(
 
     // On a CPU target, it's okay to have global scope parameters that use Uniform resources (because on CPU
     // all resources are 'Uniform')
-    if (!_isCPUTarget(targetReq->target))
+    // TODO(JS): We'll just assume the same with CUDA target for now..
+    if (!_isCPUTarget(targetReq->target) && !_isPTXTarget(targetReq->target))
     {
         for( auto& parameterInfo : sharedContext.parameters )
         {
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index 88e62323b..772686163 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -398,6 +398,13 @@ struct CPULayoutRulesImpl : DefaultLayoutRulesImpl
     }
 };
 
+// TODO(JS): Most likely wrong. For layout for CUDA, we'll just do the default to get things up and running
+struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl
+{
+    typedef DefaultLayoutRulesImpl Super;
+};
+
+
 struct HLSLStructuredBufferLayoutRulesImpl : DefaultLayoutRulesImpl
 {
     // HLSL structured buffers drop the restrictions added for constant buffers,
@@ -653,9 +660,29 @@ struct CPULayoutRulesFamilyImpl : LayoutRulesFamilyImpl
     LayoutRulesImpl* getStructuredBufferRules() override;
 };
 
+struct CUDALayoutRulesFamilyImpl : LayoutRulesFamilyImpl
+{
+    virtual LayoutRulesImpl* getConstantBufferRules() override;
+    virtual LayoutRulesImpl* getPushConstantBufferRules() override;
+    virtual LayoutRulesImpl* getTextureBufferRules() override;
+    virtual LayoutRulesImpl* getVaryingInputRules() override;
+    virtual LayoutRulesImpl* getVaryingOutputRules() override;
+    virtual LayoutRulesImpl* getSpecializationConstantRules() override;
+    virtual LayoutRulesImpl* getShaderStorageBufferRules() override;
+    virtual LayoutRulesImpl* getParameterBlockRules() override;
+
+    LayoutRulesImpl* getRayPayloadParameterRules()      override;
+    LayoutRulesImpl* getCallablePayloadParameterRules() override;
+    LayoutRulesImpl* getHitAttributesParameterRules()   override;
+
+    LayoutRulesImpl* getShaderRecordConstantBufferRules() override;
+    LayoutRulesImpl* getStructuredBufferRules() override;
+};
+
 GLSLLayoutRulesFamilyImpl kGLSLLayoutRulesFamilyImpl;
 HLSLLayoutRulesFamilyImpl kHLSLLayoutRulesFamilyImpl;
 CPULayoutRulesFamilyImpl kCPULayoutRulesFamilyImpl;
+CUDALayoutRulesFamilyImpl kCUDALayoutRulesFamilyImpl;
 
 // CPU case
 
@@ -703,6 +730,12 @@ struct CPUObjectLayoutRulesImpl : ObjectLayoutRulesImpl
 };
 
 
+// TODO(JS): Most likely wrong! Use CPU layout for CUDA for now
+struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
+{
+    typedef CPUObjectLayoutRulesImpl Super;
+
+};
 
 static CPUObjectLayoutRulesImpl kCPUObjectLayoutRulesImpl;
 static CPULayoutRulesImpl kCPULayoutRulesImpl;
@@ -711,6 +744,16 @@ LayoutRulesImpl kCPULayoutRulesImpl_ = {
     &kCPULayoutRulesFamilyImpl, &kCPULayoutRulesImpl, &kCPUObjectLayoutRulesImpl,
 };
 
+// CUDA
+
+static CUDAObjectLayoutRulesImpl kCUDAObjectLayoutRulesImpl;
+static CUDALayoutRulesImpl kCUALayoutRulesImpl;
+
+LayoutRulesImpl kCUDALayoutRulesImpl_ = {
+    &kCPULayoutRulesFamilyImpl, &kCUALayoutRulesImpl, &kCUDAObjectLayoutRulesImpl,
+};
+
+
 // GLSL cases
 
 LayoutRulesImpl kStd140LayoutRulesImpl_ = {
@@ -986,6 +1029,69 @@ LayoutRulesImpl* CPULayoutRulesFamilyImpl::getStructuredBufferRules()
     return &kCPULayoutRulesImpl_;
 }
 
+// CUDA Family
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getConstantBufferRules()
+{
+    return &kCPULayoutRulesImpl_;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getPushConstantBufferRules()
+{
+    return &kCPULayoutRulesImpl_;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getTextureBufferRules()
+{
+    return nullptr;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getVaryingInputRules()
+{
+    return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getVaryingOutputRules()
+{
+    return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getSpecializationConstantRules()
+{
+    return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getShaderStorageBufferRules()
+{
+    return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getParameterBlockRules()
+{
+    // Not clear - just use similar to CPU 
+    return &kCUDALayoutRulesImpl_;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getRayPayloadParameterRules()
+{
+    return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getCallablePayloadParameterRules()
+{
+    return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getHitAttributesParameterRules()
+{
+    return nullptr;
+}
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getShaderRecordConstantBufferRules()
+{
+    // Just following HLSLs lead for the moment
+    return &kCUDALayoutRulesImpl_;
+}
+
+LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getStructuredBufferRules()
+{
+    return &kCUDALayoutRulesImpl_;
+}
+
+
+
 LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targetReq)
 {
     switch (targetReq->getTarget())
@@ -1019,6 +1125,13 @@ LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targe
         return &kCPULayoutRulesFamilyImpl;
     }
 
+    case CodeGenTarget::PTX:
+    case CodeGenTarget::CUDASource:
+    {
+        return &kCUDALayoutRulesFamilyImpl;
+    }
+
+
     default:
         return nullptr;
     }
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index 8c8a712ce..e120e006f 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -199,6 +199,7 @@
     <ClInclude Include="slang-diagnostics.h" />
     <ClInclude Include="slang-emit-c-like.h" />
     <ClInclude Include="slang-emit-cpp.h" />
+    <ClInclude Include="slang-emit-cuda.h" />
     <ClInclude Include="slang-emit-glsl-extension-tracker.h" />
     <ClInclude Include="slang-emit-glsl.h" />
     <ClInclude Include="slang-emit-hlsl.h" />
@@ -280,6 +281,7 @@
     <ClCompile Include="slang-dxc-support.cpp" />
     <ClCompile Include="slang-emit-c-like.cpp" />
     <ClCompile Include="slang-emit-cpp.cpp" />
+    <ClCompile Include="slang-emit-cuda.cpp" />
     <ClCompile Include="slang-emit-glsl-extension-tracker.cpp" />
     <ClCompile Include="slang-emit-glsl.cpp" />
     <ClCompile Include="slang-emit-hlsl.cpp" />
@@ -373,4 +375,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project> 
-\ No newline at end of file
+</Project>
+\ No newline at end of file
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index c562d28e3..1c619a3d5 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -48,6 +48,9 @@
     <ClInclude Include="slang-emit-cpp.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="slang-emit-cuda.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="slang-emit-glsl-extension-tracker.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -287,6 +290,9 @@
     <ClCompile Include="slang-emit-cpp.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="slang-emit-cuda.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="slang-emit-glsl-extension-tracker.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/tests/cuda/compile-to-cuda.slang b/tests/cuda/compile-to-cuda.slang
new file mode 100644
index 000000000..6166aaf0b
--- /dev/null
+++ b/tests/cuda/compile-to-cuda.slang
@@ -0,0 +1,29 @@
+//DISABLE_TEST(smoke):SIMPLE: -target ptx -entry computeMain -stage compute 
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer : register(u0);
+
+int quantize(double value)
+{
+    return int(value * 256);
+}
+
+int quantize(float value)
+{
+    return int(value * 256);
+}
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    float values[] = { -9, 9, -3, 3 };
+
+	int tid = int(dispatchThreadID.x);
+    float value = values[tid];
+    
+    outputBuffer[tid * 4] = quantize(sin(value));
+    outputBuffer[tid * 4 + 1] = quantize(cos(value));
+    
+    outputBuffer[tid * 4 + 2] = quantize(sin(double(value)));
+    outputBuffer[tid * 4 + 3] = quantize(cos(double(value)));
+}
diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp
index cdfe6a1a7..6401ac852 100644
--- a/tools/slang-test/slang-test-main.cpp
+++ b/tools/slang-test/slang-test-main.cpp
@@ -505,6 +505,7 @@ static PassThroughFlags _getPassThroughFlagsForTarget(SlangCompileTarget target)
         case SLANG_GLSL:
         case SLANG_C_SOURCE:
         case SLANG_CPP_SOURCE:
+        case SLANG_CUDA_SOURCE:
         {
             return 0;
         }
author	jsmall-nvidia <jsmall@nvidia.com>	2019-12-19 11:23:14 -0500
committer	GitHub <noreply@github.com>	2019-12-19 11:23:14 -0500
commit	e3fe0319467546bae070137c58dcf8f9fbe93c79 (patch)
tree	6cc26ccda33725e98c4a9a0408cf31a1348db268
parent	60934d98fbc20d83b5e149e72a197ec4f5c61580 (diff)