summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/slang/slang-emit-c-like.cpp7
-rw-r--r--source/slang/slang-emit-c-like.h2
-rw-r--r--source/slang/slang-emit-cuda.cpp127
-rw-r--r--source/slang/slang-emit-cuda.h3
-rw-r--r--source/slang/slang-emit.cpp20
-rw-r--r--source/slang/slang-ir-explicit-global-context.cpp50
-rw-r--r--source/slang/slang.vcxproj2
7 files changed, 139 insertions, 72 deletions
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index ce3dc8957..4c6c89ef5 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -660,6 +660,11 @@ String CLikeSourceEmitter::scrubName(const String& name)
return sb.ProduceString();
}
+String CLikeSourceEmitter::generateEntryPointNameImpl(IREntryPointDecoration* entryPointDecor)
+{
+ return entryPointDecor->getName()->getStringSlice();
+}
+
String CLikeSourceEmitter::generateName(IRInst* inst)
{
// If the instruction names something
@@ -686,7 +691,7 @@ String CLikeSourceEmitter::generateName(IRInst* inst)
return "main";
}
- return entryPointDecor->getName()->getStringSlice();
+ return generateEntryPointNameImpl(entryPointDecor);
}
// If we have a name hint on the instruction, then we will try to use that
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index d813a819e..b89d5d1c4 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -156,6 +156,8 @@ public:
String scrubName(const String& name);
String generateName(IRInst* inst);
+ virtual String generateEntryPointNameImpl(IREntryPointDecoration* entryPointDecor);
+
String getName(IRInst* inst);
void emitDeclarator(IRDeclaratorInfo* declarator);
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 6f24d5b74..acd913865 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -239,7 +239,15 @@ void CUDASourceEmitter::emitLayoutSemanticsImpl(IRInst* inst, char const* unifor
void CUDASourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type)
{
- Super::emitParameterGroupImpl(varDecl, type);
+ auto elementType = type->getElementType();
+
+ m_writer->emit("extern \"C\" __constant__ ");
+ emitType(elementType, "SLANG_globalParams");
+ m_writer->emit(";\n");
+
+ m_writer->emit("#define ");
+ m_writer->emit(getName(varDecl));
+ m_writer->emit(" (&SLANG_globalParams)\n");
}
void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor)
@@ -260,6 +268,59 @@ void CUDASourceEmitter::emitFunctionPreambleImpl(IRInst* inst)
}
}
+String CUDASourceEmitter::generateEntryPointNameImpl(IREntryPointDecoration* entryPointDecor)
+{
+ // We have an entry-point function in the IR module, which we
+ // will want to emit as a `__global__` function in the generated
+ // CUDA C++.
+ //
+ // The most common case will be a compute kernel, in which case
+ // we will emit the function more or less as-is, including
+ // usingits original name as the name of the global symbol.
+ //
+ String funcName = Super::generateEntryPointNameImpl(entryPointDecor);
+ String globalSymbolName = funcName;
+
+ // We also suport emitting ray tracing kernels for use with
+ // OptiX, and in that case the name of the global symbol
+ // must be prefixed to indicate to the OptiX runtime what
+ // stage it is to be compiled for.
+ //
+ auto stage = entryPointDecor->getProfile().getStage();
+ switch( stage )
+ {
+ default:
+ break;
+
+#define CASE(STAGE, PREFIX) \
+ case Stage::STAGE: globalSymbolName = #PREFIX + funcName; break
+
+ // Optix 7 Guide, Section 6.1 (Program input)
+ //
+ // > The input PTX should include one or more NVIDIA OptiX programs.
+ // > The type of program affects how the program can be used during
+ // > the execution of the pipeline. These program types are specified
+ // by prefixing the program’s name with the following:
+ //
+ // > Program type Function name prefix
+ CASE( RayGeneration, __raygen__);
+ CASE( Intersection, __intersection__);
+ CASE( AnyHit, __anyhit__);
+ CASE( ClosestHit, __closesthit__);
+ CASE( Miss, __miss__);
+ CASE( Callable, __direct_callable__);
+ //
+ // There are two stages (or "program types") supported by OptiX
+ // that Slang currently cannot target:
+ //
+ // CASE(ContinuationCallable, __continuation_callable__);
+ // CASE(Exception, __exception__);
+ //
+#undef CASE
+ }
+
+ return globalSymbolName;
+}
void CUDASourceEmitter::emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec)
{
@@ -642,6 +703,24 @@ void CUDASourceEmitter::emitPreprocessorDirectivesImpl()
}
}
+bool CUDASourceEmitter::tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType)
+{
+ // A global shader parameter in the IR for CUDA output will
+ // either be the unique constant buffer that wraps all the
+ // global-scope parameters in the original code (which is
+ // handled as a special-case before this routine would be
+ // called), or it is one of the system-defined varying inputs
+ // like `threadIdx`. We won't need to emit anything in the
+ // output code for the latter case, so we need to emit
+ // nothing here and return `true` so that the base class
+ // uses our logic instead of the default.
+ //
+ SLANG_UNUSED(varDecl);
+ SLANG_UNUSED(varType);
+ return true;
+}
+
+
void CUDASourceEmitter::emitModuleImpl(IRModule* module)
{
// Setup all built in types used in the module
@@ -660,51 +739,7 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module)
// TODO(JS): We may need to generate types (for example for matrices)
- // TODO(JS): We need to determine which functions we need to inline
-
- // The IR will usually come in an order that respects
- // dependencies between global declarations, but this
- // isn't guaranteed, so we need to be careful about
- // the order in which we emit things.
-
- List<EmitAction> actions;
-
- computeEmitActions(module, actions);
-
-
- _emitForwardDeclarations(actions);
-
- // Output group shared variables
-
- {
- for (auto action : actions)
- {
- if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar && as<IRGroupSharedRate>(action.inst->getRate()))
- {
- emitGlobalInst(action.inst);
- }
- }
- }
-
- {
- // Output all the thread locals
- for (auto action : actions)
- {
- if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar && !as<IRGroupSharedRate>(action.inst->getRate()))
- {
- emitGlobalInst(action.inst);
- }
- }
-
- // Finally output the functions as methods on the context
- for (auto action : actions)
- {
- if (action.level == EmitAction::Level::Definition && as<IRFunc>(action.inst))
- {
- emitGlobalInst(action.inst);
- }
- }
- }
+ CLikeSourceEmitter::emitModuleImpl(module);
}
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
index ae78a5e57..156d5fab1 100644
--- a/source/slang/slang-emit-cuda.h
+++ b/source/slang/slang-emit-cuda.h
@@ -57,12 +57,13 @@ protected:
virtual void emitMatrixLayoutModifiersImpl(IRVarLayout* layout) SLANG_OVERRIDE;
virtual void emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
virtual void emitFunctionPreambleImpl(IRInst* inst) SLANG_OVERRIDE;
+ virtual String generateEntryPointNameImpl(IREntryPointDecoration* entryPointDecor) SLANG_OVERRIDE;
virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE;
virtual void handleCallExprDecorationsImpl(IRInst* funcValue) SLANG_OVERRIDE;
- //virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE;
+ virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE;
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
virtual void emitPreprocessorDirectivesImpl() SLANG_OVERRIDE;
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 7d8a4074e..af01e4b0d 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -242,20 +242,19 @@ Result linkAndOptimizeIR(
CollectEntryPointUniformParamsOptions passOptions;
switch( target )
{
- default:
+ case CodeGenTarget::CUDASource:
break;
case CodeGenTarget::CPPSource:
- case CodeGenTarget::CUDASource:
passOptions.alwaysCreateCollectedParam = true;
+ default:
+ collectEntryPointUniformParams(irModule, passOptions);
+ #if 0
+ dumpIRIfEnabled(compileRequest, irModule, "ENTRY POINT UNIFORMS COLLECTED");
+ #endif
+ validateIRModuleIfEnabled(compileRequest, irModule);
break;
}
-
- collectEntryPointUniformParams(irModule, passOptions);
- #if 0
- dumpIRIfEnabled(compileRequest, irModule, "ENTRY POINT UNIFORMS COLLECTED");
- #endif
- validateIRModuleIfEnabled(compileRequest, irModule);
}
switch( target )
@@ -637,7 +636,10 @@ Result linkAndOptimizeIR(
case CodeGenTarget::CUDASource:
moveGlobalVarInitializationToEntryPoints(irModule);
introduceExplicitGlobalContext(irModule, target);
- convertEntryPointPtrParamsToRawPtrs(irModule);
+ if(target == CodeGenTarget::CPPSource)
+ {
+ convertEntryPointPtrParamsToRawPtrs(irModule);
+ }
#if 0
dumpIRIfEnabled(compileRequest, irModule, "EXPLICIT GLOBAL CONTEXT INTRODUCED");
#endif
diff --git a/source/slang/slang-ir-explicit-global-context.cpp b/source/slang/slang-ir-explicit-global-context.cpp
index 8f11bce2c..32efd51e8 100644
--- a/source/slang/slang-ir-explicit-global-context.cpp
+++ b/source/slang/slang-ir-explicit-global-context.cpp
@@ -31,17 +31,6 @@ struct IntroduceExplicitGlobalContextPass
IRBuilder builder(&sharedBuilder);
- // The global context will be represneted by a `struct`
- // type with a name hint of `KernelContext`.
- //
- m_contextStructType = builder.createStructType();
- builder.addNameHintDecoration(m_contextStructType, UnownedTerminatedStringSlice("KernelContext"));
-
- // The context will usually be passed around by pointer,
- // so we get and cache that pointer type up front.
- //
- m_contextStructPtrType = builder.getPtrType(m_contextStructType);
-
// The transformation we will perform will need to affect
// global variables, global shader parameters, and entry-point
// function (at the very least), and we start with an explicit
@@ -107,7 +96,13 @@ struct IntroduceExplicitGlobalContextPass
// Note: If we ever changed out mind about the representation
// and wanted to support multiple global parameters, we could
// easily generalize this code to work with a list.
- //
+
+ // For CUDA output, we want to leave the global uniform
+ // parameter where it is, because it will translate to
+ // a global `__constant__` variable.
+ if(m_target == CodeGenTarget::CUDASource)
+ continue;
+
SLANG_ASSERT(!m_globalUniformsParam);
m_globalUniformsParam = globalParam;
}
@@ -132,9 +127,36 @@ struct IntroduceExplicitGlobalContextPass
}
}
+ // If there are no global-scope entities that require processing,
+ // then we can completely skip the work of this pass for CUDA.
+ //
+ // Note: We cannot skip the rest of the pass for CPU, because
+ // it is responsible for introducing the explicit entry-point
+ // parameter that is used for passing in the global param(s).
+ //
+ if( m_target == CodeGenTarget::CUDASource )
+ {
+ if( !m_globalUniformsParam && (m_globalVars.getCount() == 0) )
+ {
+ return;
+ }
+ }
+
// Now that we've capture all the relevant global entities from the IR,
// we can being to transform them in an appropriate order.
//
+ // The global context will be represneted by a `struct`
+ // type with a name hint of `KernelContext`.
+ //
+ m_contextStructType = builder.createStructType();
+ builder.addNameHintDecoration(m_contextStructType, UnownedTerminatedStringSlice("KernelContext"));
+
+ // The context will usually be passed around by pointer,
+ // so we get and cache that pointer type up front.
+ //
+ m_contextStructPtrType = builder.getPtrType(m_contextStructType);
+
+
// The first step will be to create fields in the `KernelContext`
// type to represent any global parameters or global variables.
//
@@ -270,9 +292,9 @@ struct IntroduceExplicitGlobalContextPass
//
globalUniformsParam->insertBefore(firstOrdinary);
}
- else
+ else if(m_target == CodeGenTarget::CPPSource)
{
- // The nature of our current ABI for entry points on CPU/CUDA
+ // The nature of our current ABI for entry points on CPU
// means that we need an explicit parameter to be *declared*
// for the global uniforms, even if it is never used.
//
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index f20a4a322..aaece7095 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -404,4 +404,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project> \ No newline at end of file
+</Project> \ No newline at end of file