summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-01-17 09:15:06 -0500
committerGitHub <noreply@github.com>2020-01-17 09:15:06 -0500
commita8669ade5cb3add8b9ce08e2c3bd96e93190bca8 (patch)
tree63be2fa7829c5bf956a5ce4d52af4e1d4073bf84 /source/slang
parent662721ba4ab0e38924701df4c876a86eb8390968 (diff)
Slang -> CUDA kernel runs correctly in test infrastructure (#1167)
* First pass at BindLocation. * Added BindSet::init - for initializing with two input constant buffers. Needs better name, and perhaps should be another class. * Fix handling of constant buffer stripping. Improved initialization. * Trying to generalize BindLocation a little more. Split out CPULikeBindRoot. * More work to make BindLocation et al work with non uniform bindings. * Added parsing to a location. * WIP: Trying to get CPU working with BindLocation. * Describe problem of knowing the type of the reference point in the binding table. * More ideas on getBindings fix. * Remove BindSet as member of BindLocation. * Added BindLocation::Invalid * Made BindLocation able to be key in hash * Use BindLocation for bindings on BindingSet. * Added cuda and nvrtc categories to test infrastructure. Disabled CUDA synthetic tests by default. Fixed such that all tests now produce something in BindLocation style. * Use m_userIndex instead of m_userData on Resource. Move the binding setup out of cpu-compute-util (as no longer CPU specific) * Removed CPUBinding - used BindLocation/BindSet instead. Fixed some bugs around indexOf around uniform indirection. * Renamed BindSet::Resource -> BindSet::Value. * Document BindLocation. * Fixes for Clang/GCC Improve invariant requirement handling when constructing from BindPoints. * WIP: First attempt to run CUDA kernel. * Fix some issues around doing CUDA kernel launch. * Fix issues around use of cudaMemCpy . * Better cuda runtime error checking mechanism. * Fixed bug in passing parameters to cuda kernel launch. Simplified initialisation of context. * WIP: Fix CUDA runtime issues. * Add explicit CUDA synchronize so failures don't appear on implicit ones. * Fix problem emitting non shared variable on CUDA. * Fix some typos in CUDA layout. Use just a pointer for now for CUDA StucturedBuffer. * Arg order for CUDA launch was wrong. * First compute kernel runs on CUDA.
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/slang-emit-cuda.cpp2
-rw-r--r--source/slang/slang-ir-entry-point-uniforms.cpp2
-rw-r--r--source/slang/slang-type-layout.cpp54
3 files changed, 52 insertions, 6 deletions
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 12807e9e2..39a25aafa 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -509,7 +509,7 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module)
// Output all the thread locals
for (auto action : actions)
{
- if (action.level == EmitAction::Level::Definition && _isVariable(action.inst->op))
+ if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar)
{
emitGlobalInst(action.inst);
}
diff --git a/source/slang/slang-ir-entry-point-uniforms.cpp b/source/slang/slang-ir-entry-point-uniforms.cpp
index ad535b747..388a7004d 100644
--- a/source/slang/slang-ir-entry-point-uniforms.cpp
+++ b/source/slang/slang-ir-entry-point-uniforms.cpp
@@ -452,6 +452,8 @@ void moveEntryPointUniformParamsToGlobalScope(
case CodeGenTarget::Executable:
case CodeGenTarget::SharedLibrary:
case CodeGenTarget::HostCallable:
+ case CodeGenTarget::CUDASource:
+ case CodeGenTarget::PTX:
{
context.targetNeedsConstantBuffer = false;
break;
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index 772686163..644f54a95 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -730,11 +730,55 @@ struct CPUObjectLayoutRulesImpl : ObjectLayoutRulesImpl
};
-// TODO(JS): Most likely wrong! Use CPU layout for CUDA for now
+// TODO(JS): Most likely wrong! Assumes largely CPU layout which is probably not right
struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
{
typedef CPUObjectLayoutRulesImpl Super;
+ virtual SimpleLayoutInfo GetObjectLayout(ShaderParameterKind kind) override
+ {
+ switch (kind)
+ {
+ case ShaderParameterKind::ConstantBuffer:
+ // It's a pointer to the actual uniform data
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+
+ case ShaderParameterKind::MutableTexture:
+ case ShaderParameterKind::TextureUniformBuffer:
+ case ShaderParameterKind::Texture:
+ // It's a pointer to a texture interface
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+
+ case ShaderParameterKind::StructuredBuffer:
+ case ShaderParameterKind::MutableStructuredBuffer:
+ // TODO(JS): We are just storing as a pointer for now
+ // It's a ptr and a size of the amount of elements
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+
+ case ShaderParameterKind::RawBuffer:
+ case ShaderParameterKind::Buffer:
+ case ShaderParameterKind::MutableRawBuffer:
+ case ShaderParameterKind::MutableBuffer:
+
+ // TODO(JS): We are storing as a pointer for now
+
+ // It's a pointer and a size in bytes
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+
+ case ShaderParameterKind::SamplerState:
+ // It's a pointer
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*));
+
+ case ShaderParameterKind::TextureSampler:
+ case ShaderParameterKind::MutableTextureSampler:
+ case ShaderParameterKind::InputRenderTarget:
+ // TODO: how to handle these?
+ default:
+ SLANG_UNEXPECTED("unhandled shader parameter kind");
+ UNREACHABLE_RETURN(SimpleLayoutInfo());
+ }
+ }
+
};
static CPUObjectLayoutRulesImpl kCPUObjectLayoutRulesImpl;
@@ -747,10 +791,10 @@ LayoutRulesImpl kCPULayoutRulesImpl_ = {
// CUDA
static CUDAObjectLayoutRulesImpl kCUDAObjectLayoutRulesImpl;
-static CUDALayoutRulesImpl kCUALayoutRulesImpl;
+static CUDALayoutRulesImpl kCUDALayoutRulesImpl;
LayoutRulesImpl kCUDALayoutRulesImpl_ = {
- &kCPULayoutRulesFamilyImpl, &kCUALayoutRulesImpl, &kCUDAObjectLayoutRulesImpl,
+ &kCUDALayoutRulesFamilyImpl, &kCUDALayoutRulesImpl, &kCUDAObjectLayoutRulesImpl,
};
@@ -1033,12 +1077,12 @@ LayoutRulesImpl* CPULayoutRulesFamilyImpl::getStructuredBufferRules()
LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getConstantBufferRules()
{
- return &kCPULayoutRulesImpl_;
+ return &kCUDALayoutRulesImpl_;
}
LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getPushConstantBufferRules()
{
- return &kCPULayoutRulesImpl_;
+ return &kCUDALayoutRulesImpl_;
}
LayoutRulesImpl* CUDALayoutRulesFamilyImpl::getTextureBufferRules()