summaryrefslogtreecommitdiffstats
path: root/source/slang/slang-emit-cuda.cpp
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-02-12 09:15:47 -0500
committerGitHub <noreply@github.com>2020-02-12 09:15:47 -0500
commitfe9d27af9de047ea75db1334c961bb025fb732f6 (patch)
tree3d7a3e1e693f0fe66930db87a5833eaea993620a /source/slang/slang-emit-cuda.cpp
parent9b3e768bceae562deeb330067f3ef5febc2e5244 (diff)
CUDA barrier/atomic support (#1218)
* * Improved fastRemoveAt * Fixed off by one bug * Fixed const safeness with List<> * Made List begin and end const safe. * Revert to previous RefPtr usage. * Fix bug with casting. * Tabs -> spaces. Small fixes/improvements to List. * Improve comment on List. * Group shared/atomic test works on CUDA. * * Enabled CUDA tests for atomics tests * Enabled DX12 test for atomics-buffer.slang Not clear just yet how to implement that for CUDA - it will work with StructuredBuffer. * hasContent -> isNonEmpty * Remove unneeded comment.
Diffstat (limited to 'source/slang/slang-emit-cuda.cpp')
-rw-r--r--source/slang/slang-emit-cuda.cpp16
1 files changed, 14 insertions, 2 deletions
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index c628e5caf..0bbaafa5b 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -470,7 +470,7 @@ void CUDASourceEmitter::emitRateQualifiersImpl(IRRate* rate)
{
if (as<IRGroupSharedRate>(rate))
{
- m_writer->emit("groupshared ");
+ m_writer->emit("__shared__ ");
}
}
@@ -599,6 +599,18 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module)
m_writer->emit("\n};\n\n");
}
+ // Output group shared variables
+
+ {
+ for (auto action : actions)
+ {
+ if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar && as<IRGroupSharedRate>(action.inst->getRate()))
+ {
+ emitGlobalInst(action.inst);
+ }
+ }
+ }
+
// Output the 'Context' which will be used for execution
{
m_writer->emit("struct Context\n{\n");
@@ -614,7 +626,7 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module)
// Output all the thread locals
for (auto action : actions)
{
- if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar)
+ if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar && !as<IRGroupSharedRate>(action.inst->getRate()))
{
emitGlobalInst(action.inst);
}