From fe9d27af9de047ea75db1334c961bb025fb732f6 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 12 Feb 2020 09:15:47 -0500 Subject: CUDA barrier/atomic support (#1218) * * Improved fastRemoveAt * Fixed off by one bug * Fixed const safeness with List<> * Made List begin and end const safe. * Revert to previous RefPtr usage. * Fix bug with casting. * Tabs -> spaces. Small fixes/improvements to List. * Improve comment on List. * Group shared/atomic test works on CUDA. * * Enabled CUDA tests for atomics tests * Enabled DX12 test for atomics-buffer.slang Not clear just yet how to implement that for CUDA - it will work with StructuredBuffer. * hasContent -> isNonEmpty * Remove unneeded comment. --- source/slang/slang-emit-cuda.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'source/slang/slang-emit-cuda.cpp') diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index c628e5caf..0bbaafa5b 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -470,7 +470,7 @@ void CUDASourceEmitter::emitRateQualifiersImpl(IRRate* rate) { if (as(rate)) { - m_writer->emit("groupshared "); + m_writer->emit("__shared__ "); } } @@ -599,6 +599,18 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module) m_writer->emit("\n};\n\n"); } + // Output group shared variables + + { + for (auto action : actions) + { + if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar && as(action.inst->getRate())) + { + emitGlobalInst(action.inst); + } + } + } + // Output the 'Context' which will be used for execution { m_writer->emit("struct Context\n{\n"); @@ -614,7 +626,7 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module) // Output all the thread locals for (auto action : actions) { - if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar) + if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar && !as(action.inst->getRate())) { emitGlobalInst(action.inst); } -- cgit v1.2.3