From b3e6f1b2cffa8def593e97a00576eeba0f947ebc Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 25 Mar 2020 16:45:56 -0400 Subject: Unroll target improvements (#1291) * Add unroll support for CUDA, and preliminary for C++. Document [unroll] support. * Fix loop-unroll to run on CPU, and test on CPU and elsewhere. Fix bug in emitting loop unroll condition. * Improved comment. * Added support for vk/glsl loop unrolling. --- docs/target-compatibility.md | 12 ++++++++++++ prelude/slang-cpp-prelude.h | 4 ++++ source/slang/slang-emit-c-like.cpp | 16 ++-------------- source/slang/slang-emit-c-like.h | 1 + source/slang/slang-emit-cpp.cpp | 9 +++++++++ source/slang/slang-emit-cpp.h | 1 + source/slang/slang-emit-cuda.cpp | 8 ++++++++ source/slang/slang-emit-cuda.h | 2 ++ source/slang/slang-emit-glsl.cpp | 10 ++++++++++ source/slang/slang-emit-glsl.h | 2 +- source/slang/slang-emit-hlsl.cpp | 8 ++++++++ source/slang/slang-emit-hlsl.h | 1 + tests/compute/loop-unroll.slang | 18 +++++++++++++----- 13 files changed, 72 insertions(+), 20 deletions(-) diff --git a/docs/target-compatibility.md b/docs/target-compatibility.md index 64695c09c..e8edeeaf5 100644 --- a/docs/target-compatibility.md +++ b/docs/target-compatibility.md @@ -34,6 +34,7 @@ Items with ^ means there is some discussion about support later in the document | tex.Load | Yes | Yes | Yes | Limited ^ | Yes | Full bool | Yes | Yes | Yes | No | Yes ^ | Mesh Shader | No | No + | No + | No | No +| `[unroll]` | Yes | Yes | Yes ^ | Yes | Limited + ## Half Type @@ -114,3 +115,14 @@ tex.Load is only supported on CUDA for Texture1D. Additionally CUDA only allows Means fully featured bool support. CUDA has issues around bool because there isn't a vector bool type built in. Currently bool aliases to an int vector type. On CPU there are some issues in so far as bool's size is not well defined in size an alignment. Most C++ compilers now use a byte to represent a bool. In the past it has been backed by an int on some compilers. + +## `[unroll]` + +The unroll attribute allows for unrolling `for` loops. At the moment the feature is dependent on downstream compiler support which is mixed. In the longer term the intention is for Slang to contain it's own loop unroller - and therefore not be dependent on the feature on downstream compilers. + +On C++ this attribute becomes SLANG_UNROLL which is defined in the prelude. This can be predefined if there is a suitable mechanism, if there isn't a definition SLANG_UNROLL will be an empty definition. + +On GLSL and VK targets loop unrolling uses the [GL_EXT_control_flow_attributes](https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_control_flow_attributes.txt) extension. + +Slang does have a cross target mechanism to [unroll loops](language-reference/06-statements.md), in the section `Compile-Time For Statement`. + diff --git a/prelude/slang-cpp-prelude.h b/prelude/slang-cpp-prelude.h index 77c738620..e85ea1173 100644 --- a/prelude/slang-cpp-prelude.h +++ b/prelude/slang-cpp-prelude.h @@ -34,4 +34,8 @@ # pragma warning(disable : 4700) #endif +#ifndef SLANG_UNROLL +# define SLANG_UNROLL +#endif + #endif diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 3631040b8..b217a2a1b 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2592,20 +2592,8 @@ void CLikeSourceEmitter::emitRegion(Region* inRegion) // if (auto loopControlDecoration = loopInst->findDecoration()) { - switch (loopControlDecoration->getMode()) - { - case kIRLoopControl_Unroll: - // Note: loop unrolling control is only available in HLSL, not GLSL - if(getSourceStyle() == SourceStyle::HLSL) - { - m_writer->emit("[unroll]\n"); - } - break; - - default: - break; - } - } + emitLoopControlDecorationImpl(loopControlDecoration); + } m_writer->emit("for(;;)\n{\n"); m_writer->indent(); diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index ddca69462..e5a8de9ec 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -328,6 +328,7 @@ public: virtual void emitParamTypeImpl(IRType* type, String const& name); virtual void emitIntrinsicCallExprImpl(IRCall* inst, IRTargetIntrinsicDecoration* targetIntrinsic, EmitOpInfo const& inOuterPrec); virtual void emitFunctionPreambleImpl(IRInst* inst) { SLANG_UNUSED(inst); } + virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) { SLANG_UNUSED(decl); } // Only needed for glsl output with $ prefix intrinsics - so perhaps removable in the future virtual void emitTextureOrTextureSamplerTypeImpl(IRTextureTypeBase* type, char const* baseName) { SLANG_UNUSED(type); SLANG_UNUSED(baseName); } diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index bece6c2d0..8f59da214 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -1868,6 +1868,15 @@ void CPPSourceEmitter::emitIntrinsicCallExprImpl( return Super::emitIntrinsicCallExprImpl(inst, targetIntrinsic, inOuterPrec); } +void CPPSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) +{ + if (decl->getMode() == kIRLoopControl_Unroll) + { + // This relies on a suitable definition in slang-cpp-prelude.h or defined in C++ compiler invocation. + m_writer->emit("SLANG_UNROLL\n"); + } +} + bool CPPSourceEmitter::_tryEmitInstExprAsIntrinsic(IRInst* inst, const EmitOpInfo& inOuterPrec) { HLSLIntrinsic* specOp = m_intrinsicSet.add(inst); diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h index 99f180850..aeef30804 100644 --- a/source/slang/slang-emit-cpp.h +++ b/source/slang/slang-emit-cpp.h @@ -79,6 +79,7 @@ protected: virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE; virtual void emitIntrinsicCallExprImpl(IRCall* inst, IRTargetIntrinsicDecoration* targetIntrinsic, EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE; + virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE; // Replaceable for classes derived from CPPSourceEmitter virtual SlangResult calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out); diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 91439d5d3..64cb240fc 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -341,6 +341,14 @@ void CUDASourceEmitter::emitCall(const HLSLIntrinsic* specOp, IRInst* inst, cons return Super::emitCall(specOp, inst, operands, numOperands, inOuterPrec); } +void CUDASourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) +{ + if (decl->getMode() == kIRLoopControl_Unroll) + { + m_writer->emit("#pragma unroll\n"); + } +} + bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) { switch(inst->op) diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h index 392e4f98d..3d23fd80f 100644 --- a/source/slang/slang-emit-cuda.h +++ b/source/slang/slang-emit-cuda.h @@ -49,6 +49,8 @@ protected: virtual void emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; virtual void emitFunctionPreambleImpl(IRInst* inst) SLANG_OVERRIDE { SLANG_UNUSED(inst); m_writer->emit("__device__ "); } + virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE; + //virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE; virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index b433b4d94..af1cfd897 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -668,6 +668,16 @@ void GLSLSourceEmitter::_maybeEmitGLSLFlatModifier(IRType* valueType) } } +void GLSLSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) +{ + if (decl->getMode() == kIRLoopControl_Unroll) + { + // https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_control_flow_attributes.txt + m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_control_flow_attributes")); + m_writer->emit("[[unroll]]\n"); + } +} + void GLSLSourceEmitter::emitSimpleValueImpl(IRInst* inst) { switch (inst->op) diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h index a88a3b39b..bc430c9a8 100644 --- a/source/slang/slang-emit-glsl.h +++ b/source/slang/slang-emit-glsl.h @@ -46,7 +46,7 @@ protected: virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; virtual void emitSimpleValueImpl(IRInst* inst) SLANG_OVERRIDE; - + virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE; void _emitGLSLTextureOrTextureSamplerType(IRTextureTypeBase* type, char const* baseName); void _emitGLSLStructuredBuffer(IRGlobalParam* varDecl, IRHLSLStructuredBufferTypeBase* structuredBufferType); diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp index a0e6e872d..489e17c8e 100644 --- a/source/slang/slang-emit-hlsl.cpp +++ b/source/slang/slang-emit-hlsl.cpp @@ -504,6 +504,14 @@ void HLSLSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerVal m_writer->emit(">"); } +void HLSLSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) +{ + if (decl->getMode() == kIRLoopControl_Unroll) + { + m_writer->emit("[unroll]\n"); + } +} + void HLSLSourceEmitter::emitSimpleValueImpl(IRInst* inst) { switch (inst->op) diff --git a/source/slang/slang-emit-hlsl.h b/source/slang/slang-emit-hlsl.h index 05fa3bb11..f9b0ad0fb 100644 --- a/source/slang/slang-emit-hlsl.h +++ b/source/slang/slang-emit-hlsl.h @@ -33,6 +33,7 @@ protected: virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; virtual void emitSimpleValueImpl(IRInst* inst) SLANG_OVERRIDE; + virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE; // Emit a single `register` semantic, as appropriate for a given resource-type-specific layout info // Keyword to use in the uniform case (`register` for globals, `packoffset` inside a `cbuffer`) diff --git a/tests/compute/loop-unroll.slang b/tests/compute/loop-unroll.slang index b8ec06768..25f25b0ec 100644 --- a/tests/compute/loop-unroll.slang +++ b/tests/compute/loop-unroll.slang @@ -1,7 +1,15 @@ //TEST(compute):COMPARE_COMPUTE: +//TEST(compute):COMPARE_COMPUTE:-dx12 +//TODO(JS): This test fails with a crash in CreateComputePipelineState, so disabled for now +//DISABLE_TEST(compute):COMPARE_COMPUTE:-dx12 -use-dxil +//TEST(compute):COMPARE_COMPUTE:-cpu +//TEST(compute):COMPARE_COMPUTE:-cuda +// Note VK output is not loop unrolled +//TEST(compute):COMPARE_COMPUTE:-vk -//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):out -//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4): +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out, name buffers[0] +//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):name buffers[1] +//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name buffers[2] // Check that we propagate the `[unroll]` attribute // through to HLSL output correctly. @@ -10,7 +18,7 @@ // it will generate a warning output from fxc, and the // test will fail to match the expected output. -RWStructuredBuffer buffers[2]; +RWStructuredBuffer buffers[3]; [numthreads(4, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) @@ -20,12 +28,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // Note: using `unroll` as a variable name to validate that // the lookup process for attribute names doesn't run into // problems because of local declarations with the same name. - int unroll = buffers[1][tid]; + int unroll = buffers[2][tid]; [unroll] for(int ii = 0; ii < 2; ii++) { - unroll = buffers[ii][unroll]; + unroll = buffers[ii + 1][unroll]; } buffers[0][tid] = unroll; -- cgit v1.2.3