summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-03-25 16:45:56 -0400
committerGitHub <noreply@github.com>2020-03-25 16:45:56 -0400
commitb3e6f1b2cffa8def593e97a00576eeba0f947ebc (patch)
treef953b64922bb3fe69ef1ac26bef0eda2741626d3
parent28a0ca96a1ad2a3f0e09cc97b866f3b6338a09fa (diff)
Unroll target improvements (#1291)
* Add unroll support for CUDA, and preliminary for C++. Document [unroll] support. * Fix loop-unroll to run on CPU, and test on CPU and elsewhere. Fix bug in emitting loop unroll condition. * Improved comment. * Added support for vk/glsl loop unrolling.
-rw-r--r--docs/target-compatibility.md12
-rw-r--r--prelude/slang-cpp-prelude.h4
-rw-r--r--source/slang/slang-emit-c-like.cpp16
-rw-r--r--source/slang/slang-emit-c-like.h1
-rw-r--r--source/slang/slang-emit-cpp.cpp9
-rw-r--r--source/slang/slang-emit-cpp.h1
-rw-r--r--source/slang/slang-emit-cuda.cpp8
-rw-r--r--source/slang/slang-emit-cuda.h2
-rw-r--r--source/slang/slang-emit-glsl.cpp10
-rw-r--r--source/slang/slang-emit-glsl.h2
-rw-r--r--source/slang/slang-emit-hlsl.cpp8
-rw-r--r--source/slang/slang-emit-hlsl.h1
-rw-r--r--tests/compute/loop-unroll.slang18
13 files changed, 72 insertions, 20 deletions
diff --git a/docs/target-compatibility.md b/docs/target-compatibility.md
index 64695c09c..e8edeeaf5 100644
--- a/docs/target-compatibility.md
+++ b/docs/target-compatibility.md
@@ -34,6 +34,7 @@ Items with ^ means there is some discussion about support later in the document
| tex.Load | Yes | Yes | Yes | Limited ^ | Yes
| Full bool | Yes | Yes | Yes | No | Yes ^
| Mesh Shader | No | No + | No + | No | No
+| `[unroll]` | Yes | Yes | Yes ^ | Yes | Limited +
## Half Type
@@ -114,3 +115,14 @@ tex.Load is only supported on CUDA for Texture1D. Additionally CUDA only allows
Means fully featured bool support. CUDA has issues around bool because there isn't a vector bool type built in. Currently bool aliases to an int vector type.
On CPU there are some issues in so far as bool's size is not well defined in size an alignment. Most C++ compilers now use a byte to represent a bool. In the past it has been backed by an int on some compilers.
+
+## `[unroll]`
+
+The unroll attribute allows for unrolling `for` loops. At the moment the feature is dependent on downstream compiler support which is mixed. In the longer term the intention is for Slang to contain it's own loop unroller - and therefore not be dependent on the feature on downstream compilers.
+
+On C++ this attribute becomes SLANG_UNROLL which is defined in the prelude. This can be predefined if there is a suitable mechanism, if there isn't a definition SLANG_UNROLL will be an empty definition.
+
+On GLSL and VK targets loop unrolling uses the [GL_EXT_control_flow_attributes](https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_control_flow_attributes.txt) extension.
+
+Slang does have a cross target mechanism to [unroll loops](language-reference/06-statements.md), in the section `Compile-Time For Statement`.
+
diff --git a/prelude/slang-cpp-prelude.h b/prelude/slang-cpp-prelude.h
index 77c738620..e85ea1173 100644
--- a/prelude/slang-cpp-prelude.h
+++ b/prelude/slang-cpp-prelude.h
@@ -34,4 +34,8 @@
# pragma warning(disable : 4700)
#endif
+#ifndef SLANG_UNROLL
+# define SLANG_UNROLL
+#endif
+
#endif
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 3631040b8..b217a2a1b 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2592,20 +2592,8 @@ void CLikeSourceEmitter::emitRegion(Region* inRegion)
//
if (auto loopControlDecoration = loopInst->findDecoration<IRLoopControlDecoration>())
{
- switch (loopControlDecoration->getMode())
- {
- case kIRLoopControl_Unroll:
- // Note: loop unrolling control is only available in HLSL, not GLSL
- if(getSourceStyle() == SourceStyle::HLSL)
- {
- m_writer->emit("[unroll]\n");
- }
- break;
-
- default:
- break;
- }
- }
+ emitLoopControlDecorationImpl(loopControlDecoration);
+ }
m_writer->emit("for(;;)\n{\n");
m_writer->indent();
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index ddca69462..e5a8de9ec 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -328,6 +328,7 @@ public:
virtual void emitParamTypeImpl(IRType* type, String const& name);
virtual void emitIntrinsicCallExprImpl(IRCall* inst, IRTargetIntrinsicDecoration* targetIntrinsic, EmitOpInfo const& inOuterPrec);
virtual void emitFunctionPreambleImpl(IRInst* inst) { SLANG_UNUSED(inst); }
+ virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) { SLANG_UNUSED(decl); }
// Only needed for glsl output with $ prefix intrinsics - so perhaps removable in the future
virtual void emitTextureOrTextureSamplerTypeImpl(IRTextureTypeBase* type, char const* baseName) { SLANG_UNUSED(type); SLANG_UNUSED(baseName); }
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index bece6c2d0..8f59da214 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -1868,6 +1868,15 @@ void CPPSourceEmitter::emitIntrinsicCallExprImpl(
return Super::emitIntrinsicCallExprImpl(inst, targetIntrinsic, inOuterPrec);
}
+void CPPSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl)
+{
+ if (decl->getMode() == kIRLoopControl_Unroll)
+ {
+ // This relies on a suitable definition in slang-cpp-prelude.h or defined in C++ compiler invocation.
+ m_writer->emit("SLANG_UNROLL\n");
+ }
+}
+
bool CPPSourceEmitter::_tryEmitInstExprAsIntrinsic(IRInst* inst, const EmitOpInfo& inOuterPrec)
{
HLSLIntrinsic* specOp = m_intrinsicSet.add(inst);
diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h
index 99f180850..aeef30804 100644
--- a/source/slang/slang-emit-cpp.h
+++ b/source/slang/slang-emit-cpp.h
@@ -79,6 +79,7 @@ protected:
virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE;
virtual void emitIntrinsicCallExprImpl(IRCall* inst, IRTargetIntrinsicDecoration* targetIntrinsic, EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE;
+ virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE;
// Replaceable for classes derived from CPPSourceEmitter
virtual SlangResult calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out);
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 91439d5d3..64cb240fc 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -341,6 +341,14 @@ void CUDASourceEmitter::emitCall(const HLSLIntrinsic* specOp, IRInst* inst, cons
return Super::emitCall(specOp, inst, operands, numOperands, inOuterPrec);
}
+void CUDASourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl)
+{
+ if (decl->getMode() == kIRLoopControl_Unroll)
+ {
+ m_writer->emit("#pragma unroll\n");
+ }
+}
+
bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
{
switch(inst->op)
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
index 392e4f98d..3d23fd80f 100644
--- a/source/slang/slang-emit-cuda.h
+++ b/source/slang/slang-emit-cuda.h
@@ -49,6 +49,8 @@ protected:
virtual void emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
virtual void emitFunctionPreambleImpl(IRInst* inst) SLANG_OVERRIDE { SLANG_UNUSED(inst); m_writer->emit("__device__ "); }
+ virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE;
+
//virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE;
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index b433b4d94..af1cfd897 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -668,6 +668,16 @@ void GLSLSourceEmitter::_maybeEmitGLSLFlatModifier(IRType* valueType)
}
}
+void GLSLSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl)
+{
+ if (decl->getMode() == kIRLoopControl_Unroll)
+ {
+ // https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_control_flow_attributes.txt
+ m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_control_flow_attributes"));
+ m_writer->emit("[[unroll]]\n");
+ }
+}
+
void GLSLSourceEmitter::emitSimpleValueImpl(IRInst* inst)
{
switch (inst->op)
diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h
index a88a3b39b..bc430c9a8 100644
--- a/source/slang/slang-emit-glsl.h
+++ b/source/slang/slang-emit-glsl.h
@@ -46,7 +46,7 @@ protected:
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
virtual void emitSimpleValueImpl(IRInst* inst) SLANG_OVERRIDE;
-
+ virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE;
void _emitGLSLTextureOrTextureSamplerType(IRTextureTypeBase* type, char const* baseName);
void _emitGLSLStructuredBuffer(IRGlobalParam* varDecl, IRHLSLStructuredBufferTypeBase* structuredBufferType);
diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp
index a0e6e872d..489e17c8e 100644
--- a/source/slang/slang-emit-hlsl.cpp
+++ b/source/slang/slang-emit-hlsl.cpp
@@ -504,6 +504,14 @@ void HLSLSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerVal
m_writer->emit(">");
}
+void HLSLSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl)
+{
+ if (decl->getMode() == kIRLoopControl_Unroll)
+ {
+ m_writer->emit("[unroll]\n");
+ }
+}
+
void HLSLSourceEmitter::emitSimpleValueImpl(IRInst* inst)
{
switch (inst->op)
diff --git a/source/slang/slang-emit-hlsl.h b/source/slang/slang-emit-hlsl.h
index 05fa3bb11..f9b0ad0fb 100644
--- a/source/slang/slang-emit-hlsl.h
+++ b/source/slang/slang-emit-hlsl.h
@@ -33,6 +33,7 @@ protected:
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
virtual void emitSimpleValueImpl(IRInst* inst) SLANG_OVERRIDE;
+ virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE;
// Emit a single `register` semantic, as appropriate for a given resource-type-specific layout info
// Keyword to use in the uniform case (`register` for globals, `packoffset` inside a `cbuffer`)
diff --git a/tests/compute/loop-unroll.slang b/tests/compute/loop-unroll.slang
index b8ec06768..25f25b0ec 100644
--- a/tests/compute/loop-unroll.slang
+++ b/tests/compute/loop-unroll.slang
@@ -1,7 +1,15 @@
//TEST(compute):COMPARE_COMPUTE:
+//TEST(compute):COMPARE_COMPUTE:-dx12
+//TODO(JS): This test fails with a crash in CreateComputePipelineState, so disabled for now
+//DISABLE_TEST(compute):COMPARE_COMPUTE:-dx12 -use-dxil
+//TEST(compute):COMPARE_COMPUTE:-cpu
+//TEST(compute):COMPARE_COMPUTE:-cuda
+// Note VK output is not loop unrolled
+//TEST(compute):COMPARE_COMPUTE:-vk
-//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):out
-//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out, name buffers[0]
+//TEST_INPUT:ubuffer(data=[0 1 2 3], stride=4):name buffers[1]
+//TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name buffers[2]
// Check that we propagate the `[unroll]` attribute
// through to HLSL output correctly.
@@ -10,7 +18,7 @@
// it will generate a warning output from fxc, and the
// test will fail to match the expected output.
-RWStructuredBuffer<int> buffers[2];
+RWStructuredBuffer<int> buffers[3];
[numthreads(4, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
@@ -20,12 +28,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
// Note: using `unroll` as a variable name to validate that
// the lookup process for attribute names doesn't run into
// problems because of local declarations with the same name.
- int unroll = buffers[1][tid];
+ int unroll = buffers[2][tid];
[unroll]
for(int ii = 0; ii < 2; ii++)
{
- unroll = buffers[ii][unroll];
+ unroll = buffers[ii + 1][unroll];
}
buffers[0][tid] = unroll;