summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarren Wihandi <65404740+fairywreath@users.noreply.github.com>2025-01-18 02:07:16 -0500
committerGitHub <noreply@github.com>2025-01-17 23:07:16 -0800
commita85c350df03c6cdf9b433f58fc0e66affda03e9e (patch)
tree3e80ea7121b3b5bb5c17aee88599212b78abec38
parent87a08160289c194ddfb337d521893f576ceb9f97 (diff)
Implement Quad Control intrinsics (#5981)
-rw-r--r--docs/user-guide/a3-02-reference-capability-atoms.md21
-rw-r--r--source/slang/core.meta.slang21
-rw-r--r--source/slang/hlsl.meta.slang57
-rw-r--r--source/slang/slang-ast-modifier.h15
-rw-r--r--source/slang/slang-capabilities.capdef33
-rw-r--r--source/slang/slang-emit-c-like.cpp8
-rw-r--r--source/slang/slang-emit-c-like.h6
-rw-r--r--source/slang/slang-emit-glsl.cpp308
-rw-r--r--source/slang/slang-emit-glsl.h2
-rw-r--r--source/slang/slang-emit-spirv.cpp45
-rw-r--r--source/slang/slang-emit.cpp2
-rw-r--r--source/slang/slang-ir-inst-defs.h8
-rw-r--r--source/slang/slang-ir-insts.h13
-rw-r--r--source/slang/slang-lower-to-ir.cpp12
-rw-r--r--tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang40
-rw-r--r--tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang.expected.txt16
-rw-r--r--tests/hlsl-intrinsic/quad-control/quad-control-frag-many-entry-points.slang96
-rw-r--r--tests/hlsl-intrinsic/quad-control/quad-control-frag.slang53
18 files changed, 632 insertions, 124 deletions
diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md
index e7a9b1bb4..a70a9f88c 100644
--- a/docs/user-guide/a3-02-reference-capability-atoms.md
+++ b/docs/user-guide/a3-02-reference-capability-atoms.md
@@ -363,6 +363,12 @@ Extensions
`SPV_EXT_demote_to_helper_invocation`
> Represents the SPIR-V extension for demoting to helper invocation.
+`SPV_KHR_maximal_reconvergence`
+> Represents the SPIR-V extension for maximal reconvergence.
+
+`SPV_KHR_quad_control`
+> Represents the SPIR-V extension for quad group control.
+
`SPV_KHR_fragment_shader_barycentric`
> Represents the SPIR-V extension for fragment shader barycentric.
@@ -503,6 +509,12 @@ Extensions
`spvDemoteToHelperInvocation`
> Represents the SPIR-V capability for demoting to helper invocation.
+`spvMaximalReconvergenceKHR`
+> Represents the SPIR-V capability for maximal reconvergence.
+
+`spvQuadControlKHR`
+> Represents the SPIR-V capability for quad group control.
+
`GL_EXT_buffer_reference`
> Represents the GL_EXT_buffer_reference extension.
@@ -515,6 +527,12 @@ Extensions
`GL_EXT_demote_to_helper_invocation`
> Represents the GL_EXT_demote_to_helper_invocation extension.
+`GL_EXT_maximal_reconvergence`
+> Represents the GL_EXT_maximal_reconvergence extension.
+
+`GL_EXT_shader_quad_control`
+> Represents the GL_EXT_shader_quad_control extension.
+
`GL_EXT_fragment_shader_barycentric`
> Represents the GL_EXT_fragment_shader_barycentric extension.
@@ -1078,6 +1096,9 @@ Compound Capabilities
`helper_lane`
> Capabilities required to enable helper-lane demotion
+`quad_control`
+> Capabilities required to enable quad group control
+
`breakpoint`
> Capabilities required to enable shader breakpoints
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 5c30da1e7..2224b8e82 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -3244,6 +3244,12 @@ __Addr<T> __getLegalizedSPIRVGlobalParamAddr(T val);
__intrinsic_op($(kIROp_RequireComputeDerivative))
void __requireComputeDerivative();
+__intrinsic_op($(kIROp_RequireMaximallyReconverges))
+void __requireMaximallyReconverges();
+
+__intrinsic_op($(kIROp_RequireQuadDerivatives))
+void __requireQuadDerivatives();
+
//@ public:
/// @category misc_types
enum MemoryOrder
@@ -3978,6 +3984,21 @@ attribute_syntax [DerivativeGroupQuad] : DerivativeGroupQuadAttribute;
__attributeTarget(FuncDecl)
attribute_syntax [DerivativeGroupLinear] : DerivativeGroupLinearAttribute;
+/// Emits `MaximallyReconvergesKHR` execution mode when producing SPIR-V.
+/// This attribute has no effect on other targets.
+__attributeTarget(FuncDecl)
+attribute_syntax [MaximallyReconverges] : MaximallyReconvergesAttribute;
+
+/// Emits `QuadDerivativesKHR` execution mode when producing SPIR-V.
+/// This attribute has no effect on other targets.
+__attributeTarget(FuncDecl)
+attribute_syntax [QuadDerivatives] : QuadDerivativesAttribute;
+
+/// Emits `RequireFullQuadsKHR` execution mode when producing SPIR-V.
+/// This attribute has no effect on other targets.
+__attributeTarget(FuncDecl)
+attribute_syntax [RequireFullQuads] : RequireFullQuadsAttribute;
+
__generic<T>
typealias NodePayloadPtr = Ptr<T, $( (uint64_t)AddressSpace::NodePayloadAMDX)>;
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 9d6a81f84..371659fc9 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -15687,6 +15687,63 @@ bool IsHelperLane()
}
}
+//
+// Quad Control intrinsics
+//
+// For SPIRV and GLSL targets, the behavior is taken from Vulkan's `VK_KHR_shader_quad_control` spec.
+// QuadAny/QuadAll will map to OpGroupNonUniformQuadAny/All, and using either of these functions will
+// result in the QuadDerivativesKHR execution mode being used. If MaximallyReconvergesKHR is not already
+// specified by other means, it will be added when using either of QuadAny/QuadAll,
+//
+
+//@public:
+/// Returns true if `expr` is true in any lane of the current quad.
+__glsl_extension(GL_KHR_shader_subgroup_vote)
+__glsl_extension(GL_EXT_maximal_reconvergence)
+__glsl_extension(GL_EXT_shader_quad_control)
+[ForceInline]
+[require(glsl_hlsl_metal_spirv, quad_control)]
+bool QuadAny(bool expr)
+{
+ __requireMaximallyReconverges();
+ __requireQuadDerivatives();
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "QuadAny";
+ case glsl: __intrinsic_asm "subgroupQuadAny";
+ case metal: __intrinsic_asm "quad_any";
+ case spirv:
+ return spirv_asm
+ {
+ result:$$bool = OpGroupNonUniformQuadAnyKHR $expr;
+ };
+ }
+}
+
+//@public:
+/// Returns true if `expr` is true in all lanes of the current quad.
+__glsl_extension(GL_KHR_shader_subgroup_vote)
+__glsl_extension(GL_EXT_maximal_reconvergence)
+__glsl_extension(GL_EXT_shader_quad_control)
+[ForceInline]
+[require(glsl_hlsl_metal_spirv, quad_control)]
+bool QuadAll(bool expr)
+{
+ __requireMaximallyReconverges();
+ __requireQuadDerivatives();
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "QuadAll";
+ case glsl: __intrinsic_asm "subgroupQuadAll";
+ case metal: __intrinsic_asm "quad_all";
+ case spirv:
+ return spirv_asm
+ {
+ result:$$bool = OpGroupNonUniformQuadAllKHR $expr;
+ };
+ }
+}
+
// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
//@hidden:
typedef Texture2D texture2D;
diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h
index ee29750a6..36dddd15f 100644
--- a/source/slang/slang-ast-modifier.h
+++ b/source/slang/slang-ast-modifier.h
@@ -1664,6 +1664,21 @@ class DerivativeGroupLinearAttribute : public Attribute
SLANG_AST_CLASS(DerivativeGroupLinearAttribute)
};
+class MaximallyReconvergesAttribute : public Attribute
+{
+ SLANG_AST_CLASS(MaximallyReconvergesAttribute)
+};
+
+class QuadDerivativesAttribute : public Attribute
+{
+ SLANG_AST_CLASS(QuadDerivativesAttribute)
+};
+
+class RequireFullQuadsAttribute : public Attribute
+{
+ SLANG_AST_CLASS(RequireFullQuadsAttribute)
+};
+
/// A `[payload]` attribute indicates that a `struct` type will be used as
/// a ray payload for `TraceRay()` calls, and thus also as input/output
/// for shaders in the ray tracing pipeline that might be invoked for
diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef
index c8a66448d..4f6357779 100644
--- a/source/slang/slang-capabilities.capdef
+++ b/source/slang/slang-capabilities.capdef
@@ -466,6 +466,14 @@ def SPV_EXT_mesh_shader : _spirv_1_4;
/// [EXT]
def SPV_EXT_demote_to_helper_invocation : _spirv_1_4;
+/// Represents the SPIR-V extension for maximal reconvergence.
+/// [EXT]
+def SPV_KHR_maximal_reconvergence : _spirv_1_0;
+
+/// Represents the SPIR-V extension for quad group control.
+/// [EXT]
+def SPV_KHR_quad_control : _spirv_1_3;
+
/// Represents the SPIR-V extension for fragment shader barycentric.
/// [EXT]
def SPV_KHR_fragment_shader_barycentric : _spirv_1_0;
@@ -654,6 +662,14 @@ def spvDemoteToHelperInvocationEXT : SPV_EXT_demote_to_helper_invocation;
/// [EXT]
def spvDemoteToHelperInvocation : spvDemoteToHelperInvocationEXT;
+/// Represents the SPIR-V capability for maximal reconvergence.
+/// [EXT]
+def spvMaximalReconvergenceKHR : SPV_KHR_maximal_reconvergence;
+
+/// Represents the SPIR-V capability for quad group control.
+/// [EXT]
+def spvQuadControlKHR : SPV_KHR_quad_control;
+
// The following capabilities all pertain to how ray tracing shaders are translated
// to GLSL, where there are two different extensions that can provide the core
// functionality of `TraceRay` and the related operations.
@@ -691,6 +707,8 @@ def _GL_EXT_shader_image_load_store : _GLSL_130;
def _GL_EXT_shader_realtime_clock : glsl;
def _GL_EXT_texture_query_lod : glsl;
def _GL_EXT_texture_shadow_lod : _GLSL_130;
+def _GL_EXT_maximal_reconvergence : _GLSL_140;
+def _GL_EXT_shader_quad_control : _GLSL_140;
def _GL_ARB_derivative_control : _GLSL_400;
def _GL_ARB_fragment_shader_interlock : _GLSL_450;
@@ -746,6 +764,14 @@ alias GL_EXT_debug_printf = _GL_EXT_debug_printf | SPV_KHR_non_semantic_info;
/// [EXT]
alias GL_EXT_demote_to_helper_invocation = _GL_EXT_demote_to_helper_invocation | spvDemoteToHelperInvocationEXT;
+/// Represents the GL_EXT_maximal_reconvergence extension.
+/// [EXT]
+alias GL_EXT_maximal_reconvergence = _GL_EXT_maximal_reconvergence | spvMaximalReconvergenceKHR;
+
+/// Represents the GL_EXT_shader_quad_control extension.
+/// [EXT]
+alias GL_EXT_shader_quad_control = _GL_EXT_shader_quad_control | spvQuadControlKHR;
+
/// Represents the GL_EXT_fragment_shader_barycentric extension.
/// [EXT]
alias GL_EXT_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric | spvFragmentBarycentricKHR;
@@ -1925,6 +1951,13 @@ alias helper_lane = _sm_6_0 + fragment
| metal + fragment
;
+/// Capabilities required to enable quad group control
+/// [Compound]
+alias quad_control = _sm_6_7
+ | GL_EXT_shader_quad_control + GL_EXT_maximal_reconvergence + GL_KHR_shader_subgroup_vote
+ | metal
+ ;
+
/// Capabilities required to enable shader breakpoints
/// [Compound]
alias breakpoint = GL_EXT_debug_printf | hlsl | _cuda_sm_8_0 | cpp;
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index eaf7ef028..ac548e354 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -129,14 +129,6 @@ void CLikeSourceEmitter::emitPreModuleImpl()
m_writer->emit("\n");
}
}
-void CLikeSourceEmitter::emitPostModuleImpl()
-{
- if (m_requiredAfter.requireComputeDerivatives.getLength() > 0)
- {
- m_writer->emit(m_requiredAfter.requireComputeDerivatives);
- m_writer->emit("\n");
- }
-}
//
// Types
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 1354b7cbd..e83b6e586 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -470,7 +470,6 @@ public:
void emitFrontMatter(TargetRequest* targetReq) { emitFrontMatterImpl(targetReq); }
void emitPreModule() { emitPreModuleImpl(); }
- void emitPostModule() { emitPostModuleImpl(); }
void emitModule(IRModule* module, DiagnosticSink* sink)
{
m_irModule = module;
@@ -555,7 +554,6 @@ protected:
/// For example on targets that don't have built in vector/matrix support, this is where
/// the appropriate generated declarations occur.
virtual void emitPreModuleImpl();
- virtual void emitPostModuleImpl();
virtual void emitSimpleTypeAndDeclaratorImpl(IRType* type, DeclaratorInfo* declarator);
void emitSimpleTypeAndDeclarator(IRType* type, DeclaratorInfo* declarator)
@@ -736,10 +734,6 @@ protected:
Dictionary<IRInst*, String> m_mapInstToName;
OrderedHashSet<IRStringLit*> m_requiredPreludes;
- struct RequiredAfter
- {
- String requireComputeDerivatives;
- } m_requiredAfter;
Dictionary<const char*, IRStringLit*> m_builtinPreludes;
};
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index 0dab07cfc..b94f44690 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -25,9 +25,78 @@ GLSLSourceEmitter::GLSLSourceEmitter(const Desc& desc)
SLANG_ASSERT(m_glslExtensionTracker);
}
+void GLSLSourceEmitter::_beforeComputeEmitProcessInstruction(
+ IRInst* parentFunc,
+ IRInst* inst,
+ IRBuilder& builder)
+{
+ if (auto requireGLSLExt = as<IRRequireGLSLExtension>(inst))
+ {
+ _requireGLSLExtension(requireGLSLExt->getExtensionName());
+ return;
+ }
+
+ // Early exit on instructions we are not interested in.
+ if (!as<IRRequireMaximallyReconverges>(inst) && !as<IRRequireQuadDerivatives>(inst) &&
+ !(as<IRRequireComputeDerivative>(inst) && (m_entryPointStage == Stage::Compute)))
+ {
+ return;
+ }
+
+ // Check for entry point specific decorations.
+ //
+ // Handle cases where "require" IR operations exist in the function body and are required
+ // as entry point decorations.
+ auto entryPoints = getReferencingEntryPoints(m_referencingEntryPoints, parentFunc);
+ if (entryPoints == nullptr)
+ return;
+
+ for (auto entryPoint : *entryPoints)
+ {
+ if (as<IRRequireMaximallyReconverges>(inst))
+ {
+ builder.addDecoration(entryPoint, kIROp_MaximallyReconvergesDecoration);
+ }
+ else if (as<IRRequireQuadDerivatives>(inst))
+ {
+ builder.addDecoration(entryPoint, kIROp_QuadDerivativesDecoration);
+ }
+ else
+ {
+ const auto requireComputeDerivative = as<IRRequireComputeDerivative>(inst);
+
+ SLANG_ASSERT(requireComputeDerivative);
+ SLANG_ASSERT(m_entryPointStage == Stage::Compute);
+
+ // Compute derivatives are quad by default, add the decoration if entry point
+ // does not not explicit linear decoration.
+ bool isQuad = !entryPoint->findDecoration<IRDerivativeGroupLinearDecoration>();
+ if (isQuad)
+ {
+ builder.addDecoration(entryPoint, kIROp_DerivativeGroupQuadDecoration);
+ }
+ }
+ }
+}
+
void GLSLSourceEmitter::beforeComputeEmitActions(IRModule* module)
{
buildEntryPointReferenceGraph(this->m_referencingEntryPoints, module);
+
+ IRBuilder builder(module);
+ for (auto globalInst : module->getGlobalInsts())
+ {
+ if (auto func = as<IRGlobalValueWithCode>(globalInst))
+ {
+ for (auto block : func->getBlocks())
+ {
+ for (auto inst = block->getFirstInst(); inst; inst = inst->next)
+ {
+ _beforeComputeEmitProcessInstruction(func, inst, builder);
+ }
+ }
+ }
+ }
}
SlangResult GLSLSourceEmitter::init()
@@ -78,8 +147,8 @@ void GLSLSourceEmitter::_requireRayQuery()
{
m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_ray_query"));
m_glslExtensionTracker->requireSPIRVVersion(
- SemanticVersion(1, 4)); // required due to glslang bug which enables `SPV_KHR_ray_tracing`
- // regardless of context
+ SemanticVersion(1, 4)); // required due to glslang bug which enables
+ // `SPV_KHR_ray_tracing` regardless of context
m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460);
}
@@ -226,8 +295,8 @@ void GLSLSourceEmitter::_emitGLSLStructuredBuffer(
m_writer->emit(") ");
/*
- If the output type is a buffer, and we can determine it is only readonly we can prefix before
- buffer with 'readonly'
+ If the output type is a buffer, and we can determine it is only readonly we can prefix
+ before buffer with 'readonly'
The actual structuredBufferType could be
@@ -349,8 +418,8 @@ void GLSLSourceEmitter::emitSSBOHeader(IRGlobalParam* varDecl, IRType* bufferTyp
_emitMemoryQualifierDecorations(varDecl);
/*
- If the output type is a buffer, and we can determine it is only readonly we can prefix before
- buffer with 'readonly'
+ If the output type is a buffer, and we can determine it is only readonly we can prefix
+ before buffer with 'readonly'
HLSLByteAddressBufferType - This is unambiguously read only
HLSLRWByteAddressBufferType - Read write
@@ -437,11 +506,11 @@ void GLSLSourceEmitter::_emitGLSLParameterGroup(
}
/*
- With resources backed by 'buffer' on glsl, we want to output 'readonly' if that is a good match
- for the underlying type. If uniform it's implicit it's readonly
+ With resources backed by 'buffer' on glsl, we want to output 'readonly' if that is a good
+ match for the underlying type. If uniform it's implicit it's readonly
- Here this only happens with isShaderRecord which is a 'constant buffer' (ie implicitly readonly)
- or IRGLSLShaderStorageBufferType which is read write.
+ Here this only happens with isShaderRecord which is a 'constant buffer' (ie implicitly
+ readonly) or IRGLSLShaderStorageBufferType which is read write.
*/
{
@@ -653,20 +722,21 @@ void GLSLSourceEmitter::_emitGLSLImageFormatModifier(IRInst* var, IRTextureType*
// default to rgba
//
// The SPIR-V spec
- // (https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.pdf) section 3.11
- // on Image Formats it does not list rgbf32.
+ // (https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.pdf)
+ // section 3.11 on Image Formats it does not list rgbf32.
//
// It seems SPIR-V can support having an image with an unknown-at-compile-time
- // format, so long as the underlying API supports it. Ideally this would mean that
- // we can just drop all these qualifiers when emitting GLSL for Vulkan targets.
+ // format, so long as the underlying API supports it. Ideally this would mean
+ // that we can just drop all these qualifiers when emitting GLSL for Vulkan
+ // targets.
//
- // This raises the question of what to do more long term. For Vulkan hopefully we
- // can just drop the layout. For OpenGL targets it would seem reasonable to have
- // well-defined rules for inferring the format (and just document that 3-component
- // formats map to 4-component formats, but that shouldn't matter because the API
- // wouldn't let the user allocate those 3-component formats anyway), and add an
- // attribute for specifying the format manually if you really want to override our
- // inference (e.g., to specify r11fg11fb10f).
+ // This raises the question of what to do more long term. For Vulkan hopefully
+ // we can just drop the layout. For OpenGL targets it would seem reasonable to
+ // have well-defined rules for inferring the format (and just document that
+ // 3-component formats map to 4-component formats, but that shouldn't matter
+ // because the API wouldn't let the user allocate those 3-component formats
+ // anyway), and add an attribute for specifying the format manually if you
+ // really want to override our inference (e.g., to specify r11fg11fb10f).
m_writer->emit("rgba");
// Emit("rgb");
@@ -1332,12 +1402,13 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl(
auto profile = entryPointDecor->getProfile();
auto stage = profile.getStage();
+ IRNumThreadsDecoration* numThreadsDecor = nullptr;
auto emitLocalSizeLayout = [&]()
{
Int sizeAlongAxis[kThreadGroupAxisCount];
Int specializationConstantIds[kThreadGroupAxisCount];
- getComputeThreadGroupSize(irFunc, sizeAlongAxis, specializationConstantIds);
-
+ numThreadsDecor =
+ getComputeThreadGroupSize(irFunc, sizeAlongAxis, specializationConstantIds);
m_writer->emit("layout(");
char const* axes[] = {"x", "y", "z"};
for (int ii = 0; ii < kThreadGroupAxisCount; ++ii)
@@ -1364,13 +1435,51 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl(
switch (stage)
{
case Stage::Compute:
+ case Stage::Mesh:
+ case Stage::Amplification:
+ emitLocalSizeLayout();
+ default:
+ break;
+ }
+
+ /// Structure to track (some) entry point attributes, to allow ordering when emitting and to
+ /// ensure decorations are only emitted once.
+ ///
+ /// These entry points attributes may be implicitly added by built-in functions and the same
+ /// function may be called multiple times, hence the need to ensure they are only emitted
+ /// once.
+ struct GLSLEntryPointAttributes
+ {
+ bool quadDerivatives;
+ bool requireFullQuads;
+ bool maximallyReconverges;
+ String computeDerivatives;
+ } attributes{};
+
+ const auto requireQuadControlExtensions = [&]()
+ {
+ _requireGLSLExtension(UnownedStringSlice("GL_KHR_shader_subgroup_vote"));
+ _requireGLSLExtension(UnownedStringSlice("GL_EXT_shader_quad_control"));
+ };
+
+ for (auto decoration : irFunc->getDecorations())
+ {
+ // Stage agnostic decorations.
+ if (as<IRMaximallyReconvergesDecoration>(decoration))
{
- emitLocalSizeLayout();
+ _requireGLSLExtension(UnownedStringSlice("GL_EXT_maximal_reconvergence"));
+ attributes.maximallyReconverges = true;
}
- break;
- case Stage::Geometry:
+ else if (as<IRQuadDerivativesDecoration>(decoration))
{
- if (auto decor = irFunc->findDecoration<IRMaxVertexCountDecoration>())
+ requireQuadControlExtensions();
+ attributes.quadDerivatives = true;
+ }
+
+ switch (stage)
+ {
+ case Stage::Geometry:
+ if (auto decor = as<IRMaxVertexCountDecoration>(decoration))
{
auto count = getIntVal(decor->getCount());
m_writer->emit("layout(max_vertices = ");
@@ -1378,7 +1487,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl(
m_writer->emit(") out;\n");
}
- if (auto decor = irFunc->findDecoration<IRInstanceDecoration>())
+ if (auto decor = as<IRInstanceDecoration>(decoration))
{
auto count = getIntVal(decor->getCount());
m_writer->emit("layout(invocations = ");
@@ -1389,7 +1498,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl(
// These decorations were moved from the parameters to the entry point by
// ir-glsl-legalize. The actual parameters have become potentially multiple global
// parameters.
- if (auto decor = irFunc->findDecoration<IRGeometryInputPrimitiveTypeDecoration>())
+ if (auto decor = as<IRGeometryInputPrimitiveTypeDecoration>(decoration))
{
switch (decor->getOp())
{
@@ -1415,7 +1524,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl(
}
}
- if (auto decor = irFunc->findDecoration<IRStreamOutputTypeDecoration>())
+ if (auto decor = as<IRStreamOutputTypeDecoration>(decoration))
{
IRType* type = decor->getStreamType();
@@ -1434,33 +1543,57 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl(
SLANG_ASSERT(!"Unknown stream out type");
}
}
- }
- break;
- case Stage::Pixel:
- {
- if (irFunc->findDecoration<IREarlyDepthStencilDecoration>())
+ break;
+ case Stage::Pixel:
+ if (as<IREarlyDepthStencilDecoration>(decoration))
{
// https://www.khronos.org/opengl/wiki/Early_Fragment_Test
m_writer->emit("layout(early_fragment_tests) in;\n");
}
+ else if (as<IRRequireFullQuadsDecoration>(decoration))
+ {
+ requireQuadControlExtensions();
+ attributes.requireFullQuads = true;
+ }
break;
- }
- case Stage::Mesh:
- {
- emitLocalSizeLayout();
- if (auto decor = irFunc->findDecoration<IRVerticesDecoration>())
+ case Stage::Compute:
+ if (as<IRDerivativeGroupQuadDecoration>(decoration))
+ {
+ _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives"));
+ verifyComputeDerivativeGroupModifiers(
+ getSink(),
+ decoration->sourceLoc,
+ true,
+ false,
+ numThreadsDecor);
+ attributes.computeDerivatives = "layout(derivative_group_quadsNV) in;\n";
+ }
+ else if (as<IRDerivativeGroupLinearDecoration>(decoration))
+ {
+ _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives"));
+ verifyComputeDerivativeGroupModifiers(
+ getSink(),
+ decoration->sourceLoc,
+ false,
+ true,
+ numThreadsDecor);
+ attributes.computeDerivatives = "layout(derivative_group_linearNV) in;\n";
+ }
+ break;
+ case Stage::Mesh:
+ if (auto decor = as<IRVerticesDecoration>(decoration))
{
m_writer->emit("layout(max_vertices = ");
m_writer->emit(decor->getMaxSize()->getValue());
m_writer->emit(") out;\n");
}
- if (auto decor = irFunc->findDecoration<IRPrimitivesDecoration>())
+ if (auto decor = as<IRPrimitivesDecoration>(decoration))
{
m_writer->emit("layout(max_primitives = ");
m_writer->emit(decor->getMaxSize()->getValue());
m_writer->emit(") out;\n");
}
- if (auto decor = irFunc->findDecoration<IROutputTopologyDecoration>())
+ if (auto decor = as<IROutputTopologyDecoration>(decoration))
{
// TODO: Ellie validate here/elsewhere, what's allowed here is
// different from the tesselator
@@ -1469,16 +1602,32 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl(
m_writer->emit(decor->getTopology()->getStringSlice());
m_writer->emit("s) out;\n");
}
+ break;
+ default:
+ break;
}
- break;
- case Stage::Amplification:
- {
- emitLocalSizeLayout();
- }
- break;
- // TODO: There are other stages that will need this kind of handling.
- default:
- break;
+ }
+
+ if (attributes.quadDerivatives)
+ {
+ m_writer->emit("layout(quad_derivatives) in;\n");
+ }
+ if (attributes.requireFullQuads)
+ {
+ m_writer->emit("layout(full_quads) in;\n");
+ }
+
+ // This must be emitted after local size when using glslang.
+ if (attributes.computeDerivatives.getLength() > 0)
+ {
+ m_writer->emit(attributes.computeDerivatives);
+ }
+
+ // This must be emitted last because GLSL's `[[..]]` attribute syntax must come right
+ // before the entry point function declaration.
+ if (attributes.maximallyReconverges)
+ {
+ m_writer->emit("[[maximally_reconverges]]\n");
}
}
@@ -2755,63 +2904,6 @@ void GLSLSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst)
}
}
}
-
- // The function may have various requirment declaring functions its body. We also need to look
- // for them.
- auto func = as<IRFunc>(inst);
- if (!func)
- return;
- auto block = func->getFirstBlock();
- if (!block)
- return;
- for (auto childInst : block->getChildren())
- {
- if (auto requireGLSLExt = as<IRRequireGLSLExtension>(childInst))
- {
- _requireGLSLExtension(requireGLSLExt->getExtensionName());
- }
- else if (const auto requireComputeDerivative = as<IRRequireComputeDerivative>(childInst))
- {
- // only allowed 1 of derivative_group_quadsNV or derivative_group_linearNV
- if (m_entryPointStage != Stage::Compute ||
- m_requiredAfter.requireComputeDerivatives.getLength() > 0)
- return;
-
- _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives"));
-
- // This will only run once per program.
- HashSet<IRFunc*>* entryPointsUsingInst =
- getReferencingEntryPoints(m_referencingEntryPoints, func);
-
- for (auto entryPoint : *entryPointsUsingInst)
- {
- bool isQuad = !entryPoint->findDecoration<IRDerivativeGroupLinearDecoration>();
- auto numThreadsDecor = entryPoint->findDecoration<IRNumThreadsDecoration>();
- if (isQuad)
- {
- verifyComputeDerivativeGroupModifiers(
- getSink(),
- inst->sourceLoc,
- true,
- false,
- numThreadsDecor);
- m_requiredAfter.requireComputeDerivatives =
- "layout(derivative_group_quadsNV) in;";
- }
- else
- {
- verifyComputeDerivativeGroupModifiers(
- getSink(),
- inst->sourceLoc,
- false,
- true,
- numThreadsDecor);
- m_requiredAfter.requireComputeDerivatives =
- "layout(derivative_group_linearNV) in;";
- }
- }
- }
- }
}
static Index _getGLSLVersion(ProfileVersion profile)
diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h
index 49a7884c2..b07b410ca 100644
--- a/source/slang/slang-emit-glsl.h
+++ b/source/slang/slang-emit-glsl.h
@@ -176,6 +176,8 @@ protected:
void emitAtomicImageCoord(IRImageSubscript* operand);
+ void _beforeComputeEmitProcessInstruction(IRInst* parentFunc, IRInst* inst, IRBuilder& builder);
+
Dictionary<IRInst*, HashSet<IRFunc*>> m_referencingEntryPoints;
RefPtr<GLSLExtensionTracker> m_glslExtensionTracker;
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index 2cf84a854..951507ab9 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -3544,6 +3544,37 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
break;
}
+
+ case kIROp_RequireMaximallyReconverges:
+ if (auto entryPointsUsingInst =
+ getReferencingEntryPoints(m_referencingEntryPoints, getParentFunc(inst)))
+ {
+ ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_maximal_reconvergence"));
+ for (IRFunc* entryPoint : *entryPointsUsingInst)
+ {
+ requireSPIRVExecutionMode(
+ nullptr,
+ getIRInstSpvID(entryPoint),
+ SpvExecutionModeMaximallyReconvergesKHR);
+ }
+ }
+ break;
+ case kIROp_RequireQuadDerivatives:
+ if (auto entryPointsUsingInst =
+ getReferencingEntryPoints(m_referencingEntryPoints, getParentFunc(inst)))
+ {
+ ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_quad_control"));
+ requireSPIRVCapability(SpvCapabilityQuadControlKHR);
+ for (IRFunc* entryPoint : *entryPointsUsingInst)
+ {
+ requireSPIRVExecutionMode(
+ nullptr,
+ getIRInstSpvID(entryPoint),
+ SpvExecutionModeQuadDerivativesKHR);
+ }
+ }
+ break;
+
case kIROp_Return:
if (as<IRReturn>(inst)->getVal()->getOp() == kIROp_VoidLit)
result = emitOpReturn(parent, inst);
@@ -4472,6 +4503,20 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
}
}
break;
+ case kIROp_MaximallyReconvergesDecoration:
+ ensureExtensionDeclaration(UnownedStringSlice("SPV_khr_maximal_reconvergence"));
+ requireSPIRVExecutionMode(nullptr, dstID, SpvExecutionModeMaximallyReconvergesKHR);
+ break;
+ case kIROp_QuadDerivativesDecoration:
+ ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_quad_control"));
+ requireSPIRVCapability(SpvCapabilityQuadControlKHR);
+ requireSPIRVExecutionMode(nullptr, dstID, SpvExecutionModeQuadDerivativesKHR);
+ break;
+ case kIROp_RequireFullQuadsDecoration:
+ ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_quad_control"));
+ requireSPIRVCapability(SpvCapabilityQuadControlKHR);
+ requireSPIRVExecutionMode(nullptr, dstID, SpvExecutionModeRequireFullQuadsKHR);
+ break;
case kIROp_SPIRVBufferBlockDecoration:
{
emitOpDecorate(
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 4fee5c440..d5c13121a 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -1955,8 +1955,6 @@ SlangResult CodeGenContext::emitEntryPointsSourceFromIR(ComPtr<IArtifact>& outAr
// Append the modules output code
finalResult.append(code);
- // Append all content that should be at the end of a module
- sourceEmitter->emitPostModule();
finalResult.append(sourceWriter.getContentAndClear());
// Write out the result
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 2f4c69820..179e3a5f4 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -663,6 +663,10 @@ INST(RequireComputeDerivative, RequireComputeDerivative, 0, 0)
INST(StaticAssert, StaticAssert, 2, 0)
INST(Printf, Printf, 1, 0)
+// Quad control execution modes.
+INST(RequireMaximallyReconverges, RequireMaximallyReconverges, 0, 0)
+INST(RequireQuadDerivatives, RequireQuadDerivatives, 0, 0)
+
// TODO: We should consider splitting the basic arithmetic/comparison
// ops into cases for signed integers, unsigned integers, and floating-point
// values, to better match downstream targets that want to treat them
@@ -960,6 +964,10 @@ INST_RANGE(BindingQuery, GetRegisterIndex, GetRegisterSpace)
INST(DerivativeGroupQuadDecoration, DerivativeGroupQuad, 0, 0)
INST(DerivativeGroupLinearDecoration, DerivativeGroupLinear, 0, 0)
+ INST(MaximallyReconvergesDecoration, MaximallyReconverges, 0, 0)
+ INST(QuadDerivativesDecoration, QuadDerivatives, 0, 0)
+ INST(RequireFullQuadsDecoration, RequireFullQuads, 0, 0)
+
// Marks a type to be non copyable, causing SSA pass to skip turning variables of the the type into SSA values.
INST(NonCopyableTypeDecoration, nonCopyable, 0, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 2fa4de612..eebb8f119 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -453,6 +453,9 @@ IR_SIMPLE_DECORATION(GlobalInputDecoration)
IR_SIMPLE_DECORATION(GlobalOutputDecoration)
IR_SIMPLE_DECORATION(DownstreamModuleExportDecoration)
IR_SIMPLE_DECORATION(DownstreamModuleImportDecoration)
+IR_SIMPLE_DECORATION(MaximallyReconvergesDecoration)
+IR_SIMPLE_DECORATION(QuadDerivativesDecoration)
+IR_SIMPLE_DECORATION(RequireFullQuadsDecoration)
struct IRAvailableInDownstreamIRDecoration : IRDecoration
{
@@ -3436,6 +3439,16 @@ struct IRRequireComputeDerivative : IRInst
IR_LEAF_ISA(RequireComputeDerivative)
};
+struct IRRequireMaximallyReconverges : IRInst
+{
+ IR_LEAF_ISA(RequireMaximallyReconverges)
+};
+
+struct IRRequireQuadDerivatives : IRInst
+{
+ IR_LEAF_ISA(RequireQuadDerivatives)
+};
+
struct IRStaticAssert : IRInst
{
IR_LEAF_ISA(StaticAssert)
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 54540a3f8..a12a09a2b 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -10347,6 +10347,18 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo>
derivativeGroupLinearDecor =
getBuilder()->addSimpleDecoration<IRDerivativeGroupLinearDecoration>(irFunc);
}
+ else if (as<MaximallyReconvergesAttribute>(modifier))
+ {
+ getBuilder()->addSimpleDecoration<IRMaximallyReconvergesDecoration>(irFunc);
+ }
+ else if (as<QuadDerivativesAttribute>(modifier))
+ {
+ getBuilder()->addSimpleDecoration<IRQuadDerivativesDecoration>(irFunc);
+ }
+ else if (as<RequireFullQuadsAttribute>(modifier))
+ {
+ getBuilder()->addSimpleDecoration<IRRequireFullQuadsDecoration>(irFunc);
+ }
else if (as<NoRefInlineAttribute>(modifier))
{
getBuilder()->addSimpleDecoration<IRNoRefInlineDecoration>(irFunc);
diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang
new file mode 100644
index 000000000..c8f82772d
--- /dev/null
+++ b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang
@@ -0,0 +1,40 @@
+//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -emit-spirv-directly
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_7 -dx12 -use-dxil -shaderobj -render-feature hardware-device
+//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(16, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ uint index = dispatchThreadID.x;
+
+ if (index < 4)
+ {
+ // Quad 1.
+ // Should return true, index 0's expr is true while all other indices' expr are false.
+ outputBuffer[index] = uint(QuadAny((index % 4) == 0));
+ }
+ else if (index < 8)
+ {
+ // Quad 2.
+ // Should return false, all indices' expr are false.
+ bool falseCondition = (5 == 4);
+ outputBuffer[index] = uint(QuadAny(falseCondition));
+ }
+ else if (index < 12)
+ {
+ // Quad 3.
+ // Should return false, index 0's expr is true while all other indices' expr are false.
+ outputBuffer[index] = uint(QuadAll((index % 4) == 0));
+ }
+ else
+ {
+ // Quad 4.
+ // Should return true, all indices' expr are true.
+ bool trueCondition = (5 == 5);
+ outputBuffer[index] = uint(QuadAll(trueCondition));
+ }
+}
+
diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang.expected.txt b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang.expected.txt
new file mode 100644
index 000000000..945f08f2c
--- /dev/null
+++ b/tests/hlsl-intrinsic/quad-control/quad-control-comp-functionality.slang.expected.txt
@@ -0,0 +1,16 @@
+1
+1
+1
+1
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-frag-many-entry-points.slang b/tests/hlsl-intrinsic/quad-control/quad-control-frag-many-entry-points.slang
new file mode 100644
index 000000000..2312f0c95
--- /dev/null
+++ b/tests/hlsl-intrinsic/quad-control/quad-control-frag-many-entry-points.slang
@@ -0,0 +1,96 @@
+//TEST:SIMPLE(filecheck=CHECK_SPIRV): -target spirv -fvk-use-entrypoint-name
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -target glsl -fvk-use-entrypoint-name
+
+//
+// Check that SPIRV quad control execution modes and GLSL layout/attribute decorations are only
+// set on entry points that contain quad control functions and/or quad control decorations.
+//
+
+Texture2D colorTexture1;
+SamplerState samplerState;
+
+struct FragmentInput {
+ float2 uv : TEXCOORD0;
+};
+
+float4 getFragColor(float2 uv) {
+ float4 fragColor = float4(1.0, 1.0, 1.0, 1.0);
+ bool nonUniformCondition = uv.x > 0.5;
+
+ if (QuadAny(nonUniformCondition)) {
+ float4 color = colorTexture1.Sample(samplerState, uv);
+ if (nonUniformCondition) {
+ fragColor = color;
+ }
+ }
+
+ return fragColor;
+}
+
+// CHECK_SPIRV: OpExecutionMode %fragmentMain1 MaximallyReconvergesKHR
+// CHECK_SPIRV: OpExecutionMode %fragmentMain1 QuadDerivativesKHR
+// CHECK_GLSL: layout(quad_derivatives) in
+// CHECK_GLSL: [maximally_reconverges]
+[shader("fragment")]
+float4 fragmentMain1(FragmentInput input) : SV_Target
+{
+ bool nonUniformCondition = input.uv.x > 0.5;
+
+ float4 fragColor = float4(1.0, 1.0, 1.0, 1.0);
+
+ if (QuadAny(nonUniformCondition)) {
+ float4 color = colorTexture1.Sample(samplerState, input.uv);
+ if (nonUniformCondition) {
+ fragColor = color;
+ }
+ }
+
+ return fragColor;
+}
+
+// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain2 QuadDerivativesKHR
+// CHECK_SPIRV: OpExecutionMode %fragmentMain2 MaximallyReconvergesKHR
+// CHECK_GLSL-NOT: layout(quad_derivatives) in
+// CHECK_GLSL: [maximally_reconverges]
+[MaximallyReconverges]
+[shader("fragment")]
+float4 fragmentMain2(FragmentInput input) : SV_Target
+{
+ return float4(1.0, 1.0, 1.0, 1.0);
+}
+
+
+// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain3 MaximallyReconvergesKHR
+// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain3 QuadDerivativesKHR
+// CHECK_SPIRV: OpExecutionMode %fragmentMain3 RequireFullQuadsKHR
+// CHECK_GLSL-NOT: layout(quad_derivatives) in
+// CHECK_GLSL: layout(full_quads) in
+// CHECK_GLSL-NOT: [maximally_reconverges]
+[RequireFullQuads]
+[shader("fragment")]
+float4 fragmentMain3(FragmentInput input) : SV_Target
+{
+ return float4(1.0, 1.0, 1.0, 1.0);
+}
+
+// CHECK_SPIRV: OpExecutionMode %fragmentMain4 MaximallyReconvergesKHR
+// CHECK_SPIRV: OpExecutionMode %fragmentMain4 QuadDerivativesKHR
+// CHECK_GLSL: layout(quad_derivatives) in
+// CHECK_GLSL: [maximally_reconverges]
+[shader("fragment")]
+float4 fragmentMain4(FragmentInput input) : SV_Target
+{
+ return getFragColor(input.uv);
+}
+
+// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain5 MaximallyReconvergesKHR
+// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain5 QuadDerivativesKHR
+// CHECK_SPIRV-NOT: OpExecutionMode %fragmentMain5 RequireFullQuadsKHR
+// CHECK_GLSL-NOT: layout(quad_derivatives) in
+// CHECK_GLSL-NOT: layout(full_quads) in
+// CHECK_GLSL-NOT: [maximally_reconverges]
+[shader("fragment")]
+float4 fragmentMain5(FragmentInput input) : SV_Target
+{
+ return float4(1.0, 1.0, 1.0, 1.0);
+}
diff --git a/tests/hlsl-intrinsic/quad-control/quad-control-frag.slang b/tests/hlsl-intrinsic/quad-control/quad-control-frag.slang
new file mode 100644
index 000000000..29a9546a0
--- /dev/null
+++ b/tests/hlsl-intrinsic/quad-control/quad-control-frag.slang
@@ -0,0 +1,53 @@
+//TEST:SIMPLE(filecheck=CHECK_SPIRV): -entry fragmentMain -stage fragment -target spirv
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -entry fragmentMain -stage fragment -target glsl
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -entry fragmentMain -stage fragment -target hlsl
+//TEST:SIMPLE(filecheck=CHECK_METAL): -entry fragmentMain -stage fragment -target metal
+
+Texture2D colorTexture1;
+Texture2D colorTexture2;
+SamplerState samplerState;
+
+struct FragmentInput {
+ float2 uv : TEXCOORD0;
+};
+
+// CHECK_SPIRV: OpExecutionMode %fragmentMain MaximallyReconvergesKHR
+// CHECK_SPIRV: OpExecutionMode %fragmentMain QuadDerivativesKHR
+// CHECK_SPIRV: OpExecutionMode %fragmentMain RequireFullQuadsKHR
+// CHECK_GLSL: layout(quad_derivatives) in
+// CHECK_GLSL: layout(full_quads) in
+// CHECK_GLSL: [maximally_reconverges]
+[QuadDerivatives]
+[RequireFullQuads]
+float4 fragmentMain(FragmentInput input) : SV_Target
+{
+ bool nonUniformCondition1 = input.uv.x > 0.5;
+ bool nonUniformCondition2 = input.uv.y > 0.8;
+
+ float4 fragColor = float4(1.0, 1.0, 1.0, 1.0);
+
+ // CHECK_SPIRV: OpGroupNonUniformQuadAnyKHR
+ // CHECK_GLSL: subgroupQuadAny
+ // CHECK_HLSL: QuadAny
+ // CHECK_METAL: quad_any
+ if (QuadAny(nonUniformCondition1)) {
+ float4 color = colorTexture1.Sample(samplerState, input.uv);
+ if (nonUniformCondition1) {
+ fragColor = color;
+ }
+ }
+
+ // CHECK_SPIRV: OpGroupNonUniformQuadAllKHR
+ // CHECK_GLSL: subgroupQuadAll
+ // CHECK_HLSL: QuadAll
+ // CHECK_METAL: quad_all
+ if (QuadAll(nonUniformCondition2)) {
+ float4 color = colorTexture2.Sample(samplerState, input.uv);
+ if (nonUniformCondition2) {
+ fragColor += color * 0.5;
+ }
+ }
+
+ return fragColor;
+}
+