diff options
| author | Darren Wihandi <65404740+fairywreath@users.noreply.github.com> | 2025-01-18 02:07:16 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-17 23:07:16 -0800 |
| commit | a85c350df03c6cdf9b433f58fc0e66affda03e9e (patch) | |
| tree | 3e80ea7121b3b5bb5c17aee88599212b78abec38 /source/slang/slang-emit-glsl.cpp | |
| parent | 87a08160289c194ddfb337d521893f576ceb9f97 (diff) | |
Implement Quad Control intrinsics (#5981)
Diffstat (limited to 'source/slang/slang-emit-glsl.cpp')
| -rw-r--r-- | source/slang/slang-emit-glsl.cpp | 308 |
1 files changed, 200 insertions, 108 deletions
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index 0dab07cfc..b94f44690 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -25,9 +25,78 @@ GLSLSourceEmitter::GLSLSourceEmitter(const Desc& desc) SLANG_ASSERT(m_glslExtensionTracker); } +void GLSLSourceEmitter::_beforeComputeEmitProcessInstruction( + IRInst* parentFunc, + IRInst* inst, + IRBuilder& builder) +{ + if (auto requireGLSLExt = as<IRRequireGLSLExtension>(inst)) + { + _requireGLSLExtension(requireGLSLExt->getExtensionName()); + return; + } + + // Early exit on instructions we are not interested in. + if (!as<IRRequireMaximallyReconverges>(inst) && !as<IRRequireQuadDerivatives>(inst) && + !(as<IRRequireComputeDerivative>(inst) && (m_entryPointStage == Stage::Compute))) + { + return; + } + + // Check for entry point specific decorations. + // + // Handle cases where "require" IR operations exist in the function body and are required + // as entry point decorations. + auto entryPoints = getReferencingEntryPoints(m_referencingEntryPoints, parentFunc); + if (entryPoints == nullptr) + return; + + for (auto entryPoint : *entryPoints) + { + if (as<IRRequireMaximallyReconverges>(inst)) + { + builder.addDecoration(entryPoint, kIROp_MaximallyReconvergesDecoration); + } + else if (as<IRRequireQuadDerivatives>(inst)) + { + builder.addDecoration(entryPoint, kIROp_QuadDerivativesDecoration); + } + else + { + const auto requireComputeDerivative = as<IRRequireComputeDerivative>(inst); + + SLANG_ASSERT(requireComputeDerivative); + SLANG_ASSERT(m_entryPointStage == Stage::Compute); + + // Compute derivatives are quad by default, add the decoration if entry point + // does not not explicit linear decoration. + bool isQuad = !entryPoint->findDecoration<IRDerivativeGroupLinearDecoration>(); + if (isQuad) + { + builder.addDecoration(entryPoint, kIROp_DerivativeGroupQuadDecoration); + } + } + } +} + void GLSLSourceEmitter::beforeComputeEmitActions(IRModule* module) { buildEntryPointReferenceGraph(this->m_referencingEntryPoints, module); + + IRBuilder builder(module); + for (auto globalInst : module->getGlobalInsts()) + { + if (auto func = as<IRGlobalValueWithCode>(globalInst)) + { + for (auto block : func->getBlocks()) + { + for (auto inst = block->getFirstInst(); inst; inst = inst->next) + { + _beforeComputeEmitProcessInstruction(func, inst, builder); + } + } + } + } } SlangResult GLSLSourceEmitter::init() @@ -78,8 +147,8 @@ void GLSLSourceEmitter::_requireRayQuery() { m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_ray_query")); m_glslExtensionTracker->requireSPIRVVersion( - SemanticVersion(1, 4)); // required due to glslang bug which enables `SPV_KHR_ray_tracing` - // regardless of context + SemanticVersion(1, 4)); // required due to glslang bug which enables + // `SPV_KHR_ray_tracing` regardless of context m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460); } @@ -226,8 +295,8 @@ void GLSLSourceEmitter::_emitGLSLStructuredBuffer( m_writer->emit(") "); /* - If the output type is a buffer, and we can determine it is only readonly we can prefix before - buffer with 'readonly' + If the output type is a buffer, and we can determine it is only readonly we can prefix + before buffer with 'readonly' The actual structuredBufferType could be @@ -349,8 +418,8 @@ void GLSLSourceEmitter::emitSSBOHeader(IRGlobalParam* varDecl, IRType* bufferTyp _emitMemoryQualifierDecorations(varDecl); /* - If the output type is a buffer, and we can determine it is only readonly we can prefix before - buffer with 'readonly' + If the output type is a buffer, and we can determine it is only readonly we can prefix + before buffer with 'readonly' HLSLByteAddressBufferType - This is unambiguously read only HLSLRWByteAddressBufferType - Read write @@ -437,11 +506,11 @@ void GLSLSourceEmitter::_emitGLSLParameterGroup( } /* - With resources backed by 'buffer' on glsl, we want to output 'readonly' if that is a good match - for the underlying type. If uniform it's implicit it's readonly + With resources backed by 'buffer' on glsl, we want to output 'readonly' if that is a good + match for the underlying type. If uniform it's implicit it's readonly - Here this only happens with isShaderRecord which is a 'constant buffer' (ie implicitly readonly) - or IRGLSLShaderStorageBufferType which is read write. + Here this only happens with isShaderRecord which is a 'constant buffer' (ie implicitly + readonly) or IRGLSLShaderStorageBufferType which is read write. */ { @@ -653,20 +722,21 @@ void GLSLSourceEmitter::_emitGLSLImageFormatModifier(IRInst* var, IRTextureType* // default to rgba // // The SPIR-V spec - // (https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.pdf) section 3.11 - // on Image Formats it does not list rgbf32. + // (https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.pdf) + // section 3.11 on Image Formats it does not list rgbf32. // // It seems SPIR-V can support having an image with an unknown-at-compile-time - // format, so long as the underlying API supports it. Ideally this would mean that - // we can just drop all these qualifiers when emitting GLSL for Vulkan targets. + // format, so long as the underlying API supports it. Ideally this would mean + // that we can just drop all these qualifiers when emitting GLSL for Vulkan + // targets. // - // This raises the question of what to do more long term. For Vulkan hopefully we - // can just drop the layout. For OpenGL targets it would seem reasonable to have - // well-defined rules for inferring the format (and just document that 3-component - // formats map to 4-component formats, but that shouldn't matter because the API - // wouldn't let the user allocate those 3-component formats anyway), and add an - // attribute for specifying the format manually if you really want to override our - // inference (e.g., to specify r11fg11fb10f). + // This raises the question of what to do more long term. For Vulkan hopefully + // we can just drop the layout. For OpenGL targets it would seem reasonable to + // have well-defined rules for inferring the format (and just document that + // 3-component formats map to 4-component formats, but that shouldn't matter + // because the API wouldn't let the user allocate those 3-component formats + // anyway), and add an attribute for specifying the format manually if you + // really want to override our inference (e.g., to specify r11fg11fb10f). m_writer->emit("rgba"); // Emit("rgb"); @@ -1332,12 +1402,13 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( auto profile = entryPointDecor->getProfile(); auto stage = profile.getStage(); + IRNumThreadsDecoration* numThreadsDecor = nullptr; auto emitLocalSizeLayout = [&]() { Int sizeAlongAxis[kThreadGroupAxisCount]; Int specializationConstantIds[kThreadGroupAxisCount]; - getComputeThreadGroupSize(irFunc, sizeAlongAxis, specializationConstantIds); - + numThreadsDecor = + getComputeThreadGroupSize(irFunc, sizeAlongAxis, specializationConstantIds); m_writer->emit("layout("); char const* axes[] = {"x", "y", "z"}; for (int ii = 0; ii < kThreadGroupAxisCount; ++ii) @@ -1364,13 +1435,51 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( switch (stage) { case Stage::Compute: + case Stage::Mesh: + case Stage::Amplification: + emitLocalSizeLayout(); + default: + break; + } + + /// Structure to track (some) entry point attributes, to allow ordering when emitting and to + /// ensure decorations are only emitted once. + /// + /// These entry points attributes may be implicitly added by built-in functions and the same + /// function may be called multiple times, hence the need to ensure they are only emitted + /// once. + struct GLSLEntryPointAttributes + { + bool quadDerivatives; + bool requireFullQuads; + bool maximallyReconverges; + String computeDerivatives; + } attributes{}; + + const auto requireQuadControlExtensions = [&]() + { + _requireGLSLExtension(UnownedStringSlice("GL_KHR_shader_subgroup_vote")); + _requireGLSLExtension(UnownedStringSlice("GL_EXT_shader_quad_control")); + }; + + for (auto decoration : irFunc->getDecorations()) + { + // Stage agnostic decorations. + if (as<IRMaximallyReconvergesDecoration>(decoration)) { - emitLocalSizeLayout(); + _requireGLSLExtension(UnownedStringSlice("GL_EXT_maximal_reconvergence")); + attributes.maximallyReconverges = true; } - break; - case Stage::Geometry: + else if (as<IRQuadDerivativesDecoration>(decoration)) { - if (auto decor = irFunc->findDecoration<IRMaxVertexCountDecoration>()) + requireQuadControlExtensions(); + attributes.quadDerivatives = true; + } + + switch (stage) + { + case Stage::Geometry: + if (auto decor = as<IRMaxVertexCountDecoration>(decoration)) { auto count = getIntVal(decor->getCount()); m_writer->emit("layout(max_vertices = "); @@ -1378,7 +1487,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( m_writer->emit(") out;\n"); } - if (auto decor = irFunc->findDecoration<IRInstanceDecoration>()) + if (auto decor = as<IRInstanceDecoration>(decoration)) { auto count = getIntVal(decor->getCount()); m_writer->emit("layout(invocations = "); @@ -1389,7 +1498,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( // These decorations were moved from the parameters to the entry point by // ir-glsl-legalize. The actual parameters have become potentially multiple global // parameters. - if (auto decor = irFunc->findDecoration<IRGeometryInputPrimitiveTypeDecoration>()) + if (auto decor = as<IRGeometryInputPrimitiveTypeDecoration>(decoration)) { switch (decor->getOp()) { @@ -1415,7 +1524,7 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( } } - if (auto decor = irFunc->findDecoration<IRStreamOutputTypeDecoration>()) + if (auto decor = as<IRStreamOutputTypeDecoration>(decoration)) { IRType* type = decor->getStreamType(); @@ -1434,33 +1543,57 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( SLANG_ASSERT(!"Unknown stream out type"); } } - } - break; - case Stage::Pixel: - { - if (irFunc->findDecoration<IREarlyDepthStencilDecoration>()) + break; + case Stage::Pixel: + if (as<IREarlyDepthStencilDecoration>(decoration)) { // https://www.khronos.org/opengl/wiki/Early_Fragment_Test m_writer->emit("layout(early_fragment_tests) in;\n"); } + else if (as<IRRequireFullQuadsDecoration>(decoration)) + { + requireQuadControlExtensions(); + attributes.requireFullQuads = true; + } break; - } - case Stage::Mesh: - { - emitLocalSizeLayout(); - if (auto decor = irFunc->findDecoration<IRVerticesDecoration>()) + case Stage::Compute: + if (as<IRDerivativeGroupQuadDecoration>(decoration)) + { + _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives")); + verifyComputeDerivativeGroupModifiers( + getSink(), + decoration->sourceLoc, + true, + false, + numThreadsDecor); + attributes.computeDerivatives = "layout(derivative_group_quadsNV) in;\n"; + } + else if (as<IRDerivativeGroupLinearDecoration>(decoration)) + { + _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives")); + verifyComputeDerivativeGroupModifiers( + getSink(), + decoration->sourceLoc, + false, + true, + numThreadsDecor); + attributes.computeDerivatives = "layout(derivative_group_linearNV) in;\n"; + } + break; + case Stage::Mesh: + if (auto decor = as<IRVerticesDecoration>(decoration)) { m_writer->emit("layout(max_vertices = "); m_writer->emit(decor->getMaxSize()->getValue()); m_writer->emit(") out;\n"); } - if (auto decor = irFunc->findDecoration<IRPrimitivesDecoration>()) + if (auto decor = as<IRPrimitivesDecoration>(decoration)) { m_writer->emit("layout(max_primitives = "); m_writer->emit(decor->getMaxSize()->getValue()); m_writer->emit(") out;\n"); } - if (auto decor = irFunc->findDecoration<IROutputTopologyDecoration>()) + if (auto decor = as<IROutputTopologyDecoration>(decoration)) { // TODO: Ellie validate here/elsewhere, what's allowed here is // different from the tesselator @@ -1469,16 +1602,32 @@ void GLSLSourceEmitter::emitEntryPointAttributesImpl( m_writer->emit(decor->getTopology()->getStringSlice()); m_writer->emit("s) out;\n"); } + break; + default: + break; } - break; - case Stage::Amplification: - { - emitLocalSizeLayout(); - } - break; - // TODO: There are other stages that will need this kind of handling. - default: - break; + } + + if (attributes.quadDerivatives) + { + m_writer->emit("layout(quad_derivatives) in;\n"); + } + if (attributes.requireFullQuads) + { + m_writer->emit("layout(full_quads) in;\n"); + } + + // This must be emitted after local size when using glslang. + if (attributes.computeDerivatives.getLength() > 0) + { + m_writer->emit(attributes.computeDerivatives); + } + + // This must be emitted last because GLSL's `[[..]]` attribute syntax must come right + // before the entry point function declaration. + if (attributes.maximallyReconverges) + { + m_writer->emit("[[maximally_reconverges]]\n"); } } @@ -2755,63 +2904,6 @@ void GLSLSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) } } } - - // The function may have various requirment declaring functions its body. We also need to look - // for them. - auto func = as<IRFunc>(inst); - if (!func) - return; - auto block = func->getFirstBlock(); - if (!block) - return; - for (auto childInst : block->getChildren()) - { - if (auto requireGLSLExt = as<IRRequireGLSLExtension>(childInst)) - { - _requireGLSLExtension(requireGLSLExt->getExtensionName()); - } - else if (const auto requireComputeDerivative = as<IRRequireComputeDerivative>(childInst)) - { - // only allowed 1 of derivative_group_quadsNV or derivative_group_linearNV - if (m_entryPointStage != Stage::Compute || - m_requiredAfter.requireComputeDerivatives.getLength() > 0) - return; - - _requireGLSLExtension(UnownedStringSlice("GL_NV_compute_shader_derivatives")); - - // This will only run once per program. - HashSet<IRFunc*>* entryPointsUsingInst = - getReferencingEntryPoints(m_referencingEntryPoints, func); - - for (auto entryPoint : *entryPointsUsingInst) - { - bool isQuad = !entryPoint->findDecoration<IRDerivativeGroupLinearDecoration>(); - auto numThreadsDecor = entryPoint->findDecoration<IRNumThreadsDecoration>(); - if (isQuad) - { - verifyComputeDerivativeGroupModifiers( - getSink(), - inst->sourceLoc, - true, - false, - numThreadsDecor); - m_requiredAfter.requireComputeDerivatives = - "layout(derivative_group_quadsNV) in;"; - } - else - { - verifyComputeDerivativeGroupModifiers( - getSink(), - inst->sourceLoc, - false, - true, - numThreadsDecor); - m_requiredAfter.requireComputeDerivatives = - "layout(derivative_group_linearNV) in;"; - } - } - } - } } static Index _getGLSLVersion(ProfileVersion profile) |
