// slang-emit.cpp #include "slang-emit.h" #include "../core/slang-writer.h" #include "../core/slang-type-text-util.h" #include "slang-ir-bind-existentials.h" #include "slang-ir-byte-address-legalize.h" #include "slang-ir-dce.h" #include "slang-ir-entry-point-uniforms.h" #include "slang-ir-glsl-legalize.h" #include "slang-ir-insts.h" #include "slang-ir-link.h" #include "slang-ir-lower-generics.h" #include "slang-ir-restructure.h" #include "slang-ir-restructure-scoping.h" #include "slang-ir-specialize.h" #include "slang-ir-specialize-resources.h" #include "slang-ir-ssa.h" #include "slang-ir-strip-witness-tables.h" #include "slang-ir-synthesize-active-mask.h" #include "slang-ir-union.h" #include "slang-ir-validate.h" #include "slang-ir-wrap-structured-buffers.h" #include "slang-legalize-types.h" #include "slang-lower-to-ir.h" #include "slang-mangle.h" #include "slang-name.h" #include "slang-syntax.h" #include "slang-type-layout.h" #include "slang-visitor.h" #include "slang-ir-strip.h" #include "slang-emit-source-writer.h" #include "slang-emit-c-like.h" #include "slang-emit-glsl.h" #include "slang-emit-hlsl.h" #include "slang-emit-cpp.h" #include "slang-emit-cuda.h" #include namespace Slang { EntryPointLayout* findEntryPointLayout( ProgramLayout* programLayout, EntryPoint* entryPoint) { // TODO: This function shouldn't need to exist, and it // somewhat hampers the capabilities of the compiler (e.g., // it isn't supported to have a single program contain // two different "instances" of the same entry point). // // Code that cares about layouts should be looking up // the entry point layout by index on a `ProgramLayout`, // knowing that those indices will align with the order // of entry points on the `ComponentType` for the program. for( auto entryPointLayout : programLayout->entryPoints ) { if(entryPointLayout->entryPoint.getName() != entryPoint->getName()) continue; // TODO: We need to be careful about this check, since it relies on // the profile information in the layout matching that in the request. // // What we really seem to want here is some dictionary mapping the // `EntryPoint` directly to the `EntryPointLayout`, and maybe // that is precisely what we should build... // if(entryPointLayout->profile != entryPoint->getProfile()) continue; return entryPointLayout; } return nullptr; } /// Given a layout computed for a scope, get the layout to use when lookup up variables. /// /// A scope (such as the global scope of a program) groups its /// parameters into a pseudo-`struct` type for layout purposes, /// and in some cases that type will in turn be wrapped in a /// `ConstantBuffer` type to indicate that the parameters needed /// an implicit constant buffer to be allocated. /// /// This function "unwraps" the type layout to find the structure /// type layout that must be stored inside. /// StructTypeLayout* getScopeStructLayout( ScopeLayout* scopeLayout) { auto scopeTypeLayout = scopeLayout->parametersLayout->typeLayout; if( auto constantBufferTypeLayout = as(scopeTypeLayout) ) { scopeTypeLayout = constantBufferTypeLayout->offsetElementTypeLayout; } if( auto structTypeLayout = as(scopeTypeLayout) ) { return structTypeLayout; } SLANG_UNEXPECTED("uhandled global-scope binding layout"); return nullptr; } /// Given a layout computed for a program, get the layout to use when lookup up variables. /// /// This is just an alias of `getScopeStructLayout`. /// StructTypeLayout* getGlobalStructLayout( ProgramLayout* programLayout) { return getScopeStructLayout(programLayout); } static void dumpIR( BackEndCompileRequest* compileRequest, IRModule* irModule, char const* label) { DiagnosticSinkWriter writerImpl(compileRequest->getSink()); WriterHelper writer(&writerImpl); if(label) { writer.put("### "); writer.put(label); writer.put(":\n"); } dumpIR(irModule, writer.getWriter()); if( label ) { writer.put("###\n"); } } static void dumpIRIfEnabled( BackEndCompileRequest* compileRequest, IRModule* irModule, char const* label = nullptr) { if(compileRequest->shouldDumpIR) { dumpIR(compileRequest, irModule, label); } } struct LinkingAndOptimizationOptions { bool shouldLegalizeExistentialAndResourceTypes = true; CLikeSourceEmitter* sourceEmitter = nullptr; }; Result linkAndOptimizeIR( BackEndCompileRequest* compileRequest, Int entryPointIndex, CodeGenTarget target, TargetRequest* targetRequest, LinkingAndOptimizationOptions const& options, LinkedIR& outLinkedIR) { auto sink = compileRequest->getSink(); auto program = compileRequest->getProgram(); auto targetProgram = program->getTargetProgram(targetRequest); auto session = targetRequest->getSession(); // We start out by performing "linking" at the level of the IR. // This step will create a fresh IR module to be used for // code generation, and will copy in any IR definitions that // the desired entry point requires. Along the way it will // resolve references to imported/exported symbols across // modules, and also select between the definitions of // any "profile-overloaded" symbols. // outLinkedIR = linkIR( compileRequest, entryPointIndex, target, targetProgram); auto irModule = outLinkedIR.module; auto irEntryPoint = outLinkedIR.entryPoint; #if 0 dumpIRIfEnabled(compileRequest, irModule, "LINKED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // If the user specified the flag that they want us to dump // IR, then do it here, for the target-specific, but // un-specialized IR. dumpIRIfEnabled(compileRequest, irModule); // Replace any global constants with their values. // replaceGlobalConstants(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "GLOBAL CONSTANTS REPLACED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // When there are top-level existential-type parameters // to the shader, we need to take the side-band information // on how the existential "slots" were bound to concrete // types, and use it to introduce additional explicit // shader parameters for those slots, to be wired up to // use sites. // bindExistentialSlots(irModule, sink); #if 0 dumpIRIfEnabled(compileRequest, irModule, "EXISTENTIALS BOUND"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // Now that we've linked the IR code, any layout/binding // information has been attached to shader parameters // and entry points. Now we are safe to make transformations // that might move code without worrying about losing // the connection between a parameter and its layout. // // An easy transformation of this kind is to take uniform // parameters of a shader entry point and move them into // the global scope instead. // moveEntryPointUniformParamsToGlobalScope(irModule, target); #if 0 dumpIRIfEnabled(compileRequest, irModule, "ENTRY POINT UNIFORMS MOVED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // Desguar any union types, since these will be illegal on // various targets. // desugarUnionTypes(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "UNIONS DESUGARED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // Next, we need to ensure that the code we emit for // the target doesn't contain any operations that would // be illegal on the target platform. For example, // none of our target supports generics, or interfaces, // so we need to specialize those away. // // Simplification of existential-based and generics-based // code may each open up opportunities for the other, so // the relevant specialization transformations are handled in a // single pass that looks for all simplification opportunities. // // TODO: We also need to extend this pass so that it will "expose" // existential values that are nested inside of other types, // so that the simplifications can be applied. // // TODO: This pass is *also* likely to be the place where we // perform specialization of functions based on parameter // values that need to be compile-time constants. // if (!compileRequest->allowDynamicCode) specializeModule(irModule); switch (target) { case CodeGenTarget::CPPSource: // For targets that supports dynamic dispatch, we need to lower the // generics / interface types to ordinary functions and types using // function pointers. lowerGenerics(irModule); break; default: break; } // Debugging code for IR transformations... #if 0 dumpIRIfEnabled(compileRequest, irModule, "SPECIALIZED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // Specialization can introduce dead code that could trip // up downstream passes like type legalization, so we // will run a DCE pass to clean up after the specialization. // // TODO: Are there other cleanup optimizations we should // apply at this point? // eliminateDeadCode(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // We don't need the legalize pass for C/C++ based types if(options.shouldLegalizeExistentialAndResourceTypes ) // if (!(sourceStyle == SourceStyle::CPP || sourceStyle == SourceStyle::C)) { // The Slang language allows interfaces to be used like // ordinary types (including placing them in constant // buffers and entry-point parameter lists), but then // getting them to lay out in a reasonable way requires // us to treat fields/variables with interface type // *as if* they were pointers to heap-allocated "objects." // // Specialization will have replaced fields/variables // with interface types like `IFoo` with fields/variables // with pointer-like types like `ExistentialBox`. // // We need to legalize these pointer-like types away, // which involves two main changes: // // 1. Any `ExistentialBox<...>` fields need to be moved // out of their enclosing `struct` type, so that the layout // of the enclosing type is computed as if the field had // zero size. // // 2. Once an `ExistentialBox` has been floated out // of its parent and landed somwhere permanent (e.g., either // a dedicated variable, or a field of constant buffer), // we need to replace it with just an `X`, after which we // will have (more) legal shader code. // legalizeExistentialTypeLayout( irModule, sink); eliminateDeadCode(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "EXISTENTIALS LEGALIZED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // Many of our target languages and/or downstream compilers // don't support `struct` types that have resource-type fields. // In order to work around this limitation, we will rewrite the // IR so that any structure types with resource-type fields get // split into a "tuple" that comprises the ordinary fields (still // bundles up as a `struct`) and one element for each resource-type // field (recursively). // // What used to be individual variables/parameters/arguments/etc. // then become multiple variables/parameters/arguments/etc. // legalizeResourceTypes( irModule, sink); eliminateDeadCode(irModule); // Debugging output of legalization #if 0 dumpIRIfEnabled(compileRequest, irModule, "LEGALIZED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); } // Once specialization and type legalization have been performed, // we should perform some of our basic optimization steps again, // to see if we can clean up any temporaries created by legalization. // (e.g., things that used to be aggregated might now be split up, // so that we can work with the individual fields). constructSSA(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER SSA"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // After type legalization and subsequent SSA cleanup we expect // that any resource types passed to functions are exposed // as their own top-level parameters (which might have // resource or array-of-...-resource types). // // Many of our targets place restrictions on how certain // resource types can be used, so that having them as // function parameters is invalid. To clean this up, // we will try to specialize called functions based // on the actual resources that are being passed to them // at specific call sites. // // Because the legalization may depend on what target // we are compiling for (certain things might be okay // for D3D targets that are not okay for Vulkan), we // pass down the target request along with the IR. // specializeResourceParameters(compileRequest, targetRequest, irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER RESOURCE SPECIALIZATION"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // For HLSL (and fxc/dxc) only, we need to "wrap" any // structured buffers defined over matrix types so // that they instead use an intermediate `struct`. // This is required to get those targets to respect // the options for matrix layout set via `#pragma` // or command-line options. // switch(target) { case CodeGenTarget::HLSL: { wrapStructuredBuffersOfMatrices(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "STRUCTURED BUFFERS WRAPPED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); } break; default: break; } // For all targets, we translate load/store operations // of aggregate types from/to byte-address buffers into // stores of individual scalar or vector values. // { ByteAddressBufferLegalizationOptions byteAddressBufferOptions; // Depending on the target, we may decide to do // more aggressive translation that reduces the // load/store operations down to invididual scalars // (splitting up vector ops). // switch( target ) { default: break; case CodeGenTarget::GLSL: // For GLSL targets, we want to translate the vector load/store // operations into scalar ops. This is in part as a simplification, // but it also ensures that our generated code respects the lax // alignment rules for D3D byte-address buffers (the base address // of a buffer need not be more than 4-byte aligned, and loads // of vectors need only be aligned based on their element type). // // TODO: We should consider having an extended variant of `Load` // on byte-address buffers which expresses a programmer's knowledge // that the load will have greater alignment than required by D3D. // That could either come as an explicit guaranteed-alignment // operand, or instead as something like a `Load4Aligned` operation // that returns a `vector<4,T>` and assumes `4*sizeof(T)` alignemtn. // byteAddressBufferOptions.scalarizeVectorLoadStore = true; // For GLSL targets, there really isn't a low-level concept // of a byte-address buffer at all, and the standard "shader storage // buffer" (SSBO) feature is a lot closer to an HLSL structured // buffer for our purposes. // // In particular, each SSBO can only have a single element type, // so that even with bitcasts we can't have a single buffer declaration // (e.g., one with `uint` elements) service all load/store operations // (e.g., a `half` value can't be stored atomically if there are // `uint` elements, unless we use explicit atomics). // // In order to simplify things, we will translate byte-address buffer // ops to equivalent structured-buffer ops for GLSL targets, where // each unique type being loaded/stored yields a different global // parameter declaration of the buffer. // byteAddressBufferOptions.translateToStructuredBufferOps = true; break; } // We also need to decide whether to translate // any "leaf" load/store operations over to // use only unsigned-integer types and then // bit-cast, or if we prefer to leave them // as load/store of the original type. // switch( target ) { case CodeGenTarget::HLSL: { auto profile = targetRequest->targetProfile; if( profile.getFamily() == ProfileFamily::DX ) { if(profile.getVersion() <= ProfileVersion::DX_5_0) { // Fxc and earlier dxc versions do not support // a templates `.Load` operation on byte-address // buffers, and instead need us to emit separate // `uint` loads and then bit-cast over to // the correct type. // byteAddressBufferOptions.useBitCastFromUInt = true; } } } break; default: break; } legalizeByteAddressBufferOps(session, irModule, byteAddressBufferOptions); } // For CUDA targets only, we will need to turn operations // the implicitly reference the "active mask" into ones // that use (and pass around) an explicit mask instead. // switch(target) { case CodeGenTarget::CUDASource: case CodeGenTarget::PTX: { synthesizeActiveMask( irModule, compileRequest->getSink()); #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER synthesizeActiveMask"); #endif validateIRModuleIfEnabled(compileRequest, irModule); } break; default: break; } // For GLSL only, we will need to perform "legalization" of // the entry point and any entry-point parameters. // // TODO: We should consider moving this legalization work // as late as possible, so that it doesn't affect how other // optimization passes need to work. // switch (target) { case CodeGenTarget::GLSL: { auto glslExtensionTracker = as(options.sourceEmitter->getExtensionTracker()); legalizeEntryPointForGLSL( session, irModule, irEntryPoint, compileRequest->getSink(), glslExtensionTracker); #if 0 dumpIRIfEnabled(compileRequest, irModule, "GLSL LEGALIZED"); #endif validateIRModuleIfEnabled(compileRequest, irModule); } break; default: break; } if (!compileRequest->allowDynamicCode) { // For all targets that don't support true dynamic dispatch through // witness tables, we need to eliminate witness tables from the IR so // that they don't keep symbols live that we don't actually need. stripWitnessTables(irModule); } #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER STRIP WITNESS TABLES"); #endif validateIRModuleIfEnabled(compileRequest, irModule); // The resource-based specialization pass above // may create specialized versions of functions, but // it does not try to completely eliminate the original // functions, so there might still be invalid code in // our IR module. // // To clean up the code, we will apply a fairly general // dead-code-elimination (DCE) pass that only retains // whatever code is "live." // eliminateDeadCode(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE"); #endif validateIRModuleIfEnabled(compileRequest, irModule); return SLANG_OK; } SlangResult emitEntryPointSourceFromIR( BackEndCompileRequest* compileRequest, Int entryPointIndex, CodeGenTarget target, TargetRequest* targetRequest, SourceResult& outSource) { outSource.reset(); auto sink = compileRequest->getSink(); auto program = compileRequest->getProgram(); auto entryPoint = program->getEntryPoint(entryPointIndex); auto lineDirectiveMode = compileRequest->getLineDirectiveMode(); // To try to make the default behavior reasonable, we will // always use C-style line directives (to give the user // good source locations on error messages from downstream // compilers) *unless* they requested raw GLSL as the // output (in which case we want to maximize compatibility // with downstream tools). if (lineDirectiveMode == LineDirectiveMode::Default && targetRequest->getTarget() == CodeGenTarget::GLSL) { lineDirectiveMode = LineDirectiveMode::GLSL; } SourceWriter sourceWriter(compileRequest->getSourceManager(), lineDirectiveMode ); CLikeSourceEmitter::Desc desc; desc.compileRequest = compileRequest; desc.target = target; desc.entryPointStage = entryPoint->getStage(); desc.effectiveProfile = getEffectiveProfile(entryPoint, targetRequest); desc.sourceWriter = &sourceWriter; // Define here, because must be in scope longer than the sourceEmitter, as sourceEmitter might reference // items in the linkedIR module LinkedIR linkedIR; RefPtr sourceEmitter; typedef CLikeSourceEmitter::SourceStyle SourceStyle; SourceStyle sourceStyle = CLikeSourceEmitter::getSourceStyle(target); switch (sourceStyle) { case SourceStyle::CPP: { sourceEmitter = new CPPSourceEmitter(desc); break; } case SourceStyle::GLSL: { sourceEmitter = new GLSLSourceEmitter(desc); break; } case SourceStyle::HLSL: { sourceEmitter = new HLSLSourceEmitter(desc); break; } case SourceStyle::CUDA: { sourceEmitter = new CUDASourceEmitter(desc); break; } default: break; } if (!sourceEmitter) { sink->diagnose(SourceLoc(), Diagnostics::unableToGenerateCodeForTarget, TypeTextUtil::getCompileTargetName(SlangCompileTarget(target))); return SLANG_FAIL; } SLANG_RETURN_ON_FAIL(sourceEmitter->init()); { LinkingAndOptimizationOptions linkingAndOptimizationOptions; linkingAndOptimizationOptions.sourceEmitter = sourceEmitter; switch( sourceStyle ) { default: break; case SourceStyle::CPP: case SourceStyle::C: case SourceStyle::CUDA: linkingAndOptimizationOptions.shouldLegalizeExistentialAndResourceTypes = false; break; } linkAndOptimizeIR( compileRequest, entryPointIndex, target, targetRequest, linkingAndOptimizationOptions, linkedIR); auto irModule = linkedIR.module; // After all of the required optimization and legalization // passes have been performed, we can emit target code from // the IR module. // // TODO: do we want to emit directly from IR, or translate the // IR back into AST for emission? #if 0 dumpIR(compileRequest, irModule, "PRE-EMIT"); #endif sourceEmitter->emitModule(irModule); } String code = sourceWriter.getContent(); sourceWriter.clearContent(); // Now that we've emitted the code for all the declarations in the file, // it is time to stitch together the final output. { Session* session = compileRequest->getSession(); // Get the downstream compiler needed for final target PassThroughMode passThru = getDownstreamCompilerRequiredForTarget(session, targetRequest->target); // If nothing was *needed*, we still need some idea of which downstream compiler is going to be used for the // prelude (the prelude is associated with each specific downstream compiler) if (passThru == PassThroughMode::None) { // TODO(JS): // NOTE! This makes the *assumption* that if we are outputting source, we don't want the // prelude for a specific compiler, we are happy to just use the GenericCCpp // If we didn't do this, we would have to do the work out what's available etc. // // This is all a bit unfortunate. The decision to associate preludes on compilers seemed like a good idea, but was perhaps a mistake. // That it would be simpler if prelude was based on output source type. passThru = getPreludeDownstreamCompilerForTarget(session, targetRequest->target); } else { // If a compiler was required that means a downstream compiler *will* be used // so lookup which specific compiler is used, and use it's prelude // Currently this distinction is only applicable to C++ if (passThru == PassThroughMode::GenericCCpp) { const SourceLanguage sourceLanguage = (sourceStyle == SourceStyle::C) ? SourceLanguage::C : SourceLanguage::CPP; // Get the compiler used for the language DownstreamCompiler* compiler = session->getDefaultDownstreamCompiler(sourceLanguage); if (compiler) { passThru = PassThroughMode(compiler->getDesc().type); } } } // If there is a prelude emit it const auto& prelude = compileRequest->getSession()->getDownstreamCompilerPrelude(passThru); if (prelude.getLength() > 0) { sourceWriter.emit(prelude.getUnownedSlice()); } } // There may be global-scope modifiers that we should emit now sourceEmitter->emitPreprocessorDirectives(); RefObject* extensionTracker = sourceEmitter->getExtensionTracker(); if (auto glslExtensionTracker = as(extensionTracker)) { StringBuilder builder; glslExtensionTracker->appendExtensionRequireLines(builder); sourceWriter.emit(builder.getUnownedSlice()); } sourceEmitter->emitLayoutDirectives(targetRequest); String prefix = sourceWriter.getContent(); StringBuilder finalResultBuilder; finalResultBuilder << prefix; finalResultBuilder << code; // Write out the result outSource.source = finalResultBuilder.ProduceString(); outSource.extensionTracker = extensionTracker; return SLANG_OK; } SlangResult emitSPIRVFromIR( BackEndCompileRequest* compileRequest, IRModule* irModule, IRFunc* irEntryPoint, List& spirvOut); SlangResult emitSPIRVForEntryPointDirectly( BackEndCompileRequest* compileRequest, Int entryPointIndex, TargetRequest* targetRequest, List& spirvOut) { auto sink = compileRequest->getSink(); auto program = compileRequest->getProgram(); auto targetProgram = program->getTargetProgram(targetRequest); auto programLayout = targetProgram->getOrCreateLayout(sink); RefPtr entryPointLayout = programLayout->entryPoints[entryPointIndex]; // Outside because we want to keep IR in scope whilst we are processing emits LinkedIR linkedIR; LinkingAndOptimizationOptions linkingAndOptimizationOptions; linkAndOptimizeIR( compileRequest, entryPointIndex, targetRequest->getTarget(), targetRequest, linkingAndOptimizationOptions, linkedIR); auto irModule = linkedIR.module; auto irEntryPoint = linkedIR.entryPoint; emitSPIRVFromIR( compileRequest, irModule, irEntryPoint, spirvOut); return SLANG_OK; } } // namespace Slang