// slang-emit.cpp #include "../compiler-core/slang-artifact-associated-impl.h" #include "../compiler-core/slang-artifact-desc-util.h" #include "../compiler-core/slang-artifact-impl.h" #include "../compiler-core/slang-artifact-util.h" #include "../compiler-core/slang-name.h" #include "../core/slang-castable.h" #include "../core/slang-performance-profiler.h" #include "../core/slang-type-text-util.h" #include "../core/slang-writer.h" #include "slang-check-out-of-bound-access.h" #include "slang-emit-c-like.h" #include "slang-emit-cpp.h" #include "slang-emit-cuda.h" #include "slang-emit-glsl.h" #include "slang-emit-hlsl.h" #include "slang-emit-metal.h" #include "slang-emit-slang.h" #include "slang-emit-source-writer.h" #include "slang-emit-torch.h" #include "slang-emit-vm.h" #include "slang-emit-wgsl.h" #include "slang-ir-any-value-inference.h" #include "slang-ir-autodiff.h" #include "slang-ir-bind-existentials.h" #include "slang-ir-byte-address-legalize.h" #include "slang-ir-check-recursion.h" #include "slang-ir-check-shader-parameter-type.h" #include "slang-ir-check-unsupported-inst.h" #include "slang-ir-cleanup-void.h" #include "slang-ir-collect-global-uniforms.h" #include "slang-ir-com-interface.h" #include "slang-ir-composite-reg-to-mem.h" #include "slang-ir-dce.h" #include "slang-ir-defer-buffer-load.h" #include "slang-ir-defunctionalization.h" #include "slang-ir-detect-uninitialized-resources.h" #include "slang-ir-diff-call.h" #include "slang-ir-dll-export.h" #include "slang-ir-dll-import.h" #include "slang-ir-early-raytracing-intrinsic-simplification.h" #include "slang-ir-eliminate-multilevel-break.h" #include "slang-ir-eliminate-phis.h" #include "slang-ir-entry-point-decorations.h" #include "slang-ir-entry-point-raw-ptr-params.h" #include "slang-ir-entry-point-uniforms.h" #include "slang-ir-explicit-global-context.h" #include "slang-ir-explicit-global-init.h" #include "slang-ir-fix-entrypoint-callsite.h" #include "slang-ir-float-non-uniform-resource-index.h" #include "slang-ir-fuse-satcoop.h" #include "slang-ir-glsl-legalize.h" #include "slang-ir-glsl-liveness.h" #include "slang-ir-hlsl-legalize.h" #include "slang-ir-inline.h" #include "slang-ir-insts.h" #include "slang-ir-layout.h" #include "slang-ir-legalize-array-return-type.h" #include "slang-ir-legalize-binary-operator.h" #include "slang-ir-legalize-composite-select.h" #include "slang-ir-legalize-empty-array.h" #include "slang-ir-legalize-global-values.h" #include "slang-ir-legalize-image-subscript.h" #include "slang-ir-legalize-matrix-types.h" #include "slang-ir-legalize-mesh-outputs.h" #include "slang-ir-legalize-uniform-buffer-load.h" #include "slang-ir-legalize-varying-params.h" #include "slang-ir-legalize-vector-types.h" #include "slang-ir-link.h" #include "slang-ir-liveness.h" #include "slang-ir-loop-unroll.h" #include "slang-ir-lower-append-consume-structured-buffer.h" #include "slang-ir-lower-binding-query.h" #include "slang-ir-lower-bit-cast.h" #include "slang-ir-lower-buffer-element-type.h" #include "slang-ir-lower-combined-texture-sampler.h" #include "slang-ir-lower-coopvec.h" #include "slang-ir-lower-dynamic-resource-heap.h" #include "slang-ir-lower-enum-type.h" #include "slang-ir-lower-generics.h" #include "slang-ir-lower-glsl-ssbo-types.h" #include "slang-ir-lower-l-value-cast.h" #include "slang-ir-lower-optional-type.h" #include "slang-ir-lower-reinterpret.h" #include "slang-ir-lower-result-type.h" #include "slang-ir-lower-tuple-types.h" #include "slang-ir-metadata.h" #include "slang-ir-metal-legalize.h" #include "slang-ir-missing-return.h" #include "slang-ir-optix-entry-point-uniforms.h" #include "slang-ir-pytorch-cpp-binding.h" #include "slang-ir-redundancy-removal.h" #include "slang-ir-resolve-texture-format.h" #include "slang-ir-resolve-varying-input-ref.h" #include "slang-ir-restructure-scoping.h" #include "slang-ir-restructure.h" #include "slang-ir-sccp.h" #include "slang-ir-simplify-for-emit.h" #include "slang-ir-specialize-address-space.h" #include "slang-ir-specialize-arrays.h" #include "slang-ir-specialize-buffer-load-arg.h" #include "slang-ir-specialize-matrix-layout.h" #include "slang-ir-specialize-resources.h" #include "slang-ir-specialize-stage-switch.h" #include "slang-ir-specialize.h" #include "slang-ir-ssa-simplification.h" #include "slang-ir-ssa.h" #include "slang-ir-string-hash.h" #include "slang-ir-strip-debug-info.h" #include "slang-ir-strip-default-construct.h" #include "slang-ir-strip-legalization-insts.h" #include "slang-ir-synthesize-active-mask.h" #include "slang-ir-transform-params-to-constref.h" #include "slang-ir-translate-global-varying-var.h" #include "slang-ir-undo-param-copy.h" #include "slang-ir-uniformity.h" #include "slang-ir-user-type-hint.h" #include "slang-ir-validate.h" #include "slang-ir-variable-scope-correction.h" #include "slang-ir-vk-invert-y.h" #include "slang-ir-wgsl-legalize.h" #include "slang-ir-wrap-cbuffer-element.h" #include "slang-ir-wrap-structured-buffers.h" #include "slang-legalize-types.h" #include "slang-lower-to-ir.h" #include "slang-mangle.h" #include "slang-syntax.h" #include "slang-type-layout.h" #include "slang-visitor.h" #include "slang-vm-bytecode.h" #include Slang::String get_slang_cpp_host_prelude(); Slang::String get_slang_torch_prelude(); namespace Slang { EntryPointLayout* findEntryPointLayout(ProgramLayout* programLayout, EntryPoint* entryPoint) { // TODO: This function shouldn't need to exist, and it // somewhat hampers the capabilities of the compiler (e.g., // it isn't supported to have a single program contain // two different "instances" of the same entry point). // // Code that cares about layouts should be looking up // the entry point layout by index on a `ProgramLayout`, // knowing that those indices will align with the order // of entry points on the `ComponentType` for the program. for (auto entryPointLayout : programLayout->entryPoints) { if (entryPointLayout->entryPoint.getName() != entryPoint->getName()) continue; // TODO: We need to be careful about this check, since it relies on // the profile information in the layout matching that in the request. // // What we really seem to want here is some dictionary mapping the // `EntryPoint` directly to the `EntryPointLayout`, and maybe // that is precisely what we should build... // if (entryPointLayout->profile != entryPoint->getProfile()) continue; return entryPointLayout; } return nullptr; } /// Given a layout computed for a scope, get the layout to use when lookup up variables. /// /// A scope (such as the global scope of a program) groups its /// parameters into a pseudo-`struct` type for layout purposes, /// and in some cases that type will in turn be wrapped in a /// `ConstantBuffer` type to indicate that the parameters needed /// an implicit constant buffer to be allocated. /// /// This function "unwraps" the type layout to find the structure /// type layout that must be stored inside. /// StructTypeLayout* getScopeStructLayout(ScopeLayout* scopeLayout) { auto scopeTypeLayout = scopeLayout->parametersLayout->typeLayout; if (auto constantBufferTypeLayout = as(scopeTypeLayout)) { scopeTypeLayout = constantBufferTypeLayout->offsetElementTypeLayout; } if (auto structTypeLayout = as(scopeTypeLayout)) { return structTypeLayout; } SLANG_UNEXPECTED("uhandled global-scope binding layout"); return nullptr; } /// Given a layout computed for a program, get the layout to use when lookup up variables. /// /// This is just an alias of `getScopeStructLayout`. /// StructTypeLayout* getGlobalStructLayout(ProgramLayout* programLayout) { return getScopeStructLayout(programLayout); } static void dumpIRIfEnabled( CodeGenContext* codeGenContext, IRModule* irModule, char const* label = nullptr) { if (codeGenContext->shouldDumpIR()) { DiagnosticSinkWriter writer(codeGenContext->getSink()); // FILE* f = nullptr; // fopen_s(&f, (String("dump-") + label + ".txt").getBuffer(), "wt"); // FileWriter writer(f, 0); dumpIR( irModule, codeGenContext->getIRDumpOptions(), label, codeGenContext->getSourceManager(), &writer); // fclose(f); } } static void reportCheckpointIntermediates( CodeGenContext* codeGenContext, DiagnosticSink* sink, IRModule* irModule) { // Report checkpointing information CompilerOptionSet& optionSet = codeGenContext->getTargetProgram()->getOptionSet(); SourceManager* sourceManager = sink->getSourceManager(); SourceWriter typeWriter(sourceManager, LineDirectiveMode::None, nullptr); CLikeSourceEmitter::Desc description; description.codeGenContext = codeGenContext; description.sourceWriter = &typeWriter; CPPSourceEmitter emitter(description); int nonEmptyStructs = 0; for (auto inst : irModule->getGlobalInsts()) { IRStructType* structType = as(inst); if (!structType) continue; auto checkpointDecoration = structType->findDecoration(); if (!checkpointDecoration) continue; IRSizeAndAlignment structSize; getNaturalSizeAndAlignment(optionSet, structType, &structSize); // Reporting happens before empty structs are optimized out // and we still want to keep the checkpointing decorations, // so we end up needing to check for non-zero-ness if (structSize.size == 0) continue; auto func = checkpointDecoration->getSourceFunction(); sink->diagnose( structType, Diagnostics::reportCheckpointIntermediates, func, structSize.size); nonEmptyStructs++; for (auto field : structType->getFields()) { IRType* fieldType = field->getFieldType(); IRSizeAndAlignment fieldSize; getNaturalSizeAndAlignment(optionSet, fieldType, &fieldSize); if (fieldSize.size == 0) continue; typeWriter.clearContent(); emitter.emitType(fieldType); sink->diagnose( field->sourceLoc, field->findDecoration() ? Diagnostics::reportCheckpointCounter : Diagnostics::reportCheckpointVariable, fieldSize.size, typeWriter.getContent()); } } if (nonEmptyStructs == 0) sink->diagnose(SourceLoc(), Diagnostics::reportCheckpointNone); } struct LinkingAndOptimizationOptions { bool shouldLegalizeExistentialAndResourceTypes = true; CLikeSourceEmitter* sourceEmitter = nullptr; }; // Scan the IR module and determine which lowering/legalization passes are needed based // on the instructions we see. // void calcRequiredLoweringPassSet( RequiredLoweringPassSet& result, CodeGenContext* codeGenContext, IRInst* inst) { switch (inst->getOp()) { case kIROp_DebugValue: case kIROp_DebugVar: case kIROp_DebugLine: case kIROp_DebugLocationDecoration: case kIROp_DebugSource: case kIROp_DebugInlinedAt: case kIROp_DebugScope: case kIROp_DebugNoScope: case kIROp_DebugFunction: case kIROp_DebugBuildIdentifier: result.debugInfo = true; break; case kIROp_ResultType: result.resultType = true; break; case kIROp_OptionalType: result.optionalType = true; break; case kIROp_EnumType: result.enumType = true; break; case kIROp_TextureType: if (!isKhronosTarget(codeGenContext->getTargetReq())) { if (auto texType = as(inst)) { auto isCombined = texType->getIsCombinedInst(); if (auto isCombinedVal = as(isCombined)) { if (isCombinedVal->getValue() != 0) { result.combinedTextureSamplers = true; } } else { result.combinedTextureSamplers = true; } } } break; case kIROp_PseudoPtrType: case kIROp_BoundInterfaceType: case kIROp_BindExistentialsType: result.generics = true; result.existentialTypeLayout = true; break; case kIROp_GetRegisterIndex: case kIROp_GetRegisterSpace: result.bindingQuery = true; break; case kIROp_BackwardDifferentiate: case kIROp_ForwardDifferentiate: case kIROp_MakeDifferentialPairUserCode: result.autodiff = true; break; case kIROp_VerticesType: case kIROp_IndicesType: case kIROp_PrimitivesType: result.meshOutput = true; break; case kIROp_CreateExistentialObject: case kIROp_MakeExistential: case kIROp_ExtractExistentialType: case kIROp_ExtractExistentialValue: case kIROp_ExtractExistentialWitnessTable: case kIROp_WrapExistential: case kIROp_LookupWitnessMethod: result.generics = true; break; case kIROp_Specialize: { auto specInst = as(inst); if (!findAnyTargetIntrinsicDecoration(getResolvedInstForDecorations(specInst))) result.generics = true; } break; case kIROp_Reinterpret: result.reinterpret = true; break; case kIROp_BitCast: result.bitcast = true; break; case kIROp_AutoPyBindCudaDecoration: result.derivativePyBindWrapper = true; break; case kIROp_Param: if (as(inst->getDataType())) result.higherOrderFunc = true; break; case kIROp_GlobalInputDecoration: case kIROp_GlobalOutputDecoration: case kIROp_GetWorkGroupSize: result.globalVaryingVar = true; break; case kIROp_BindExistentialSlotsDecoration: result.bindExistential = true; result.generics = true; result.existentialTypeLayout = true; break; case kIROp_GLSLShaderStorageBufferType: result.glslSSBO = true; break; case kIROp_ByteAddressBufferLoad: case kIROp_ByteAddressBufferStore: case kIROp_HLSLRWByteAddressBufferType: case kIROp_HLSLByteAddressBufferType: result.byteAddressBuffer = true; break; case kIROp_DynamicResourceType: result.dynamicResource = true; break; case kIROp_GetDynamicResourceHeap: result.dynamicResourceHeap = true; break; case kIROp_ResolveVaryingInputRef: result.resolveVaryingInputRef = true; break; case kIROp_GetCurrentStage: result.specializeStageSwitch = true; break; case kIROp_MissingReturn: result.missingReturn = true; break; case kIROp_Select: if (!isScalarOrVectorType(inst->getFullType())) result.nonVectorCompositeSelect = true; break; } if (!result.generics || !result.existentialTypeLayout) { // If any instruction has an interface type, we need to run // the generics lowering pass. auto type = as(inst) ? inst : inst->getDataType(); for (;;) { if (auto ptrType = as(type)) type = ptrType->getValueType(); else break; } if (type && type->getOp() == kIROp_InterfaceType) { result.generics = true; result.existentialTypeLayout = true; } } for (auto child : inst->getDecorationsAndChildren()) { calcRequiredLoweringPassSet( codeGenContext->getRequiredLoweringPassSet(), codeGenContext, child); } } void diagnoseCallStack(IRInst* inst, DiagnosticSink* sink) { static const int maxDepth = 5; for (int i = 0; i < maxDepth; i++) { auto func = getParentFunc(inst); if (!func) return; bool shouldContinue = false; for (auto use = func->firstUse; use; use = use->nextUse) { auto user = use->getUser(); if (auto call = as(user)) { sink->diagnose(call, Diagnostics::seeCallOfFunc, func); inst = call; shouldContinue = true; break; } } if (!shouldContinue) return; } } bool checkStaticAssert(IRInst* inst, DiagnosticSink* sink) { switch (inst->getOp()) { case kIROp_StaticAssert: { IRInst* condi = inst->getOperand(0); if (auto condiLit = as(condi)) { if (!condiLit->getValue()) { IRInst* msg = inst->getOperand(1); if (auto msgLit = as(msg)) { sink->diagnose( inst, Diagnostics::staticAssertionFailure, msgLit->getStringSlice()); } else { sink->diagnose(inst, Diagnostics::staticAssertionFailureWithoutMessage); } diagnoseCallStack(inst, sink); } } else { sink->diagnose(condi, Diagnostics::staticAssertionConditionNotConstant); } return true; } } List removeList; for (auto child : inst->getChildren()) { if (checkStaticAssert(child, sink)) removeList.add(child); } for (auto child : removeList) { child->removeAndDeallocate(); } return false; } static void unexportNonEmbeddableIR(CodeGenTarget target, IRModule* irModule) { for (auto inst : irModule->getGlobalInsts()) { if (inst->getOp() == kIROp_Func) { bool remove = false; if (target == CodeGenTarget::HLSL) { // DXIL does not permit HLSLStructureBufferType in exported functions // or sadly Matrices (https://github.com/shader-slang/slang/issues/4880) auto type = as(inst->getFullType()); auto argCount = type->getOperandCount(); for (UInt aa = 0; aa < argCount; ++aa) { auto operand = type->getOperand(aa); if (operand->getOp() == kIROp_HLSLStructuredBufferType || operand->getOp() == kIROp_MatrixType) { remove = true; break; } } } else if (target == CodeGenTarget::SPIRV) { // SPIR-V does not allow exporting entry points if (inst->findDecoration()) { remove = true; } } if (remove) { if (auto dec = inst->findDecoration()) { dec->removeAndDeallocate(); } if (auto dec = inst->findDecoration()) { dec->removeAndDeallocate(); } } } } } // Add DenormPreserve and DenormFlushToZero decorations to all entry point functions static void addDenormalModeDecorations(IRModule* irModule, CodeGenContext* codeGenContext) { auto optionSet = codeGenContext->getTargetProgram()->getOptionSet(); // Only add decorations if we have floating point denormal handling mode options set auto denormalModeFp16 = optionSet.getDenormalModeFp16(); auto denormalModeFp32 = optionSet.getDenormalModeFp32(); auto denormalModeFp64 = optionSet.getDenormalModeFp64(); if (denormalModeFp16 == FloatingPointDenormalMode::Any && denormalModeFp32 == FloatingPointDenormalMode::Any && denormalModeFp64 == FloatingPointDenormalMode::Any) return; IRBuilder builder(irModule); // Apply floating point denormal handling mode decorations to all entry point functions for (auto inst : irModule->getGlobalInsts()) { IRFunc* func = nullptr; // Check if this is a direct function if (auto directFunc = as(inst)) { func = directFunc; } // Check if this is a generic that contains an entry point function else if (auto generic = as(inst)) { if (auto innerFunc = as(findGenericReturnVal(generic))) { func = innerFunc; } } if (!func) continue; // Check if this is an entry point function auto entryPoint = func->findDecoration(); if (!entryPoint) continue; // Handle FP16 denormal handling mode auto width16 = builder.getIntValue(builder.getUIntType(), 16); if (denormalModeFp16 == FloatingPointDenormalMode::Preserve) { builder.addFpDenormalPreserveDecoration(func, width16); } else if (denormalModeFp16 == FloatingPointDenormalMode::FlushToZero) { builder.addFpDenormalFlushToZeroDecoration(func, width16); } // Handle FP32 denormal handling mode auto width32 = builder.getIntValue(builder.getUIntType(), 32); if (denormalModeFp32 == FloatingPointDenormalMode::Preserve) { builder.addFpDenormalPreserveDecoration(func, width32); } else if (denormalModeFp32 == FloatingPointDenormalMode::FlushToZero) { builder.addFpDenormalFlushToZeroDecoration(func, width32); } // Handle FP64 denormal handling mode auto width64 = builder.getIntValue(builder.getUIntType(), 64); if (denormalModeFp64 == FloatingPointDenormalMode::Preserve) { builder.addFpDenormalPreserveDecoration(func, width64); } else if (denormalModeFp64 == FloatingPointDenormalMode::FlushToZero) { builder.addFpDenormalFlushToZeroDecoration(func, width64); } } } // Helper function to convert a 20 byte SHA1 to a hexadecimal string, // needed for the build identifier instruction. String getBuildIdentifierString(ComponentType* component) { ComPtr hashBlob; component->getEntryPointHash(0, 0, hashBlob.writeRef()); const uint8_t* data = reinterpret_cast(hashBlob->getBufferPointer()); size_t size = hashBlob->getBufferSize(); StringBuilder sb; for (size_t i = 0; i < size; ++i) sb << StringUtil::makeStringWithFormat("%02x", data[i]); return sb.produceString(); } Result linkAndOptimizeIR( CodeGenContext* codeGenContext, LinkingAndOptimizationOptions const& options, LinkedIR& outLinkedIR) { SLANG_PROFILE; auto session = codeGenContext->getSession(); auto sink = codeGenContext->getSink(); auto target = codeGenContext->getTargetFormat(); auto targetRequest = codeGenContext->getTargetReq(); auto targetProgram = codeGenContext->getTargetProgram(); auto targetCompilerOptions = targetRequest->getOptionSet(); // Get the artifact desc for the target const auto artifactDesc = ArtifactDescUtil::makeDescForCompileTarget(asExternal(target)); // We start out by performing "linking" at the level of the IR. // This step will create a fresh IR module to be used for // code generation, and will copy in any IR definitions that // the desired entry point requires. Along the way it will // resolve references to imported/exported symbols across // modules, and also select between the definitions of // any "profile-overloaded" symbols. // outLinkedIR = linkIR(codeGenContext); auto irModule = outLinkedIR.module; auto irEntryPoints = outLinkedIR.entryPoints; // For now, only emit the debug build identifier if separate debug info is enabled // and only if there are targets. // TODO: We will ultimately need to change this to always emit the instruction. if (targetCompilerOptions.shouldEmitSeparateDebugInfo()) { // Build identifier is a hash of the source code and compile options. String buildIdentifier = getBuildIdentifierString(codeGenContext->getProgram()); int buildIdentifierFlags = 0; IRBuilder builder(irModule); builder.setInsertInto(irModule->getModuleInst()); builder.emitDebugBuildIdentifier(buildIdentifier.getUnownedSlice(), buildIdentifierFlags); } #if 0 dumpIRIfEnabled(codeGenContext, irModule, "LINKED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // If the user specified the flag that they want us to dump // IR, then do it here, for the target-specific, but // un-specialized IR. dumpIRIfEnabled(codeGenContext, irModule, "POST IR VALIDATION"); // Scan the IR module and determine which lowering/legalization passes are needed. RequiredLoweringPassSet& requiredLoweringPassSet = codeGenContext->getRequiredLoweringPassSet(); requiredLoweringPassSet = {}; calcRequiredLoweringPassSet(requiredLoweringPassSet, codeGenContext, irModule->getModuleInst()); // Debug info is added by the front-end, and therefore needs to be stripped out by targets that // opt out of debug info. if (requiredLoweringPassSet.debugInfo && (targetCompilerOptions.getIntOption(CompilerOptionName::DebugInformation) == SLANG_DEBUG_INFO_LEVEL_NONE)) stripDebugInfo(irModule); if (!isKhronosTarget(targetRequest) && requiredLoweringPassSet.glslSSBO) lowerGLSLShaderStorageBufferObjectsToStructuredBuffers(irModule, sink); if (requiredLoweringPassSet.globalVaryingVar) translateGlobalVaryingVar(codeGenContext, irModule); if (requiredLoweringPassSet.resolveVaryingInputRef) resolveVaryingInputRef(irModule); fixEntryPointCallsites(irModule); // Replace any global constants with their values. // replaceGlobalConstants(irModule); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "GLOBAL CONSTANTS REPLACED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // When there are top-level existential-type parameters // to the shader, we need to take the side-band information // on how the existential "slots" were bound to concrete // types, and use it to introduce additional explicit // shader parameters for those slots, to be wired up to // use sites. // if (requiredLoweringPassSet.bindExistential) bindExistentialSlots(irModule, sink); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "EXISTENTIALS BOUND"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // Now that we've linked the IR code, any layout/binding // information has been attached to shader parameters // and entry points. Now we are safe to make transformations // that might move code without worrying about losing // the connection between a parameter and its layout. // One example of a transformation that needs to wait until // we have layout information is the step where we collect // any global-scope shader parameters with ordinary/uniform // type into an aggregate `struct`, and then (optionally) // wrap that `struct` up in a constant buffer. // // This step allows shaders to declare parameters of ordinary // type as globals in the input file, while ensuring that // downstream passes for graphics APIs like Vulkan and D3D // can assume that all ordinary/uniform data is strictly // passed using constant buffers. // collectGlobalUniformParameters(irModule, outLinkedIR.globalScopeVarLayout, target); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "GLOBAL UNIFORMS COLLECTED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); checkEntryPointDecorations(irModule, target, sink); // Add floating point denormal handling mode decorations to entry point functions based on // compiler options. This is done post-linking to ensure all entry points from linked modules // are processed. addDenormalModeDecorations(irModule, codeGenContext); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "FP DENORMAL MODE DECORATIONS ADDED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // Another transformation that needed to wait until we // had layout information on parameters is to take uniform // parameters of a shader entry point and move them into // the global scope instead. // // TODO: We should skip this step for CUDA targets. // (NM): we actually do need to do this step for OptiX based CUDA targets // { CollectEntryPointUniformParamsOptions passOptions; passOptions.targetReq = targetRequest; switch (target) { case CodeGenTarget::HostCPPSource: case CodeGenTarget::HostVM: break; case CodeGenTarget::CUDASource: collectOptiXEntryPointUniformParams(irModule); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "OPTIX ENTRY POINT UNIFORMS COLLECTED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); break; case CodeGenTarget::CPPSource: passOptions.alwaysCreateCollectedParam = true; [[fallthrough]]; default: collectEntryPointUniformParams(irModule, passOptions); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "ENTRY POINT UNIFORMS COLLECTED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); break; } } switch (target) { default: moveEntryPointUniformParamsToGlobalScope(irModule); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "ENTRY POINT UNIFORMS MOVED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); break; case CodeGenTarget::HostCPPSource: case CodeGenTarget::CPPSource: case CodeGenTarget::CUDASource: case CodeGenTarget::HostVM: break; } switch (target) { case CodeGenTarget::CUDASource: case CodeGenTarget::PyTorchCppBinding: break; default: removeTorchAndCUDAEntryPoints(irModule); break; } #if 0 dumpIRIfEnabled(codeGenContext, irModule, "UNIONS DESUGARED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // Lower all the LValue implict casts (used for out/inout/ref scenarios) lowerLValueCast(targetProgram, irModule); IRSimplificationOptions defaultIRSimplificationOptions = IRSimplificationOptions::getDefault(targetProgram); IRSimplificationOptions fastIRSimplificationOptions = IRSimplificationOptions::getFast(targetProgram); IRDeadCodeEliminationOptions deadCodeEliminationOptions = IRDeadCodeEliminationOptions(); fastIRSimplificationOptions.minimalOptimization = defaultIRSimplificationOptions.minimalOptimization; deadCodeEliminationOptions.useFastAnalysis = fastIRSimplificationOptions.minimalOptimization; deadCodeEliminationOptions.keepGlobalParamsAlive = targetProgram->getOptionSet().getBoolOption(CompilerOptionName::PreserveParameters); simplifyIR(targetProgram, irModule, defaultIRSimplificationOptions, sink); if (targetProgram->getOptionSet().getBoolOption(CompilerOptionName::ValidateUniformity)) { validateUniformity(irModule, sink); if (sink->getErrorCount() != 0) return SLANG_FAIL; } // Fill in default matrix layout into matrix types that left layout unspecified. specializeMatrixLayout(targetProgram, irModule); // It's important that this takes place before defunctionalization as we // want to be able to easily discover the cooperate and fallback funcitons // being passed to saturated_cooperation if (!targetProgram->getOptionSet().shouldPerformMinimumOptimizations()) fuseCallsToSaturatedCooperation(irModule); switch (target) { case CodeGenTarget::CUDASource: case CodeGenTarget::PyTorchCppBinding: { // Generate any requested derivative wrappers if (requiredLoweringPassSet.derivativePyBindWrapper) generateDerivativeWrappers(irModule, sink); break; } default: break; } if (requiredLoweringPassSet.autodiff) { // Generate warnings for potentially incorrect or badly-performing autodiff patterns. checkAutodiffPatterns(targetProgram, irModule, sink); } // Next, we need to ensure that the code we emit for // the target doesn't contain any operations that would // be illegal on the target platform. For example, // none of our target supports generics, or interfaces, // so we need to specialize those away. // // Simplification of existential-based and generics-based // code may each open up opportunities for the other, so // the relevant specialization transformations are handled in a // single pass that looks for all simplification opportunities. // // TODO: We also need to extend this pass so that it will "expose" // existential values that are nested inside of other types, // so that the simplifications can be applied. // // TODO: This pass is *also* likely to be the place where we // perform specialization of functions based on parameter // values that need to be compile-time constants. // // Specialization passes and auto-diff passes runs in an iterative loop // since each pass can enable the other pass to progress further. for (;;) { bool changed = false; dumpIRIfEnabled(codeGenContext, irModule, "BEFORE-SPECIALIZE"); if (!codeGenContext->isSpecializationDisabled()) { // Pre-autodiff, we will attempt to specialize as much as possible. // // Note: Lowered dynamic-dispatch code cannot be differentiated correctly due to // missing information, so we defer that to after the auto-dff step. // SpecializationOptions specOptions; specOptions.lowerWitnessLookups = false; changed |= specializeModule(targetProgram, irModule, codeGenContext->getSink(), specOptions); } if (codeGenContext->getSink()->getErrorCount() != 0) return SLANG_FAIL; dumpIRIfEnabled(codeGenContext, irModule, "AFTER-SPECIALIZE"); if (changed) { applySparseConditionalConstantPropagation(irModule, codeGenContext->getSink()); } validateIRModuleIfEnabled(codeGenContext, irModule); // Inline calls to any functions marked with [__unsafeInlineEarly] again, // since we may be missing out cases prevented by the functions that we just specialzied. performMandatoryEarlyInlining(irModule); eliminateDeadCode(irModule, deadCodeEliminationOptions); // Unroll loops. if (!fastIRSimplificationOptions.minimalOptimization) { if (codeGenContext->getSink()->getErrorCount() == 0) { if (!unrollLoopsInModule(targetProgram, irModule, codeGenContext->getSink())) return SLANG_FAIL; } } // Few of our targets support higher order functions, and // we don't have the backend code to emit higher order functions for those // which do. // Specialize away these parameters // TODO: We should implement a proper defunctionalization pass if (requiredLoweringPassSet.higherOrderFunc) changed |= specializeHigherOrderParameters(codeGenContext, irModule); if (requiredLoweringPassSet.autodiff) { dumpIRIfEnabled(codeGenContext, irModule, "BEFORE-AUTODIFF"); { auto validationScope = enableIRValidationScope(); changed |= processAutodiffCalls(targetProgram, irModule, sink); } dumpIRIfEnabled(codeGenContext, irModule, "AFTER-AUTODIFF"); } if (!changed) break; } // Report checkpointing information if (codeGenContext->shouldReportCheckpointIntermediates()) { simplifyIR(targetProgram, irModule, fastIRSimplificationOptions, sink); reportCheckpointIntermediates(codeGenContext, sink, irModule); } // Finalization is always run so AD-related instructions can be removed, // even if the AD pass itself is not run. // finalizeAutoDiffPass(targetProgram, irModule); eliminateDeadCode(irModule, deadCodeEliminationOptions); // After auto-diff, we can perform more aggressive specialization with dynamic-dispatch // lowering. // if (!codeGenContext->isSpecializationDisabled()) { SpecializationOptions specOptions; specOptions.lowerWitnessLookups = true; specializeModule(targetProgram, irModule, codeGenContext->getSink(), specOptions); } finalizeSpecialization(irModule); // Lower `Result` types into ordinary struct types. This must happen // after specialization, since otherwise incompatible copies of the lowered // result structure are generated. if (requiredLoweringPassSet.resultType) lowerResultType(irModule, sink); if (requiredLoweringPassSet.optionalType) lowerOptionalType(irModule, sink); if (requiredLoweringPassSet.nonVectorCompositeSelect) { switch (target) { case CodeGenTarget::HLSL: legalizeNonVectorCompositeSelect(irModule); break; default: break; } } switch (target) { case CodeGenTarget::CPPSource: case CodeGenTarget::HostCPPSource: { lowerComInterfaces(irModule, artifactDesc.style, sink); generateDllImportFuncs(codeGenContext->getTargetProgram(), irModule, sink); generateDllExportFuncs(irModule, sink); break; } default: break; } calcRequiredLoweringPassSet(requiredLoweringPassSet, codeGenContext, irModule->getModuleInst()); switch (target) { case CodeGenTarget::PyTorchCppBinding: generateHostFunctionsForAutoBindCuda(irModule, sink); lowerBuiltinTypesForKernelEntryPoints(irModule, sink); generatePyTorchCppBinding(irModule, sink); handleAutoBindNames(irModule); break; case CodeGenTarget::CUDASource: lowerBuiltinTypesForKernelEntryPoints(irModule, sink); removeTorchKernels(irModule); handleAutoBindNames(irModule); break; default: break; } detectUninitializedResources(irModule, sink); if (codeGenContext->removeAvailableInDownstreamIR) { removeAvailableInDownstreamModuleDecorations(target, irModule); } if (targetProgram->getOptionSet().shouldRunNonEssentialValidation()) { checkForRecursiveTypes(irModule, sink); checkForRecursiveFunctions(codeGenContext->getTargetReq(), irModule, sink); checkForOutOfBoundAccess(irModule, sink); if (requiredLoweringPassSet.missingReturn) checkForMissingReturns(irModule, sink, target, false); // For some targets, we are more restrictive about what types are allowed // to be used as shader parameters in ConstantBuffer/ParameterBlock. // We will check for these restrictions here. checkForInvalidShaderParameterType(targetRequest, irModule, sink); } if (sink->getErrorCount() != 0) return SLANG_FAIL; // If we have a target that is GPU like we use the string hashing mechanism // but for that to work we need to inline such that calls (or returns) of strings // boil down into getStringHash(stringLiteral) if (!ArtifactDescUtil::isCpuLikeTarget(artifactDesc)) { // We could fail because // 1) It's not inlinable for some reason (for example if it's recursive) SLANG_RETURN_ON_FAIL(performTypeInlining(irModule, sink)); } if (requiredLoweringPassSet.reinterpret) lowerReinterpret(targetProgram, irModule, sink); if (sink->getErrorCount() != 0) return SLANG_FAIL; validateIRModuleIfEnabled(codeGenContext, irModule); inferAnyValueSizeWhereNecessary(targetProgram, irModule); // If we have any witness tables that are marked as `KeepAlive`, // but are not used for dynamic dispatch, unpin them so we don't // do unnecessary work to lower them. unpinWitnessTables(irModule); if (!fastIRSimplificationOptions.minimalOptimization) { simplifyIR(targetProgram, irModule, fastIRSimplificationOptions, sink); } else if (requiredLoweringPassSet.generics) { eliminateDeadCode(irModule, fastIRSimplificationOptions.deadCodeElimOptions); } if (!ArtifactDescUtil::isCpuLikeTarget(artifactDesc) && targetProgram->getOptionSet().shouldRunNonEssentialValidation()) { // We could fail because (perhaps, somehow) end up with getStringHash that the operand is // not a string literal SLANG_RETURN_ON_FAIL(checkGetStringHashInsts(irModule, sink)); } // For targets that supports dynamic dispatch, we need to lower the // generics / interface types to ordinary functions and types using // function pointers. dumpIRIfEnabled(codeGenContext, irModule, "BEFORE-LOWER-GENERICS"); if (requiredLoweringPassSet.generics) lowerGenerics(targetProgram, irModule, sink); else cleanupGenerics(targetProgram, irModule, sink); dumpIRIfEnabled(codeGenContext, irModule, "AFTER-LOWER-GENERICS"); if (requiredLoweringPassSet.enumType) lowerEnumType(irModule, sink); // Don't need to run any further target-dependent passes if we are generating code // for host vm. if (target == CodeGenTarget::HostVM) { performForceInlining(irModule); simplifyIR(targetProgram, irModule, defaultIRSimplificationOptions, sink); return SLANG_OK; } // After dynamic dispatch logic is resolved into ordinary function calls, // we can now run our stage specialization logic. if (requiredLoweringPassSet.specializeStageSwitch) specializeStageSwitch(irModule); if (sink->getErrorCount() != 0) return SLANG_FAIL; #if 0 dumpIRIfEnabled(codeGenContext, irModule, "SPECIALIZED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); switch (target) { case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: case CodeGenTarget::HLSL: break; default: lowerCooperativeVectors(irModule, sink); } // Inline calls to any functions marked with [__unsafeInlineEarly] or [ForceInline]. performForceInlining(irModule); // Specialization can introduce dead code that could trip // up downstream passes like type legalization, so we // will run a DCE pass to clean up after the specialization. // if (fastIRSimplificationOptions.minimalOptimization) { eliminateDeadCode(irModule, deadCodeEliminationOptions); } else { simplifyIR(targetProgram, irModule, defaultIRSimplificationOptions, sink); } validateIRModuleIfEnabled(codeGenContext, irModule); // On non-HLSL targets, there isn't an implementation of `AppendStructuredBuffer` // and `ConsumeStructuredBuffer` types, so we lower them into normal struct types // of `RWStructuredBuffer` typed fields now. if (target != CodeGenTarget::HLSL) { lowerAppendConsumeStructuredBuffers(targetProgram, irModule, sink); } switch (target) { default: if (!ArtifactDescUtil::isCpuLikeTarget(artifactDesc)) break; [[fallthrough]]; case CodeGenTarget::HLSL: case CodeGenTarget::Metal: case CodeGenTarget::MetalLib: case CodeGenTarget::MetalLibAssembly: case CodeGenTarget::WGSL: if (requiredLoweringPassSet.combinedTextureSamplers) lowerCombinedTextureSamplers(codeGenContext, irModule, sink); break; } if (codeGenContext->getTargetProgram()->getOptionSet().getBoolOption( CompilerOptionName::VulkanEmitReflection)) { addUserTypeHintDecorations(irModule); } legalizeEmptyArray(irModule, sink); // For CUDA targets, always inline global constants to avoid dynamic initialization // of __device__ variables rejected by NVRTC. This runs independently of the broader // resource/existential type legalization, which remains disabled for CUDA. if (target == CodeGenTarget::CUDASource || options.shouldLegalizeExistentialAndResourceTypes) { inlineGlobalConstantsForLegalization(irModule); } // We don't need the legalize pass for C/C++ based types if (options.shouldLegalizeExistentialAndResourceTypes) { if (isMetalTarget(targetRequest)) { // Metal is a special target in that we want to legalize constant buffer // types as if it is a bindful target, and skip legalizing parameter block // types as if it is a bindless target. // To achieve this, we want to ensure that all resource typed fields in parameter blocks // are translated into descriptor handles first before running the resource type // legalization pass for metal, so that type legalization pass won't mess around with // them. BufferElementTypeLoweringOptions bufferElementTypeLoweringOptions = {}; bufferElementTypeLoweringOptions.loweringPolicyKind = BufferElementTypeLoweringPolicyKind::MetalParameterBlock; lowerBufferElementTypeToStorageType( targetProgram, irModule, bufferElementTypeLoweringOptions); } // The Slang language allows interfaces to be used like // ordinary types (including placing them in constant // buffers and entry-point parameter lists), but then // getting them to lay out in a reasonable way requires // us to treat fields/variables with interface type // *as if* they were pointers to heap-allocated "objects." // // Specialization will have replaced fields/variables // with interface types like `IFoo` with fields/variables // with pointer-like types like `ExistentialBox`. // // We need to legalize these pointer-like types away, // which involves two main changes: // // 1. Any `ExistentialBox<...>` fields need to be moved // out of their enclosing `struct` type, so that the layout // of the enclosing type is computed as if the field had // zero size. // // 2. Once an `ExistentialBox` has been floated out // of its parent and landed somwhere permanent (e.g., either // a dedicated variable, or a field of constant buffer), // we need to replace it with just an `X`, after which we // will have (more) legal shader code. // if (requiredLoweringPassSet.existentialTypeLayout) { legalizeExistentialTypeLayout(targetProgram, irModule, sink); } #if 0 dumpIRIfEnabled(codeGenContext, irModule, "EXISTENTIALS LEGALIZED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); if (!validateStructuredBufferResourceTypes(irModule, sink, targetRequest)) return SLANG_FAIL; // Many of our target languages and/or downstream compilers // don't support `struct` types that have resource-type fields. // In order to work around this limitation, we will rewrite the // IR so that any structure types with resource-type fields get // split into a "tuple" that comprises the ordinary fields (still // bundles up as a `struct`) and one element for each resource-type // field (recursively). // // What used to be individual variables/parameters/arguments/etc. // then become multiple variables/parameters/arguments/etc. // legalizeResourceTypes(targetProgram, irModule, sink); // We also need to legalize empty types for Metal targets. switch (target) { case CodeGenTarget::Metal: case CodeGenTarget::MetalLib: case CodeGenTarget::MetalLibAssembly: legalizeEmptyTypes(targetProgram, irModule, sink); break; } // Debugging output of legalization #if 0 dumpIRIfEnabled(codeGenContext, irModule, "LEGALIZED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); } else { // On CPU/CUDA targets, we simply elminate any empty types if // they are not part of public interface. legalizeEmptyTypes(targetProgram, irModule, sink); } legalizeMatrixTypes(targetProgram, irModule, sink); dumpIRIfEnabled(codeGenContext, irModule, "AFTER-MATRIX-LEGALIZATION"); legalizeVectorTypes(irModule, sink); dumpIRIfEnabled(codeGenContext, irModule, "AFTER-VECTOR-LEGALIZATION"); // Once specialization and type legalization have been performed, // we should perform some of our basic optimization steps again, // to see if we can clean up any temporaries created by legalization. // (e.g., things that used to be aggregated might now be split up, // so that we can work with the individual fields). if (fastIRSimplificationOptions.minimalOptimization) eliminateDeadCode(irModule, deadCodeEliminationOptions); else simplifyIR(targetProgram, irModule, fastIRSimplificationOptions, sink); if (requiredLoweringPassSet.dynamicResourceHeap) lowerDynamicResourceHeap(targetProgram, irModule, sink); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "AFTER SSA"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // After type legalization and subsequent SSA cleanup we expect // that any resource types passed to functions are exposed // as their own top-level parameters (which might have // resource or array-of-...-resource types). // // Many of our targets place restrictions on how certain // resource types can be used, so that having them as // function parameters, reults, etc. is invalid. // We clean up the usages of resource values here. specializeResourceUsage(codeGenContext, irModule); // Specialize calls to functions with values loaded from an immutable location, // so that we directly load the value inside the callee, instead of loading the // value outside of the callee and copy it in. This is necessary to avoid copying // large values (e.g. arrays) in registers, where most of the elements are not // actually used. specializeFuncsForBufferLoadArgs(codeGenContext, irModule); // Push `structuredBufferLoad` to the end of access chain to avoid loading unnecessary data. deferBufferLoad(codeGenContext, irModule); // We also want to specialize calls to functions that // takes unsized array parameters if possible. specializeArrayParameters(codeGenContext, irModule); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "AFTER RESOURCE SPECIALIZATION"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // Process `static_assert` after the specialization is done. // Some information for `static_assert` is available only after the specialization. checkStaticAssert(irModule->getModuleInst(), sink); switch (target) { case CodeGenTarget::HLSL: { // For HLSL(fxc) only, we need to "wrap" any // structured buffers defined over matrix types so // that they instead use an intermediate `struct`. // This is required to get those targets to respect // the options for matrix layout set via `#pragma` // or command-line options. // wrapStructuredBuffersOfMatrices(irModule); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "STRUCTURED BUFFERS WRAPPED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); } break; case CodeGenTarget::Metal: case CodeGenTarget::MetalLib: // Metal does not allow `ConstantBuffer>`, so we need to create // a wrapper struct for the `StructuredBuffer`. wrapCBufferElementsForMetal(irModule); break; default: break; } // For all targets, we translate load/store operations // of aggregate types from/to byte-address buffers into // stores of individual scalar or vector values. // if (requiredLoweringPassSet.byteAddressBuffer) { ByteAddressBufferLegalizationOptions byteAddressBufferOptions; // Depending on the target, we may decide to do // more aggressive translation that reduces the // load/store operations down to invididual scalars // (splitting up vector ops). // switch (target) { default: break; case CodeGenTarget::GLSL: case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: // For GLSL targets, we want to translate the vector load/store // operations into scalar ops. This is in part as a simplification, // but it also ensures that our generated code respects the lax // alignment rules for D3D byte-address buffers (the base address // of a buffer need not be more than 4-byte aligned, and loads // of vectors need only be aligned based on their element type). // // Slang IR supports a variant of `Load` on byte-address buffers // that will have greater alignment than required by D3D. The // alignment information is inferred from the operation like a // `Load4Aligned` that returns a `vector<4,T>` that assumes a // `4*sizeof(T)` alignment. We may choose to disable that in favor // of byte-address indexing by setting this flag to true. byteAddressBufferOptions.scalarizeVectorLoadStore = false; // For GLSL targets, there really isn't a low-level concept // of a byte-address buffer at all, and the standard "shader storage // buffer" (SSBO) feature is a lot closer to an HLSL structured // buffer for our purposes. // // In particular, each SSBO can only have a single element type, // so that even with bitcasts we can't have a single buffer declaration // (e.g., one with `uint` elements) service all load/store operations // (e.g., a `half` value can't be stored atomically if there are // `uint` elements, unless we use explicit atomics). // // In order to simplify things, we will translate byte-address buffer // ops to equivalent structured-buffer ops for GLSL targets, where // each unique type being loaded/stored yields a different global // parameter declaration of the buffer. // byteAddressBufferOptions.translateToStructuredBufferOps = true; break; case CodeGenTarget::Metal: case CodeGenTarget::MetalLib: case CodeGenTarget::MetalLibAssembly: byteAddressBufferOptions.scalarizeVectorLoadStore = true; byteAddressBufferOptions.treatGetEquivalentStructuredBufferAsGetThis = true; byteAddressBufferOptions.translateToStructuredBufferOps = false; byteAddressBufferOptions.lowerBasicTypeOps = true; break; case CodeGenTarget::WGSL: case CodeGenTarget::WGSLSPIRV: case CodeGenTarget::WGSLSPIRVAssembly: byteAddressBufferOptions.scalarizeVectorLoadStore = true; byteAddressBufferOptions.treatGetEquivalentStructuredBufferAsGetThis = true; byteAddressBufferOptions.translateToStructuredBufferOps = false; byteAddressBufferOptions.lowerBasicTypeOps = true; byteAddressBufferOptions.useBitCastFromUInt = true; break; } // We also need to decide whether to translate // any "leaf" load/store operations over to // use only unsigned-integer types and then // bit-cast, or if we prefer to leave them // as load/store of the original type. // switch (target) { case CodeGenTarget::HLSL: { auto profile = codeGenContext->getTargetProgram()->getOptionSet().getProfile(); if (profile.getFamily() == ProfileFamily::DX) { if (profile.getVersion() <= ProfileVersion::DX_5_0) { // Fxc and earlier dxc versions do not support // a templates `.Load` operation on byte-address // buffers, and instead need us to emit separate // `uint` loads and then bit-cast over to // the correct type. // byteAddressBufferOptions.useBitCastFromUInt = true; } } } break; default: break; } legalizeByteAddressBufferOps( session, targetProgram, irModule, codeGenContext->getSink(), byteAddressBufferOptions); } // For SPIR-V, this function is called elsewhere, so that it can happen after address space // specialization if (target != CodeGenTarget::SPIRV && target != CodeGenTarget::SPIRVAssembly) { bool skipFuncParamValidation = true; validateAtomicOperations(skipFuncParamValidation, sink, irModule->getModuleInst()); } // For CUDA targets only, we will need to turn operations // the implicitly reference the "active mask" into ones // that use (and pass around) an explicit mask instead. // switch (target) { case CodeGenTarget::CUDASource: case CodeGenTarget::PTX: { synthesizeActiveMask(irModule, codeGenContext->getSink()); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "AFTER synthesizeActiveMask"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); } break; default: break; } switch (target) { case CodeGenTarget::GLSL: case CodeGenTarget::SPIRV: case CodeGenTarget::WGSL: resolveTextureFormat(irModule); break; } // For GLSL only, we will need to perform "legalization" of // the entry point and any entry-point parameters. // // TODO: We should consider moving this legalization work // as late as possible, so that it doesn't affect how other // optimization passes need to work. // switch (target) { case CodeGenTarget::GLSL: case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: { ShaderExtensionTracker glslExtensionTracker; ShaderExtensionTracker* glslExtensionTrackerPtr = options.sourceEmitter ? as(options.sourceEmitter->getExtensionTracker()) : &glslExtensionTracker; #if 0 dumpIRIfEnabled(codeGenContext, irModule, "PRE GLSL LEGALIZED"); #endif legalizeEntryPointsForGLSL( session, irModule, irEntryPoints, codeGenContext, glslExtensionTrackerPtr); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "GLSL LEGALIZED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); } break; case CodeGenTarget::Metal: case CodeGenTarget::MetalLib: case CodeGenTarget::MetalLibAssembly: { legalizeIRForMetal(irModule, sink); } break; case CodeGenTarget::CSource: case CodeGenTarget::CPPSource: { legalizeEntryPointVaryingParamsForCPU(irModule, codeGenContext->getSink()); } break; case CodeGenTarget::CUDASource: { legalizeEntryPointVaryingParamsForCUDA(irModule, codeGenContext->getSink()); } break; case CodeGenTarget::WGSL: case CodeGenTarget::WGSLSPIRV: case CodeGenTarget::WGSLSPIRVAssembly: { legalizeIRForWGSL(irModule, sink); } break; default: break; } if (!isSPIRV(targetRequest->getTarget())) { floatNonUniformResourceIndex(irModule, NonUniformResourceIndexFloatMode::Textual); } if (isD3DTarget(targetRequest) || isKhronosTarget(targetRequest) || isWGPUTarget(targetRequest) || isMetalTarget(targetRequest)) legalizeLogicalAndOr(irModule->getModuleInst()); // Legalize non struct parameters that are expected to be structs for HLSL. if (isD3DTarget(targetRequest)) legalizeNonStructParameterToStructForHLSL(irModule); // Create aliases for all dynamic resource parameters. if (requiredLoweringPassSet.dynamicResource && isKhronosTarget(targetRequest)) legalizeDynamicResourcesForGLSL(codeGenContext, irModule); // Legalize `ImageSubscript` loads. switch (target) { case CodeGenTarget::MetalLibAssembly: case CodeGenTarget::MetalLib: case CodeGenTarget::Metal: case CodeGenTarget::GLSL: case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: { legalizeImageSubscript(targetRequest, irModule, sink); } break; default: break; } // Legalize constant buffer loads. switch (target) { case CodeGenTarget::GLSL: case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: { legalizeConstantBufferLoadForGLSL(irModule); legalizeDispatchMeshPayloadForGLSL(irModule); } break; default: break; } switch (target) { default: break; case CodeGenTarget::HLSL: case CodeGenTarget::GLSL: case CodeGenTarget::WGSL: moveGlobalVarInitializationToEntryPoints(irModule, targetProgram); break; // For SPIR-V to SROA across 2 entry-points a value must not be a global case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: moveGlobalVarInitializationToEntryPoints(irModule, targetProgram); if (targetProgram->getOptionSet().getBoolOption( CompilerOptionName::EnableExperimentalPasses)) introduceExplicitGlobalContext(irModule, target); transformParamsToConstRef(irModule, codeGenContext->getSink()); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "EXPLICIT GLOBAL CONTEXT INTRODUCED"); #endif break; case CodeGenTarget::Metal: case CodeGenTarget::CPPSource: case CodeGenTarget::CUDASource: // For CUDA/OptiX like targets, add our pass to replace inout parameter copies with direct // pointers undoParameterCopy(irModule); // Transform struct parameters to use ConstRef for better performance if (isCPUTarget(targetRequest) || isCUDATarget(targetRequest) || isMetalTarget(targetRequest)) { transformParamsToConstRef(irModule, codeGenContext->getSink()); } #if 0 dumpIRIfEnabled(codeGenContext, irModule, "PARAMETER COPIES REPLACED WITH DIRECT POINTERS"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); moveGlobalVarInitializationToEntryPoints(irModule, targetProgram); introduceExplicitGlobalContext(irModule, target); if (target == CodeGenTarget::CPPSource) { convertEntryPointPtrParamsToRawPtrs(irModule); } #if 0 dumpIRIfEnabled(codeGenContext, irModule, "EXPLICIT GLOBAL CONTEXT INTRODUCED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); break; } // TODO: our current dynamic dispatch pass will remove all uses of witness tables. // If we are going to support function-pointer based, "real" modular dynamic dispatch, // we will need to disable this pass. stripLegalizationOnlyInstructions(irModule); switch (target) { // On targets that don't support default initialization, remove 'raw' default construct // insts because our code-gen will not have any way to emit them. // case CodeGenTarget::SPIRV: if (targetProgram->shouldEmitSPIRVDirectly()) removeRawDefaultConstructors(irModule); break; default: break; } #if 0 dumpIRIfEnabled(codeGenContext, irModule, "AFTER STRIP WITNESS TABLES"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // Validate vectors and matrices according to what the target allows validateVectorsAndMatrices(irModule, sink, targetRequest); // The resource-based specialization pass above // may create specialized versions of functions, but // it does not try to completely eliminate the original // functions, so there might still be invalid code in // our IR module. // // We run DCE pass again to clean things up. // eliminateDeadCode(irModule, deadCodeEliminationOptions); cleanUpVoidType(irModule); if (isKhronosTarget(targetRequest)) { // As a fallback, if the above specialization steps failed to remove resource type // parameters, we will inline the functions in question to make sure we can produce valid // GLSL. performGLSLResourceReturnFunctionInlining(targetProgram, irModule); } #if 0 dumpIRIfEnabled(codeGenContext, irModule, "AFTER DCE"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); // Lower the `getRegisterIndex` and `getRegisterSpace` intrinsics. // if (requiredLoweringPassSet.bindingQuery) lowerBindingQueries(irModule, sink); // For some small improvement in type safety we represent these as opaque // structs instead of regular arrays. // // If any have survived this far, change them back to regular (decorated) // arrays that the emitters can deal with. if (requiredLoweringPassSet.meshOutput) legalizeMeshOutputTypes(irModule); // Lower all bit_cast operations on complex types into leaf-level // bit_cast on basic types. if (requiredLoweringPassSet.bitcast) lowerBitCast(targetProgram, irModule, sink); // Rewrite functions that return arrays to return them via `out` parameter, // since our target languages doesn't allow returning arrays. if (!isMetalTarget(targetRequest) && !isSPIRV(target)) legalizeArrayReturnType(irModule); if (isKhronosTarget(targetRequest) || target == CodeGenTarget::HLSL) { legalizeUniformBufferLoad(irModule); if (targetProgram->getOptionSet().getBoolOption(CompilerOptionName::VulkanInvertY)) invertYOfPositionOutput(irModule); if (targetProgram->getOptionSet().getBoolOption(CompilerOptionName::VulkanUseDxPositionW)) rcpWOfPositionInput(irModule); } BufferElementTypeLoweringOptions bufferElementTypeLoweringOptions = {}; if (isWGPUTarget(targetRequest)) bufferElementTypeLoweringOptions.loweringPolicyKind = BufferElementTypeLoweringPolicyKind::WGSL; else if (isKhronosTarget(targetRequest)) bufferElementTypeLoweringOptions.loweringPolicyKind = BufferElementTypeLoweringPolicyKind::KhronosTarget; else bufferElementTypeLoweringOptions.loweringPolicyKind = BufferElementTypeLoweringPolicyKind::Default; lowerBufferElementTypeToStorageType(targetProgram, irModule, bufferElementTypeLoweringOptions); // If we are generating code for glsl or metal, perform address space propagation now. // For SPIRV, we will do that during spirv legalization that happens after // `linkAndOptimizeIR`. if (target == CodeGenTarget::GLSL) { NoOpInitialAddressSpaceAssigner addrSpaceAssigner; specializeAddressSpace(irModule, &addrSpaceAssigner); } else if (isMetalTarget(targetRequest)) { specializeAddressSpaceForMetal(irModule); } else if (isWGPUTarget(targetRequest)) { specializeAddressSpaceForWGSL(irModule); } performForceInlining(irModule); bool emitSpirvDirectly = targetProgram->shouldEmitSPIRVDirectly(); if (emitSpirvDirectly) { performIntrinsicFunctionInlining(irModule); } eliminateMultiLevelBreak(irModule); if (!fastIRSimplificationOptions.minimalOptimization) { IRSimplificationOptions simplificationOptions = fastIRSimplificationOptions; simplificationOptions.cfgOptions.removeTrivialSingleIterationLoops = true; simplifyIR(targetProgram, irModule, simplificationOptions, sink); } // As a late step, we need to take the SSA-form IR and move things *out* // of SSA form, by eliminating all "phi nodes" (block parameters) and // introducing explicit temporaries instead. Doing this at the IR level // means that subsequent emit logic doesn't need to contend with the // complexities of blocks with parameters. // { // Get the liveness mode. const LivenessMode livenessMode = codeGenContext->shouldTrackLiveness() ? LivenessMode::Enabled : LivenessMode::Disabled; // // Downstream targets may benefit from having live-range information for // local variables, and our IR currently encodes a reasonably good version // of that information. At this point we will insert live-range markers // for local variables, on when such markers are requested. // // After this point in optimization, any passes that introduce new // temporary variables into the IR module should take responsibility for // producing their own live-range information. // if (isEnabled(livenessMode)) { LivenessUtil::addVariableRangeStarts(irModule, livenessMode); } // We only want to accumulate locations if liveness tracking is enabled. PhiEliminationOptions phiEliminationOptions; if (isKhronosTarget(targetRequest) && emitSpirvDirectly) { phiEliminationOptions.eliminateCompositeTypedPhiOnly = false; phiEliminationOptions.useRegisterAllocation = true; } eliminatePhis(livenessMode, irModule, phiEliminationOptions); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "PHIS ELIMINATED"); #endif // If liveness is enabled add liveness ranges based on the accumulated liveness locations if (isEnabled(livenessMode)) { LivenessUtil::addRangeEnds(irModule, livenessMode); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "LIVENESS"); #endif } } // TODO: We need to insert the logic that fixes variable scoping issues // here (rather than doing it very late in the emit process), because // otherwise the `applyGLSLLiveness()` operation below wouldn't be // able to see the live-range information that pass would need to add. // For now we are avoiding that problem by simply *not* emitting live-range // information when we fix variable scoping later on. // Depending on the target, certain things that were represented ass // single IR instructions will need to be emitted with the help of // function declaratons in output high-level code. // // One example of this is the live-range information, which needs // to be output to GLSL code that uses a glslang extension for // supporting function declarations that map directly to SPIR-V opcodes. // // We execute a pass here to transform any live-range instructions // in the module into function calls, for the targets that require it. // if (codeGenContext->shouldTrackLiveness()) { if (isKhronosTarget(targetRequest)) { applyGLSLLiveness(irModule); } } if (isKhronosTarget(targetRequest) && emitSpirvDirectly) { replaceLocationIntrinsicsWithRaytracingObject(targetProgram, irModule, sink); } validateIRModuleIfEnabled(codeGenContext, irModule); // Run a final round of simplifications to clean up unused things after phi-elimination. simplifyNonSSAIR(targetProgram, irModule, fastIRSimplificationOptions); // We include one final step to (optionally) dump the IR and validate // it after all of the optimization passes are complete. This should // reflect the IR that code is generated from as closely as possible. // #if 0 dumpIRIfEnabled(codeGenContext, irModule, "OPTIMIZED"); #endif validateIRModuleIfEnabled(codeGenContext, irModule); if ((target != CodeGenTarget::SPIRV) && (target != CodeGenTarget::SPIRVAssembly)) { // We need to perform a final pass to ensure that all the // variables in the IR module have their scopes set correctly. // // This is a separate pass because it needs to run after // all the other optimization passes have been performed. applyVariableScopeCorrection(irModule, targetRequest); validateIRModuleIfEnabled(codeGenContext, irModule); } auto metadata = new ArtifactPostEmitMetadata; outLinkedIR.metadata = metadata; if (targetProgram->getOptionSet().getBoolOption(CompilerOptionName::EmbedDownstreamIR)) { unexportNonEmbeddableIR(target, irModule); } collectMetadata(irModule, *metadata); outLinkedIR.metadata = metadata; if (!targetProgram->getOptionSet().shouldPerformMinimumOptimizations()) checkUnsupportedInst(codeGenContext->getTargetReq(), irModule, sink); return sink->getErrorCount() == 0 ? SLANG_OK : SLANG_FAIL; } SlangResult CodeGenContext::emitEntryPointsSourceFromIR(ComPtr& outArtifact) { SLANG_PROFILE; outArtifact.setNull(); auto session = getSession(); auto sink = getSink(); auto sourceManager = getSourceManager(); auto target = getTargetFormat(); auto targetRequest = getTargetReq(); auto targetProgram = getTargetProgram(); auto lineDirectiveMode = targetProgram->getOptionSet().getEnumOption( CompilerOptionName::LineDirectiveMode); // We will generally use C-style line directives in order to give the user good // source locations on error messages from downstream compilers, but there are // a few exceptions. if (lineDirectiveMode == LineDirectiveMode::Default) { switch (targetRequest->getTarget()) { case CodeGenTarget::GLSL: // We want to maximize compatibility with downstream tools. lineDirectiveMode = LineDirectiveMode::GLSL; break; case CodeGenTarget::WGSLSPIRVAssembly: case CodeGenTarget::WGSLSPIRV: case CodeGenTarget::WGSL: // WGSL doesn't support line directives. // See https://github.com/gpuweb/gpuweb/issues/606. lineDirectiveMode = LineDirectiveMode::None; break; } } ComPtr> sourceMap; // If SourceMap is enabled, we create one and associate it with the sourceWriter if (lineDirectiveMode == LineDirectiveMode::SourceMap) { sourceMap = new BoxValue; } SourceWriter sourceWriter(sourceManager, lineDirectiveMode, sourceMap); CLikeSourceEmitter::Desc desc; desc.codeGenContext = this; if (getEntryPointCount() == 1) { auto entryPoint = getEntryPoint(getSingleEntryPointIndex()); desc.entryPointStage = entryPoint->getStage(); desc.effectiveProfile = getEffectiveProfile(entryPoint, targetRequest); } else { desc.entryPointStage = Stage::Unknown; desc.effectiveProfile = targetProgram->getOptionSet().getProfile(); } desc.sourceWriter = &sourceWriter; // Define here, because must be in scope longer than the sourceEmitter, as sourceEmitter might // reference items in the linkedIR module LinkedIR linkedIR; RefPtr sourceEmitter; SourceLanguage sourceLanguage = CLikeSourceEmitter::getSourceLanguage(target); switch (target) { default: switch (sourceLanguage) { case SourceLanguage::CPP: { sourceEmitter = new CPPSourceEmitter(desc); break; } case SourceLanguage::GLSL: { sourceEmitter = new GLSLSourceEmitter(desc); break; } case SourceLanguage::HLSL: { sourceEmitter = new HLSLSourceEmitter(desc); break; } case SourceLanguage::CUDA: { sourceEmitter = new CUDASourceEmitter(desc); break; } case SourceLanguage::Metal: { sourceEmitter = new MetalSourceEmitter(desc); break; } case SourceLanguage::WGSL: { sourceEmitter = new WGSLSourceEmitter(desc); break; } default: break; } break; case CodeGenTarget::PyTorchCppBinding: sourceEmitter = new TorchCppSourceEmitter(desc); break; } if (!sourceEmitter) { sink->diagnose( SourceLoc(), Diagnostics::unableToGenerateCodeForTarget, TypeTextUtil::getCompileTargetName(SlangCompileTarget(target))); return SLANG_FAIL; } SLANG_RETURN_ON_FAIL(sourceEmitter->init()); ComPtr metadata; { LinkingAndOptimizationOptions linkingAndOptimizationOptions; linkingAndOptimizationOptions.sourceEmitter = sourceEmitter; switch (sourceLanguage) { default: break; case SourceLanguage::CPP: case SourceLanguage::C: case SourceLanguage::CUDA: linkingAndOptimizationOptions.shouldLegalizeExistentialAndResourceTypes = false; break; } SLANG_RETURN_ON_FAIL(linkAndOptimizeIR(this, linkingAndOptimizationOptions, linkedIR)); auto irModule = linkedIR.module; // Perform final simplifications to help emit logic to generate more compact code. simplifyForEmit(irModule, targetRequest); metadata = linkedIR.metadata; // After all of the required optimization and legalization // passes have been performed, we can emit target code from // the IR module. // sourceEmitter->emitModule(irModule, sink); } String code = sourceWriter.getContent(); sourceWriter.clearContent(); // Now that we've emitted the code for all the declarations in the file, // it is time to stitch together the final output. // There may be global-scope modifiers that we should emit now // Supress emitting line directives when emitting preprocessor directives since // these preprocessor directives may be required to appear in the first line // of the output. An example is that the "#version" line in a GLSL source must // appear before anything else. sourceWriter.supressLineDirective(); // When emitting front matter we can emit the target-language-specific directives // needed to get the default matrix layout to match what was requested // for the given target. // // Note: we do not rely on the defaults for the target language, // because a user could take the HLSL/GLSL generated by Slang and pass // it to another compiler with non-default options specified on // the command line, leading to all kinds of trouble. // // TODO: We need an approach to "global" layout directives that will work // in the presence of multiple modules. If modules A and B were each // compiled with different assumptions about how layout is performed, // then types/variables defined in those modules should be emitted in // a way that is consistent with that layout... // Emit any front matter sourceEmitter->emitFrontMatter(targetRequest); switch (target) { case CodeGenTarget::PyTorchCppBinding: sourceWriter.emit(get_slang_torch_prelude()); break; default: if (isHeterogeneousTarget(target)) { sourceWriter.emit(get_slang_cpp_host_prelude()); } else { // Get the prelude String prelude = session->getPreludeForLanguage(sourceLanguage); sourceWriter.emit(prelude); } break; } // Emit anything that goes before the contents of the code generated for the module sourceEmitter->emitPreModule(); sourceWriter.resumeLineDirective(); // Get the content built so far from the front matter/prelude/preModule // By getting in this way, the content is no longer referenced by the sourceWriter. String finalResult = sourceWriter.getContentAndClear(); // Append the modules output code finalResult.append(code); finalResult.append(sourceWriter.getContentAndClear()); // Write out the result auto artifact = ArtifactUtil::createArtifactForCompileTarget(asExternal(target)); artifact->addRepresentationUnknown(StringBlob::moveCreate(finalResult)); ArtifactUtil::addAssociated(artifact, metadata); if (sourceMap) { auto sourceMapArtifact = ArtifactUtil::createArtifact(ArtifactDesc::make( ArtifactKind::Json, ArtifactPayload::SourceMap, ArtifactStyle::None)); sourceMapArtifact->addRepresentation(sourceMap); artifact->addAssociated(sourceMapArtifact); } outArtifact.swap(artifact); return SLANG_OK; } SlangResult emitSPIRVFromIR( CodeGenContext* codeGenContext, IRModule* irModule, const List& irEntryPoints, List& spirvOut); // Helper class to assist in stepping through the SPIRV instructions. class SpirvInstructionHelper { public: // The result id of the OpExtInstImport instruction, eg.g %2 below // %2 = OpExtInstImport "NonSemantic.Shader.DebugInfo.100" uint32_t m_nonSemanticDebugInfoExtSetId = 0; // The index of the SPIRV words in the blob. // The first 5 words are the header as defined by the SPIRV spec. enum SpvWordIndex { SPV_INDEX_MAGIC_NUMBER, SPV_INDEX_VERSION_NUMBER, SPV_INDEX_GENERATOR_NUMBER, SPV_INDEX_BOUND, SPV_INDEX_SCHEMA, SPV_INDEX_INSTRUCTION_START, }; // An instruction in the SPIRV blob. This points to the first word of the instruction. struct SpvInstruction { SpvInstruction(SpvWord* word) : word(word) { } uint16_t getOpCode() const { return word[0] & 0xFFFF; } uint16_t getWordCountForInst() const { return (word[0] >> 16) & 0xFFFF; } SpvWord getOperand(uint32_t index) const { return word[index + 1]; } // Helper function to interpret the instruction as a string and // extract the string value. String getStringFromInst() const { String result; for (uint32_t i = 1; i < getWordCountForInst(); i++) { SpvWord op = getOperand(i); for (int b = 0; b < 4; ++b) { char c = (char)((op >> (b * 8)) & 0xFF); if (c == '\0') return result; result.append(c); } } return result; } SpvWord* word = nullptr; }; SpirvInstructionHelper() {} // Load the SPIRV instructions from the artifact into a data blob that // we can read. SlangResult loadBlob(ComPtr& artifact) { m_nonSemanticDebugInfoExtSetId = 0; m_words.clear(); m_headerWords.clear(); ComPtr spirvBlob; SlangResult res = artifact->loadBlob(ArtifactKeep::Yes, spirvBlob.writeRef()); if (SLANG_FAILED(res) || !spirvBlob || spirvBlob->getBufferSize() < SPV_INDEX_INSTRUCTION_START * sizeof(SpvWord)) return SLANG_FAIL; // Populate the full array of SPIR-V words. m_words.addRange( reinterpret_cast(spirvBlob->getBufferPointer()), spirvBlob->getBufferSize() / sizeof(SpvWord)); // Populate the header words. These are the first 5 words of the SPIR-V // blob and are treated differently from the rest of the instructions. m_headerWords.clear(); m_headerWords.addRange(m_words.getBuffer(), SPV_INDEX_INSTRUCTION_START); return SLANG_OK; } // Get the header words. const List& getHeaderWords() const { return m_headerWords; } Index getWordCount() const { return m_words.getCount(); } // Visit all SPIRV instructions (excluding header words), invoking the callback for each // instruction. The callback should be a function or lambda with signature: void(const // SpvInstruction&). template void visitInstructions(Func&& callback) { // Instructions start after the header (first 5 words) constexpr size_t kHeaderWordCount = static_cast(SpvWordIndex::SPV_INDEX_INSTRUCTION_START); size_t i = kHeaderWordCount; while (i < (size_t)m_words.getCount()) { SpvWord* wordPtr = m_words.getBuffer() + i; SpvInstruction inst(wordPtr); callback(inst); uint16_t wordCount = inst.getWordCountForInst(); if (wordCount == 0) break; // Prevent infinite loop on malformed input i += wordCount; } } private: // The full array of SPIRV words. List m_words; // The header words. List m_headerWords; }; // Helper function that takes an artifact populated with SPIRV instructions // after the spirv-opt step, and a previously created but empty // strippedArtifact. The artifact is unmodified, and the strippedArtifact // will contain all the artifact's instructions except for debug instructions. static SlangResult stripDbgSpirvFromArtifact( ComPtr& artifact, ComPtr& strippedArtifact) { // Standard debug opcodes to strip out. This mimics the behavior of // spirv-opt. static const uint16_t debugOpCodeVals[] = { SpvOpSourceContinued, SpvOpSource, SpvOpSourceExtension, SpvOpString, SpvOpName, SpvOpMemberName, SpvOpModuleProcessed, SpvOpLine, SpvOpNoLine}; // If the instruction is an extended instruction, then we also need // to check if the instruction number is for a debug instruction as // listed in slang-emit-spirv-ops-debug-info-ext.h static const uint32_t debugExtInstVals[] = { NonSemanticShaderDebugInfo100DebugCompilationUnit, NonSemanticShaderDebugInfo100DebugTypeBasic, NonSemanticShaderDebugInfo100DebugTypePointer, NonSemanticShaderDebugInfo100DebugTypeQualifier, NonSemanticShaderDebugInfo100DebugTypeArray, NonSemanticShaderDebugInfo100DebugTypeVector, NonSemanticShaderDebugInfo100DebugTypeFunction, NonSemanticShaderDebugInfo100DebugTypeComposite, NonSemanticShaderDebugInfo100DebugTypeMember, NonSemanticShaderDebugInfo100DebugFunction, NonSemanticShaderDebugInfo100DebugScope, NonSemanticShaderDebugInfo100DebugNoScope, NonSemanticShaderDebugInfo100DebugInlinedAt, NonSemanticShaderDebugInfo100DebugLocalVariable, NonSemanticShaderDebugInfo100DebugGlobalVariable, NonSemanticShaderDebugInfo100DebugInlinedVariable, NonSemanticShaderDebugInfo100DebugDeclare, NonSemanticShaderDebugInfo100DebugValue, NonSemanticShaderDebugInfo100DebugExpression, NonSemanticShaderDebugInfo100DebugSource, NonSemanticShaderDebugInfo100DebugFunctionDefinition, NonSemanticShaderDebugInfo100DebugSourceContinued, NonSemanticShaderDebugInfo100DebugLine, NonSemanticShaderDebugInfo100DebugEntryPoint, NonSemanticShaderDebugInfo100DebugTypeMatrix, }; // Hash sets for easier lookup. HashSet debugOpCodes; for (auto val : debugOpCodeVals) debugOpCodes.add(val); HashSet debugExtInstNumbers; for (auto val : debugExtInstVals) debugExtInstNumbers.add(val); SpirvInstructionHelper spirvInstructionHelper; SLANG_RETURN_ON_FAIL(spirvInstructionHelper.loadBlob(artifact)); const auto& headerWords = spirvInstructionHelper.getHeaderWords(); List spirvWordsList; if (auto totalWordCapacity = headerWords.getCount() + spirvInstructionHelper.getWordCount()) { const Index byteCapacity = totalWordCapacity * sizeof(SpvWord); spirvWordsList.reserve(byteCapacity); } spirvWordsList.addRange( reinterpret_cast(headerWords.getBuffer()), headerWords.getCount() * sizeof(SpvWord)); // First find the DebugBuildIdentifier instruction, and keep track of which string // it refers to, this string needs to be kept in the final output. SpvWord debugStringId = 0; spirvInstructionHelper.visitInstructions( [&](const SpirvInstructionHelper::SpvInstruction& inst) { if (inst.getOpCode() == SpvOpExtInst) { if (inst.getOperand(3) == NonSemanticShaderDebugInfo100DebugBuildIdentifier) { debugStringId = inst.getOperand(4); return; } } else if (inst.getOpCode() == SpvOpExtInstImport) { // looking for result id of "OpExtInstImport "NonSemantic.Shader.DebugInfo.100" auto importName = inst.getStringFromInst(); if (importName == "NonSemantic.Shader.DebugInfo.100") { spirvInstructionHelper.m_nonSemanticDebugInfoExtSetId = inst.getOperand(0); } } }); // Iterate over the instructions from the artifact and add them to the list // only if they are not debug instructions. We also get the debug build hash // to use as the filename for the debug spirv file. String debugBuildHash; spirvInstructionHelper.visitInstructions( [&](const SpirvInstructionHelper::SpvInstruction& inst) { if (debugOpCodes.contains(inst.getOpCode())) { // We can only strip strings if they are not being used by the // DebugBuildIdentifier instruction. bool foundDebugString = false; if (inst.getOpCode() == SpvOpString && inst.getOperand(0) == debugStringId) { debugBuildHash = inst.getStringFromInst(); foundDebugString = true; } if (!foundDebugString) { return; } } // Also check if the instruction is an extended instruction containing DebugInfo. if (inst.getOpCode() == SpvOpExtInst) { // Ignore this if the instruction contains DebugInfo and is from the debug import if (debugExtInstNumbers.contains(inst.getOperand(3)) && inst.getOperand(2) == spirvInstructionHelper.m_nonSemanticDebugInfoExtSetId) { return; } } // Otherwise this is a non-debug instruction and should be included. spirvWordsList.addRange( reinterpret_cast(inst.word), inst.getWordCountForInst() * sizeof(SpvWord)); }); // Create the stripped artifact using the above created instruction list. strippedArtifact->addRepresentationUnknown(ListBlob::moveCreate(spirvWordsList)); // Set the name of the artifact to the debug build hash so it can be used // as the filename for the debug spirv file. artifact->setName(debugBuildHash.getBuffer()); return SLANG_OK; } // Helper function to create an artifact from IR used internally by // emitSPIRVForEntryPointsDirectly. static SlangResult createArtifactFromIR( CodeGenContext* codeGenContext, IRModule* irModule, List irEntryPoints, ComPtr& artifact, ComPtr& dbgArtifact) { List spirv, outSpirv; emitSPIRVFromIR(codeGenContext, irModule, irEntryPoints, spirv); auto targetRequest = codeGenContext->getTargetReq(); auto targetCompilerOptions = targetRequest->getOptionSet(); #if 0 String optErr; if (SLANG_FAILED(optimizeSPIRV(spirv, optErr, outSpirv))) { codeGenContext->getSink()->diagnose(SourceLoc(), Diagnostics::spirvOptFailed, optErr); spirv = _Move(outSpirv); } #endif artifact->addRepresentationUnknown(ListBlob::moveCreate(spirv)); IDownstreamCompiler* compiler = codeGenContext->getSession()->getOrLoadDownstreamCompiler( PassThroughMode::SpirvOpt, codeGenContext->getSink()); if (compiler) { #if 0 // Dump the unoptimized/unlinked SPIRV after lowering from slang IR -> SPIRV compiler->disassemble((uint32_t*)spirv.getBuffer(), int(spirv.getCount() / 4)); #endif bool isPrecompilation = codeGenContext->getTargetProgram()->getOptionSet().getBoolOption( CompilerOptionName::EmbedDownstreamIR); if (!isPrecompilation && !codeGenContext->shouldSkipDownstreamLinking()) { ComPtr linkedArtifact; // collect spirv files List spirvFiles; List spirvSizes; // Start with the SPIR-V we just generated. // SPIRV-Tools-link expects the size in 32-bit words // whereas the spirv blob size is in bytes. spirvFiles.add((uint32_t*)spirv.getBuffer()); spirvSizes.add(int(spirv.getCount()) / 4); // Iterate over all modules in the linkedIR. For each module, if it // contains an embedded downstream ir instruction, add it to the list // of spirv files. auto program = codeGenContext->getProgram(); program->enumerateIRModules( [&](IRModule* irModule) { for (auto globalInst : irModule->getModuleInst()->getChildren()) { if (auto inst = as(globalInst)) { if (inst->getTarget() == CodeGenTarget::SPIRV) { auto slice = inst->getBlob()->getStringSlice(); spirvFiles.add((uint32_t*)slice.begin()); spirvSizes.add(int(slice.getLength()) / 4); } } } }); SLANG_ASSERT(int(spirv.getCount()) % 4 == 0); SLANG_ASSERT(spirvFiles.getCount() == spirvSizes.getCount()); if (spirvFiles.getCount() > 1) { SlangResult linkresult = compiler->link( (const uint32_t**)spirvFiles.getBuffer(), (const uint32_t*)spirvSizes.getBuffer(), (uint32_t)spirvFiles.getCount(), linkedArtifact.writeRef()); if (linkresult != SLANG_OK) { return SLANG_FAIL; } ComPtr blob; linkedArtifact->loadBlob(ArtifactKeep::No, blob.writeRef()); artifact = _Move(linkedArtifact); } } if (!codeGenContext->shouldSkipSPIRVValidation()) { StringBuilder runSpirvValEnvVar; PlatformUtil::getEnvironmentVariable( UnownedStringSlice("SLANG_RUN_SPIRV_VALIDATION"), runSpirvValEnvVar); if (runSpirvValEnvVar.getUnownedSlice() == "1") { if (SLANG_FAILED(compiler->validate( (uint32_t*)spirv.getBuffer(), int(spirv.getCount() / 4)))) { compiler->disassemble((uint32_t*)spirv.getBuffer(), int(spirv.getCount() / 4)); codeGenContext->getSink()->diagnoseWithoutSourceView( SourceLoc{}, Diagnostics::spirvValidationFailed); } } } ComPtr optimizedArtifact; DownstreamCompileOptions downstreamOptions; downstreamOptions.sourceArtifacts = makeSlice(artifact.readRef(), 1); downstreamOptions.targetType = SLANG_SPIRV; downstreamOptions.sourceLanguage = SLANG_SOURCE_LANGUAGE_SPIRV; switch (codeGenContext->getTargetProgram()->getOptionSet().getEnumOption( CompilerOptionName::Optimization)) { case OptimizationLevel::None: downstreamOptions.optimizationLevel = DownstreamCompileOptions::OptimizationLevel::None; break; case OptimizationLevel::Default: downstreamOptions.optimizationLevel = DownstreamCompileOptions::OptimizationLevel::Default; break; case OptimizationLevel::High: downstreamOptions.optimizationLevel = DownstreamCompileOptions::OptimizationLevel::High; break; case OptimizationLevel::Maximal: downstreamOptions.optimizationLevel = DownstreamCompileOptions::OptimizationLevel::Maximal; break; default: SLANG_ASSERT(!"Unhandled optimization level"); break; } auto downstreamStartTime = std::chrono::high_resolution_clock::now(); if (SLANG_SUCCEEDED(compiler->compile(downstreamOptions, optimizedArtifact.writeRef()))) { // Check if we need to output a separate SPIRV file containing debug info. If so // then strip all debug instructions from the artifact. The dbgArtifact will still // contain all instructions. if (targetCompilerOptions.shouldEmitSeparateDebugInfo()) { auto strippedArtifact = ArtifactUtil::createArtifactForCompileTarget(SLANG_SPIRV); SLANG_RETURN_ON_FAIL( stripDbgSpirvFromArtifact(optimizedArtifact, strippedArtifact)); artifact = _Move(strippedArtifact); dbgArtifact = _Move(optimizedArtifact); } else artifact = _Move(optimizedArtifact); } auto downstreamElapsedTime = (std::chrono::high_resolution_clock::now() - downstreamStartTime).count() * 0.000000001; codeGenContext->getSession()->addDownstreamCompileTime(downstreamElapsedTime); SLANG_RETURN_ON_FAIL( passthroughDownstreamDiagnostics(codeGenContext->getSink(), compiler, artifact)); } return SLANG_OK; } SlangResult emitSPIRVForEntryPointsDirectly( CodeGenContext* codeGenContext, ComPtr& outArtifact) { // Outside because we want to keep IR in scope whilst we are processing emits LinkedIR linkedIR; LinkingAndOptimizationOptions linkingAndOptimizationOptions; SLANG_RETURN_ON_FAIL( linkAndOptimizeIR(codeGenContext, linkingAndOptimizationOptions, linkedIR)); auto irModule = linkedIR.module; auto irEntryPoints = linkedIR.entryPoints; dumpIRIfEnabled(codeGenContext, irModule, "POST LINK AND OPTIMIZE"); auto targetRequest = codeGenContext->getTargetReq(); auto targetCompilerOptions = targetRequest->getOptionSet(); // Create the artifact containing the main SPIRV data, and the debug SPIRV // data if requested by the command line arg -separate-debug-info. Slang::ComPtr dbgArtifact; auto artifact = ArtifactUtil::createArtifactForCompileTarget(asExternal(codeGenContext->getTargetFormat())); SLANG_RETURN_ON_FAIL( createArtifactFromIR(codeGenContext, irModule, irEntryPoints, artifact, dbgArtifact)); ArtifactUtil::addAssociated(artifact, linkedIR.metadata); // Associate the debug artifact with the main artifact. // EndToEndCompileRequest::generateOutput will read this data // and produce a .dbg.spv file for this child artifact. if (targetCompilerOptions.shouldEmitSeparateDebugInfo()) { artifact->addAssociated(dbgArtifact); auto artifactPostEmitMetadata = static_cast(linkedIR.metadata.get()); artifactPostEmitMetadata->addRef(); artifactPostEmitMetadata->m_debugBuildIdentifier = dbgArtifact->getName(); } outArtifact.swap(artifact); return SLANG_OK; } SlangResult emitHostVMCode(CodeGenContext* codeGenContext, ComPtr& outArtifact) { LinkedIR linkedIR; LinkingAndOptimizationOptions linkingAndOptimizationOptions; SLANG_RETURN_ON_FAIL( linkAndOptimizeIR(codeGenContext, linkingAndOptimizationOptions, linkedIR)); VMByteCodeBuilder byteCode; SLANG_RETURN_ON_FAIL(emitVMByteCodeForEntryPoints(codeGenContext, linkedIR, byteCode)); String slangDeclaration; SLANG_RETURN_ON_FAIL( emitSlangDeclarationsForEntryPoints(codeGenContext, linkedIR, slangDeclaration)); slang::SessionDesc sessionDesc = {}; ComPtr slangSession; SLANG_RETURN_ON_FAIL( codeGenContext->getSession()->createSession(sessionDesc, slangSession.writeRef())); auto linkage = static_cast(slangSession.get()); ComPtr diagnostics; auto module = slangSession->loadModuleFromSource( "kernel", "kernel.slang", StringBlob::create(slangDeclaration), diagnostics.writeRef()); if (!module) return SLANG_FAIL; RefPtr newModule = new Module(linkage); newModule->setModuleDecl(static_cast(module)->getModuleDecl()); newModule->setIRModule(linkedIR.module); newModule->setName("kernels"); SLANG_RETURN_ON_FAIL(newModule->serialize(byteCode.kernelBlob.writeRef())); ComPtr byteCodeBlob; SLANG_RETURN_ON_FAIL(byteCode.serialize(byteCodeBlob.writeRef())); outArtifact = ArtifactUtil::createArtifactForCompileTarget(SLANG_HOST_VM); outArtifact->addRepresentationUnknown(byteCodeBlob); return SLANG_OK; } } // namespace Slang