diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/emit.cpp | 42 | ||||
| -rw-r--r-- | source/slang/ir-clone.cpp | 265 | ||||
| -rw-r--r-- | source/slang/ir-clone.h | 164 | ||||
| -rw-r--r-- | source/slang/ir-glsl-legalize.cpp | 1548 | ||||
| -rw-r--r-- | source/slang/ir-glsl-legalize.h | 21 | ||||
| -rw-r--r-- | source/slang/ir-insts.h | 55 | ||||
| -rw-r--r-- | source/slang/ir-link.cpp | 1297 | ||||
| -rw-r--r-- | source/slang/ir-link.h | 33 | ||||
| -rw-r--r-- | source/slang/ir-specialize-resources.cpp | 268 | ||||
| -rw-r--r-- | source/slang/ir-specialize.cpp | 685 | ||||
| -rw-r--r-- | source/slang/ir-specialize.h | 13 | ||||
| -rw-r--r-- | source/slang/ir.cpp | 3404 | ||||
| -rw-r--r-- | source/slang/ir.h | 1 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj | 8 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj.filters | 24 |
15 files changed, 4153 insertions, 3675 deletions
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index a1235f03a..49eaf917b 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -4,9 +4,12 @@ #include "../core/slang-writer.h" #include "ir-dce.h" #include "ir-existential.h" +#include "ir-glsl-legalize.h" #include "ir-insts.h" +#include "ir-link.h" #include "ir-restructure.h" #include "ir-restructure-scoping.h" +#include "ir-specialize.h" #include "ir-specialize-resources.h" #include "ir-ssa.h" #include "ir-validate.h" @@ -6517,10 +6520,9 @@ String emitEntryPoint( session, irModule); - specializeIRForEntryPoint( + auto irEntryPoint = specializeIRForEntryPoint( irSpecializationState, - entryPoint, - &sharedContext.extensionUsageTracker); + entryPoint); #if 0 dumpIRIfEnabled(compileRequest, irModule, "CLONED"); @@ -6533,6 +6535,38 @@ String emitEntryPoint( // un-specialized IR. dumpIRIfEnabled(compileRequest, irModule); + + + // For GLSL only, we will need to perform "legalization" of + // the entry point and any entry-point parameters. + // + // TODO: We should consider moving this legalization work + // as late as possible, so that it doesn't affect how other + // optimization passes need to work. + // + switch (target) + { + case CodeGenTarget::GLSL: + { + legalizeEntryPointForGLSL( + session, + irModule, + irEntryPoint, + &compileRequest->mSink, + &sharedContext.extensionUsageTracker); + } + break; + + default: + break; + } +#if 0 + dumpIRIfEnabled(compileRequest, irModule, "GLSL LEGALIZED"); +#endif + validateIRModuleIfEnabled(compileRequest, irModule); + + + // Any code that makes use of existential (interface) types // needs to be simplified to use concrete types instead, // wherever this is possible. @@ -6559,7 +6593,7 @@ String emitEntryPoint( // none of our target supports generics, or interfaces, // so we need to specialize those away. // - specializeGenerics(irModule, sharedContext.target); + specializeGenerics(irModule); diff --git a/source/slang/ir-clone.cpp b/source/slang/ir-clone.cpp new file mode 100644 index 000000000..df9cf2bf6 --- /dev/null +++ b/source/slang/ir-clone.cpp @@ -0,0 +1,265 @@ +// ir-clone.cpp +#include "ir-clone.h" + +#include "ir.h" +#include "ir-insts.h" + +namespace Slang +{ + +IRInst* lookUp(IRCloneEnv* env, IRInst* oldVal) +{ + for( auto ee = env; ee; ee = ee->parent ) + { + IRInst* newVal = nullptr; + if(ee->mapOldValToNew.TryGetValue(oldVal, newVal)) + return newVal; + } + return nullptr; +} + +IRInst* findCloneForOperand( + IRCloneEnv* env, + IRInst* oldOperand) +{ + if(!oldOperand) return nullptr; + + // If there is a registered replacement for + // the existing operand, then use it. + // + if( IRInst* newVal = lookUp(env, oldOperand) ) + return newVal; + + // Otherwise, we assume that the caller wants + // to default to using existing values wherever + // an explicit replacement hasn't been registered. + // + // This is, notably, the right default whenever + // `oldOperand` is a global value or constant + // and our cloned code will sit in the same + // module as the original. + // + // TODO: We could make this a customization point + // down the road, if we ever had a case where + // we want to clone things with a different policy. + // + return oldOperand; +} + +IRInst* cloneInstAndOperands( + IRCloneEnv* env, + IRBuilder* builder, + IRInst* oldInst) +{ + SLANG_ASSERT(env); + SLANG_ASSERT(builder); + SLANG_ASSERT(oldInst); + + // This logic will not handle any instructions + // with special-case data attached, but that only + // applies to `IRConstant`s at this point, and those + // should only appear at the global scope rather than + // in function bodies. + // + // TODO: It would be easy enough to extend this logic + // to handle constants gracefully, if it ever comes up. + // + SLANG_ASSERT(!as<IRConstant>(oldInst)); + + // We start by mapping the type of the orignal instruction + // to its replacement value, if any. + // + auto oldType = oldInst->getFullType(); + auto newType = (IRType*) findCloneForOperand(env, oldType); + + // Next we will create an empty shell of the instruction, + // with space for the operands, but no actual operand + // values attached. + // + UInt operandCount = oldInst->getOperandCount(); + auto newInst = builder->emitIntrinsicInst( + newType, + oldInst->op, + operandCount, + nullptr); + + // Finally we will iterate over the operands of `oldInst` + // to find their replacements and install them as + // the operands of `newInst`. + // + for(UInt ii = 0; ii < operandCount; ++ii) + { + auto oldOperand = oldInst->getOperand(ii); + auto newOperand = findCloneForOperand(env, oldOperand); + + newInst->getOperands()[ii].init(newInst, newOperand); + } + + return newInst; +} + +// The complexity of the second phase of cloning (the +// one that deals with decorations and children) comes +// from the fact that it needs to sequence the two phases +// of cloning for any child instructions. We will do this +// by performing the first phase of cloning, and building +// up a list of children that require the second phase of processing. +// Each entry in that list will be a pair of an old instruction +// and its new clone. +// +struct IRCloningOldNewPair +{ + IRInst* oldInst; + IRInst* newInst; +}; + +// We will use an internal variant of `cloneInstDecorationsAndChildren` +// that modifies the provided `env` as it goes as the main +// workhorse, since we need to make sure that instructions in +// earlier blocks are visible to those in other, later, blocks +// when cloning a function, so that strict scoping along the +// lines of the nesting of instructions isn't sufficient. +// +static void _cloneInstDecorationsAndChildren( + IRCloneEnv* env, + SharedIRBuilder* sharedBuilder, + IRInst* oldInst, + IRInst* newInst) +{ + SLANG_ASSERT(env); + SLANG_ASSERT(sharedBuilder); + SLANG_ASSERT(oldInst); + SLANG_ASSERT(newInst); + + // We will set up an IR builder that inserts + // into the new parent instruction. + // + IRBuilder builderStorage; + auto builder = &builderStorage; + builder->sharedBuilder = sharedBuilder; + builder->setInsertInto(newInst); + + // When applying the first phase of cloning to + // children, we will keep track of those that + // require the second phase. + // + List<IRCloningOldNewPair> pairs; + + for( auto oldChild : oldInst->getDecorationsAndChildren() ) + { + // As a very subtle special case, if one of the children + // of our `oldInst` already has a registered replacement, + // then we don't want to clone it (not least because + // the `Dictionary::Add` method would give us an error + // when we try to insert a new value for the same key). + // + // This arises for entries in `mapOldValToNew` that were + // seeded before cloning begain (e.g., function + // parameters that are to be replaced). + // + if(lookUp(env, oldChild)) + continue; + + // Now we can perform the first phase of cloning + // on the child, and register it in our map from + // old to new values. + // + auto newChild = cloneInstAndOperands(env, builder, oldChild); + env->mapOldValToNew.Add(oldChild, newChild); + + // If and only if the old child had decorations + // or children, we will register it into our + // list for processing in the second phase. + // + if( oldChild->getFirstDecorationOrChild() ) + { + IRCloningOldNewPair pair; + pair.oldInst = oldChild; + pair.newInst = newChild; + pairs.Add(pair); + } + } + + // Once we have done first-phase processing for + // all child instructions, we scan through those + // in the list that required second-phase processing, + // and clone their decorations and/or children recursively. + // + for( auto pair : pairs ) + { + auto oldChild = pair.oldInst; + auto newChild = pair.newInst; + + _cloneInstDecorationsAndChildren(env, sharedBuilder, oldChild, newChild); + } +} + +// The public version of `cloneInstDecorationsAndChildren` is then +// just a wrapper over the internal one that sets up a temporary +// environment to use for the cloning process, so that we do +// not leave any lasting changes in the user-provided `env`. +// +void cloneInstDecorationsAndChildren( + IRCloneEnv* env, + SharedIRBuilder* sharedBuilder, + IRInst* oldInst, + IRInst* newInst) +{ + SLANG_ASSERT(sharedBuilder); + SLANG_ASSERT(oldInst); + SLANG_ASSERT(newInst); + + IRCloneEnv subEnvStorage; + auto subEnv = &subEnvStorage; + subEnv->parent = env; + + _cloneInstDecorationsAndChildren(subEnv, sharedBuilder, oldInst, newInst); +} + +// The convenience function `cloneInst` just sequences the +// operations that have already been defined. +// +IRInst* cloneInst( + IRCloneEnv* env, + IRBuilder* builder, + IRInst* oldInst) +{ + SLANG_ASSERT(env); + SLANG_ASSERT(builder); + SLANG_ASSERT(oldInst); + + auto newInst = cloneInstAndOperands( + env, builder, oldInst); + + env->mapOldValToNew.Add(oldInst, newInst); + + cloneInstDecorationsAndChildren( + env, builder->sharedBuilder, oldInst, newInst); + + return newInst; +} + +bool IRSimpleSpecializationKey::operator==(IRSimpleSpecializationKey const& other) const +{ + auto valCount = vals.Count(); + if(valCount != other.vals.Count()) return false; + for( UInt ii = 0; ii < valCount; ++ii ) + { + if(vals[ii] != other.vals[ii]) return false; + } + return true; +} + +int IRSimpleSpecializationKey::GetHashCode() const +{ + auto valCount = vals.Count(); + int hash = Slang::GetHashCode(valCount); + for( UInt ii = 0; ii < valCount; ++ii ) + { + hash = combineHash(hash, Slang::GetHashCode(vals[ii])); + } + return hash; +} + + +} // namespace Slang diff --git a/source/slang/ir-clone.h b/source/slang/ir-clone.h new file mode 100644 index 000000000..f1e27b785 --- /dev/null +++ b/source/slang/ir-clone.h @@ -0,0 +1,164 @@ +// ir-clone.h +#pragma once + +#include "../core/dictionary.h" + +#include "ir.h" + +namespace Slang +{ +struct IRBuilder; +struct IRInst; +struct SharedIRBuilder; + +// This file provides an interface to simplify the task of +// correctling "cloning" IR code, whether individual +// instructions, or whole functions. + + /// An environment for mapping existing values to their cloned replacements. + /// + /// This type serves two main roles in the process of IR cloning: + /// + /// * Before cloning begins, a client will usually + /// register the mapping from things that are to be + /// replaced entirely (like function parameters to + /// be specialized away) to their replacements (e.g., + /// a constant value). + /// + /// * During the process of cloning, env environment + /// will be maintained and updated so that when, e.g., + /// an instruction later in a function refers to + /// something from earlier, we can look up the + /// replacement. + /// +struct IRCloneEnv +{ + /// A mapping from old values to their replacements. + Dictionary<IRInst*, IRInst*> mapOldValToNew; + + /// A parent environment to fall back to if `mapOldValToNew` doesn't contain a key. + IRCloneEnv* parent = nullptr; +}; + + /// Look up the replacement for `oldVal`, if any, registered in `env`. + /// + /// Returns `nullptr` if `oldVal` has no registered replacement. + /// +IRInst* lookUp(IRCloneEnv* env, IRInst* oldVal); + +// The SSA property and the way we have structured +// our "phi nodes" (block parameters) means that +// just going through the children of a function, +// and then the children of a block will generally +// do the Right Thing and always visit an instruction +// before its uses. +// +// The big exception to this is that branch instructions +// can refer to blocks later in the same function. +// +// We work around this sort of problem in a fairly +// general fashion, by splitting the cloning of +// an instruction into two steps. +// +// The first step is just to clone the instruction +// and its direct operands, but not any decorations +// or children. + + /// Clone `oldInst` and its direct operands. + /// + /// The "direct operands" include the type of the instruction. + /// The type and operands of `oldInst` will be mapped to now + /// values using `findOrCloneOperand` with the given `env`. + /// + /// Any new instruction that gets emitted will be output to + /// the provided `builder`, which must be non-null. + /// + /// This operation does *not* clone any children or decorations on `oldInst`. + /// This operation does *not* register its result as a replacement + /// for `oldInst` in the given `env`. + /// +IRInst* cloneInstAndOperands( + IRCloneEnv* env, + IRBuilder* builder, + IRInst* oldInst); + +// The second phase of cloning an instruction is to clone +// its decorations and children. This step only needs to +// be performed on those instructions that *have* decorations +// and/or children. + + /// Clone any decorations and/or children of `oldInst` onto `newInst` + /// + /// Any new instructions that get emitted will use the + /// provided `sharedBuilder`, which must be non-null. + /// + /// During the process of cloning decorations/children, operand values + /// will be looked up in the provided `env`, which should provide + /// replacement values for instructions that should have a different + /// identity in the clone. + /// The provided `env` will *not* be updated/modified during the + /// process of cloding decorations/children. + /// + /// If any child or decoration on `oldInst` already has a replacement + /// registered in `env`, it will *not* be cloned into `newInst`. + /// +void cloneInstDecorationsAndChildren( + IRCloneEnv* env, + SharedIRBuilder* sharedBuilder, + IRInst* oldInst, + IRInst* newInst); + +// For the case where the user knows the sequencing constraints +// on cloning operands before uses can be satisfied, we provide +// a convenience wrapper around the two phases of cloning: + + /// Clone `oldInst` and return the cloned value. + /// + /// This function is a convenience wrapper around + /// `cloneInstAndOperands` and `cloneInstDecorationsAndChildren`. + /// It also registers the resultint instruction as + /// the replacement value for `oldInst` in the given `env` + /// which must therefore be non-null. + /// +IRInst* cloneInst( + IRCloneEnv* env, + IRBuilder* builder, + IRInst* oldInst); + + + /// Find the "cloned" value to use for an operand. + /// + /// This either returns the value registered for `oldOperand` + /// in `env`, or else `oldOperand` itself. +IRInst* findCloneForOperand( + IRCloneEnv* env, + IRInst* oldOperand); + +// It isn't technically part of the cloning infrastructure, +// but when make specialized copies of IR instructions via +// cloning we often need a simple kind of key suitable +// for caching existing specializations, so we'll define +// it here so that is is easily accessible to code that +// needs it. + +struct IRSimpleSpecializationKey +{ + // The structure of a specialization key will be a list + // of instructions, typically starting with the function, + // generic, or other object to be specialized, and then + // having one or more entries to represent the specialization + // arguments. + // + List<IRInst*> vals; + + // In order to use this type as a `Dictionary` key we + // need it to support equality and hashing. + // + // TODO: honestly we might consider having `GetHashCode` + // and `operator==` defined for `List<T>`. + + bool operator==(IRSimpleSpecializationKey const& other) const; + int GetHashCode() const; +}; + +} diff --git a/source/slang/ir-glsl-legalize.cpp b/source/slang/ir-glsl-legalize.cpp new file mode 100644 index 000000000..b659cf293 --- /dev/null +++ b/source/slang/ir-glsl-legalize.cpp @@ -0,0 +1,1548 @@ +// ir-glsl-legalize.cpp +#include "ir-glsl-legalize.h" + +#include "ir.h" +#include "ir-insts.h" + +namespace Slang +{ + +// +// Legalization of entry points for GLSL: +// + +IRGlobalParam* addGlobalParam( + IRModule* module, + IRType* valueType) +{ + auto session = module->session; + + SharedIRBuilder shared; + shared.module = module; + shared.session = session; + + IRBuilder builder; + builder.sharedBuilder = &shared; + return builder.createGlobalParam(valueType); +} + +void moveValueBefore( + IRInst* valueToMove, + IRInst* placeBefore) +{ + valueToMove->removeFromParent(); + valueToMove->insertBefore(placeBefore); +} + +IRType* getFieldType( + IRType* baseType, + IRStructKey* fieldKey) +{ + if(auto structType = as<IRStructType>(baseType)) + { + for(auto ff : structType->getFields()) + { + if(ff->getKey() == fieldKey) + return ff->getFieldType(); + } + } + + SLANG_UNEXPECTED("no such field"); + UNREACHABLE_RETURN(nullptr); +} + + + +// When scalarizing shader inputs/outputs for GLSL, we need a way +// to refer to a conceptual "value" that might comprise multiple +// IR-level values. We could in principle introduce tuple types +// into the IR so that everything stays at the IR level, but +// it seems easier to just layer it over the top for now. +// +// The `ScalarizedVal` type deals with the "tuple or single value?" +// question, and also the "l-value or r-value?" question. +struct ScalarizedValImpl : RefObject +{}; +struct ScalarizedTupleValImpl; +struct ScalarizedTypeAdapterValImpl; +struct ScalarizedVal +{ + enum class Flavor + { + // no value (null pointer) + none, + + // A simple `IRInst*` that represents the actual value + value, + + // An `IRInst*` that represents the address of the actual value + address, + + // A `TupleValImpl` that represents zero or more `ScalarizedVal`s + tuple, + + // A `TypeAdapterValImpl` that wraps a single `ScalarizedVal` and + // represents an implicit type conversion applied to it on read + // or write. + typeAdapter, + }; + + // Create a value representing a simple value + static ScalarizedVal value(IRInst* irValue) + { + ScalarizedVal result; + result.flavor = Flavor::value; + result.irValue = irValue; + return result; + } + + + // Create a value representing an address + static ScalarizedVal address(IRInst* irValue) + { + ScalarizedVal result; + result.flavor = Flavor::address; + result.irValue = irValue; + return result; + } + + static ScalarizedVal tuple(ScalarizedTupleValImpl* impl) + { + ScalarizedVal result; + result.flavor = Flavor::tuple; + result.impl = (ScalarizedValImpl*)impl; + return result; + } + + static ScalarizedVal typeAdapter(ScalarizedTypeAdapterValImpl* impl) + { + ScalarizedVal result; + result.flavor = Flavor::typeAdapter; + result.impl = (ScalarizedValImpl*)impl; + return result; + } + + Flavor flavor = Flavor::none; + IRInst* irValue = nullptr; + RefPtr<ScalarizedValImpl> impl; +}; + +// This is the case for a value that is a "tuple" of other values +struct ScalarizedTupleValImpl : ScalarizedValImpl +{ + struct Element + { + IRStructKey* key; + ScalarizedVal val; + }; + + IRType* type; + List<Element> elements; +}; + +// This is the case for a value that is stored with one type, +// but needs to present itself as having a different type +struct ScalarizedTypeAdapterValImpl : ScalarizedValImpl +{ + ScalarizedVal val; + IRType* actualType; // the actual type of `val` + IRType* pretendType; // the type this value pretends to have +}; + +struct GlobalVaryingDeclarator +{ + enum class Flavor + { + array, + }; + + Flavor flavor; + IRInst* elementCount; + GlobalVaryingDeclarator* next; +}; + +struct GLSLSystemValueInfo +{ + // The name of the built-in GLSL variable + char const* name; + + // The name of an outer array that wraps + // the variable, in the case of a GS input + char const* outerArrayName; + + // The required type of the built-in variable + IRType* requiredType; +}; + +void requireGLSLVersionImpl( + ExtensionUsageTracker* tracker, + ProfileVersion version); + +void requireGLSLExtension( + ExtensionUsageTracker* tracker, + String const& name); + +struct GLSLLegalizationContext +{ + Session* session; + ExtensionUsageTracker* extensionUsageTracker; + DiagnosticSink* sink; + Stage stage; + + void requireGLSLExtension(String const& name) + { + Slang::requireGLSLExtension(extensionUsageTracker, name); + } + + void requireGLSLVersion(ProfileVersion version) + { + Slang::requireGLSLVersionImpl(extensionUsageTracker, version); + } + + Stage getStage() + { + return stage; + } + + DiagnosticSink* getSink() + { + return sink; + } + + IRBuilder* builder; + IRBuilder* getBuilder() { return builder; } +}; + +GLSLSystemValueInfo* getGLSLSystemValueInfo( + GLSLLegalizationContext* context, + VarLayout* varLayout, + LayoutResourceKind kind, + Stage stage, + GLSLSystemValueInfo* inStorage) +{ + char const* name = nullptr; + char const* outerArrayName = nullptr; + + auto semanticNameSpelling = varLayout->systemValueSemantic; + if(semanticNameSpelling.Length() == 0) + return nullptr; + + auto semanticName = semanticNameSpelling.ToLower(); + + IRType* requiredType = nullptr; + + if(semanticName == "sv_position") + { + // This semantic can either work like `gl_FragCoord` + // when it is used as a fragment shader input, or + // like `gl_Position` when used in other stages. + // + // Note: This isn't as simple as testing input-vs-output, + // because a user might have a VS output `SV_Position`, + // and then pass it along to a GS that reads it as input. + // + if( stage == Stage::Fragment + && kind == LayoutResourceKind::VaryingInput ) + { + name = "gl_FragCoord"; + } + else if( stage == Stage::Geometry + && kind == LayoutResourceKind::VaryingInput ) + { + // As a GS input, the correct syntax is `gl_in[...].gl_Position`, + // but that is not compatible with picking the array dimension later, + // of course. + outerArrayName = "gl_in"; + name = "gl_Position"; + } + else + { + name = "gl_Position"; + } + } + else if(semanticName == "sv_target") + { + // Note: we do *not* need to generate some kind of `gl_` + // builtin for fragment-shader outputs: they are just + // ordinary `out` variables, with ordinary `location`s, + // as far as GLSL is concerned. + return nullptr; + } + else if(semanticName == "sv_clipdistance") + { + // TODO: type conversion is required here. + name = "gl_ClipDistance"; + } + else if(semanticName == "sv_culldistance") + { + context->requireGLSLExtension("ARB_cull_distance"); + + // TODO: type conversion is required here. + name = "gl_CullDistance"; + } + else if(semanticName == "sv_coverage") + { + // TODO: deal with `gl_SampleMaskIn` when used as an input. + + // TODO: type conversion is required here. + name = "gl_SampleMask"; + } + else if(semanticName == "sv_depth") + { + name = "gl_FragDepth"; + } + else if(semanticName == "sv_depthgreaterequal") + { + // TODO: layout(depth_greater) out float gl_FragDepth; + name = "gl_FragDepth"; + } + else if(semanticName == "sv_depthlessequal") + { + // TODO: layout(depth_greater) out float gl_FragDepth; + name = "gl_FragDepth"; + } + else if(semanticName == "sv_dispatchthreadid") + { + name = "gl_GlobalInvocationID"; + } + else if(semanticName == "sv_domainlocation") + { + name = "gl_TessCoord"; + } + else if(semanticName == "sv_groupid") + { + name = "gl_WorkGroupID"; + } + else if(semanticName == "sv_groupindex") + { + name = "gl_LocalInvocationIndex"; + } + else if(semanticName == "sv_groupthreadid") + { + name = "gl_LocalInvocationID"; + } + else if(semanticName == "sv_gsinstanceid") + { + name = "gl_InvocationID"; + } + else if(semanticName == "sv_instanceid") + { + name = "gl_InstanceIndex"; + } + else if(semanticName == "sv_isfrontface") + { + name = "gl_FrontFacing"; + } + else if(semanticName == "sv_outputcontrolpointid") + { + name = "gl_InvocationID"; + } + else if(semanticName == "sv_primitiveid") + { + name = "gl_PrimitiveID"; + } + else if (semanticName == "sv_rendertargetarrayindex") + { + switch (context->getStage()) + { + case Stage::Geometry: + context->requireGLSLVersion(ProfileVersion::GLSL_150); + break; + + case Stage::Fragment: + context->requireGLSLVersion(ProfileVersion::GLSL_430); + break; + + default: + context->requireGLSLVersion(ProfileVersion::GLSL_450); + context->requireGLSLExtension("GL_ARB_shader_viewport_layer_array"); + break; + } + + name = "gl_Layer"; + requiredType = context->getBuilder()->getBasicType(BaseType::Int); + } + else if (semanticName == "sv_sampleindex") + { + name = "gl_SampleID"; + } + else if (semanticName == "sv_stencilref") + { + context->requireGLSLExtension("ARB_shader_stencil_export"); + name = "gl_FragStencilRef"; + } + else if (semanticName == "sv_tessfactor") + { + name = "gl_TessLevelOuter"; + } + else if (semanticName == "sv_vertexid") + { + name = "gl_VertexIndex"; + } + else if (semanticName == "sv_viewportarrayindex") + { + name = "gl_ViewportIndex"; + } + else if (semanticName == "nv_x_right") + { + context->requireGLSLVersion(ProfileVersion::GLSL_450); + context->requireGLSLExtension("GL_NVX_multiview_per_view_attributes"); + + // The actual output in GLSL is: + // + // vec4 gl_PositionPerViewNV[]; + // + // and is meant to support an arbitrary number of views, + // while the HLSL case just defines a second position + // output. + // + // For now we will hack this by: + // 1. Mapping an `NV_X_Right` output to `gl_PositionPerViewNV[1]` + // (that is, just one element of the output array) + // 2. Adding logic to copy the traditional `gl_Position` output + // over to `gl_PositionPerViewNV[0]` + // + + name = "gl_PositionPerViewNV[1]"; + +// shared->requiresCopyGLPositionToPositionPerView = true; + } + else if (semanticName == "nv_viewport_mask") + { + context->requireGLSLVersion(ProfileVersion::GLSL_450); + context->requireGLSLExtension("GL_NVX_multiview_per_view_attributes"); + + name = "gl_ViewportMaskPerViewNV"; +// globalVarExpr = createGLSLBuiltinRef("gl_ViewportMaskPerViewNV", +// getUnsizedArrayType(getIntType())); + } + + if( name ) + { + inStorage->name = name; + inStorage->outerArrayName = outerArrayName; + inStorage->requiredType = requiredType; + return inStorage; + } + + context->getSink()->diagnose(varLayout->varDecl.getDecl()->loc, Diagnostics::unknownSystemValueSemantic, semanticNameSpelling); + return nullptr; +} + +ScalarizedVal createSimpleGLSLGlobalVarying( + GLSLLegalizationContext* context, + IRBuilder* builder, + IRType* inType, + VarLayout* inVarLayout, + TypeLayout* inTypeLayout, + LayoutResourceKind kind, + Stage stage, + UInt bindingIndex, + GlobalVaryingDeclarator* declarator) +{ + // Check if we have a system value on our hands. + GLSLSystemValueInfo systemValueInfoStorage; + auto systemValueInfo = getGLSLSystemValueInfo( + context, + inVarLayout, + kind, + stage, + &systemValueInfoStorage); + + IRType* type = inType; + + // A system-value semantic might end up needing to override the type + // that the user specified. + if( systemValueInfo && systemValueInfo->requiredType ) + { + type = systemValueInfo->requiredType; + } + + // Construct the actual type and type-layout for the global variable + // + RefPtr<TypeLayout> typeLayout = inTypeLayout; + for( auto dd = declarator; dd; dd = dd->next ) + { + // We only have one declarator case right now... + SLANG_ASSERT(dd->flavor == GlobalVaryingDeclarator::Flavor::array); + + auto arrayType = builder->getArrayType( + type, + dd->elementCount); + + RefPtr<ArrayTypeLayout> arrayTypeLayout = new ArrayTypeLayout(); +// arrayTypeLayout->type = arrayType; + arrayTypeLayout->rules = typeLayout->rules; + arrayTypeLayout->originalElementTypeLayout = typeLayout; + arrayTypeLayout->elementTypeLayout = typeLayout; + arrayTypeLayout->uniformStride = 0; + + if( auto resInfo = inTypeLayout->FindResourceInfo(kind) ) + { + // TODO: it is kind of gross to be re-running some + // of the type layout logic here. + + UInt elementCount = (UInt) GetIntVal(dd->elementCount); + arrayTypeLayout->addResourceUsage( + kind, + resInfo->count * elementCount); + } + + type = arrayType; + typeLayout = arrayTypeLayout; + } + + // We need to construct a fresh layout for the variable, even + // if the original had its own layout, because it might be + // an `inout` parameter, and we only want to deal with the case + // described by our `kind` parameter. + RefPtr<VarLayout> varLayout = new VarLayout(); + varLayout->varDecl = inVarLayout->varDecl; + varLayout->typeLayout = typeLayout; + varLayout->flags = inVarLayout->flags; + varLayout->systemValueSemantic = inVarLayout->systemValueSemantic; + varLayout->systemValueSemanticIndex = inVarLayout->systemValueSemanticIndex; + varLayout->semanticName = inVarLayout->semanticName; + varLayout->semanticIndex = inVarLayout->semanticIndex; + varLayout->stage = inVarLayout->stage; + varLayout->AddResourceInfo(kind)->index = bindingIndex; + + // We are going to be creating a global parameter to replace + // the function parameter, but we need to handle the case + // where the parameter represents a varying *output* and not + // just an input. + // + // Our IR global shader parameters are read-only, just + // like our IR function parameters, and need a wrapper + // `Out<...>` type to represent otuputs. + // + bool isOutput = kind == LayoutResourceKind::VaryingOutput; + IRType* paramType = isOutput ? builder->getOutType(type) : type; + + auto globalParam = addGlobalParam(builder->getModule(), paramType); + moveValueBefore(globalParam, builder->getFunc()); + + ScalarizedVal val = isOutput ? ScalarizedVal::address(globalParam) : ScalarizedVal::value(globalParam); + + if( systemValueInfo ) + { + builder->addImportDecoration(globalParam, UnownedTerminatedStringSlice(systemValueInfo->name)); + + if( auto fromType = systemValueInfo->requiredType ) + { + // We may need to adapt from the declared type to/from + // the actual type of the GLSL global. + auto toType = inType; + + if( fromType != toType ) + { + RefPtr<ScalarizedTypeAdapterValImpl> typeAdapter = new ScalarizedTypeAdapterValImpl; + typeAdapter->actualType = systemValueInfo->requiredType; + typeAdapter->pretendType = inType; + typeAdapter->val = val; + + val = ScalarizedVal::typeAdapter(typeAdapter); + } + } + + if(auto outerArrayName = systemValueInfo->outerArrayName) + { + builder->addGLSLOuterArrayDecoration(globalParam, UnownedTerminatedStringSlice(outerArrayName)); + } + } + + builder->addLayoutDecoration(globalParam, varLayout); + + return val; +} + +ScalarizedVal createGLSLGlobalVaryingsImpl( + GLSLLegalizationContext* context, + IRBuilder* builder, + IRType* type, + VarLayout* varLayout, + TypeLayout* typeLayout, + LayoutResourceKind kind, + Stage stage, + UInt bindingIndex, + GlobalVaryingDeclarator* declarator) +{ + if( as<IRBasicType>(type) ) + { + return createSimpleGLSLGlobalVarying( + context, + builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); + } + else if( as<IRVectorType>(type) ) + { + return createSimpleGLSLGlobalVarying( + context, + builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); + } + else if( as<IRMatrixType>(type) ) + { + // TODO: a matrix-type varying should probably be handled like an array of rows + return createSimpleGLSLGlobalVarying( + context, + builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); + } + else if( auto arrayType = as<IRArrayType>(type) ) + { + // We will need to SOA-ize any nested types. + + auto elementType = arrayType->getElementType(); + auto elementCount = arrayType->getElementCount(); + auto arrayLayout = dynamic_cast<ArrayTypeLayout*>(typeLayout); + SLANG_ASSERT(arrayLayout); + auto elementTypeLayout = arrayLayout->elementTypeLayout; + + GlobalVaryingDeclarator arrayDeclarator; + arrayDeclarator.flavor = GlobalVaryingDeclarator::Flavor::array; + arrayDeclarator.elementCount = elementCount; + arrayDeclarator.next = declarator; + + return createGLSLGlobalVaryingsImpl( + context, + builder, + elementType, + varLayout, + elementTypeLayout, + kind, + stage, + bindingIndex, + &arrayDeclarator); + } + else if( auto streamType = as<IRHLSLStreamOutputType>(type)) + { + auto elementType = streamType->getElementType(); + auto streamLayout = dynamic_cast<StreamOutputTypeLayout*>(typeLayout); + SLANG_ASSERT(streamLayout); + auto elementTypeLayout = streamLayout->elementTypeLayout; + + return createGLSLGlobalVaryingsImpl( + context, + builder, + elementType, + varLayout, + elementTypeLayout, + kind, + stage, + bindingIndex, + declarator); + } + else if(auto structType = as<IRStructType>(type)) + { + // We need to recurse down into the individual fields, + // and generate a variable for each of them. + + auto structTypeLayout = dynamic_cast<StructTypeLayout*>(typeLayout); + SLANG_ASSERT(structTypeLayout); + RefPtr<ScalarizedTupleValImpl> tupleValImpl = new ScalarizedTupleValImpl(); + + + // Construct the actual type for the tuple (including any outer arrays) + IRType* fullType = type; + for( auto dd = declarator; dd; dd = dd->next ) + { + SLANG_ASSERT(dd->flavor == GlobalVaryingDeclarator::Flavor::array); + fullType = builder->getArrayType( + fullType, + dd->elementCount); + } + + tupleValImpl->type = fullType; + + // Okay, we want to walk through the fields here, and + // generate one variable for each. + UInt fieldCounter = 0; + for(auto field : structType->getFields()) + { + UInt fieldIndex = fieldCounter++; + + auto fieldLayout = structTypeLayout->fields[fieldIndex]; + + UInt fieldBindingIndex = bindingIndex; + if(auto fieldResInfo = fieldLayout->FindResourceInfo(kind)) + fieldBindingIndex += fieldResInfo->index; + + auto fieldVal = createGLSLGlobalVaryingsImpl( + context, + builder, + field->getFieldType(), + fieldLayout, + fieldLayout->typeLayout, + kind, + stage, + fieldBindingIndex, + declarator); + + ScalarizedTupleValImpl::Element element; + element.val = fieldVal; + element.key = field->getKey(); + + tupleValImpl->elements.Add(element); + } + + return ScalarizedVal::tuple(tupleValImpl); + } + + // Default case is to fall back on the simple behavior + return createSimpleGLSLGlobalVarying( + context, + builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); +} + +ScalarizedVal createGLSLGlobalVaryings( + GLSLLegalizationContext* context, + IRBuilder* builder, + IRType* type, + VarLayout* layout, + LayoutResourceKind kind, + Stage stage) +{ + UInt bindingIndex = 0; + if(auto rr = layout->FindResourceInfo(kind)) + bindingIndex = rr->index; + return createGLSLGlobalVaryingsImpl( + context, + builder, type, layout, layout->typeLayout, kind, stage, bindingIndex, nullptr); +} + +ScalarizedVal extractField( + IRBuilder* builder, + ScalarizedVal const& val, + UInt fieldIndex, + IRStructKey* fieldKey) +{ + switch( val.flavor ) + { + case ScalarizedVal::Flavor::value: + return ScalarizedVal::value( + builder->emitFieldExtract( + getFieldType(val.irValue->getDataType(), fieldKey), + val.irValue, + fieldKey)); + + case ScalarizedVal::Flavor::address: + { + auto ptrType = as<IRPtrTypeBase>(val.irValue->getDataType()); + auto valType = ptrType->getValueType(); + auto fieldType = getFieldType(valType, fieldKey); + auto fieldPtrType = builder->getPtrType(ptrType->op, fieldType); + return ScalarizedVal::address( + builder->emitFieldAddress( + fieldPtrType, + val.irValue, + fieldKey)); + } + + case ScalarizedVal::Flavor::tuple: + { + auto tupleVal = val.impl.As<ScalarizedTupleValImpl>(); + return tupleVal->elements[fieldIndex].val; + } + + default: + SLANG_UNEXPECTED("unimplemented"); + UNREACHABLE_RETURN(ScalarizedVal()); + } + +} + +ScalarizedVal adaptType( + IRBuilder* builder, + IRInst* val, + IRType* toType, + IRType* /*fromType*/) +{ + // TODO: actually consider what needs to go on here... + return ScalarizedVal::value(builder->emitConstructorInst( + toType, + 1, + &val)); +} + +ScalarizedVal adaptType( + IRBuilder* builder, + ScalarizedVal const& val, + IRType* toType, + IRType* fromType) +{ + switch( val.flavor ) + { + case ScalarizedVal::Flavor::value: + return adaptType(builder, val.irValue, toType, fromType); + break; + + case ScalarizedVal::Flavor::address: + { + auto loaded = builder->emitLoad(val.irValue); + return adaptType(builder, loaded, toType, fromType); + } + break; + + default: + SLANG_UNEXPECTED("unimplemented"); + UNREACHABLE_RETURN(ScalarizedVal()); + } +} + +void assign( + IRBuilder* builder, + ScalarizedVal const& left, + ScalarizedVal const& right) +{ + switch( left.flavor ) + { + case ScalarizedVal::Flavor::address: + switch( right.flavor ) + { + case ScalarizedVal::Flavor::value: + { + builder->emitStore(left.irValue, right.irValue); + } + break; + + case ScalarizedVal::Flavor::address: + { + auto val = builder->emitLoad(right.irValue); + builder->emitStore(left.irValue, val); + } + break; + + case ScalarizedVal::Flavor::tuple: + { + // We are assigning from a tuple to a destination + // that is not a tuple. We will perform assignment + // element-by-element. + auto rightTupleVal = right.impl.As<ScalarizedTupleValImpl>(); + UInt elementCount = rightTupleVal->elements.Count(); + + for( UInt ee = 0; ee < elementCount; ++ee ) + { + auto rightElement = rightTupleVal->elements[ee]; + auto leftElementVal = extractField( + builder, + left, + ee, + rightElement.key); + assign(builder, leftElementVal, rightElement.val); + } + } + break; + + default: + SLANG_UNEXPECTED("unimplemented"); + break; + } + break; + + case ScalarizedVal::Flavor::tuple: + { + // We have a tuple, so we are going to need to try and assign + // to each of its constituent fields. + auto leftTupleVal = left.impl.As<ScalarizedTupleValImpl>(); + UInt elementCount = leftTupleVal->elements.Count(); + + for( UInt ee = 0; ee < elementCount; ++ee ) + { + auto rightElementVal = extractField( + builder, + right, + ee, + leftTupleVal->elements[ee].key); + assign(builder, leftTupleVal->elements[ee].val, rightElementVal); + } + } + break; + + case ScalarizedVal::Flavor::typeAdapter: + { + // We are trying to assign to something that had its type adjusted, + // so we will need to adjust the type of the right-hand side first. + // + // In this case we are converting to the actual type of the GLSL variable, + // from the "pretend" type that it had in the IR before. + auto typeAdapter = left.impl.As<ScalarizedTypeAdapterValImpl>(); + auto adaptedRight = adaptType(builder, right, typeAdapter->actualType, typeAdapter->pretendType); + assign(builder, typeAdapter->val, adaptedRight); + } + break; + + default: + SLANG_UNEXPECTED("unimplemented"); + break; + } +} + +ScalarizedVal getSubscriptVal( + IRBuilder* builder, + IRType* elementType, + ScalarizedVal val, + IRInst* indexVal) +{ + switch( val.flavor ) + { + case ScalarizedVal::Flavor::value: + return ScalarizedVal::value( + builder->emitElementExtract( + elementType, + val.irValue, + indexVal)); + + case ScalarizedVal::Flavor::address: + return ScalarizedVal::address( + builder->emitElementAddress( + builder->getPtrType(elementType), + val.irValue, + indexVal)); + + case ScalarizedVal::Flavor::tuple: + { + auto inputTuple = val.impl.As<ScalarizedTupleValImpl>(); + + RefPtr<ScalarizedTupleValImpl> resultTuple = new ScalarizedTupleValImpl(); + resultTuple->type = elementType; + + UInt elementCount = inputTuple->elements.Count(); + UInt elementCounter = 0; + + auto structType = as<IRStructType>(elementType); + for(auto field : structType->getFields()) + { + auto tupleElementType = field->getFieldType(); + + UInt elementIndex = elementCounter++; + + SLANG_RELEASE_ASSERT(elementIndex < elementCount); + auto inputElement = inputTuple->elements[elementIndex]; + + ScalarizedTupleValImpl::Element resultElement; + resultElement.key = inputElement.key; + resultElement.val = getSubscriptVal( + builder, + tupleElementType, + inputElement.val, + indexVal); + + resultTuple->elements.Add(resultElement); + } + SLANG_RELEASE_ASSERT(elementCounter == elementCount); + + return ScalarizedVal::tuple(resultTuple); + } + + default: + SLANG_UNEXPECTED("unimplemented"); + UNREACHABLE_RETURN(ScalarizedVal()); + } +} + +ScalarizedVal getSubscriptVal( + IRBuilder* builder, + IRType* elementType, + ScalarizedVal val, + UInt index) +{ + return getSubscriptVal( + builder, + elementType, + val, + builder->getIntValue( + builder->getIntType(), + index)); +} + +IRInst* materializeValue( + IRBuilder* builder, + ScalarizedVal const& val); + +IRInst* materializeTupleValue( + IRBuilder* builder, + ScalarizedVal val) +{ + auto tupleVal = val.impl.As<ScalarizedTupleValImpl>(); + SLANG_ASSERT(tupleVal); + + UInt elementCount = tupleVal->elements.Count(); + auto type = tupleVal->type; + + if( auto arrayType = as<IRArrayType>(type)) + { + // The tuple represent an array, which means that the + // individual elements are expected to yield arrays as well. + // + // We will extract a value for each array element, and + // then use these to construct our result. + + List<IRInst*> arrayElementVals; + UInt arrayElementCount = (UInt) GetIntVal(arrayType->getElementCount()); + + for( UInt ii = 0; ii < arrayElementCount; ++ii ) + { + auto arrayElementPseudoVal = getSubscriptVal( + builder, + arrayType->getElementType(), + val, + ii); + + auto arrayElementVal = materializeValue( + builder, + arrayElementPseudoVal); + + arrayElementVals.Add(arrayElementVal); + } + + return builder->emitMakeArray( + arrayType, + arrayElementVals.Count(), + arrayElementVals.Buffer()); + } + else + { + // The tuple represents a value of some aggregate type, + // so we can simply materialize the elements and then + // construct a value of that type. + // + // TODO: this should be using a `makeStruct` instruction. + + List<IRInst*> elementVals; + for( UInt ee = 0; ee < elementCount; ++ee ) + { + auto elementVal = materializeValue(builder, tupleVal->elements[ee].val); + elementVals.Add(elementVal); + } + + return builder->emitConstructorInst( + tupleVal->type, + elementVals.Count(), + elementVals.Buffer()); + } +} + +IRInst* materializeValue( + IRBuilder* builder, + ScalarizedVal const& val) +{ + switch( val.flavor ) + { + case ScalarizedVal::Flavor::value: + return val.irValue; + + case ScalarizedVal::Flavor::address: + { + auto loadInst = builder->emitLoad(val.irValue); + return loadInst; + } + break; + + case ScalarizedVal::Flavor::tuple: + { + auto tupleVal = val.impl.As<ScalarizedTupleValImpl>(); + return materializeTupleValue(builder, val); + } + break; + + case ScalarizedVal::Flavor::typeAdapter: + { + // Somebody is trying to use a value where its actual type + // doesn't match the type it pretends to have. To make this + // work we need to adapt the type from its actual type over + // to its pretend type. + auto typeAdapter = val.impl.As<ScalarizedTypeAdapterValImpl>(); + auto adapted = adaptType(builder, typeAdapter->val, typeAdapter->pretendType, typeAdapter->actualType); + return materializeValue(builder, adapted); + } + break; + + default: + SLANG_UNEXPECTED("unimplemented"); + break; + } +} + +void legalizeRayTracingEntryPointParameterForGLSL( + GLSLLegalizationContext* context, + IRFunc* func, + IRParam* pp, + VarLayout* paramLayout) +{ + auto builder = context->getBuilder(); + auto paramType = pp->getDataType(); + + // The parameter might be either an `in` parameter, + // or an `out` or `in out` parameter, and in those + // latter cases its IR-level type will include a + // wrapping "pointer-like" type (e.g., `Out<Float>` + // instead of just `Float`). + // + // Because global shader parameters are read-only + // in the same way function types are, we can take + // care of that detail here just by allocating a + // global shader parameter with exactly the type + // of the original function parameter. + // + auto globalParam = addGlobalParam(builder->getModule(), paramType); + builder->addLayoutDecoration(globalParam, paramLayout); + moveValueBefore(globalParam, builder->getFunc()); + pp->replaceUsesWith(globalParam); + + // Because linkage between ray-tracing shaders is + // based on the type of incoming/outgoing payload + // and attribute parameters, it would be an error to + // eliminate the global parameter *even if* it is + // not actually used inside the entry point. + // + // We attach a decoration to the entry point that + // makes note of the dependency, so that steps + // like dead code elimination cannot get rid of + // the parameter. + // + // TODO: We could consider using a structure like + // this for *all* of the entry point parameters + // that get moved to the global scope, since SPIR-V + // ends up requiring such information on an `OpEntryPoint`. + // + // As a further alternative, we could decide to + // keep entry point varying input/outtput attached + // to the parameter list through all of the Slang IR + // steps, and only declare it as global variables at + // the last minute when emitting a GLSL `main` or + // SPIR-V for an entry point. + // + builder->addDependsOnDecoration(func, globalParam); +} + +void legalizeEntryPointParameterForGLSL( + GLSLLegalizationContext* context, + IRFunc* func, + IRParam* pp, + VarLayout* paramLayout) +{ + auto builder = context->getBuilder(); + auto stage = context->getStage(); + + // We need to create a global variable that will replace the parameter. + // It seems superficially obvious that the variable should have + // the same type as the parameter. + // However, if the parameter was a pointer, in order to + // support `out` or `in out` parameter passing, we need + // to be sure to allocate a variable of the pointed-to + // type instead. + // + // We also need to replace uses of the parameter with + // uses of the variable, and the exact logic there + // will differ a bit between the pointer and non-pointer + // cases. + auto paramType = pp->getDataType(); + + // First we will special-case stage input/outputs that + // don't fit into the standard varying model. + // For right now we are only doing special-case handling + // of geometry shader output streams. + if( auto paramPtrType = as<IROutTypeBase>(paramType) ) + { + auto valueType = paramPtrType->getValueType(); + if( auto gsStreamType = as<IRHLSLStreamOutputType>(valueType) ) + { + // An output stream type like `TriangleStream<Foo>` should + // more or less translate into `out Foo` (plus scalarization). + + auto globalOutputVal = createGLSLGlobalVaryings( + context, + builder, + valueType, + paramLayout, + LayoutResourceKind::VaryingOutput, + stage); + + // TODO: a GS output stream might be passed into other + // functions, so that we should really be modifying + // any function that has one of these in its parameter + // list (and in the limit we should be leagalizing any + // type that nests these...). + // + // For now we will just try to deal with `Append` calls + // directly in this function. + + + + for( auto bb = func->getFirstBlock(); bb; bb = bb->getNextBlock() ) + { + for( auto ii = bb->getFirstInst(); ii; ii = ii->getNextInst() ) + { + // Is it a call? + if(ii->op != kIROp_Call) + continue; + + // Is it calling the append operation? + auto callee = ii->getOperand(0); + for(;;) + { + // If the instruction is `specialize(X,...)` then + // we want to look at `X`, and if it is `generic { ... return R; }` + // then we want to look at `R`. We handle this + // iteratively here. + // + // TODO: This idiom seems to come up enough that we + // should probably have a dedicated convenience routine + // for this. + // + // Alternatively, we could switch the IR encoding so + // that decorations are added to the generic instead of the + // value it returns. + // + switch(callee->op) + { + case kIROp_Specialize: + { + callee = cast<IRSpecialize>(callee)->getOperand(0); + continue; + } + + case kIROp_Generic: + { + auto genericResult = findGenericReturnVal(cast<IRGeneric>(callee)); + if(genericResult) + { + callee = genericResult; + continue; + } + } + + default: + break; + } + break; + } + if(callee->op != kIROp_Func) + continue; + + // HACK: we will identify the operation based + // on the target-intrinsic definition that was + // given to it. + auto decoration = findTargetIntrinsicDecoration(callee, "glsl"); + if(!decoration) + continue; + + if(decoration->getDefinition() != UnownedStringSlice::fromLiteral("EmitVertex()")) + { + continue; + } + + // Okay, we have a declaration, and we want to modify it! + + builder->setInsertBefore(ii); + + assign(builder, globalOutputVal, ScalarizedVal::value(ii->getOperand(2))); + } + } + + return; + } + } + + // When we have an HLSL ray tracing shader entry point, + // we don't want to translate the inputs/outputs for GLSL/SPIR-V + // according to our default rules, for two reasons: + // + // 1. The input and output for these stages are expected to + // be packaged into `struct` types rather than be scalarized, + // so the usual scalarization approach we take here should + // not be applied. + // + // 2. An `in out` parameter isn't just sugar for a combination + // of an `in` and an `out` parameter, and instead represents the + // read/write "payload" that was passed in. It should legalize + // to a single variable, and we can lower reads/writes of it + // directly, rather than introduce an intermediate temporary. + // + switch( stage ) + { + default: + break; + + case Stage::AnyHit: + case Stage::Callable: + case Stage::ClosestHit: + case Stage::Intersection: + case Stage::Miss: + case Stage::RayGeneration: + legalizeRayTracingEntryPointParameterForGLSL(context, func, pp, paramLayout); + return; + } + + // Is the parameter type a special pointer type + // that indicates the parameter is used for `out` + // or `inout` access? + if(auto paramPtrType = as<IROutTypeBase>(paramType) ) + { + // Okay, we have the more interesting case here, + // where the parameter was being passed by reference. + // We are going to create a local variable of the appropriate + // type, which will replace the parameter, along with + // one or more global variables for the actual input/output. + + auto valueType = paramPtrType->getValueType(); + + auto localVariable = builder->emitVar(valueType); + auto localVal = ScalarizedVal::address(localVariable); + + if( auto inOutType = as<IRInOutType>(paramPtrType) ) + { + // In the `in out` case we need to declare two + // sets of global variables: one for the `in` + // side and one for the `out` side. + auto globalInputVal = createGLSLGlobalVaryings( + context, + builder, valueType, paramLayout, LayoutResourceKind::VaryingInput, stage); + + assign(builder, localVal, globalInputVal); + } + + // Any places where the original parameter was used inside + // the function body should instead use the new local variable. + // Since the parameter was a pointer, we use the variable instruction + // itself (which is an `alloca`d pointer) directly: + pp->replaceUsesWith(localVariable); + + // We also need one or more global variables to write the output to + // when the function is done. We create them here. + auto globalOutputVal = createGLSLGlobalVaryings( + context, + builder, valueType, paramLayout, LayoutResourceKind::VaryingOutput, stage); + + // Now we need to iterate over all the blocks in the function looking + // for any `return*` instructions, so that we can write to the output variable + for( auto bb = func->getFirstBlock(); bb; bb = bb->getNextBlock() ) + { + auto terminatorInst = bb->getLastInst(); + if(!terminatorInst) + continue; + + switch( terminatorInst->op ) + { + default: + continue; + + case kIROp_ReturnVal: + case kIROp_ReturnVoid: + break; + } + + // We dont' re-use `builder` here because we don't want to + // disrupt the source location it is using for inserting + // temporary variables at the top of the function. + // + IRBuilder terminatorBuilder; + terminatorBuilder.sharedBuilder = builder->sharedBuilder; + terminatorBuilder.setInsertBefore(terminatorInst); + + // Assign from the local variabel to the global output + // variable before the actual `return` takes place. + assign(&terminatorBuilder, globalOutputVal, localVal); + } + } + else + { + // This is the "easy" case where the parameter wasn't + // being passed by reference. We start by just creating + // one or more global variables to represent the parameter, + // and attach the required layout information to it along + // the way. + + auto globalValue = createGLSLGlobalVaryings( + context, + builder, paramType, paramLayout, LayoutResourceKind::VaryingInput, stage); + + // Next we need to replace uses of the parameter with + // references to the variable(s). We are going to do that + // somewhat naively, by simply materializing the + // variables at the start. + IRInst* materialized = materializeValue(builder, globalValue); + + pp->replaceUsesWith(materialized); + } +} + +void legalizeEntryPointForGLSL( + Session* session, + IRModule* module, + IRFunc* func, + DiagnosticSink* sink, + ExtensionUsageTracker* extensionUsageTracker) +{ + auto layoutDecoration = func->findDecoration<IRLayoutDecoration>(); + SLANG_ASSERT(layoutDecoration); + + auto entryPointLayout = dynamic_cast<EntryPointLayout*>(layoutDecoration->getLayout()); + SLANG_ASSERT(entryPointLayout); + + GLSLLegalizationContext context; + context.session = session; + context.stage = entryPointLayout->profile.GetStage(); + context.sink = sink; + context.extensionUsageTracker = extensionUsageTracker; + + Stage stage = entryPointLayout->profile.GetStage(); + + // We require that the entry-point function has no uses, + // because otherwise we'd invalidate the signature + // at all existing call sites. + // + // TODO: the right thing to do here is to split any + // function that both gets called as an entry point + // and as an ordinary function. + SLANG_ASSERT(!func->firstUse); + + // We create a dummy IR builder, since some of + // the functions require it. + // + // TODO: make some of these free functions... + // + SharedIRBuilder shared; + shared.module = module; + shared.session = session; + IRBuilder builder; + builder.sharedBuilder = &shared; + builder.setInsertInto(func); + + context.builder = &builder; + + // We will start by looking at the return type of the + // function, because that will enable us to do an + // early-out check to avoid more work. + // + // Specifically, we need to check if the function has + // a `void` return type, because there is no work + // to be done on its return value in that case. + auto resultType = func->getResultType(); + if(as<IRVoidType>(resultType)) + { + // In this case, the function doesn't return a value + // so we don't need to transform its `return` sites. + // + // We can also use this opportunity to quickly + // check if the function has any parameters, and if + // it doesn't use the chance to bail out immediately. + if( func->getParamCount() == 0 ) + { + // This function is already legal for GLSL + // (at least in terms of parameter/result signature), + // so we won't bother doing anything at all. + return; + } + + // If the function does have parameters, then we need + // to let the logic later in this function handle them. + } + else + { + // Function returns a value, so we need + // to introduce a new global variable + // to hold that value, and then replace + // any `returnVal` instructions with + // code to write to that variable. + + auto resultGlobal = createGLSLGlobalVaryings( + &context, + &builder, + resultType, + entryPointLayout->resultLayout, + LayoutResourceKind::VaryingOutput, + stage); + + for( auto bb = func->getFirstBlock(); bb; bb = bb->getNextBlock() ) + { + // TODO: This is silly, because we are looking at every instruction, + // when we know that a `returnVal` should only ever appear as a + // terminator... + for( auto ii = bb->getFirstInst(); ii; ii = ii->getNextInst() ) + { + if(ii->op != kIROp_ReturnVal) + continue; + + IRReturnVal* returnInst = (IRReturnVal*) ii; + IRInst* returnValue = returnInst->getVal(); + + // Make sure we add these instructions to the right block + builder.setInsertInto(bb); + + // Write to our global variable(s) from the value being returned. + assign(&builder, resultGlobal, ScalarizedVal::value(returnValue)); + + // Emit a `returnVoid` to end the block + auto returnVoid = builder.emitReturn(); + + // Remove the old `returnVal` instruction. + returnInst->removeAndDeallocate(); + + // Make sure to resume our iteration at an + // appropriate instruciton, since we deleted + // the one we had been using. + ii = returnVoid; + } + } + } + + // Next we will walk through any parameters of the entry-point function, + // and turn them into global variables. + if( auto firstBlock = func->getFirstBlock() ) + { + // Any initialization code we insert for parameters needs + // to be at the start of the "ordinary" instructions in the block: + builder.setInsertBefore(firstBlock->getFirstOrdinaryInst()); + + UInt paramCounter = 0; + for( auto pp = firstBlock->getFirstParam(); pp; pp = pp->getNextParam() ) + { + UInt paramIndex = paramCounter++; + + // We assume that the entry-point layout includes information + // on each parameter, and that these arrays are kept aligned. + // Note that this means that any transformations that mess + // with function signatures will need to also update layout info... + // + SLANG_ASSERT(entryPointLayout->fields.Count() > paramIndex); + auto paramLayout = entryPointLayout->fields[paramIndex]; + + legalizeEntryPointParameterForGLSL( + &context, + func, + pp, + paramLayout); + } + + // At this point we should have eliminated all uses of the + // parameters of the entry block. Also, our control-flow + // rules mean that the entry block cannot be the target + // of any branches in the code, so there can't be + // any control-flow ops that try to match the parameter + // list. + // + // We can safely go through and destroy the parameters + // themselves, and then clear out the parameter list. + + for( auto pp = firstBlock->getFirstParam(); pp; ) + { + auto next = pp->getNextParam(); + pp->removeAndDeallocate(); + pp = next; + } + } + + // Finally, we need to patch up the type of the entry point, + // because it is no longer accurate. + + IRFuncType* voidFuncType = builder.getFuncType( + 0, + nullptr, + builder.getVoidType()); + func->setFullType(voidFuncType); + + // TODO: we should technically be constructing + // a new `EntryPointLayout` here to reflect + // the way that things have been moved around. +} + +} // namespace Slang diff --git a/source/slang/ir-glsl-legalize.h b/source/slang/ir-glsl-legalize.h new file mode 100644 index 000000000..7fabac869 --- /dev/null +++ b/source/slang/ir-glsl-legalize.h @@ -0,0 +1,21 @@ +// ir-glsl-legalize.h +#pragma once + +namespace Slang +{ + +class DiagnosticSink; +class Session; + +struct ExtensionUsageTracker; +struct IRFunc; +struct IRModule; + +void legalizeEntryPointForGLSL( + Session* session, + IRModule* module, + IRFunc* func, + DiagnosticSink* sink, + ExtensionUsageTracker* extensionUsageTracker); + +} diff --git a/source/slang/ir-insts.h b/source/slang/ir-insts.h index 29743eebc..cd5407acd 100644 --- a/source/slang/ir-insts.h +++ b/source/slang/ir-insts.h @@ -573,6 +573,9 @@ struct IRWitnessTableEntry : IRInst // The IR-level value that satisfies the requirement IRUse satisfyingVal; + IRInst* getRequirementKey() { return getOperand(0); } + IRInst* getSatisfyingVal() { return getOperand(1); } + IR_LEAF_ISA(WitnessTableEntry) }; @@ -791,6 +794,12 @@ struct IRBuilder UInt argCount, IRInst* const* args); + IRInst* createIntrinsicInst( + IRType* type, + IROp op, + UInt argCount, + IRInst* const* args); + IRInst* emitIntrinsicInst( IRType* type, IROp op, @@ -1142,6 +1151,10 @@ struct IRBuilder } }; +void addHoistableInst( + IRBuilder* builder, + IRInst* inst); + // Helper to establish the source location that will be used // by an IRBuilder. struct IRBuilderSourceLocRAII @@ -1168,44 +1181,6 @@ struct IRBuilderSourceLocRAII } }; - -// - -// Interface to IR specialization for use when cloning target-specific -// IR as part of compiling an entry point. -// -// TODO: we really need to move all of this logic to its own files. - -// `IRSpecializationState` is used as an opaque type to wrap up all -// the data needed to perform IR specialization, without exposing -// implementation details. -struct IRSpecializationState; -IRSpecializationState* createIRSpecializationState( - EntryPointRequest* entryPointRequest, - ProgramLayout* programLayout, - CodeGenTarget target, - TargetRequest* targetReq); -void destroyIRSpecializationState(IRSpecializationState* state); -IRModule* getIRModule(IRSpecializationState* state); - -struct ExtensionUsageTracker; - -// Clone the IR values reachable from the given entry point -// into the IR module associated with the specialization state. -// When multiple definitions of a symbol are found, the one -// that is best specialized for the given `targetReq` will be -// used. -void specializeIRForEntryPoint( - IRSpecializationState* state, - EntryPointRequest* entryPointRequest, - ExtensionUsageTracker* extensionUsageTracker); - -// Find suitable uses of the `specialize` instruction that -// can be replaced with references to specialized functions. -void specializeGenerics( - IRModule* module, - CodeGenTarget target); - // void markConstExpr( @@ -1214,6 +1189,10 @@ void markConstExpr( // +IRTargetIntrinsicDecoration* findTargetIntrinsicDecoration( + IRInst* val, + String const& targetName); + } #endif diff --git a/source/slang/ir-link.cpp b/source/slang/ir-link.cpp new file mode 100644 index 000000000..586172cb2 --- /dev/null +++ b/source/slang/ir-link.cpp @@ -0,0 +1,1297 @@ +// ir-link.cpp +#include "ir-link.h" + +#include "ir.h" +#include "ir-insts.h" +#include "mangle.h" + +namespace Slang +{ + +StructTypeLayout* getGlobalStructLayout( + ProgramLayout* programLayout); + +// Needed for lookup up entry-point layouts. +// +// TODO: maybe arrange so that codegen is driven from the layout layer +// instead of the input/request layer. +EntryPointLayout* findEntryPointLayout( + ProgramLayout* programLayout, + EntryPointRequest* entryPointRequest); + +struct IRSpecSymbol : RefObject +{ + IRInst* irGlobalValue; + RefPtr<IRSpecSymbol> nextWithSameName; +}; + +struct IRSpecEnv +{ + IRSpecEnv* parent = nullptr; + + // A map from original values to their cloned equivalents. + typedef Dictionary<IRInst*, IRInst*> ClonedValueDictionary; + ClonedValueDictionary clonedValues; +}; + +struct IRSharedSpecContext +{ + // The code-generation target in use + CodeGenTarget target; + + // The specialized module we are building + RefPtr<IRModule> module; + + // The original, unspecialized module we are copying + IRModule* originalModule; + + // A map from mangled symbol names to zero or + // more global IR values that have that name, + // in the *original* module. + typedef Dictionary<String, RefPtr<IRSpecSymbol>> SymbolDictionary; + SymbolDictionary symbols; + + SharedIRBuilder sharedBuilderStorage; + IRBuilder builderStorage; + + // The "global" specialization environment. + IRSpecEnv globalEnv; +}; + +struct IRSpecContextBase +{ + // A map from the mangled name of a global variable + // to the layout to use for it. + Dictionary<String, VarLayout*> globalVarLayouts; + + IRSharedSpecContext* shared; + + IRSharedSpecContext* getShared() { return shared; } + + IRModule* getModule() { return getShared()->module; } + + IRModule* getOriginalModule() { return getShared()->originalModule; } + + IRSharedSpecContext::SymbolDictionary& getSymbols() { return getShared()->symbols; } + + // The current specialization environment to use. + IRSpecEnv* env = nullptr; + IRSpecEnv* getEnv() + { + // TODO: need to actually establish environments on contexts we create. + // + // Or more realistically we need to change the whole approach + // to specialization and cloning so that we don't try to share + // logic between two very different cases. + + + return env; + } + + // The IR builder to use for creating nodes + IRBuilder* builder; + + // A callback to be used when a value that is not registerd in `clonedValues` + // is needed during cloning. This gives the subtype a chance to intercept + // the operation and clone (or not) as needed. + virtual IRInst* maybeCloneValue(IRInst* originalVal) + { + return originalVal; + } +}; + +void registerClonedValue( + IRSpecContextBase* context, + IRInst* clonedValue, + IRInst* originalValue) +{ + if(!originalValue) + return; + + // TODO: now that things are scoped using environments, we + // shouldn't be running into the cases where a value with + // the same key already exists. This should be changed to + // an `Add()` call. + // + context->getEnv()->clonedValues[originalValue] = clonedValue; +} + +// Information on values to use when registering a cloned value +struct IROriginalValuesForClone +{ + IRInst* originalVal = nullptr; + IRSpecSymbol* sym = nullptr; + + IROriginalValuesForClone() {} + + IROriginalValuesForClone(IRInst* originalValue) + : originalVal(originalValue) + {} + + IROriginalValuesForClone(IRSpecSymbol* symbol) + : sym(symbol) + {} +}; + +void registerClonedValue( + IRSpecContextBase* context, + IRInst* clonedValue, + IROriginalValuesForClone const& originalValues) +{ + registerClonedValue(context, clonedValue, originalValues.originalVal); + for( auto s = originalValues.sym; s; s = s->nextWithSameName ) + { + registerClonedValue(context, clonedValue, s->irGlobalValue); + } +} + +IRInst* cloneInst( + IRSpecContextBase* context, + IRBuilder* builder, + IRInst* originalInst, + IROriginalValuesForClone const& originalValues); + +IRInst* cloneInst( + IRSpecContextBase* context, + IRBuilder* builder, + IRInst* originalInst) +{ + return cloneInst(context, builder, originalInst, originalInst); +} + + /// Clone any decorations from `originalValue` onto `clonedValue` +void cloneDecorations( + IRSpecContextBase* context, + IRInst* clonedValue, + IRInst* originalValue) +{ + // TODO: In many cases we might be able to use this as a general-purpose + // place to do cloning of *all* the children of an instruction, and + // not just its decorations. We should look to refactor this code + // later. + + IRBuilder builderStorage = *context->builder; + IRBuilder* builder = &builderStorage; + builder->setInsertInto(clonedValue); + + + SLANG_UNUSED(context); + for(auto originalDecoration : originalValue->getDecorations()) + { + cloneInst(context, builder, originalDecoration); + } + + // We will also clone the location here, just because this is a convenient bottleneck + clonedValue->sourceLoc = originalValue->sourceLoc; +} + + /// Clone any decorations and children from `originalValue` onto `clonedValue` +void cloneDecorationsAndChildren( + IRSpecContextBase* context, + IRInst* clonedValue, + IRInst* originalValue) +{ + IRBuilder builderStorage = *context->builder; + IRBuilder* builder = &builderStorage; + builder->setInsertInto(clonedValue); + + SLANG_UNUSED(context); + for(auto originalItem : originalValue->getDecorationsAndChildren()) + { + cloneInst(context, builder, originalItem); + } + + // We will also clone the location here, just because this is a convenient bottleneck + clonedValue->sourceLoc = originalValue->sourceLoc; +} + +// We use an `IRSpecContext` for the case where we are cloning +// code from one or more input modules to create a "linked" output +// module. Along the way, we will resolve profile-specific functions +// to the best definition for a given target. +// +struct IRSpecContext : IRSpecContextBase +{ + // Override the "maybe clone" logic so that we always clone + virtual IRInst* maybeCloneValue(IRInst* originalVal) override; +}; + + +IRInst* cloneGlobalValue(IRSpecContext* context, IRInst* originalVal); + +IRInst* cloneValue( + IRSpecContextBase* context, + IRInst* originalValue); + +IRType* cloneType( + IRSpecContextBase* context, + IRType* originalType); + +IRInst* IRSpecContext::maybeCloneValue(IRInst* originalValue) +{ + switch (originalValue->op) + { + case kIROp_StructType: + case kIROp_Func: + case kIROp_Generic: + case kIROp_GlobalVar: + case kIROp_GlobalConstant: + case kIROp_GlobalParam: + case kIROp_StructKey: + case kIROp_GlobalGenericParam: + case kIROp_WitnessTable: + return cloneGlobalValue(this, originalValue); + + case kIROp_BoolLit: + { + IRConstant* c = (IRConstant*)originalValue; + return builder->getBoolValue(c->value.intVal != 0); + } + break; + + + case kIROp_IntLit: + { + IRConstant* c = (IRConstant*)originalValue; + return builder->getIntValue(cloneType(this, c->getDataType()), c->value.intVal); + } + break; + + case kIROp_FloatLit: + { + IRConstant* c = (IRConstant*)originalValue; + return builder->getFloatValue(cloneType(this, c->getDataType()), c->value.floatVal); + } + break; + + case kIROp_StringLit: + { + IRConstant* c = (IRConstant*)originalValue; + return builder->getStringValue(c->getStringSlice()); + } + break; + + case kIROp_PtrLit: + { + IRConstant* c = (IRConstant*)originalValue; + return builder->getPtrValue(c->value.ptrVal); + } + break; + + default: + { + // In the deafult case, assume that we have some sort of "hoistable" + // instruction that requires us to create a clone of it. + + UInt argCount = originalValue->getOperandCount(); + IRInst* clonedValue = builder->createIntrinsicInst( + cloneType(this, originalValue->getFullType()), + originalValue->op, + argCount, nullptr); + registerClonedValue(this, clonedValue, originalValue); + for (UInt aa = 0; aa < argCount; ++aa) + { + IRInst* originalArg = originalValue->getOperand(aa); + IRInst* clonedArg = cloneValue(this, originalArg); + clonedValue->getOperands()[aa].init(clonedValue, clonedArg); + } + cloneDecorationsAndChildren(this, clonedValue, originalValue); + + addHoistableInst(builder, clonedValue); + + return clonedValue; + } + break; + } +} + +IRInst* cloneValue( + IRSpecContextBase* context, + IRInst* originalValue); + +// Find a pre-existing cloned value, or return null if none is available. +IRInst* findClonedValue( + IRSpecContextBase* context, + IRInst* originalValue) +{ + IRInst* clonedValue = nullptr; + for (auto env = context->getEnv(); env; env = env->parent) + { + if (env->clonedValues.TryGetValue(originalValue, clonedValue)) + { + return clonedValue; + } + } + + return nullptr; +} + +IRInst* cloneValue( + IRSpecContextBase* context, + IRInst* originalValue) +{ + if (!originalValue) + return nullptr; + + if (IRInst* clonedValue = findClonedValue(context, originalValue)) + return clonedValue; + + return context->maybeCloneValue(originalValue); +} + +IRType* cloneType( + IRSpecContextBase* context, + IRType* originalType) +{ + return (IRType*)cloneValue(context, originalType); +} + +void cloneGlobalValueWithCodeCommon( + IRSpecContextBase* context, + IRGlobalValueWithCode* clonedValue, + IRGlobalValueWithCode* originalValue); + +IRRate* cloneRate( + IRSpecContextBase* context, + IRRate* rate) +{ + return (IRRate*) cloneType(context, rate); +} + +void maybeSetClonedRate( + IRSpecContextBase* context, + IRBuilder* builder, + IRInst* clonedValue, + IRInst* originalValue) +{ + if(auto rate = originalValue->getRate() ) + { + clonedValue->setFullType(builder->getRateQualifiedType( + cloneRate(context, rate), + clonedValue->getFullType())); + } +} + +IRGlobalVar* cloneGlobalVarImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRGlobalVar* originalVar, + IROriginalValuesForClone const& originalValues) +{ + auto clonedVar = builder->createGlobalVar( + cloneType(context, originalVar->getDataType()->getValueType())); + + maybeSetClonedRate(context, builder, clonedVar, originalVar); + + registerClonedValue(context, clonedVar, originalValues); + + // Clone any code in the body of the variable, since this + // represents the initializer. + cloneGlobalValueWithCodeCommon( + context, + clonedVar, + originalVar); + + return clonedVar; +} + +IRGlobalConstant* cloneGlobalConstantImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRGlobalConstant* originalVal, + IROriginalValuesForClone const& originalValues) +{ + auto clonedVal = builder->createGlobalConstant( + cloneType(context, originalVal->getFullType())); + registerClonedValue(context, clonedVal, originalValues); + + // Clone any code in the body of the constant, since this + // represents the initializer. + cloneGlobalValueWithCodeCommon( + context, + clonedVal, + originalVal); + + return clonedVal; +} + +void cloneSimpleGlobalValueImpl( + IRSpecContextBase* context, + IRInst* originalInst, + IROriginalValuesForClone const& originalValues, + IRInst* clonedInst, + bool registerValue = true) +{ + if (registerValue) + registerClonedValue(context, clonedInst, originalValues); + + // Set up an IR builder for inserting into the inst + IRBuilder builderStorage = *context->builder; + IRBuilder* builder = &builderStorage; + builder->setInsertInto(clonedInst); + + // Clone any children of the instruction + for (auto child : originalInst->getDecorationsAndChildren()) + { + cloneInst(context, builder, child); + } +} + +IRGlobalParam* cloneGlobalParamImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRGlobalParam* originalVal, + IROriginalValuesForClone const& originalValues) +{ + auto clonedVal = builder->createGlobalParam( + cloneType(context, originalVal->getFullType())); + cloneSimpleGlobalValueImpl(context, originalVal, originalValues, clonedVal); + + if(auto linkage = originalVal->findDecoration<IRLinkageDecoration>()) + { + auto mangledName = String(linkage->getMangledName()); + VarLayout* layout = nullptr; + if (context->globalVarLayouts.TryGetValue(mangledName, layout)) + { + builder->addLayoutDecoration(clonedVal, layout); + } + } + + return clonedVal; +} + +IRGeneric* cloneGenericImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRGeneric* originalVal, + IROriginalValuesForClone const& originalValues) +{ + auto clonedVal = builder->emitGeneric(); + registerClonedValue(context, clonedVal, originalValues); + + // Clone any code in the body of the generic, since this + // computes its result value. + cloneGlobalValueWithCodeCommon( + context, + clonedVal, + originalVal); + + return clonedVal; +} + +IRStructKey* cloneStructKeyImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRStructKey* originalVal, + IROriginalValuesForClone const& originalValues) +{ + auto clonedVal = builder->createStructKey(); + cloneSimpleGlobalValueImpl(context, originalVal, originalValues, clonedVal); + return clonedVal; +} + +IRGlobalGenericParam* cloneGlobalGenericParamImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRGlobalGenericParam* originalVal, + IROriginalValuesForClone const& originalValues) +{ + auto clonedVal = builder->emitGlobalGenericParam(); + cloneSimpleGlobalValueImpl(context, originalVal, originalValues, clonedVal); + return clonedVal; +} + + +IRWitnessTable* cloneWitnessTableImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRWitnessTable* originalTable, + IROriginalValuesForClone const& originalValues, + IRWitnessTable* dstTable = nullptr, + bool registerValue = true) +{ + auto clonedTable = dstTable ? dstTable : builder->createWitnessTable(); + cloneSimpleGlobalValueImpl(context, originalTable, originalValues, clonedTable, registerValue); + return clonedTable; +} + +IRWitnessTable* cloneWitnessTableWithoutRegistering( + IRSpecContextBase* context, + IRBuilder* builder, + IRWitnessTable* originalTable, + IRWitnessTable* dstTable = nullptr) +{ + return cloneWitnessTableImpl(context, builder, originalTable, IROriginalValuesForClone(), dstTable, false); +} + +IRStructType* cloneStructTypeImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRStructType* originalStruct, + IROriginalValuesForClone const& originalValues) +{ + auto clonedStruct = builder->createStructType(); + cloneSimpleGlobalValueImpl(context, originalStruct, originalValues, clonedStruct); + return clonedStruct; +} + + +IRInterfaceType* cloneInterfaceTypeImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRInterfaceType* originalInterface, + IROriginalValuesForClone const& originalValues) +{ + auto clonedInterface = builder->createInterfaceType(); + cloneSimpleGlobalValueImpl(context, originalInterface, originalValues, clonedInterface); + return clonedInterface; +} + +void cloneGlobalValueWithCodeCommon( + IRSpecContextBase* context, + IRGlobalValueWithCode* clonedValue, + IRGlobalValueWithCode* originalValue) +{ + // Next we are going to clone the actual code. + IRBuilder builderStorage = *context->builder; + IRBuilder* builder = &builderStorage; + builder->setInsertInto(clonedValue); + + cloneDecorations(context, clonedValue, originalValue); + + // We will walk through the blocks of the function, and clone each of them. + // + // We need to create the cloned blocks first, and then walk through them, + // because blocks might be forward referenced (this is not possible + // for other cases of instructions). + for (auto originalBlock = originalValue->getFirstBlock(); + originalBlock; + originalBlock = originalBlock->getNextBlock()) + { + IRBlock* clonedBlock = builder->createBlock(); + clonedValue->addBlock(clonedBlock); + registerClonedValue(context, clonedBlock, originalBlock); + +#if 0 + // We can go ahead and clone parameters here, while we are at it. + builder->curBlock = clonedBlock; + for (auto originalParam = originalBlock->getFirstParam(); + originalParam; + originalParam = originalParam->getNextParam()) + { + IRParam* clonedParam = builder->emitParam( + context->maybeCloneType( + originalParam->getFullType())); + cloneDecorations(context, clonedParam, originalParam); + registerClonedValue(context, clonedParam, originalParam); + } +#endif + } + + // Okay, now we are in a good position to start cloning + // the instructions inside the blocks. + { + IRBlock* ob = originalValue->getFirstBlock(); + IRBlock* cb = clonedValue->getFirstBlock(); + while (ob) + { + SLANG_ASSERT(cb); + + builder->setInsertInto(cb); + for (auto oi = ob->getFirstInst(); oi; oi = oi->getNextInst()) + { + cloneInst(context, builder, oi); + } + + ob = ob->getNextBlock(); + cb = cb->getNextBlock(); + } + } + +} + +void checkIRDuplicate(IRInst* inst, IRInst* moduleInst, UnownedStringSlice const& mangledName) +{ +#ifdef _DEBUG + for (auto child : moduleInst->getDecorationsAndChildren()) + { + if (child == inst) + continue; + + if(auto childLinkage = child->findDecoration<IRLinkageDecoration>()) + { + if(mangledName == childLinkage->getMangledName()) + { + SLANG_UNEXPECTED("duplicate global instruction"); + } + } + } +#else + SLANG_UNREFERENCED_PARAMETER(inst); + SLANG_UNREFERENCED_PARAMETER(moduleInst); + SLANG_UNREFERENCED_PARAMETER(mangledName); +#endif +} + +void cloneFunctionCommon( + IRSpecContextBase* context, + IRFunc* clonedFunc, + IRFunc* originalFunc, + bool checkDuplicate = true) +{ + // First clone all the simple properties. + clonedFunc->setFullType(cloneType(context, originalFunc->getFullType())); + + cloneGlobalValueWithCodeCommon( + context, + clonedFunc, + originalFunc); + + // Shuffle the function to the end of the list, because + // it needs to follow its dependencies. + // + // TODO: This isn't really a good requirement to place on the IR... + clonedFunc->moveToEnd(); + + if( checkDuplicate ) + { + if( auto linkage = clonedFunc->findDecoration<IRLinkageDecoration>() ) + { + checkIRDuplicate(clonedFunc, context->getModule()->getModuleInst(), linkage->getMangledName()); + } + } +} + +IRFunc* specializeIRForEntryPoint( + IRSpecContext* context, + EntryPointRequest* entryPointRequest, + EntryPointLayout* entryPointLayout) +{ + // Look up the IR symbol by name + auto mangledName = getMangledName(entryPointRequest->decl); + RefPtr<IRSpecSymbol> sym; + if (!context->getSymbols().TryGetValue(mangledName, sym)) + { + SLANG_UNEXPECTED("no matching IR symbol"); + return nullptr; + } + + // TODO: deal with the case where we might + // have multiple versions... + + auto globalValue = sym->irGlobalValue; + if (globalValue->op != kIROp_Func) + { + SLANG_UNEXPECTED("expected an IR function"); + return nullptr; + } + auto originalFunc = (IRFunc*)globalValue; + + // Create a clone for the IR function + auto clonedFunc = context->builder->createFunc(); + + // Note: we do *not* register this cloned declaration + // as the cloned value for the original symbol. + // This is kind of a kludge, but it ensures that + // in the unlikely case that the function is both + // used as an entry point and a callable function + // (yes, this would imply recursion...) we actually + // have two copies, which lets us arbitrarily + // transform the entry point to meet target requirements. + // + // TODO: The above statement is kind of bunk, though, + // because both versions of the function would have + // the same mangled name... :( + + // We need to clone all the properties of the original + // function, including any blocks, their parameters, + // and their instructions. + cloneFunctionCommon(context, clonedFunc, originalFunc); + + // We need to attach the layout information for + // the entry point to this declaration, so that + // we can use it to inform downstream code emit. + context->builder->addLayoutDecoration( + clonedFunc, + entryPointLayout); + + // We will also go on and attach layout information + // to the function parameters, so that we have it + // available directly on the parameters, rather + // than having to look it up on the original entry-point layout. + if( auto firstBlock = clonedFunc->getFirstBlock() ) + { + UInt paramLayoutCount = entryPointLayout->fields.Count(); + UInt paramCounter = 0; + for( auto pp = firstBlock->getFirstParam(); pp; pp = pp->getNextParam() ) + { + UInt paramIndex = paramCounter++; + if( paramIndex < paramLayoutCount ) + { + auto paramLayout = entryPointLayout->fields[paramIndex]; + context->builder->addLayoutDecoration( + pp, + paramLayout); + } + else + { + SLANG_UNEXPECTED("too many parameters"); + } + } + } + + return clonedFunc; +} + +// Get a string form of the target so that we can +// use it to match against target-specialization modifiers +// +// TODO: We shouldn't be using strings for this. +String getTargetName(IRSpecContext* context) +{ + switch( context->shared->target ) + { + case CodeGenTarget::HLSL: + return "hlsl"; + + case CodeGenTarget::GLSL: + return "glsl"; + + default: + SLANG_UNEXPECTED("unhandled case"); + UNREACHABLE_RETURN("unknown"); + } +} + +// How specialized is a given declaration for the chosen target? +enum class TargetSpecializationLevel +{ + specializedForOtherTarget = 0, + notSpecialized, + specializedForTarget, +}; + +TargetSpecializationLevel getTargetSpecialiationLevel( + IRInst* inVal, + String const& targetName) +{ + // HACK: Currently the front-end is placing modifiers related + // to target specialization on nodes like functions, even when + // those functions are being returned by a generic. This + // means that we need to try and inspect the value being + // returned by the generic if we are looking at a generic. + IRInst* val = inVal; + while( auto genericVal = as<IRGeneric>(val) ) + { + auto firstBlock = genericVal->getFirstBlock(); + if(!firstBlock) break; + + auto returnInst = as<IRReturnVal>(firstBlock->getLastInst()); + if(!returnInst) break; + + val = returnInst->getVal(); + } + + TargetSpecializationLevel result = TargetSpecializationLevel::notSpecialized; + for(auto dd : val->getDecorations()) + { + if(dd->op != kIROp_TargetDecoration) + continue; + + auto decoration = (IRTargetDecoration*) dd; + if(String(decoration->getTargetName()) == targetName) + return TargetSpecializationLevel::specializedForTarget; + + result = TargetSpecializationLevel::specializedForOtherTarget; + } + + return result; +} + +// Is `newVal` marked as being a better match for our +// chosen code-generation target? +// +// TODO: there is a missing step here where we need +// to check if things are even available in the first place... +bool isBetterForTarget( + IRSpecContext* context, + IRInst* newVal, + IRInst* oldVal) +{ + String targetName = getTargetName(context); + + // For right now every declaration might have zero or more + // modifiers, representing the targets for which it is specialized. + // Each modifier has a single string "tag" to represent a target. + // We thus decide that a declaration is "more specialized" by: + // + // - Does it have a modifier with a tag with the string for the current target? + // If yes, it is the most specialized it can be. + // + // - Does it have a no tags? Then it is "unspecialized" and that is okay. + // + // - Does it have a modifier with a tag for a *different* target? + // If yes, then it shouldn't even be usable on this target. + // + // Longer term a better approach is to think of this in terms + // of a "disjunction of conjunctions" that is: + // + // (A and B and C) or (A and D) or (E) or (F and G) ... + // + // A code generation target would then consist of a + // conjunction of invidual tags: + // + // (HLSL and SM_4_0 and Vertex and ...) + // + // A declaration is *applicable* on a target if one of + // its conjunctions of tags is a subset of the target's. + // + // One declaration is *better* than another on a target + // if it is applicable and its tags are a superset + // of the other's. + + auto newLevel = getTargetSpecialiationLevel(newVal, targetName); + auto oldLevel = getTargetSpecialiationLevel(oldVal, targetName); + if(newLevel != oldLevel) + return UInt(newLevel) > UInt(oldLevel); + + // All other factors being equal, a definition is + // better than a declaration. + auto newIsDef = isDefinition(newVal); + auto oldIsDef = isDefinition(oldVal); + if (newIsDef != oldIsDef) + return newIsDef; + + return false; +} + +IRFunc* cloneFuncImpl( + IRSpecContextBase* context, + IRBuilder* builder, + IRFunc* originalFunc, + IROriginalValuesForClone const& originalValues) +{ + auto clonedFunc = builder->createFunc(); + registerClonedValue(context, clonedFunc, originalValues); + cloneFunctionCommon(context, clonedFunc, originalFunc); + return clonedFunc; +} + + +IRInst* cloneInst( + IRSpecContextBase* context, + IRBuilder* builder, + IRInst* originalInst, + IROriginalValuesForClone const& originalValues) +{ + switch (originalInst->op) + { + // We need to special-case any instruction that is not + // allocated like an ordinary `IRInst` with trailing args. + case kIROp_Func: + return cloneFuncImpl(context, builder, cast<IRFunc>(originalInst), originalValues); + + case kIROp_GlobalVar: + return cloneGlobalVarImpl(context, builder, cast<IRGlobalVar>(originalInst), originalValues); + + case kIROp_GlobalConstant: + return cloneGlobalConstantImpl(context, builder, cast<IRGlobalConstant>(originalInst), originalValues); + + case kIROp_GlobalParam: + return cloneGlobalParamImpl(context, builder, cast<IRGlobalParam>(originalInst), originalValues); + + case kIROp_WitnessTable: + return cloneWitnessTableImpl(context, builder, cast<IRWitnessTable>(originalInst), originalValues); + + case kIROp_StructType: + return cloneStructTypeImpl(context, builder, cast<IRStructType>(originalInst), originalValues); + + case kIROp_InterfaceType: + return cloneInterfaceTypeImpl(context, builder, cast<IRInterfaceType>(originalInst), originalValues); + + case kIROp_Generic: + return cloneGenericImpl(context, builder, cast<IRGeneric>(originalInst), originalValues); + + case kIROp_StructKey: + return cloneStructKeyImpl(context, builder, cast<IRStructKey>(originalInst), originalValues); + + case kIROp_GlobalGenericParam: + return cloneGlobalGenericParamImpl(context, builder, cast<IRGlobalGenericParam>(originalInst), originalValues); + + default: + break; + } + + // The common case is that we just need to construct a cloned + // instruction with the right number of operands, intialize + // it, and then add it to the sequence. + UInt argCount = originalInst->getOperandCount(); + IRInst* clonedInst = builder->createIntrinsicInst( + cloneType(context, originalInst->getFullType()), + originalInst->op, + argCount, nullptr); + registerClonedValue(context, clonedInst, originalValues); + auto oldBuilder = context->builder; + context->builder = builder; + for (UInt aa = 0; aa < argCount; ++aa) + { + IRInst* originalArg = originalInst->getOperand(aa); + IRInst* clonedArg = cloneValue(context, originalArg); + clonedInst->getOperands()[aa].init(clonedInst, clonedArg); + } + builder->addInst(clonedInst); + context->builder = oldBuilder; + cloneDecorations(context, clonedInst, originalInst); + + return clonedInst; +} + +IRInst* cloneGlobalValueImpl( + IRSpecContext* context, + IRInst* originalInst, + IROriginalValuesForClone const& originalValues) +{ + auto clonedValue = cloneInst(context, &context->shared->builderStorage, originalInst, originalValues); + clonedValue->moveToEnd(); + return clonedValue; +} + + + /// Clone a global value, which has the given `originalLinkage`. + /// + /// The `originalVal` is a known global IR value with that linkage, if one is available. + /// (It is okay for this parameter to be null). + /// +IRInst* cloneGlobalValueWithLinkage( + IRSpecContext* context, + IRInst* originalVal, + IRLinkageDecoration* originalLinkage) +{ + // If the global value being cloned is already in target module, don't clone + // Why checking this? + // When specializing a generic function G (which is already in target module), + // where G calls a normal function F (which is already in target module), + // then when we are making a copy of G via cloneFuncCommom(), it will recursively clone F, + // however we don't want to make a duplicate of F in the target module. + if (originalVal->getParent() == context->getModule()->getModuleInst()) + return originalVal; + + // Check if we've already cloned this value, for the case where + // an original value has already been established. + if (originalVal) + { + if (IRInst* clonedVal = findClonedValue(context, originalVal)) + { + return clonedVal; + } + } + + if(!originalLinkage) + { + // If there is no mangled name, then we assume this is a local symbol, + // and it can't possibly have multiple declarations. + return cloneGlobalValueImpl(context, originalVal, IROriginalValuesForClone()); + } + + // + // We will scan through all of the available declarations + // with the same mangled name as `originalVal` and try + // to pick the "best" one for our target. + + auto mangledName = String(originalLinkage->getMangledName()); + RefPtr<IRSpecSymbol> sym; + if( !context->getSymbols().TryGetValue(mangledName, sym) ) + { + if(!originalVal) + return nullptr; + + // This shouldn't happen! + SLANG_UNEXPECTED("no matching values registered"); + UNREACHABLE_RETURN(cloneGlobalValueImpl(context, originalVal, IROriginalValuesForClone())); + } + + // We will try to track the "best" declaration we can find. + // + // Generally, one declaration wil lbe better than another if it is + // more specialized for the chosen target. Otherwise, we simply favor + // definitions over declarations. + // + IRInst* bestVal = sym->irGlobalValue; + for( auto ss = sym->nextWithSameName; ss; ss = ss->nextWithSameName ) + { + IRInst* newVal = ss->irGlobalValue; + if(isBetterForTarget(context, newVal, bestVal)) + bestVal = newVal; + } + + // Check if we've already cloned this value, for the case where + // we didn't have an original value (just a name), but we've + // now found a representative value. + if (!originalVal) + { + if (IRInst* clonedVal = findClonedValue(context, bestVal)) + { + return clonedVal; + } + } + + return cloneGlobalValueImpl(context, bestVal, IROriginalValuesForClone(sym)); +} + +// Clone a global value, where `originalVal` is one declaration/definition, but we might +// have to consider others, in order to find the "best" version of the symbol. +IRInst* cloneGlobalValue(IRSpecContext* context, IRInst* originalVal) +{ + // We are being asked to clone a particular global value, but in + // the IR that comes out of the front-end there could still + // be multiple, target-specific, declarations of any given + // global value, all of which share the same mangled name. + return cloneGlobalValueWithLinkage( + context, + originalVal, + originalVal->findDecoration<IRLinkageDecoration>()); +} + +void insertGlobalValueSymbol( + IRSharedSpecContext* sharedContext, + IRInst* gv) +{ + auto linkage = gv->findDecoration<IRLinkageDecoration>(); + + // Don't try to register a symbol for global values + // that don't have linkage. + // + if (!linkage) + return; + + auto mangledName = String(linkage->getMangledName()); + + RefPtr<IRSpecSymbol> sym = new IRSpecSymbol(); + sym->irGlobalValue = gv; + + RefPtr<IRSpecSymbol> prev; + if (sharedContext->symbols.TryGetValue(mangledName, prev)) + { + sym->nextWithSameName = prev->nextWithSameName; + prev->nextWithSameName = sym; + } + else + { + sharedContext->symbols.Add(mangledName, sym); + } +} + +void insertGlobalValueSymbols( + IRSharedSpecContext* sharedContext, + IRModule* originalModule) +{ + if (!originalModule) + return; + + for(auto ii : originalModule->getGlobalInsts()) + { + insertGlobalValueSymbol(sharedContext, ii); + } +} + +void initializeSharedSpecContext( + IRSharedSpecContext* sharedContext, + Session* session, + IRModule* module, + IRModule* originalModule, + CodeGenTarget target) +{ + + SharedIRBuilder* sharedBuilder = &sharedContext->sharedBuilderStorage; + sharedBuilder->module = nullptr; + sharedBuilder->session = session; + + IRBuilder* builder = &sharedContext->builderStorage; + builder->sharedBuilder = sharedBuilder; + + if( !module ) + { + module = builder->createModule(); + } + + sharedBuilder->module = module; + sharedContext->module = module; + sharedContext->originalModule = originalModule; + sharedContext->target = target; + // We will populate a map with all of the IR values + // that use the same mangled name, to make lookup easier + // in other steps. + insertGlobalValueSymbols(sharedContext, originalModule); +} + +// implementation provided in parameter-binding.cpp +RefPtr<ProgramLayout> specializeProgramLayout( + TargetRequest * targetReq, + ProgramLayout* programLayout, + SubstitutionSet typeSubst); + +struct IRSpecializationState +{ + ProgramLayout* programLayout; + CodeGenTarget target; + TargetRequest* targetReq; + + IRModule* irModule = nullptr; + RefPtr<ProgramLayout> newProgramLayout; + + IRSharedSpecContext sharedContextStorage; + IRSpecContext contextStorage; + + IRSpecEnv globalEnv; + + IRSharedSpecContext* getSharedContext() { return &sharedContextStorage; } + IRSpecContext* getContext() { return &contextStorage; } + + IRSpecializationState() + { + contextStorage.env = &globalEnv; + } + + ~IRSpecializationState() + { + newProgramLayout = nullptr; + contextStorage = IRSpecContext(); + sharedContextStorage = IRSharedSpecContext(); + } +}; + +IRSpecializationState* createIRSpecializationState( + EntryPointRequest* entryPointRequest, + ProgramLayout* programLayout, + CodeGenTarget target, + TargetRequest* targetReq) +{ + IRSpecializationState* state = new IRSpecializationState(); + + state->programLayout = programLayout; + state->target = target; + state->targetReq = targetReq; + + + auto compileRequest = entryPointRequest->compileRequest; + auto translationUnit = entryPointRequest->getTranslationUnit(); + auto originalIRModule = translationUnit->irModule; + + auto sharedContext = state->getSharedContext(); + initializeSharedSpecContext( + sharedContext, + compileRequest->mSession, + nullptr, + originalIRModule, + target); + + state->irModule = sharedContext->module; + + // We also need to attach the IR definitions for symbols from + // any loaded modules: + for (auto loadedModule : compileRequest->loadedModulesList) + { + insertGlobalValueSymbols(sharedContext, loadedModule->irModule); + } + + auto context = state->getContext(); + context->shared = sharedContext; + context->builder = &sharedContext->builderStorage; + + // Now specialize the program layout using the substitution + // + // TODO: The specialization of the layout is conceptually an AST-level operations, + // and shouldn't be done here in the IR at all. + // + RefPtr<ProgramLayout> newProgramLayout = specializeProgramLayout( + targetReq, + programLayout, + SubstitutionSet(entryPointRequest->globalGenericSubst)); + + // TODO: we need to register the (IR-level) arguments of the global generic parameters as the + // substitutions for the generic parameters in the original IR. + + // applyGlobalGenericParamSubsitution(...); + + + state->newProgramLayout = newProgramLayout; + + // Next, we want to optimize lookup for layout infromation + // associated with global declarations, so that we can + // look things up based on the IR values (using mangled names) + auto globalStructLayout = getGlobalStructLayout(newProgramLayout); + for (auto globalVarLayout : globalStructLayout->fields) + { + auto mangledName = getMangledName(globalVarLayout->varDecl); + context->globalVarLayouts.AddIfNotExists(mangledName, globalVarLayout); + } + + // for now, clone all unreferenced witness tables + for (auto sym :context->getSymbols()) + { + if (sym.Value->irGlobalValue->op == kIROp_WitnessTable) + cloneGlobalValue(context, (IRWitnessTable*)sym.Value->irGlobalValue); + } + return state; +} + +void destroyIRSpecializationState(IRSpecializationState* state) +{ + delete state; +} + +IRModule* getIRModule(IRSpecializationState* state) +{ + return state->irModule; +} + +IRFunc* specializeIRForEntryPoint( + IRSpecializationState* state, + EntryPointRequest* entryPointRequest) +{ + auto translationUnit = entryPointRequest->getTranslationUnit(); + auto originalIRModule = translationUnit->irModule; + if (!originalIRModule) + { + // We should already have emitted IR for the original + // translation unit, and it we don't have it, then + // we are now in trouble. + return nullptr; + } + + auto context = state->getContext(); + auto newProgramLayout = state->newProgramLayout; + + auto entryPointLayout = findEntryPointLayout(newProgramLayout, entryPointRequest); + + + // Next, we make sure to clone the global value for + // the entry point function itself, and rely on + // this step to recursively copy over anything else + // it might reference. + auto irEntryPoint = specializeIRForEntryPoint(context, entryPointRequest, entryPointLayout); + + // HACK: right now the bindings for global generic parameters are coming in + // as part of the original IR module, and we need to make sure these get + // copied over, even if they aren't referenced. + // + for(auto inst : originalIRModule->getGlobalInsts()) + { + auto bindInst = as<IRBindGlobalGenericParam>(inst); + if(!bindInst) + continue; + + cloneValue(context, bindInst); + } + + + // TODO: *technically* we should consider the case where + // we have global variables with initializers, since + // these should get run whether or not the entry point + // references them. + return irEntryPoint; +} + + + +} // namespace Slang diff --git a/source/slang/ir-link.h b/source/slang/ir-link.h new file mode 100644 index 000000000..18940f825 --- /dev/null +++ b/source/slang/ir-link.h @@ -0,0 +1,33 @@ +// ir-link.h +#pragma once + +#include "compiler.h" + +namespace Slang +{ + // Interface to IR specialization for use when cloning target-specific + // IR as part of compiling an entry point. + + // `IRSpecializationState` is used as an opaque type to wrap up all + // the data needed to perform IR specialization, without exposing + // implementation details. + struct IRSpecializationState; + IRSpecializationState* createIRSpecializationState( + EntryPointRequest* entryPointRequest, + ProgramLayout* programLayout, + CodeGenTarget target, + TargetRequest* targetReq); + void destroyIRSpecializationState(IRSpecializationState* state); + IRModule* getIRModule(IRSpecializationState* state); + + struct ExtensionUsageTracker; + + // Clone the IR values reachable from the given entry point + // into the IR module associated with the specialization state. + // When multiple definitions of a symbol are found, the one + // that is best specialized for the given `targetReq` will be + // used. + IRFunc* specializeIRForEntryPoint( + IRSpecializationState* state, + EntryPointRequest* entryPointRequest); +} diff --git a/source/slang/ir-specialize-resources.cpp b/source/slang/ir-specialize-resources.cpp index e6d4351f2..0108a91f8 100644 --- a/source/slang/ir-specialize-resources.cpp +++ b/source/slang/ir-specialize-resources.cpp @@ -2,6 +2,7 @@ #include "ir-specialize-resources.h" #include "ir.h" +#include "ir-clone.h" #include "ir-insts.h" namespace Slang @@ -297,50 +298,11 @@ struct ResourceParameterSpecializationContext // well as a "key" to identify the specialized function // that is required. // - // The key type is similar to that used for generic specialization - // elsewhere in the IR code. It might be worth pulling this - // notion out somewhere more centralized, but we are dealing - // with the code duplication for now. + // We will use the key type defined as part of the IR cloning + // infrastructure, which uses a sequence of `IRInst*`s + // to hold the state of the key: // - struct Key - { - // The structure of a specialization key will be a list - // of instructions starting with the function to be specialized, - // and then having one or more entries for each parameter - // that is being specialized to indicate the value to which - // it is being specialized (e.g. the global shader parameter). - // - List<IRInst*> vals; - - // In order to use this type as a `Dictionary` key we - // need it to support equality and hashing, but the - // implementaitons are straightforward. - // - // TODO: honestly we might consider having `GetHashCode` - // and `operator==` defined for `List<T>`. - - bool operator==(Key const& other) const - { - auto valCount = vals.Count(); - if(valCount != other.vals.Count()) return false; - for( UInt ii = 0; ii < valCount; ++ii ) - { - if(vals[ii] != other.vals[ii]) return false; - } - return true; - } - - int GetHashCode() const - { - auto valCount = vals.Count(); - int hash = Slang::GetHashCode(valCount); - for( UInt ii = 0; ii < valCount; ++ii ) - { - hash = combineHash(hash, Slang::GetHashCode(vals[ii])); - } - return hash; - } - }; + typedef IRSimpleSpecializationKey Key; // As indicated above, the information we collect about a call // site consists of the key for the specialized function we @@ -768,199 +730,7 @@ struct ResourceParameterSpecializationContext } } - // Now that we've covered how all the relevant information - // gets gathered, we can turn our attention to the - // meat of actually generating a specialized version - // of a function. - // - // For the most part, this is just a matter of *cloning* - // the original function, while keeping around a mapping - // from original values/instructions to their replacements. - // - // Because we might perform specialization many times, - // it will get is own nested context type. - // - struct CloneContext - { - // When cloning, we need an IR builder to use for - // making new instructions. - // - IRBuilder* builder; - - // We also need a mapping from old instruction to their - // new equivalents, which will serve double duty: - // - // * Before we start cloning, this will be used to - // register the mapping from things that are to be - // replaced entirely (like function parameters to - // be specialized away) to their replacements (like - // a global shader parameter). - // - // * During the process of cloning, this will be - // updated as we clone instructions so that when - // an instruction later in the function refers to - // something from earlier, we can look up the - // replacement. - // - Dictionary<IRInst*, IRInst*> mapOldValToNew; - - // Whenever we need to look up an operand value - // during the cloning process we'll use `cloneOperand`, - // which mostly just uses `mapOldValToNew`. - // - IRInst* cloneOperand(IRInst* oldOperand) - { - IRInst* newOperand = nullptr; - if(mapOldValToNew.TryGetValue(oldOperand, newOperand)) - return newOperand; - - // The one wrinkle here, and the place where - // this cloning logic differs from some other - // IR cloning implementations we have lying around, - // is that when we *don't* find an instruction in - // our map, we automatically assume it is not - // something taht needs to be cloned, so that the old - // value is fine to use as-is. - // - // Note that this puts an ordering constraint on - // our work: if we are going to clone some instruction - // A, then we had better clone it *before* anything - // that uses A as an operand. - // - return oldOperand; - } - - // The SSA property and the way we have structured - // our "phi nodes" (block parameters) means that - // just going through the children of a function, - // and then the children of a block will generally - // do the Right Thing and always visit an instruction - // before its uses. - // - // The big exception to this is that branch instructions - // can refer to blocks later in the same function. - // - // We work around this sort of problem in a fairly - // general fashion, by splitting the cloning of - // an instruction into two steps. - // - // The first step is just to clone the instruction - // and its direct operands, but not any decorations - // or children. - // - IRInst* cloneInstAndOperands(IRInst* oldInst) - { - // In order to clone an instruction we first - // need to map its operands over to their - // new values. - // - List<IRInst*> newOperands; - UInt operandCount = oldInst->getOperandCount(); - for(UInt ii = 0; ii < operandCount; ++ii) - { - auto oldOperand = oldInst->getOperand(ii); - auto newOperand = cloneOperand(oldOperand); - newOperands.Add(newOperand); - } - - // Now we can just tell the IR builder to - // go and create an instruction directly - // - // Note: this logic would not handle any instructions - // with special-case data attached, but that only - // applies to `IRConstant`s at this point, and those - // should only appear at the global scope rather than - // in function bodies. - // - SLANG_ASSERT(!as<IRConstant>(oldInst)); - auto newInst = builder->emitIntrinsicInst( - oldInst->getFullType(), - oldInst->op, - newOperands.Count(), - newOperands.Buffer()); - - return newInst; - } - - // The second phase of cloning an instruction is to clone - // its decorations and children. This step only needs to - // be performed on those instructions that *have* decorations - // and/or children. - // - // The complexity of this step comes from the fact that it - // needs to sequence the two phases of cloning for any - // child instructions. We will do this by performing the - // first phase of cloning, and building up a list of - // children that require the second phase of processing. - // Each entry in that list will be a pair of an old instruction - // and its new clone. - // - struct OldNewPair - { - IRInst* oldInst; - IRInst* newInst; - }; - void cloneInstDecorationsAndChildren(IRInst* oldInst, IRInst* newInst) - { - List<OldNewPair> pairs; - for( auto oldChild : oldInst->getDecorationsAndChildren() ) - { - // As a very subtle special case, if one of the children - // of our `oldInst` already has a registered replacement, - // then we don't want to clone it (not least because - // the `Dictionary::Add` method would give us an error - // when we try to insert a new value for the same key). - // - // This arises for entries in `mapOldValToNew` that were - // seeded before cloning begain (e.g., the function - // parameters that are to be replaced). - // - if(mapOldValToNew.ContainsKey(oldChild)) - continue; - - // Because we are re-using the same IR builder in - // multiple places, we need to make sure to set - // its insertion location before creating the - // child instruction. - // - builder->setInsertInto(newInst); - - // Now we can perform the first phase of cloning - // on the child, and register it in our map from - // old to new values. - // - auto newChild = cloneInstAndOperands(oldChild); - mapOldValToNew.Add(oldChild, newChild); - - // If an only if the old child had decorations - // or children, we will register it into our - // list for processing in the second phase. - // - if( oldChild->getFirstDecorationOrChild() ) - { - OldNewPair pair; - pair.oldInst = oldChild; - pair.newInst = newChild; - pairs.Add(pair); - } - } - - // Once we have done first-phase processing for - // all child instructions, we scan through those - // in the list that required second-phase processing, - // and clone their decorations and/or children recursively. - // - for( auto pair : pairs ) - { - auto oldChild = pair.oldInst; - auto newChild = pair.newInst; - - cloneInstDecorationsAndChildren(oldChild, newChild); - } - } - }; - - // With all of that machinery out of the way, + // With all of that data-gathering code out of the way, // we are now prepared to walk through the process of // specializing a given callee function based on // the information we have gathered. @@ -969,12 +739,14 @@ struct ResourceParameterSpecializationContext IRFunc* oldFunc, FuncSpecializationInfo const& funcInfo) { - // We start by setting up our context for cloning - // the blocks and instructions in the old function. + // We will make use of the infrastructure for cloning + // IR code, that is defined in `ir-clone.{h,cpp}`. // - auto builder = getBuilder(); - CloneContext cloneContext; - cloneContext.builder = builder; + // In order to do the cloning work we need an + // "environment" that will map old values to + // their replacements. + // + IRCloneEnv cloneEnv; // Next we iterate over the parameters of the old // function, and register each as being mapped @@ -986,7 +758,7 @@ struct ResourceParameterSpecializationContext { UInt paramIndex = paramCounter++; auto newVal = funcInfo.replacementsForOldParameters[paramIndex]; - cloneContext.mapOldValToNew.Add(oldParam, newVal); + cloneEnv.mapOldValToNew.Add(oldParam, newVal); } // Next we will create the skeleton of the new @@ -1003,6 +775,8 @@ struct ResourceParameterSpecializationContext { paramTypes.Add(param->getFullType()); } + + auto builder = getBuilder(); IRType* funcType = builder->getFuncType( paramTypes.Count(), paramTypes.Buffer(), @@ -1015,11 +789,15 @@ struct ResourceParameterSpecializationContext // of cloning the function (since `IRFunc`s have no // operands). // - // We can now call into our `CloneContext` to perform - // the second phase of cloning, which will recursively + // We can now use the shared IR cloning infrastructure + // to perform the second phase of cloning, which will recursively // clone any nested decorations, blocks, and instructions. // - cloneContext.cloneInstDecorationsAndChildren(oldFunc, newFunc); + cloneInstDecorationsAndChildren( + &cloneEnv, + builder->sharedBuilder, + oldFunc, + newFunc); // We are almost done at this point, except that `newFunc` // is lacking its parameters, as well as any of the body diff --git a/source/slang/ir-specialize.cpp b/source/slang/ir-specialize.cpp new file mode 100644 index 000000000..4a20c6195 --- /dev/null +++ b/source/slang/ir-specialize.cpp @@ -0,0 +1,685 @@ +// ir-specialize.cpp +#include "ir-specialize.h" + +#include "ir.h" +#include "ir-clone.h" +#include "ir-insts.h" + +namespace Slang +{ + +// This file implements the primary specialization pass, that takes +// generic/polymorphic Slang code and specializes/monomorphises it. +// +// At present this primarily means generating specialized copies +// of generic functions/types based on the concrete types used +// at specialization sites, and also specializing instances +// of witness-table lookup to directly refer to the concrete +// values for witnesses when witness tables are known. +// +// Eventually, this pass will also need to perform specialization +// of functions to argument values for parameters that must +// be compile-time constants, and simplification of code using +// existential (interface) types for function parameters/results. + +struct SpecializationContext +{ + // We know that we can only perform specialization when all + // of the arguments to a generic are also fully specialized. + // The "is fully specialized" condition is something we + // need to solve for over the program, because the fully- + // specialized-ness of an instruction depends on the + // fully-specialized-ness of its operands. + // + // We will build an explicit hash set to encode those + // instructions that are fully specialized. + // + HashSet<IRInst*> fullySpecializedInsts; + + // An instruction is then fully specialized if and only + // if it is in our set. + // + bool isInstFullySpecialized( + IRInst* inst) + { + // A small wrinkle is that a null instruction pointer + // sometimes appears a a type, and so should be treated + // as fully specialized too. + // + // TODO: It would be nice to remove this wrinkle. + // + if(!inst) return true; + + return fullySpecializedInsts.Contains(inst); + } + + // When an instruction isn't fully specialized, but its operands *are* + // then it is a candidate for specialization itself, so we will have + // a query to check for the "all operands fully specialized" case. + // + bool areAllOperandsFullySpecialized( + IRInst* inst) + { + if(!isInstFullySpecialized(inst->getFullType())) + return false; + + UInt operandCount = inst->getOperandCount(); + for(UInt ii = 0; ii < operandCount; ++ii) + { + IRInst* operand = inst->getOperand(ii); + if(!isInstFullySpecialized(operand)) + return false; + } + + return true; + } + + // We will also maintain a work list of instructions that are + // not fully specialized, and that we want to consider for + // specialization. + // + List<IRInst*> workList; + + // When we consider adding an instruction to our work list + // we will try to be careful and only add things that aren't + // already fully specialized. + // + void maybeAddToWorkList(IRInst* inst) + { + if(isInstFullySpecialized(inst)) + return; + + workList.Add(inst); + } + + // When we go to populate the work list by recursively + // traversing some code, we will be careful to *not* + // add generics or their children to the work list, + // and will instead consider a generic to be "fully + // specialized" already (because uses of that generic + // as an *operand* should be seen as fully specialized + // references). + // + void populateWorkListRec( + IRInst* inst) + { + if(auto genericInst = as<IRGeneric>(inst)) + { + fullySpecializedInsts.Add(genericInst); + } + else + { + maybeAddToWorkList(inst); + + for(auto child : inst->getChildren()) + { + populateWorkListRec(child); + } + } + } + + // Of course, somewhere along the way we expect + // to run into uses of `specialize(...)` instructions + // to bind a generic to arguments that we want to + // specialize into concrete code. + // + // We also know that if we encouter `specialize(g, a, b, c)` + // and then later `specialize(g, a, b, c)` again, we + // only want to generate the specialized code for `g<a,b,c>` + // *once*, and re-use it for both versions. + // + // We will cache existing specializations of generic function/types + // using the simple key type defined as part of the IR cloning infrastructure. + // + typedef IRSimpleSpecializationKey Key; + Dictionary<Key, IRInst*> genericSpecializations; + + // We will also use some shared IR building state across + // all of our specialization/cloning steps. + // + SharedIRBuilder sharedBuilderStorage; + + // Now let's look at the task of finding or generation a + // specialization of some generic `g`, given a specialization + // instruction like `specialize(g, a, b, c)`. + // + // The `specializeGeneric` function will return a value + // suitable for use as a replacement for the `specialize(...)` + // instruction. + // + IRInst* specializeGeneric( + IRGeneric* genericVal, + IRSpecialize* specializeInst) + { + // First, we want to see if an existing specialization + // has already been made. To do that we will construct a key + // for lookup in the generic specialization context. + // + // Our key will consist of the identity of the generic + // being specialized, and each of the argument values + // being pased to it. In our hypothetical example of + // `specialize(g, a, b, c)` the key will then be + // the array `[g, a, b, c]`. + // + Key key; + key.vals.Add(specializeInst->getBase()); + UInt argCount = specializeInst->getArgCount(); + for( UInt ii = 0; ii < argCount; ++ii ) + { + key.vals.Add(specializeInst->getArg(ii)); + } + + { + // We use our generated key to look for an + // existing specialization that has been registered. + // If one is found, our work is done. + // + IRInst* specializedVal = nullptr; + if(genericSpecializations.TryGetValue(key, specializedVal)) + return specializedVal; + } + + // If no existing specialization is found, we need + // to create the specialization instead. + // + // Effectively this amounts to "calling" the generic + // on its concrete argument values and computing the + // result it returns. + // + // For now, all of our generics consist of a single + // basic block, so we can "call" them just by + // cloning the instructions in their single block + // into the global scope, using an environment for + // cloning that maps the generic parameters to + // the concrete arguments that were provided + // by the `specialize(...)` instruction. + // + IRCloneEnv env; + + // We will walk through the parameters of the generic and + // register the corresponding argument of the `specialize` + // instruction to be used as the "cloned" value for each + // parameter. + // + // Suppose we are looking at `specialize(g, a, b, c)` and `g` has + // three generic parameters: `T`, `U`, and `V`. Then we will + // be initializing our environment to map `T -> a`, `U -> b`, + // and `V -> c`. + // + UInt argCounter = 0; + for( auto param : genericVal->getParams() ) + { + UInt argIndex = argCounter++; + SLANG_ASSERT(argIndex < specializeInst->getArgCount()); + + IRInst* arg = specializeInst->getArg(argIndex); + + env.mapOldValToNew.Add(param, arg); + } + + // We will set up an IR builder for insertion + // into the global scope, at the same location + // as the original generic. + // + IRBuilder builderStorage; + IRBuilder* builder = &builderStorage; + builder->sharedBuilder = &sharedBuilderStorage; + builder->setInsertBefore(genericVal); + + // Now we will run through the body of the generic and + // clone each of its instructions into the global scope, + // until we reach a `return` instruction. + // + for( auto bb : genericVal->getBlocks() ) + { + // We expect a generic to only ever contain a single block. + // + SLANG_ASSERT(bb == genericVal->getFirstBlock()); + + // We will iterate over the non-parameter ("ordinary") + // instructions only, because parameters were dealt + // with explictly at an earlier point. + // + for( auto ii : bb->getOrdinaryInsts() ) + { + // The last block of the generic is expected to end with + // a `return` instruction for the specialized value that + // comes out of the abstraction. + // + // We thus use that cloned value as the result of the + // specialization step. + // + if( auto returnValInst = as<IRReturnVal>(ii) ) + { + auto specializedVal = findCloneForOperand(&env, returnValInst->getVal()); + + // The value that was returned from evaluating + // the generic is the specialized value, and we + // need to remember it in our dictionary of + // specializations so that we don't instantiate + // this generic again for the same arguments. + // + genericSpecializations.Add(key, specializedVal); + + return specializedVal; + } + + // For any instruction other than a `return`, we will + // simply clone it completely into the global scope. + // + IRInst* clonedInst = cloneInst(&env, builder, ii); + + // Any new instructions we create during cloning were + // not present when we initially built our work list, + // so we need to make sure to consider them now. + // + // This is important for the cases where one generic + // invokes another, because there will be `specialize` + // operations nested inside the first generic that refer + // to the second. + // + populateWorkListRec(clonedInst); + } + } + + // If we reach this point, something went wrong, because we + // never encountered a `return` inside the body of the generic. + // + SLANG_UNEXPECTED("no return from generic"); + UNREACHABLE_RETURN(nullptr); + } + + // The logic for generating a specialization of an IR generic + // relies on the ability to "evaluate" the code in the body of + // the generic, but that obviously doesn't work if we don't + // actually have the full definition for the body. + // + // This can arise in particular for builtin operations/types. + // + // Before calling `specializeGeneric()` we need to make sure + // that the generic is actually amenable to specialization, + // by looking at whether it is a definition or a declaration. + // + bool canSpecializeGeneric( + IRGeneric* generic) + { + // It is possible to have multiple "layers" of generics + // (e.g., when a generic function is nested in a generic + // type). Therefore we need to drill down through all + // of the layers present to see if at the leaf we have + // something that looks like a definition. + // + IRGeneric* g = generic; + for(;;) + { + // Given the generic `g`, we will find the value + // it appears to return in its body. + // + auto val = findGenericReturnVal(g); + if(!val) + return false; + + // If `g` returns an inner generic, then we need + // to drill down further. + // + if (auto nestedGeneric = as<IRGeneric>(val)) + { + g = nestedGeneric; + continue; + } + + // Once we've found the leaf value that will be produced + // after all specialization is complete, we can check + // whether it looks like a definition or not. + // + return isDefinition(val); + } + } + + // Now that we know when we can specialize a generic, and how + // to do it, we can write a subroutine that takes a + // `specialize(g, a, b, c, ...)` instruction and performs + // specialization if it is possible. + // + void maybeSpecializeGeneric( + IRSpecialize* specInst) + { + // The invariant that the arguments are fully specialized + // should mean that `a, b, c, ...` are in a form that + // we can work with, but it does *not* guarantee + // that the `g` operand is something we can work with. + // + // We can only perform specialization in the case where + // the base `g` is a known `generic` instruction. + // + auto baseVal = specInst->getBase(); + auto genericVal = as<IRGeneric>(baseVal); + if(!genericVal) + return; + + // We can also only specialize a generic if it + // represents a definition rather than a declaration. + // + if(!canSpecializeGeneric(genericVal)) + return; + + // Once we know that specialization is possible, + // the actual work is fairly simple. + // + // First, we find or generate a specialized + // version of the result of the generic (a specialized + // type, function, or whatever). + // + auto specializedVal = specializeGeneric(genericVal, specInst); + + // Then we simply replace any uses of the `specialize(...)` + // instruction with the specialized value and delete + // the `specialize(...)` instruction from existence. + // + specInst->replaceUsesWith(specializedVal); + specInst->removeAndDeallocate(); + } + + // The basic rule we are following is that once all the operands + // to an instruction are fully specialized, we are safe + // to specialize the instruction itself, but the work + // required to specialize an instruction depends on the + // form of the instruction. + // + void fullySpecializeInst( + IRInst* inst) + { + // A precondition of the `fullySpecializeInst` operation + // is that the operands to `inst` have all been fully + // specialized. + // + SLANG_ASSERT(areAllOperandsFullySpecialized(inst)); + + switch(inst->op) + { + default: + // The default case is that there is nothing to + // be done to specialize an instruction; once all + // of its operands are specialized it is safe + // to consider the instruction itself as fully + // specialized. + // + break; + + case kIROp_Specialize: + // The logic for specializing a `specialize(...)` + // instruction has already been elaborated above. + // + maybeSpecializeGeneric(cast<IRSpecialize>(inst)); + break; + + case kIROp_lookup_interface_method: + // The remaining case we need to consider here + // is when we have a `lookup_witness_method` instruction + // that is being applied to a concrete witness table, + // because we can specialize it to just be a direct + // reference to the actual witness value from the table. + // + maybeSpecializeWitnessLookup(cast<IRLookupWitnessMethod>(inst)); + break; + } + } + + void maybeSpecializeWitnessLookup( + IRLookupWitnessMethod* lookupInst) + { + // Note: While we currently have named the instruction + // `lookup_witness_method`, the `method` part is a misnomer + // and the same instruction can look up *any* interface + // requirement based on the witness table that provides + // a conformance, and the "key" that indicates the interface + // requirement. + + // We can only specialize in the case where the lookup + // is being done on a concrete witness table, and not + // the result of a `specialize` instruction or other + // operation that will yield such a table. + // + auto witnessTable = as<IRWitnessTable>(lookupInst->getWitnessTable()); + if(!witnessTable) + return; + + // Because we have a concrete witness table, we can + // use it to look up the IR value that satisfies + // the given interface requirement. + // + auto requirementKey = lookupInst->getRequirementKey(); + auto satisfyingVal = findWitnessVal(witnessTable, requirementKey); + + // We expect to always find a satisfying value, but + // we will go ahead and code defensively so that + // we leave "correct" but unspecialized code if + // we cannot find a concrete value to use. + // + if(!satisfyingVal) + return; + + // At this point, we know that `satisfyingVal` is what + // would result from executing this `lookup_witness_method` + // instruciton dynamically, so we can go ahead and + // replace the original instruction with that value. + // + lookupInst->replaceUsesWith(satisfyingVal); + lookupInst->removeAndDeallocate(); + } + + // The above subroutine needed a way to look up + // the satisfying value for a given requirement + // key in a concrete witness table, so let's + // define that now. + // + IRInst* findWitnessVal( + IRWitnessTable* witnessTable, + IRInst* requirementKey) + { + // A witness table is basically just a container + // for key-value pairs, and so the best we can + // do for now is a naive linear search. + // + for( auto entry : witnessTable->getEntries() ) + { + if (requirementKey == entry->getRequirementKey()) + { + return entry->getSatisfyingVal(); + } + } + return nullptr; + } + + // All of the machinery has been defined above, so + // we can now walk through the flow of the overall + // specialization pass. + // + void processModule(IRModule* module) + { + // We start by initializing our shared IR building state, + // since we will re-use that state for any code we + // generate along the way. + // + SharedIRBuilder* sharedBuilder = &sharedBuilderStorage; + sharedBuilder->module = module; + sharedBuilder->session = module->session; + + // The unspecialized IR we receive as input will have + // `IRBindGlobalGenericParam` instructions that associate + // each global-scope generic parameter (a type, witness + // table, or what-have-you) with the value that it should + // be bound to for the purposes of this code-generation + // pass. + // + // Before doing any other specialization work, we will + // iterate over these instructions (which may only + // appear at the global scope) and use them to drive + // replacement of the given generic type parameter with + // the desired concrete value. + // + auto moduleInst = module->getModuleInst(); + for(auto inst : moduleInst->getChildren()) + { + // We only want to consider the `bind_global_generic_param` + // instructions, and ignore everything else. + // + auto bindInst = as<IRBindGlobalGenericParam>(inst); + if(!bindInst) + continue; + + // HACK: Our current front-end emit logic can end up emitting multiple + // `bind_global_generic_param` instructions for the same parameter. This is + // a buggy behavior, but a real fix would require refactoring the way + // global generic arguments are specified today. + // + // For now we will do a sanity check to detect parameters that + // have already been specialized. + // + if( !as<IRGlobalGenericParam>(bindInst->getOperand(0)) ) + { + // The "parameter" operand is no longer a parameter, so it + // seems things must have been specialized already. + // + continue; + } + + // The actual logic for applying the substitution is + // almost trivial: we will replace any uses of the + // global generic parameter with its desired value. + // + auto param = bindInst->getParam(); + auto val = bindInst->getVal(); + param->replaceUsesWith(val); + } + { + // Now that we've replaced any uses of global generic + // parameters, we will do a second pass to remove + // the parameters and any `bind_global_generic_param` + // instructions, since both should be dead/unused. + // + IRInst* next = nullptr; + for(auto inst = moduleInst->getFirstChild(); inst; inst = next) + { + next = inst->getNextInst(); + + switch(inst->op) + { + default: + break; + + case kIROp_GlobalGenericParam: + case kIROp_BindGlobalGenericParam: + // A `bind_global_generic_param` instruction should + // have no uses in the first place, and all the global + // generic parameters should have had their uses replaced. + // + SLANG_ASSERT(!inst->firstUse); + inst->removeAndDeallocate(); + break; + } + } + } + + // Now that we've eliminated all cases of global generic parameters, + // we should now have the properties that: + // + // 1. Execution starts in non-generic code, with no unbound + // generic parameters in scope. + // + // 2. Any case where non-generic code makes use of a generic + // type/function, there will be a `specialize` instruction + // that specifies both the generic and the (concrete) type + // arguments that should be provided to it. + // + // Our primary goal is then to find `specialize` instructions that + // can be replaced with references to, e.g., a suitably + // specialized function, and to resolve any `lookup_interface_method` + // instructions to the concrete value fetched from a witness + // table. + // + // We need to be careful of a few things: + // + // * It would not in general make sense to consider specialize-able + // instructions under an `IRGeneric`, since that could mean "specializing" + // code to parameter values that are still unknown. + // + // * We *also* need to be careful not to specialize something when one + // or more of its inputs is also a `specialize` or `lookup_interface_method` + // instruction, because then we'd be propagating through non-concrete + // values. + // + // The approach we use here is to build a work list of instructions + // that are candidates for specialization, and then process them one + // at a time to see if we can make some forward progress. + // + // We will start by recursively walking all the instructions to add + // the appropriate ones to our work list: + // + populateWorkListRec(moduleInst); + + // We want to treat our work list like a queue rather than + // a stack, because we populated it in program order, + // and fully-specialized-ness will tend to flow top-down. + // + // To accomplish this we will ping-pong between the + // real work list and a copy so that we can iterate over + // one list while adding to the other. + // + List<IRInst*> workListCopy; + while(workList.Count() != 0) + { + workListCopy.Clear(); + workListCopy.SwapWith(workList); + + for( auto inst : workListCopy ) + { + // We need to check whether it is possible to specialize + // the instruction yet. It might not be specializable + // because its operands haven't been specialized. + // + if(!areAllOperandsFullySpecialized(inst)) + { + // If we can't fully specialize this instruction + // yet, then we need to toss it back onto the + // work list to be considered in the next round. + // + // TODO: We need to carefully vet that this can + // never lead to infinite looping + // + workList.Add(inst); + } + else + { + // If all of the operands to `inst` are + // fully specialized, then we can go + // ahead and do whatever is required + // to "fully specialize" `inst` itself. + // + fullySpecializeInst(inst); + + // At this point, we want to start + // considering `inst` as fully specialized, + // so let's add it to our set. + // + fullySpecializedInsts.Add(inst); + } + } + } + + // Once the work list has gone dry, we should have the invariant + // that there are no `specialize` instructions inside of non-generic + // functions that in turn reference a generic type/function, *except* + // in the case where that generic is for a builtin type/function, in + // which case we wouldn't want to specialize it anyway. + } +}; + +void specializeGenerics( + IRModule* module) +{ + SpecializationContext context; + context.processModule(module); +} + +} // namespace Slang diff --git a/source/slang/ir-specialize.h b/source/slang/ir-specialize.h new file mode 100644 index 000000000..dc1f07481 --- /dev/null +++ b/source/slang/ir-specialize.h @@ -0,0 +1,13 @@ +// ir-specialize.h +#pragma once + +namespace Slang +{ +struct IRModule; + +// Find suitable uses of the `specialize` instruction that +// can be replaced with references to specialized functions. +void specializeGenerics( + IRModule* module); + +} diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp index 6899e1494..ca424b4a4 100644 --- a/source/slang/ir.cpp +++ b/source/slang/ir.cpp @@ -1858,18 +1858,32 @@ namespace Slang return inst; } - IRInst* IRBuilder::emitIntrinsicInst( + IRInst* IRBuilder::createIntrinsicInst( IRType* type, IROp op, UInt argCount, IRInst* const* args) { - auto inst = createInstWithTrailingArgs<IRInst>( + return createInstWithTrailingArgs<IRInst>( this, op, type, argCount, args); + } + + + IRInst* IRBuilder::emitIntrinsicInst( + IRType* type, + IROp op, + UInt argCount, + IRInst* const* args) + { + auto inst = createIntrinsicInst( + type, + op, + argCount, + args); addInst(inst); return inst; } @@ -3909,1058 +3923,6 @@ namespace Slang return t; } - // - // Legalization of entry points for GLSL: - // - - IRGlobalParam* addGlobalParam( - IRModule* module, - IRType* valueType) - { - auto session = module->session; - - SharedIRBuilder shared; - shared.module = module; - shared.session = session; - - IRBuilder builder; - builder.sharedBuilder = &shared; - return builder.createGlobalParam(valueType); - } - - void moveValueBefore( - IRInst* valueToMove, - IRInst* placeBefore) - { - valueToMove->removeFromParent(); - valueToMove->insertBefore(placeBefore); - } - - // When scalarizing shader inputs/outputs for GLSL, we need a way - // to refer to a conceptual "value" that might comprise multiple - // IR-level values. We could in principle introduce tuple types - // into the IR so that everything stays at the IR level, but - // it seems easier to just layer it over the top for now. - // - // The `ScalarizedVal` type deals with the "tuple or single value?" - // question, and also the "l-value or r-value?" question. - struct ScalarizedValImpl : RefObject - {}; - struct ScalarizedTupleValImpl; - struct ScalarizedTypeAdapterValImpl; - struct ScalarizedVal - { - enum class Flavor - { - // no value (null pointer) - none, - - // A simple `IRInst*` that represents the actual value - value, - - // An `IRInst*` that represents the address of the actual value - address, - - // A `TupleValImpl` that represents zero or more `ScalarizedVal`s - tuple, - - // A `TypeAdapterValImpl` that wraps a single `ScalarizedVal` and - // represents an implicit type conversion applied to it on read - // or write. - typeAdapter, - }; - - // Create a value representing a simple value - static ScalarizedVal value(IRInst* irValue) - { - ScalarizedVal result; - result.flavor = Flavor::value; - result.irValue = irValue; - return result; - } - - - // Create a value representing an address - static ScalarizedVal address(IRInst* irValue) - { - ScalarizedVal result; - result.flavor = Flavor::address; - result.irValue = irValue; - return result; - } - - static ScalarizedVal tuple(ScalarizedTupleValImpl* impl) - { - ScalarizedVal result; - result.flavor = Flavor::tuple; - result.impl = (ScalarizedValImpl*)impl; - return result; - } - - static ScalarizedVal typeAdapter(ScalarizedTypeAdapterValImpl* impl) - { - ScalarizedVal result; - result.flavor = Flavor::typeAdapter; - result.impl = (ScalarizedValImpl*)impl; - return result; - } - - Flavor flavor = Flavor::none; - IRInst* irValue = nullptr; - RefPtr<ScalarizedValImpl> impl; - }; - - // This is the case for a value that is a "tuple" of other values - struct ScalarizedTupleValImpl : ScalarizedValImpl - { - struct Element - { - IRStructKey* key; - ScalarizedVal val; - }; - - IRType* type; - List<Element> elements; - }; - - // This is the case for a value that is stored with one type, - // but needs to present itself as having a different type - struct ScalarizedTypeAdapterValImpl : ScalarizedValImpl - { - ScalarizedVal val; - IRType* actualType; // the actual type of `val` - IRType* pretendType; // the type this value pretends to have - }; - - struct GlobalVaryingDeclarator - { - enum class Flavor - { - array, - }; - - Flavor flavor; - IRInst* elementCount; - GlobalVaryingDeclarator* next; - }; - - struct GLSLSystemValueInfo - { - // The name of the built-in GLSL variable - char const* name; - - // The name of an outer array that wraps - // the variable, in the case of a GS input - char const* outerArrayName; - - // The required type of the built-in variable - IRType* requiredType; - }; - - void requireGLSLVersionImpl( - ExtensionUsageTracker* tracker, - ProfileVersion version); - - void requireGLSLExtension( - ExtensionUsageTracker* tracker, - String const& name); - - struct GLSLLegalizationContext - { - Session* session; - ExtensionUsageTracker* extensionUsageTracker; - DiagnosticSink* sink; - Stage stage; - - void requireGLSLExtension(String const& name) - { - Slang::requireGLSLExtension(extensionUsageTracker, name); - } - - void requireGLSLVersion(ProfileVersion version) - { - Slang::requireGLSLVersionImpl(extensionUsageTracker, version); - } - - Stage getStage() - { - return stage; - } - - DiagnosticSink* getSink() - { - return sink; - } - - IRBuilder* builder; - IRBuilder* getBuilder() { return builder; } - }; - - GLSLSystemValueInfo* getGLSLSystemValueInfo( - GLSLLegalizationContext* context, - VarLayout* varLayout, - LayoutResourceKind kind, - Stage stage, - GLSLSystemValueInfo* inStorage) - { - char const* name = nullptr; - char const* outerArrayName = nullptr; - - auto semanticNameSpelling = varLayout->systemValueSemantic; - if(semanticNameSpelling.Length() == 0) - return nullptr; - - auto semanticName = semanticNameSpelling.ToLower(); - - IRType* requiredType = nullptr; - - if(semanticName == "sv_position") - { - // This semantic can either work like `gl_FragCoord` - // when it is used as a fragment shader input, or - // like `gl_Position` when used in other stages. - // - // Note: This isn't as simple as testing input-vs-output, - // because a user might have a VS output `SV_Position`, - // and then pass it along to a GS that reads it as input. - // - if( stage == Stage::Fragment - && kind == LayoutResourceKind::VaryingInput ) - { - name = "gl_FragCoord"; - } - else if( stage == Stage::Geometry - && kind == LayoutResourceKind::VaryingInput ) - { - // As a GS input, the correct syntax is `gl_in[...].gl_Position`, - // but that is not compatible with picking the array dimension later, - // of course. - outerArrayName = "gl_in"; - name = "gl_Position"; - } - else - { - name = "gl_Position"; - } - } - else if(semanticName == "sv_target") - { - // Note: we do *not* need to generate some kind of `gl_` - // builtin for fragment-shader outputs: they are just - // ordinary `out` variables, with ordinary `location`s, - // as far as GLSL is concerned. - return nullptr; - } - else if(semanticName == "sv_clipdistance") - { - // TODO: type conversion is required here. - name = "gl_ClipDistance"; - } - else if(semanticName == "sv_culldistance") - { - context->requireGLSLExtension("ARB_cull_distance"); - - // TODO: type conversion is required here. - name = "gl_CullDistance"; - } - else if(semanticName == "sv_coverage") - { - // TODO: deal with `gl_SampleMaskIn` when used as an input. - - // TODO: type conversion is required here. - name = "gl_SampleMask"; - } - else if(semanticName == "sv_depth") - { - name = "gl_FragDepth"; - } - else if(semanticName == "sv_depthgreaterequal") - { - // TODO: layout(depth_greater) out float gl_FragDepth; - name = "gl_FragDepth"; - } - else if(semanticName == "sv_depthlessequal") - { - // TODO: layout(depth_greater) out float gl_FragDepth; - name = "gl_FragDepth"; - } - else if(semanticName == "sv_dispatchthreadid") - { - name = "gl_GlobalInvocationID"; - } - else if(semanticName == "sv_domainlocation") - { - name = "gl_TessCoord"; - } - else if(semanticName == "sv_groupid") - { - name = "gl_WorkGroupID"; - } - else if(semanticName == "sv_groupindex") - { - name = "gl_LocalInvocationIndex"; - } - else if(semanticName == "sv_groupthreadid") - { - name = "gl_LocalInvocationID"; - } - else if(semanticName == "sv_gsinstanceid") - { - name = "gl_InvocationID"; - } - else if(semanticName == "sv_instanceid") - { - name = "gl_InstanceIndex"; - } - else if(semanticName == "sv_isfrontface") - { - name = "gl_FrontFacing"; - } - else if(semanticName == "sv_outputcontrolpointid") - { - name = "gl_InvocationID"; - } - else if(semanticName == "sv_primitiveid") - { - name = "gl_PrimitiveID"; - } - else if (semanticName == "sv_rendertargetarrayindex") - { - switch (context->getStage()) - { - case Stage::Geometry: - context->requireGLSLVersion(ProfileVersion::GLSL_150); - break; - - case Stage::Fragment: - context->requireGLSLVersion(ProfileVersion::GLSL_430); - break; - - default: - context->requireGLSLVersion(ProfileVersion::GLSL_450); - context->requireGLSLExtension("GL_ARB_shader_viewport_layer_array"); - break; - } - - name = "gl_Layer"; - requiredType = context->getBuilder()->getBasicType(BaseType::Int); - } - else if (semanticName == "sv_sampleindex") - { - name = "gl_SampleID"; - } - else if (semanticName == "sv_stencilref") - { - context->requireGLSLExtension("ARB_shader_stencil_export"); - name = "gl_FragStencilRef"; - } - else if (semanticName == "sv_tessfactor") - { - name = "gl_TessLevelOuter"; - } - else if (semanticName == "sv_vertexid") - { - name = "gl_VertexIndex"; - } - else if (semanticName == "sv_viewportarrayindex") - { - name = "gl_ViewportIndex"; - } - else if (semanticName == "nv_x_right") - { - context->requireGLSLVersion(ProfileVersion::GLSL_450); - context->requireGLSLExtension("GL_NVX_multiview_per_view_attributes"); - - // The actual output in GLSL is: - // - // vec4 gl_PositionPerViewNV[]; - // - // and is meant to support an arbitrary number of views, - // while the HLSL case just defines a second position - // output. - // - // For now we will hack this by: - // 1. Mapping an `NV_X_Right` output to `gl_PositionPerViewNV[1]` - // (that is, just one element of the output array) - // 2. Adding logic to copy the traditional `gl_Position` output - // over to `gl_PositionPerViewNV[0]` - // - - name = "gl_PositionPerViewNV[1]"; - -// shared->requiresCopyGLPositionToPositionPerView = true; - } - else if (semanticName == "nv_viewport_mask") - { - context->requireGLSLVersion(ProfileVersion::GLSL_450); - context->requireGLSLExtension("GL_NVX_multiview_per_view_attributes"); - - name = "gl_ViewportMaskPerViewNV"; -// globalVarExpr = createGLSLBuiltinRef("gl_ViewportMaskPerViewNV", -// getUnsizedArrayType(getIntType())); - } - - if( name ) - { - inStorage->name = name; - inStorage->outerArrayName = outerArrayName; - inStorage->requiredType = requiredType; - return inStorage; - } - - context->getSink()->diagnose(varLayout->varDecl.getDecl()->loc, Diagnostics::unknownSystemValueSemantic, semanticNameSpelling); - return nullptr; - } - - ScalarizedVal createSimpleGLSLGlobalVarying( - GLSLLegalizationContext* context, - IRBuilder* builder, - IRType* inType, - VarLayout* inVarLayout, - TypeLayout* inTypeLayout, - LayoutResourceKind kind, - Stage stage, - UInt bindingIndex, - GlobalVaryingDeclarator* declarator) - { - // Check if we have a system value on our hands. - GLSLSystemValueInfo systemValueInfoStorage; - auto systemValueInfo = getGLSLSystemValueInfo( - context, - inVarLayout, - kind, - stage, - &systemValueInfoStorage); - - IRType* type = inType; - - // A system-value semantic might end up needing to override the type - // that the user specified. - if( systemValueInfo && systemValueInfo->requiredType ) - { - type = systemValueInfo->requiredType; - } - - // Construct the actual type and type-layout for the global variable - // - RefPtr<TypeLayout> typeLayout = inTypeLayout; - for( auto dd = declarator; dd; dd = dd->next ) - { - // We only have one declarator case right now... - SLANG_ASSERT(dd->flavor == GlobalVaryingDeclarator::Flavor::array); - - auto arrayType = builder->getArrayType( - type, - dd->elementCount); - - RefPtr<ArrayTypeLayout> arrayTypeLayout = new ArrayTypeLayout(); -// arrayTypeLayout->type = arrayType; - arrayTypeLayout->rules = typeLayout->rules; - arrayTypeLayout->originalElementTypeLayout = typeLayout; - arrayTypeLayout->elementTypeLayout = typeLayout; - arrayTypeLayout->uniformStride = 0; - - if( auto resInfo = inTypeLayout->FindResourceInfo(kind) ) - { - // TODO: it is kind of gross to be re-running some - // of the type layout logic here. - - UInt elementCount = (UInt) GetIntVal(dd->elementCount); - arrayTypeLayout->addResourceUsage( - kind, - resInfo->count * elementCount); - } - - type = arrayType; - typeLayout = arrayTypeLayout; - } - - // We need to construct a fresh layout for the variable, even - // if the original had its own layout, because it might be - // an `inout` parameter, and we only want to deal with the case - // described by our `kind` parameter. - RefPtr<VarLayout> varLayout = new VarLayout(); - varLayout->varDecl = inVarLayout->varDecl; - varLayout->typeLayout = typeLayout; - varLayout->flags = inVarLayout->flags; - varLayout->systemValueSemantic = inVarLayout->systemValueSemantic; - varLayout->systemValueSemanticIndex = inVarLayout->systemValueSemanticIndex; - varLayout->semanticName = inVarLayout->semanticName; - varLayout->semanticIndex = inVarLayout->semanticIndex; - varLayout->stage = inVarLayout->stage; - varLayout->AddResourceInfo(kind)->index = bindingIndex; - - // We are going to be creating a global parameter to replace - // the function parameter, but we need to handle the case - // where the parameter represents a varying *output* and not - // just an input. - // - // Our IR global shader parameters are read-only, just - // like our IR function parameters, and need a wrapper - // `Out<...>` type to represent otuputs. - // - bool isOutput = kind == LayoutResourceKind::VaryingOutput; - IRType* paramType = isOutput ? builder->getOutType(type) : type; - - auto globalParam = addGlobalParam(builder->getModule(), paramType); - moveValueBefore(globalParam, builder->getFunc()); - - ScalarizedVal val = isOutput ? ScalarizedVal::address(globalParam) : ScalarizedVal::value(globalParam); - - if( systemValueInfo ) - { - builder->addImportDecoration(globalParam, UnownedTerminatedStringSlice(systemValueInfo->name)); - - if( auto fromType = systemValueInfo->requiredType ) - { - // We may need to adapt from the declared type to/from - // the actual type of the GLSL global. - auto toType = inType; - - if( fromType != toType ) - { - RefPtr<ScalarizedTypeAdapterValImpl> typeAdapter = new ScalarizedTypeAdapterValImpl; - typeAdapter->actualType = systemValueInfo->requiredType; - typeAdapter->pretendType = inType; - typeAdapter->val = val; - - val = ScalarizedVal::typeAdapter(typeAdapter); - } - } - - if(auto outerArrayName = systemValueInfo->outerArrayName) - { - builder->addGLSLOuterArrayDecoration(globalParam, UnownedTerminatedStringSlice(outerArrayName)); - } - } - - builder->addLayoutDecoration(globalParam, varLayout); - - return val; - } - - ScalarizedVal createGLSLGlobalVaryingsImpl( - GLSLLegalizationContext* context, - IRBuilder* builder, - IRType* type, - VarLayout* varLayout, - TypeLayout* typeLayout, - LayoutResourceKind kind, - Stage stage, - UInt bindingIndex, - GlobalVaryingDeclarator* declarator) - { - if( as<IRBasicType>(type) ) - { - return createSimpleGLSLGlobalVarying( - context, - builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); - } - else if( as<IRVectorType>(type) ) - { - return createSimpleGLSLGlobalVarying( - context, - builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); - } - else if( as<IRMatrixType>(type) ) - { - // TODO: a matrix-type varying should probably be handled like an array of rows - return createSimpleGLSLGlobalVarying( - context, - builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); - } - else if( auto arrayType = as<IRArrayType>(type) ) - { - // We will need to SOA-ize any nested types. - - auto elementType = arrayType->getElementType(); - auto elementCount = arrayType->getElementCount(); - auto arrayLayout = dynamic_cast<ArrayTypeLayout*>(typeLayout); - SLANG_ASSERT(arrayLayout); - auto elementTypeLayout = arrayLayout->elementTypeLayout; - - GlobalVaryingDeclarator arrayDeclarator; - arrayDeclarator.flavor = GlobalVaryingDeclarator::Flavor::array; - arrayDeclarator.elementCount = elementCount; - arrayDeclarator.next = declarator; - - return createGLSLGlobalVaryingsImpl( - context, - builder, - elementType, - varLayout, - elementTypeLayout, - kind, - stage, - bindingIndex, - &arrayDeclarator); - } - else if( auto streamType = as<IRHLSLStreamOutputType>(type)) - { - auto elementType = streamType->getElementType(); - auto streamLayout = dynamic_cast<StreamOutputTypeLayout*>(typeLayout); - SLANG_ASSERT(streamLayout); - auto elementTypeLayout = streamLayout->elementTypeLayout; - - return createGLSLGlobalVaryingsImpl( - context, - builder, - elementType, - varLayout, - elementTypeLayout, - kind, - stage, - bindingIndex, - declarator); - } - else if(auto structType = as<IRStructType>(type)) - { - // We need to recurse down into the individual fields, - // and generate a variable for each of them. - - auto structTypeLayout = dynamic_cast<StructTypeLayout*>(typeLayout); - SLANG_ASSERT(structTypeLayout); - RefPtr<ScalarizedTupleValImpl> tupleValImpl = new ScalarizedTupleValImpl(); - - - // Construct the actual type for the tuple (including any outer arrays) - IRType* fullType = type; - for( auto dd = declarator; dd; dd = dd->next ) - { - SLANG_ASSERT(dd->flavor == GlobalVaryingDeclarator::Flavor::array); - fullType = builder->getArrayType( - fullType, - dd->elementCount); - } - - tupleValImpl->type = fullType; - - // Okay, we want to walk through the fields here, and - // generate one variable for each. - UInt fieldCounter = 0; - for(auto field : structType->getFields()) - { - UInt fieldIndex = fieldCounter++; - - auto fieldLayout = structTypeLayout->fields[fieldIndex]; - - UInt fieldBindingIndex = bindingIndex; - if(auto fieldResInfo = fieldLayout->FindResourceInfo(kind)) - fieldBindingIndex += fieldResInfo->index; - - auto fieldVal = createGLSLGlobalVaryingsImpl( - context, - builder, - field->getFieldType(), - fieldLayout, - fieldLayout->typeLayout, - kind, - stage, - fieldBindingIndex, - declarator); - - ScalarizedTupleValImpl::Element element; - element.val = fieldVal; - element.key = field->getKey(); - - tupleValImpl->elements.Add(element); - } - - return ScalarizedVal::tuple(tupleValImpl); - } - - // Default case is to fall back on the simple behavior - return createSimpleGLSLGlobalVarying( - context, - builder, type, varLayout, typeLayout, kind, stage, bindingIndex, declarator); - } - - ScalarizedVal createGLSLGlobalVaryings( - GLSLLegalizationContext* context, - IRBuilder* builder, - IRType* type, - VarLayout* layout, - LayoutResourceKind kind, - Stage stage) - { - UInt bindingIndex = 0; - if(auto rr = layout->FindResourceInfo(kind)) - bindingIndex = rr->index; - return createGLSLGlobalVaryingsImpl( - context, - builder, type, layout, layout->typeLayout, kind, stage, bindingIndex, nullptr); - } - - IRType* getFieldType( - IRType* baseType, - IRStructKey* fieldKey) - { - if(auto structType = as<IRStructType>(baseType)) - { - for(auto ff : structType->getFields()) - { - if(ff->getKey() == fieldKey) - return ff->getFieldType(); - } - } - - SLANG_UNEXPECTED("no such field"); - UNREACHABLE_RETURN(nullptr); - } - - ScalarizedVal extractField( - IRBuilder* builder, - ScalarizedVal const& val, - UInt fieldIndex, - IRStructKey* fieldKey) - { - switch( val.flavor ) - { - case ScalarizedVal::Flavor::value: - return ScalarizedVal::value( - builder->emitFieldExtract( - getFieldType(val.irValue->getDataType(), fieldKey), - val.irValue, - fieldKey)); - - case ScalarizedVal::Flavor::address: - { - auto ptrType = as<IRPtrTypeBase>(val.irValue->getDataType()); - auto valType = ptrType->getValueType(); - auto fieldType = getFieldType(valType, fieldKey); - auto fieldPtrType = builder->getPtrType(ptrType->op, fieldType); - return ScalarizedVal::address( - builder->emitFieldAddress( - fieldPtrType, - val.irValue, - fieldKey)); - } - - case ScalarizedVal::Flavor::tuple: - { - auto tupleVal = val.impl.As<ScalarizedTupleValImpl>(); - return tupleVal->elements[fieldIndex].val; - } - - default: - SLANG_UNEXPECTED("unimplemented"); - UNREACHABLE_RETURN(ScalarizedVal()); - } - - } - - ScalarizedVal adaptType( - IRBuilder* builder, - IRInst* val, - IRType* toType, - IRType* /*fromType*/) - { - // TODO: actually consider what needs to go on here... - return ScalarizedVal::value(builder->emitConstructorInst( - toType, - 1, - &val)); - } - - ScalarizedVal adaptType( - IRBuilder* builder, - ScalarizedVal const& val, - IRType* toType, - IRType* fromType) - { - switch( val.flavor ) - { - case ScalarizedVal::Flavor::value: - return adaptType(builder, val.irValue, toType, fromType); - break; - - case ScalarizedVal::Flavor::address: - { - auto loaded = builder->emitLoad(val.irValue); - return adaptType(builder, loaded, toType, fromType); - } - break; - - default: - SLANG_UNEXPECTED("unimplemented"); - UNREACHABLE_RETURN(ScalarizedVal()); - } - } - - void assign( - IRBuilder* builder, - ScalarizedVal const& left, - ScalarizedVal const& right) - { - switch( left.flavor ) - { - case ScalarizedVal::Flavor::address: - switch( right.flavor ) - { - case ScalarizedVal::Flavor::value: - { - builder->emitStore(left.irValue, right.irValue); - } - break; - - case ScalarizedVal::Flavor::address: - { - auto val = builder->emitLoad(right.irValue); - builder->emitStore(left.irValue, val); - } - break; - - case ScalarizedVal::Flavor::tuple: - { - // We are assigning from a tuple to a destination - // that is not a tuple. We will perform assignment - // element-by-element. - auto rightTupleVal = right.impl.As<ScalarizedTupleValImpl>(); - UInt elementCount = rightTupleVal->elements.Count(); - - for( UInt ee = 0; ee < elementCount; ++ee ) - { - auto rightElement = rightTupleVal->elements[ee]; - auto leftElementVal = extractField( - builder, - left, - ee, - rightElement.key); - assign(builder, leftElementVal, rightElement.val); - } - } - break; - - default: - SLANG_UNEXPECTED("unimplemented"); - break; - } - break; - - case ScalarizedVal::Flavor::tuple: - { - // We have a tuple, so we are going to need to try and assign - // to each of its constituent fields. - auto leftTupleVal = left.impl.As<ScalarizedTupleValImpl>(); - UInt elementCount = leftTupleVal->elements.Count(); - - for( UInt ee = 0; ee < elementCount; ++ee ) - { - auto rightElementVal = extractField( - builder, - right, - ee, - leftTupleVal->elements[ee].key); - assign(builder, leftTupleVal->elements[ee].val, rightElementVal); - } - } - break; - - case ScalarizedVal::Flavor::typeAdapter: - { - // We are trying to assign to something that had its type adjusted, - // so we will need to adjust the type of the right-hand side first. - // - // In this case we are converting to the actual type of the GLSL variable, - // from the "pretend" type that it had in the IR before. - auto typeAdapter = left.impl.As<ScalarizedTypeAdapterValImpl>(); - auto adaptedRight = adaptType(builder, right, typeAdapter->actualType, typeAdapter->pretendType); - assign(builder, typeAdapter->val, adaptedRight); - } - break; - - default: - SLANG_UNEXPECTED("unimplemented"); - break; - } - } - - ScalarizedVal getSubscriptVal( - IRBuilder* builder, - IRType* elementType, - ScalarizedVal val, - IRInst* indexVal) - { - switch( val.flavor ) - { - case ScalarizedVal::Flavor::value: - return ScalarizedVal::value( - builder->emitElementExtract( - elementType, - val.irValue, - indexVal)); - - case ScalarizedVal::Flavor::address: - return ScalarizedVal::address( - builder->emitElementAddress( - builder->getPtrType(elementType), - val.irValue, - indexVal)); - - case ScalarizedVal::Flavor::tuple: - { - auto inputTuple = val.impl.As<ScalarizedTupleValImpl>(); - - RefPtr<ScalarizedTupleValImpl> resultTuple = new ScalarizedTupleValImpl(); - resultTuple->type = elementType; - - UInt elementCount = inputTuple->elements.Count(); - UInt elementCounter = 0; - - auto structType = as<IRStructType>(elementType); - for(auto field : structType->getFields()) - { - auto tupleElementType = field->getFieldType(); - - UInt elementIndex = elementCounter++; - - SLANG_RELEASE_ASSERT(elementIndex < elementCount); - auto inputElement = inputTuple->elements[elementIndex]; - - ScalarizedTupleValImpl::Element resultElement; - resultElement.key = inputElement.key; - resultElement.val = getSubscriptVal( - builder, - tupleElementType, - inputElement.val, - indexVal); - - resultTuple->elements.Add(resultElement); - } - SLANG_RELEASE_ASSERT(elementCounter == elementCount); - - return ScalarizedVal::tuple(resultTuple); - } - - default: - SLANG_UNEXPECTED("unimplemented"); - UNREACHABLE_RETURN(ScalarizedVal()); - } - } - - ScalarizedVal getSubscriptVal( - IRBuilder* builder, - IRType* elementType, - ScalarizedVal val, - UInt index) - { - return getSubscriptVal( - builder, - elementType, - val, - builder->getIntValue( - builder->getIntType(), - index)); - } - - IRInst* materializeValue( - IRBuilder* builder, - ScalarizedVal const& val); - - IRInst* materializeTupleValue( - IRBuilder* builder, - ScalarizedVal val) - { - auto tupleVal = val.impl.As<ScalarizedTupleValImpl>(); - SLANG_ASSERT(tupleVal); - - UInt elementCount = tupleVal->elements.Count(); - auto type = tupleVal->type; - - if( auto arrayType = as<IRArrayType>(type)) - { - // The tuple represent an array, which means that the - // individual elements are expected to yield arrays as well. - // - // We will extract a value for each array element, and - // then use these to construct our result. - - List<IRInst*> arrayElementVals; - UInt arrayElementCount = (UInt) GetIntVal(arrayType->getElementCount()); - - for( UInt ii = 0; ii < arrayElementCount; ++ii ) - { - auto arrayElementPseudoVal = getSubscriptVal( - builder, - arrayType->getElementType(), - val, - ii); - - auto arrayElementVal = materializeValue( - builder, - arrayElementPseudoVal); - - arrayElementVals.Add(arrayElementVal); - } - - return builder->emitMakeArray( - arrayType, - arrayElementVals.Count(), - arrayElementVals.Buffer()); - } - else - { - // The tuple represents a value of some aggregate type, - // so we can simply materialize the elements and then - // construct a value of that type. - // - // TODO: this should be using a `makeStruct` instruction. - - List<IRInst*> elementVals; - for( UInt ee = 0; ee < elementCount; ++ee ) - { - auto elementVal = materializeValue(builder, tupleVal->elements[ee].val); - elementVals.Add(elementVal); - } - - return builder->emitConstructorInst( - tupleVal->type, - elementVals.Count(), - elementVals.Buffer()); - } - } - - IRInst* materializeValue( - IRBuilder* builder, - ScalarizedVal const& val) - { - switch( val.flavor ) - { - case ScalarizedVal::Flavor::value: - return val.irValue; - - case ScalarizedVal::Flavor::address: - { - auto loadInst = builder->emitLoad(val.irValue); - return loadInst; - } - break; - - case ScalarizedVal::Flavor::tuple: - { - auto tupleVal = val.impl.As<ScalarizedTupleValImpl>(); - return materializeTupleValue(builder, val); - } - break; - - case ScalarizedVal::Flavor::typeAdapter: - { - // Somebody is trying to use a value where its actual type - // doesn't match the type it pretends to have. To make this - // work we need to adapt the type from its actual type over - // to its pretend type. - auto typeAdapter = val.impl.As<ScalarizedTypeAdapterValImpl>(); - auto adapted = adaptType(builder, typeAdapter->val, typeAdapter->pretendType, typeAdapter->actualType); - return materializeValue(builder, adapted); - } - break; - - default: - SLANG_UNEXPECTED("unimplemented"); - break; - } - } - IRTargetIntrinsicDecoration* findTargetIntrinsicDecoration( IRInst* val, String const& targetName) @@ -4978,1360 +3940,14 @@ namespace Slang return nullptr; } - void legalizeRayTracingEntryPointParameterForGLSL( - GLSLLegalizationContext* context, - IRFunc* func, - IRParam* pp, - VarLayout* paramLayout) - { - auto builder = context->getBuilder(); - auto paramType = pp->getDataType(); - - // The parameter might be either an `in` parameter, - // or an `out` or `in out` parameter, and in those - // latter cases its IR-level type will include a - // wrapping "pointer-like" type (e.g., `Out<Float>` - // instead of just `Float`). - // - // Because global shader parameters are read-only - // in the same way function types are, we can take - // care of that detail here just by allocating a - // global shader parameter with exactly the type - // of the original function parameter. - // - auto globalParam = addGlobalParam(builder->getModule(), paramType); - builder->addLayoutDecoration(globalParam, paramLayout); - moveValueBefore(globalParam, builder->getFunc()); - pp->replaceUsesWith(globalParam); - - // Because linkage between ray-tracing shaders is - // based on the type of incoming/outgoing payload - // and attribute parameters, it would be an error to - // eliminate the global parameter *even if* it is - // not actually used inside the entry point. - // - // We attach a decoration to the entry point that - // makes note of the dependency, so that steps - // like dead code elimination cannot get rid of - // the parameter. - // - // TODO: We could consider using a structure like - // this for *all* of the entry point parameters - // that get moved to the global scope, since SPIR-V - // ends up requiring such information on an `OpEntryPoint`. - // - // As a further alternative, we could decide to - // keep entry point varying input/outtput attached - // to the parameter list through all of the Slang IR - // steps, and only declare it as global variables at - // the last minute when emitting a GLSL `main` or - // SPIR-V for an entry point. - // - builder->addDependsOnDecoration(func, globalParam); - } - - void legalizeEntryPointParameterForGLSL( - GLSLLegalizationContext* context, - IRFunc* func, - IRParam* pp, - VarLayout* paramLayout) - { - auto builder = context->getBuilder(); - auto stage = context->getStage(); - - // We need to create a global variable that will replace the parameter. - // It seems superficially obvious that the variable should have - // the same type as the parameter. - // However, if the parameter was a pointer, in order to - // support `out` or `in out` parameter passing, we need - // to be sure to allocate a variable of the pointed-to - // type instead. - // - // We also need to replace uses of the parameter with - // uses of the variable, and the exact logic there - // will differ a bit between the pointer and non-pointer - // cases. - auto paramType = pp->getDataType(); - - // First we will special-case stage input/outputs that - // don't fit into the standard varying model. - // For right now we are only doing special-case handling - // of geometry shader output streams. - if( auto paramPtrType = as<IROutTypeBase>(paramType) ) - { - auto valueType = paramPtrType->getValueType(); - if( auto gsStreamType = as<IRHLSLStreamOutputType>(valueType) ) - { - // An output stream type like `TriangleStream<Foo>` should - // more or less translate into `out Foo` (plus scalarization). - - auto globalOutputVal = createGLSLGlobalVaryings( - context, - builder, - valueType, - paramLayout, - LayoutResourceKind::VaryingOutput, - stage); - - // TODO: a GS output stream might be passed into other - // functions, so that we should really be modifying - // any function that has one of these in its parameter - // list (and in the limit we should be leagalizing any - // type that nests these...). - // - // For now we will just try to deal with `Append` calls - // directly in this function. - - - - for( auto bb = func->getFirstBlock(); bb; bb = bb->getNextBlock() ) - { - for( auto ii = bb->getFirstInst(); ii; ii = ii->getNextInst() ) - { - // Is it a call? - if(ii->op != kIROp_Call) - continue; - - // Is it calling the append operation? - auto callee = ii->getOperand(0); - for(;;) - { - // If the instruction is `specialize(X,...)` then - // we want to look at `X`, and if it is `generic { ... return R; }` - // then we want to look at `R`. We handle this - // iteratively here. - // - // TODO: This idiom seems to come up enough that we - // should probably have a dedicated convenience routine - // for this. - // - // Alternatively, we could switch the IR encoding so - // that decorations are added to the generic instead of the - // value it returns. - // - switch(callee->op) - { - case kIROp_Specialize: - { - callee = cast<IRSpecialize>(callee)->getOperand(0); - continue; - } - - case kIROp_Generic: - { - auto genericResult = findGenericReturnVal(cast<IRGeneric>(callee)); - if(genericResult) - { - callee = genericResult; - continue; - } - } - - default: - break; - } - break; - } - if(callee->op != kIROp_Func) - continue; - - // HACK: we will identify the operation based - // on the target-intrinsic definition that was - // given to it. - auto decoration = findTargetIntrinsicDecoration(callee, "glsl"); - if(!decoration) - continue; - - if(decoration->getDefinition() != UnownedStringSlice::fromLiteral("EmitVertex()")) - { - continue; - } - - // Okay, we have a declaration, and we want to modify it! - - builder->setInsertBefore(ii); - - assign(builder, globalOutputVal, ScalarizedVal::value(ii->getOperand(2))); - } - } - - return; - } - } - - // When we have an HLSL ray tracing shader entry point, - // we don't want to translate the inputs/outputs for GLSL/SPIR-V - // according to our default rules, for two reasons: - // - // 1. The input and output for these stages are expected to - // be packaged into `struct` types rather than be scalarized, - // so the usual scalarization approach we take here should - // not be applied. - // - // 2. An `in out` parameter isn't just sugar for a combination - // of an `in` and an `out` parameter, and instead represents the - // read/write "payload" that was passed in. It should legalize - // to a single variable, and we can lower reads/writes of it - // directly, rather than introduce an intermediate temporary. - // - switch( stage ) - { - default: - break; - - case Stage::AnyHit: - case Stage::Callable: - case Stage::ClosestHit: - case Stage::Intersection: - case Stage::Miss: - case Stage::RayGeneration: - legalizeRayTracingEntryPointParameterForGLSL(context, func, pp, paramLayout); - return; - } - - // Is the parameter type a special pointer type - // that indicates the parameter is used for `out` - // or `inout` access? - if(auto paramPtrType = as<IROutTypeBase>(paramType) ) - { - // Okay, we have the more interesting case here, - // where the parameter was being passed by reference. - // We are going to create a local variable of the appropriate - // type, which will replace the parameter, along with - // one or more global variables for the actual input/output. - - auto valueType = paramPtrType->getValueType(); - - auto localVariable = builder->emitVar(valueType); - auto localVal = ScalarizedVal::address(localVariable); - - if( auto inOutType = as<IRInOutType>(paramPtrType) ) - { - // In the `in out` case we need to declare two - // sets of global variables: one for the `in` - // side and one for the `out` side. - auto globalInputVal = createGLSLGlobalVaryings( - context, - builder, valueType, paramLayout, LayoutResourceKind::VaryingInput, stage); - - assign(builder, localVal, globalInputVal); - } - - // Any places where the original parameter was used inside - // the function body should instead use the new local variable. - // Since the parameter was a pointer, we use the variable instruction - // itself (which is an `alloca`d pointer) directly: - pp->replaceUsesWith(localVariable); - - // We also need one or more global variables to write the output to - // when the function is done. We create them here. - auto globalOutputVal = createGLSLGlobalVaryings( - context, - builder, valueType, paramLayout, LayoutResourceKind::VaryingOutput, stage); - - // Now we need to iterate over all the blocks in the function looking - // for any `return*` instructions, so that we can write to the output variable - for( auto bb = func->getFirstBlock(); bb; bb = bb->getNextBlock() ) - { - auto terminatorInst = bb->getLastInst(); - if(!terminatorInst) - continue; - - switch( terminatorInst->op ) - { - default: - continue; - - case kIROp_ReturnVal: - case kIROp_ReturnVoid: - break; - } - - // We dont' re-use `builder` here because we don't want to - // disrupt the source location it is using for inserting - // temporary variables at the top of the function. - // - IRBuilder terminatorBuilder; - terminatorBuilder.sharedBuilder = builder->sharedBuilder; - terminatorBuilder.setInsertBefore(terminatorInst); - - // Assign from the local variabel to the global output - // variable before the actual `return` takes place. - assign(&terminatorBuilder, globalOutputVal, localVal); - } - } - else - { - // This is the "easy" case where the parameter wasn't - // being passed by reference. We start by just creating - // one or more global variables to represent the parameter, - // and attach the required layout information to it along - // the way. - - auto globalValue = createGLSLGlobalVaryings( - context, - builder, paramType, paramLayout, LayoutResourceKind::VaryingInput, stage); - - // Next we need to replace uses of the parameter with - // references to the variable(s). We are going to do that - // somewhat naively, by simply materializing the - // variables at the start. - IRInst* materialized = materializeValue(builder, globalValue); - - pp->replaceUsesWith(materialized); - } - } - - void legalizeEntryPointForGLSL( - Session* session, - IRModule* module, - IRFunc* func, - EntryPointLayout* entryPointLayout, - DiagnosticSink* sink, - ExtensionUsageTracker* extensionUsageTracker) - { - GLSLLegalizationContext context; - context.session = session; - context.stage = entryPointLayout->profile.GetStage(); - context.sink = sink; - context.extensionUsageTracker = extensionUsageTracker; - - Stage stage = entryPointLayout->profile.GetStage(); - - // We require that the entry-point function has no uses, - // because otherwise we'd invalidate the signature - // at all existing call sites. - // - // TODO: the right thing to do here is to split any - // function that both gets called as an entry point - // and as an ordinary function. - SLANG_ASSERT(!func->firstUse); - - // We create a dummy IR builder, since some of - // the functions require it. - // - // TODO: make some of these free functions... - // - SharedIRBuilder shared; - shared.module = module; - shared.session = session; - IRBuilder builder; - builder.sharedBuilder = &shared; - builder.setInsertInto(func); - - context.builder = &builder; - - // We will start by looking at the return type of the - // function, because that will enable us to do an - // early-out check to avoid more work. - // - // Specifically, we need to check if the function has - // a `void` return type, because there is no work - // to be done on its return value in that case. - auto resultType = func->getResultType(); - if(as<IRVoidType>(resultType)) - { - // In this case, the function doesn't return a value - // so we don't need to transform its `return` sites. - // - // We can also use this opportunity to quickly - // check if the function has any parameters, and if - // it doesn't use the chance to bail out immediately. - if( func->getParamCount() == 0 ) - { - // This function is already legal for GLSL - // (at least in terms of parameter/result signature), - // so we won't bother doing anything at all. - return; - } - - // If the function does have parameters, then we need - // to let the logic later in this function handle them. - } - else - { - // Function returns a value, so we need - // to introduce a new global variable - // to hold that value, and then replace - // any `returnVal` instructions with - // code to write to that variable. - - auto resultGlobal = createGLSLGlobalVaryings( - &context, - &builder, - resultType, - entryPointLayout->resultLayout, - LayoutResourceKind::VaryingOutput, - stage); - - for( auto bb = func->getFirstBlock(); bb; bb = bb->getNextBlock() ) - { - // TODO: This is silly, because we are looking at every instruction, - // when we know that a `returnVal` should only ever appear as a - // terminator... - for( auto ii = bb->getFirstInst(); ii; ii = ii->getNextInst() ) - { - if(ii->op != kIROp_ReturnVal) - continue; - - IRReturnVal* returnInst = (IRReturnVal*) ii; - IRInst* returnValue = returnInst->getVal(); - - // Make sure we add these instructions to the right block - builder.setInsertInto(bb); - - // Write to our global variable(s) from the value being returned. - assign(&builder, resultGlobal, ScalarizedVal::value(returnValue)); - - // Emit a `returnVoid` to end the block - auto returnVoid = builder.emitReturn(); - - // Remove the old `returnVal` instruction. - returnInst->removeAndDeallocate(); - - // Make sure to resume our iteration at an - // appropriate instruciton, since we deleted - // the one we had been using. - ii = returnVoid; - } - } - } - - // Next we will walk through any parameters of the entry-point function, - // and turn them into global variables. - if( auto firstBlock = func->getFirstBlock() ) - { - // Any initialization code we insert for parameters needs - // to be at the start of the "ordinary" instructions in the block: - builder.setInsertBefore(firstBlock->getFirstOrdinaryInst()); - - UInt paramCounter = 0; - for( auto pp = firstBlock->getFirstParam(); pp; pp = pp->getNextParam() ) - { - UInt paramIndex = paramCounter++; - - // We assume that the entry-point layout includes information - // on each parameter, and that these arrays are kept aligned. - // Note that this means that any transformations that mess - // with function signatures will need to also update layout info... - // - SLANG_ASSERT(entryPointLayout->fields.Count() > paramIndex); - auto paramLayout = entryPointLayout->fields[paramIndex]; - - legalizeEntryPointParameterForGLSL( - &context, - func, - pp, - paramLayout); - } - - // At this point we should have eliminated all uses of the - // parameters of the entry block. Also, our control-flow - // rules mean that the entry block cannot be the target - // of any branches in the code, so there can't be - // any control-flow ops that try to match the parameter - // list. - // - // We can safely go through and destroy the parameters - // themselves, and then clear out the parameter list. - - for( auto pp = firstBlock->getFirstParam(); pp; ) - { - auto next = pp->getNextParam(); - pp->removeAndDeallocate(); - pp = next; - } - } - - // Finally, we need to patch up the type of the entry point, - // because it is no longer accurate. - - IRFuncType* voidFuncType = builder.getFuncType( - 0, - nullptr, - builder.getVoidType()); - func->setFullType(voidFuncType); - - // TODO: we should technically be constructing - // a new `EntryPointLayout` here to reflect - // the way that things have been moved around. - } - - // Needed for lookup up entry-point layouts. - // - // TODO: maybe arrange so that codegen is driven from the layout layer - // instead of the input/request layer. - EntryPointLayout* findEntryPointLayout( - ProgramLayout* programLayout, - EntryPointRequest* entryPointRequest); - - struct IRSpecSymbol : RefObject - { - IRInst* irGlobalValue; - RefPtr<IRSpecSymbol> nextWithSameName; - }; - - struct IRSpecEnv - { - IRSpecEnv* parent = nullptr; - - // A map from original values to their cloned equivalents. - typedef Dictionary<IRInst*, IRInst*> ClonedValueDictionary; - ClonedValueDictionary clonedValues; - }; - - struct IRSharedSpecContext - { - // The code-generation target in use - CodeGenTarget target; - - // The specialized module we are building - RefPtr<IRModule> module; - - // The original, unspecialized module we are copying - IRModule* originalModule; - - // A map from mangled symbol names to zero or - // more global IR values that have that name, - // in the *original* module. - typedef Dictionary<String, RefPtr<IRSpecSymbol>> SymbolDictionary; - SymbolDictionary symbols; - - SharedIRBuilder sharedBuilderStorage; - IRBuilder builderStorage; - - // The "global" specialization environment. - IRSpecEnv globalEnv; - }; - - struct IRGenericSpecKey - { - // Note: Slang::Dictionary requires key types to have default constructors - IRGenericSpecKey() - {} - - IRGenericSpecKey(IRSpecialize* specializeInst) - { - m_values.Add(specializeInst->getBase()); - auto argCount = specializeInst->getArgCount(); - for(UInt aa = 0; aa < argCount; ++aa) - { - m_values.Add(specializeInst->getArg(aa)); - } - } - - List<IRInst*> m_values; - - bool operator==(IRGenericSpecKey const& other) const - { - auto valueCount = m_values.Count(); - if(valueCount != other.m_values.Count()) return false; - for(UInt ii = 0; ii < valueCount; ++ii) - { - if(m_values[ii] != other.m_values[ii]) return false; - } - return true; - } - - UInt GetHashCode() const - { - auto hash = 0; - auto valueCount = m_values.Count(); - for(UInt ii = 0; ii < valueCount; ++ii) - { - hash = combineHash(hash, Slang::GetHashCode(m_values[ii])); - } - return hash; - } - }; - - struct IRSharedGenericSpecContext : IRSharedSpecContext - { - // Instructions to be processed (for generic specialization context) - List<IRInst*> workList; - HashSet<IRInst*> workListSet; - void addToWorkList(IRInst* inst) - { - if(!workListSet.Contains(inst)) - { - workList.Add(inst); - workListSet.Add(inst); - } - } - IRInst* popWorkList() - { - UInt count = workList.Count(); - if(count != 0) - { - IRInst* inst = workList[count - 1]; - workList.FastRemoveAt(count - 1); - workListSet.Remove(inst); - return inst; - } - return nullptr; - } - - Dictionary<IRGenericSpecKey, IRInst*> specializations; - }; - - struct IRSpecContextBase - { - // A map from the mangled name of a global variable - // to the layout to use for it. - Dictionary<String, VarLayout*> globalVarLayouts; - - IRSharedSpecContext* shared; - - IRSharedSpecContext* getShared() { return shared; } - - IRModule* getModule() { return getShared()->module; } - - IRModule* getOriginalModule() { return getShared()->originalModule; } - - IRSharedSpecContext::SymbolDictionary& getSymbols() { return getShared()->symbols; } - - // The current specialization environment to use. - IRSpecEnv* env = nullptr; - IRSpecEnv* getEnv() - { - // TODO: need to actually establish environments on contexts we create. - // - // Or more realistically we need to change the whole approach - // to specialization and cloning so that we don't try to share - // logic between two very different cases. - - - return env; - } - - // The IR builder to use for creating nodes - IRBuilder* builder; - - // A callback to be used when a value that is not registerd in `clonedValues` - // is needed during cloning. This gives the subtype a chance to intercept - // the operation and clone (or not) as needed. - virtual IRInst* maybeCloneValue(IRInst* originalVal) - { - return originalVal; - } - }; - - void registerClonedValue( - IRSpecContextBase* context, - IRInst* clonedValue, - IRInst* originalValue) - { - if(!originalValue) - return; - - // TODO: now that things are scoped using environments, we - // shouldn't be running into the cases where a value with - // the same key already exists. This should be changed to - // an `Add()` call. - // - context->getEnv()->clonedValues[originalValue] = clonedValue; - } - - // Information on values to use when registering a cloned value - struct IROriginalValuesForClone - { - IRInst* originalVal = nullptr; - IRSpecSymbol* sym = nullptr; - - IROriginalValuesForClone() {} - - IROriginalValuesForClone(IRInst* originalValue) - : originalVal(originalValue) - {} - - IROriginalValuesForClone(IRSpecSymbol* symbol) - : sym(symbol) - {} - }; - - void registerClonedValue( - IRSpecContextBase* context, - IRInst* clonedValue, - IROriginalValuesForClone const& originalValues) - { - registerClonedValue(context, clonedValue, originalValues.originalVal); - for( auto s = originalValues.sym; s; s = s->nextWithSameName ) - { - registerClonedValue(context, clonedValue, s->irGlobalValue); - } - } - - IRInst* cloneInst( - IRSpecContextBase* context, - IRBuilder* builder, - IRInst* originalInst, - IROriginalValuesForClone const& originalValues); - - IRInst* cloneInst( - IRSpecContextBase* context, - IRBuilder* builder, - IRInst* originalInst) - { - return cloneInst(context, builder, originalInst, originalInst); - } - - /// Clone any decorations from `originalValue` onto `clonedValue` - void cloneDecorations( - IRSpecContextBase* context, - IRInst* clonedValue, - IRInst* originalValue) - { - // TODO: In many cases we might be able to use this as a general-purpose - // place to do cloning of *all* the children of an instruction, and - // not just its decorations. We should look to refactor this code - // later. - - IRBuilder builderStorage = *context->builder; - IRBuilder* builder = &builderStorage; - builder->setInsertInto(clonedValue); - - - SLANG_UNUSED(context); - for(auto originalDecoration : originalValue->getDecorations()) - { - cloneInst(context, builder, originalDecoration); - } - - // We will also clone the location here, just because this is a convenient bottleneck - clonedValue->sourceLoc = originalValue->sourceLoc; - } - - /// Clone any decorations and children from `originalValue` onto `clonedValue` - void cloneDecorationsAndChildren( - IRSpecContextBase* context, - IRInst* clonedValue, - IRInst* originalValue) - { - IRBuilder builderStorage = *context->builder; - IRBuilder* builder = &builderStorage; - builder->setInsertInto(clonedValue); - - SLANG_UNUSED(context); - for(auto originalItem : originalValue->getDecorationsAndChildren()) - { - cloneInst(context, builder, originalItem); - } - - // We will also clone the location here, just because this is a convenient bottleneck - clonedValue->sourceLoc = originalValue->sourceLoc; - } - - // We use an `IRSpecContext` for the case where we are cloning - // code from one or more input modules to create a "linked" output - // module. Along the way, we will resolve profile-specific functions - // to the best definition for a given target. - // - struct IRSpecContext : IRSpecContextBase - { - // Override the "maybe clone" logic so that we always clone - virtual IRInst* maybeCloneValue(IRInst* originalVal) override; - }; - - - IRInst* cloneGlobalValue(IRSpecContext* context, IRInst* originalVal); - - IRInst* cloneValue( - IRSpecContextBase* context, - IRInst* originalValue); - - IRType* cloneType( - IRSpecContextBase* context, - IRType* originalType); - - IRInst* IRSpecContext::maybeCloneValue(IRInst* originalValue) - { - switch (originalValue->op) - { - case kIROp_StructType: - case kIROp_Func: - case kIROp_Generic: - case kIROp_GlobalVar: - case kIROp_GlobalConstant: - case kIROp_GlobalParam: - case kIROp_StructKey: - case kIROp_GlobalGenericParam: - case kIROp_WitnessTable: - return cloneGlobalValue(this, originalValue); - - case kIROp_BoolLit: - { - IRConstant* c = (IRConstant*)originalValue; - return builder->getBoolValue(c->value.intVal != 0); - } - break; - - - case kIROp_IntLit: - { - IRConstant* c = (IRConstant*)originalValue; - return builder->getIntValue(cloneType(this, c->getDataType()), c->value.intVal); - } - break; - - case kIROp_FloatLit: - { - IRConstant* c = (IRConstant*)originalValue; - return builder->getFloatValue(cloneType(this, c->getDataType()), c->value.floatVal); - } - break; - - case kIROp_StringLit: - { - IRConstant* c = (IRConstant*)originalValue; - return builder->getStringValue(c->getStringSlice()); - } - break; - - case kIROp_PtrLit: - { - IRConstant* c = (IRConstant*)originalValue; - return builder->getPtrValue(c->value.ptrVal); - } - break; - - default: - { - // In the deafult case, assume that we have some sort of "hoistable" - // instruction that requires us to create a clone of it. - - UInt argCount = originalValue->getOperandCount(); - IRInst* clonedValue = createInstWithTrailingArgs<IRInst>( - builder, - originalValue->op, - cloneType(this, originalValue->getFullType()), - 0, nullptr, - argCount, nullptr); - registerClonedValue(this, clonedValue, originalValue); - for (UInt aa = 0; aa < argCount; ++aa) - { - IRInst* originalArg = originalValue->getOperand(aa); - IRInst* clonedArg = cloneValue(this, originalArg); - clonedValue->getOperands()[aa].init(clonedValue, clonedArg); - } - cloneDecorationsAndChildren(this, clonedValue, originalValue); - - addHoistableInst(builder, clonedValue); - - return clonedValue; - } - break; - } - } - - IRInst* cloneValue( - IRSpecContextBase* context, - IRInst* originalValue); - - // Find a pre-existing cloned value, or return null if none is available. - IRInst* findClonedValue( - IRSpecContextBase* context, - IRInst* originalValue) - { - IRInst* clonedValue = nullptr; - for (auto env = context->getEnv(); env; env = env->parent) - { - if (env->clonedValues.TryGetValue(originalValue, clonedValue)) - { - return clonedValue; - } - } - - return nullptr; - } - - IRInst* cloneValue( - IRSpecContextBase* context, - IRInst* originalValue) - { - if (!originalValue) - return nullptr; - - if (IRInst* clonedValue = findClonedValue(context, originalValue)) - return clonedValue; - - return context->maybeCloneValue(originalValue); - } - - IRType* cloneType( - IRSpecContextBase* context, - IRType* originalType) - { - return (IRType*)cloneValue(context, originalType); - } - - void cloneGlobalValueWithCodeCommon( - IRSpecContextBase* context, - IRGlobalValueWithCode* clonedValue, - IRGlobalValueWithCode* originalValue); - - IRRate* cloneRate( - IRSpecContextBase* context, - IRRate* rate) - { - return (IRRate*) cloneType(context, rate); - } - - void maybeSetClonedRate( - IRSpecContextBase* context, - IRBuilder* builder, - IRInst* clonedValue, - IRInst* originalValue) - { - if(auto rate = originalValue->getRate() ) - { - clonedValue->setFullType(builder->getRateQualifiedType( - cloneRate(context, rate), - clonedValue->getFullType())); - } - } - - IRGlobalVar* cloneGlobalVarImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRGlobalVar* originalVar, - IROriginalValuesForClone const& originalValues) - { - auto clonedVar = builder->createGlobalVar( - cloneType(context, originalVar->getDataType()->getValueType())); - - maybeSetClonedRate(context, builder, clonedVar, originalVar); - - registerClonedValue(context, clonedVar, originalValues); - - // Clone any code in the body of the variable, since this - // represents the initializer. - cloneGlobalValueWithCodeCommon( - context, - clonedVar, - originalVar); - - return clonedVar; - } - - IRGlobalConstant* cloneGlobalConstantImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRGlobalConstant* originalVal, - IROriginalValuesForClone const& originalValues) - { - auto clonedVal = builder->createGlobalConstant( - cloneType(context, originalVal->getFullType())); - registerClonedValue(context, clonedVal, originalValues); - - // Clone any code in the body of the constant, since this - // represents the initializer. - cloneGlobalValueWithCodeCommon( - context, - clonedVal, - originalVal); - - return clonedVal; - } - - void cloneSimpleGlobalValueImpl( - IRSpecContextBase* context, - IRInst* originalInst, - IROriginalValuesForClone const& originalValues, - IRInst* clonedInst, - bool registerValue = true) - { - if (registerValue) - registerClonedValue(context, clonedInst, originalValues); - - // Set up an IR builder for inserting into the inst - IRBuilder builderStorage = *context->builder; - IRBuilder* builder = &builderStorage; - builder->setInsertInto(clonedInst); - - // Clone any children of the instruction - for (auto child : originalInst->getDecorationsAndChildren()) - { - cloneInst(context, builder, child); - } - } - - IRGlobalParam* cloneGlobalParamImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRGlobalParam* originalVal, - IROriginalValuesForClone const& originalValues) - { - auto clonedVal = builder->createGlobalParam( - cloneType(context, originalVal->getFullType())); - cloneSimpleGlobalValueImpl(context, originalVal, originalValues, clonedVal); - - if(auto linkage = originalVal->findDecoration<IRLinkageDecoration>()) - { - auto mangledName = String(linkage->getMangledName()); - VarLayout* layout = nullptr; - if (context->globalVarLayouts.TryGetValue(mangledName, layout)) - { - builder->addLayoutDecoration(clonedVal, layout); - } - } - - return clonedVal; - } - - IRGeneric* cloneGenericImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRGeneric* originalVal, - IROriginalValuesForClone const& originalValues) - { - auto clonedVal = builder->emitGeneric(); - registerClonedValue(context, clonedVal, originalValues); - - // Clone any code in the body of the generic, since this - // computes its result value. - cloneGlobalValueWithCodeCommon( - context, - clonedVal, - originalVal); - - return clonedVal; - } - - IRStructKey* cloneStructKeyImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRStructKey* originalVal, - IROriginalValuesForClone const& originalValues) - { - auto clonedVal = builder->createStructKey(); - cloneSimpleGlobalValueImpl(context, originalVal, originalValues, clonedVal); - return clonedVal; - } - - IRGlobalGenericParam* cloneGlobalGenericParamImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRGlobalGenericParam* originalVal, - IROriginalValuesForClone const& originalValues) - { - auto clonedVal = builder->emitGlobalGenericParam(); - cloneSimpleGlobalValueImpl(context, originalVal, originalValues, clonedVal); - return clonedVal; - } - - - IRWitnessTable* cloneWitnessTableImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRWitnessTable* originalTable, - IROriginalValuesForClone const& originalValues, - IRWitnessTable* dstTable = nullptr, - bool registerValue = true) - { - auto clonedTable = dstTable ? dstTable : builder->createWitnessTable(); - cloneSimpleGlobalValueImpl(context, originalTable, originalValues, clonedTable, registerValue); - return clonedTable; - } - - IRWitnessTable* cloneWitnessTableWithoutRegistering( - IRSpecContextBase* context, - IRBuilder* builder, - IRWitnessTable* originalTable, - IRWitnessTable* dstTable = nullptr) - { - return cloneWitnessTableImpl(context, builder, originalTable, IROriginalValuesForClone(), dstTable, false); - } - - IRStructType* cloneStructTypeImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRStructType* originalStruct, - IROriginalValuesForClone const& originalValues) - { - auto clonedStruct = builder->createStructType(); - cloneSimpleGlobalValueImpl(context, originalStruct, originalValues, clonedStruct); - return clonedStruct; - } - - - IRInterfaceType* cloneInterfaceTypeImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRInterfaceType* originalInterface, - IROriginalValuesForClone const& originalValues) - { - auto clonedInterface = builder->createInterfaceType(); - cloneSimpleGlobalValueImpl(context, originalInterface, originalValues, clonedInterface); - return clonedInterface; - } - - void cloneGlobalValueWithCodeCommon( - IRSpecContextBase* context, - IRGlobalValueWithCode* clonedValue, - IRGlobalValueWithCode* originalValue) - { - // Next we are going to clone the actual code. - IRBuilder builderStorage = *context->builder; - IRBuilder* builder = &builderStorage; - builder->setInsertInto(clonedValue); - - cloneDecorations(context, clonedValue, originalValue); - - // We will walk through the blocks of the function, and clone each of them. - // - // We need to create the cloned blocks first, and then walk through them, - // because blocks might be forward referenced (this is not possible - // for other cases of instructions). - for (auto originalBlock = originalValue->getFirstBlock(); - originalBlock; - originalBlock = originalBlock->getNextBlock()) - { - IRBlock* clonedBlock = builder->createBlock(); - clonedValue->addBlock(clonedBlock); - registerClonedValue(context, clonedBlock, originalBlock); - #if 0 - // We can go ahead and clone parameters here, while we are at it. - builder->curBlock = clonedBlock; - for (auto originalParam = originalBlock->getFirstParam(); - originalParam; - originalParam = originalParam->getNextParam()) - { - IRParam* clonedParam = builder->emitParam( - context->maybeCloneType( - originalParam->getFullType())); - cloneDecorations(context, clonedParam, originalParam); - registerClonedValue(context, clonedParam, originalParam); - } -#endif - } - - // Okay, now we are in a good position to start cloning - // the instructions inside the blocks. - { - IRBlock* ob = originalValue->getFirstBlock(); - IRBlock* cb = clonedValue->getFirstBlock(); - while (ob) - { - SLANG_ASSERT(cb); - - builder->setInsertInto(cb); - for (auto oi = ob->getFirstInst(); oi; oi = oi->getNextInst()) - { - cloneInst(context, builder, oi); - } - - ob = ob->getNextBlock(); - cb = cb->getNextBlock(); - } - } - - } - - void checkIRDuplicate(IRInst* inst, IRInst* moduleInst, UnownedStringSlice const& mangledName) - { -#ifdef _DEBUG - for (auto child : moduleInst->getDecorationsAndChildren()) - { - if (child == inst) - continue; - - if(auto childLinkage = child->findDecoration<IRLinkageDecoration>()) - { - if(mangledName == childLinkage->getMangledName()) - { - SLANG_UNEXPECTED("duplicate global instruction"); - } - } - } -#else - SLANG_UNREFERENCED_PARAMETER(inst); - SLANG_UNREFERENCED_PARAMETER(moduleInst); - SLANG_UNREFERENCED_PARAMETER(mangledName); -#endif - } - - void cloneFunctionCommon( - IRSpecContextBase* context, - IRFunc* clonedFunc, - IRFunc* originalFunc, - bool checkDuplicate = true) - { - // First clone all the simple properties. - clonedFunc->setFullType(cloneType(context, originalFunc->getFullType())); - - cloneGlobalValueWithCodeCommon( - context, - clonedFunc, - originalFunc); - - // Shuffle the function to the end of the list, because - // it needs to follow its dependencies. - // - // TODO: This isn't really a good requirement to place on the IR... - clonedFunc->moveToEnd(); - - if( checkDuplicate ) - { - if( auto linkage = clonedFunc->findDecoration<IRLinkageDecoration>() ) - { - checkIRDuplicate(clonedFunc, context->getModule()->getModuleInst(), linkage->getMangledName()); - } - } - } - - IRFunc* specializeIRForEntryPoint( - IRSpecContext* context, - EntryPointRequest* entryPointRequest, - EntryPointLayout* entryPointLayout) - { - // Look up the IR symbol by name - auto mangledName = getMangledName(entryPointRequest->decl); - RefPtr<IRSpecSymbol> sym; - if (!context->getSymbols().TryGetValue(mangledName, sym)) - { - SLANG_UNEXPECTED("no matching IR symbol"); - return nullptr; - } - - // TODO: deal with the case where we might - // have multiple versions... - - auto globalValue = sym->irGlobalValue; - if (globalValue->op != kIROp_Func) - { - SLANG_UNEXPECTED("expected an IR function"); - return nullptr; - } - auto originalFunc = (IRFunc*)globalValue; - - // Create a clone for the IR function - auto clonedFunc = context->builder->createFunc(); - - // Note: we do *not* register this cloned declaration - // as the cloned value for the original symbol. - // This is kind of a kludge, but it ensures that - // in the unlikely case that the function is both - // used as an entry point and a callable function - // (yes, this would imply recursion...) we actually - // have two copies, which lets us arbitrarily - // transform the entry point to meet target requirements. - // - // TODO: The above statement is kind of bunk, though, - // because both versions of the function would have - // the same mangled name... :( - - // We need to clone all the properties of the original - // function, including any blocks, their parameters, - // and their instructions. - cloneFunctionCommon(context, clonedFunc, originalFunc); - - // We need to attach the layout information for - // the entry point to this declaration, so that - // we can use it to inform downstream code emit. - context->builder->addLayoutDecoration( - clonedFunc, - entryPointLayout); - - // We will also go on and attach layout information - // to the function parameters, so that we have it - // available directly on the parameters, rather - // than having to look it up on the original entry-point layout. - if( auto firstBlock = clonedFunc->getFirstBlock() ) - { - UInt paramLayoutCount = entryPointLayout->fields.Count(); - UInt paramCounter = 0; - for( auto pp = firstBlock->getFirstParam(); pp; pp = pp->getNextParam() ) - { - UInt paramIndex = paramCounter++; - if( paramIndex < paramLayoutCount ) - { - auto paramLayout = entryPointLayout->fields[paramIndex]; - context->builder->addLayoutDecoration( - pp, - paramLayout); - } - else - { - SLANG_UNEXPECTED("too many parameters"); - } - } - } - - return clonedFunc; - } - IRFunc* cloneSimpleFuncWithoutRegistering(IRSpecContextBase* context, IRFunc* originalFunc) { auto clonedFunc = context->builder->createFunc(); cloneFunctionCommon(context, clonedFunc, originalFunc, false); return clonedFunc; } - - // Get a string form of the target so that we can - // use it to match against target-specialization modifiers - // - // TODO: We shouldn't be using strings for this. - String getTargetName(IRSpecContext* context) - { - switch( context->shared->target ) - { - case CodeGenTarget::HLSL: - return "hlsl"; - - case CodeGenTarget::GLSL: - return "glsl"; - - default: - SLANG_UNEXPECTED("unhandled case"); - UNREACHABLE_RETURN("unknown"); - } - } - - // How specialized is a given declaration for the chosen target? - enum class TargetSpecializationLevel - { - specializedForOtherTarget = 0, - notSpecialized, - specializedForTarget, - }; - - TargetSpecializationLevel getTargetSpecialiationLevel( - IRInst* inVal, - String const& targetName) - { - // HACK: Currently the front-end is placing modifiers related - // to target specialization on nodes like functions, even when - // those functions are being returned by a generic. This - // means that we need to try and inspect the value being - // returned by the generic if we are looking at a generic. - IRInst* val = inVal; - while( auto genericVal = as<IRGeneric>(val) ) - { - auto firstBlock = genericVal->getFirstBlock(); - if(!firstBlock) break; - - auto returnInst = as<IRReturnVal>(firstBlock->getLastInst()); - if(!returnInst) break; - - val = returnInst->getVal(); - } - - TargetSpecializationLevel result = TargetSpecializationLevel::notSpecialized; - for(auto dd : val->getDecorations()) - { - if(dd->op != kIROp_TargetDecoration) - continue; - - auto decoration = (IRTargetDecoration*) dd; - if(String(decoration->getTargetName()) == targetName) - return TargetSpecializationLevel::specializedForTarget; - - result = TargetSpecializationLevel::specializedForOtherTarget; - } - - return result; - } +#endif IRInst* findGenericReturnVal(IRGeneric* generic) { @@ -6406,992 +4022,6 @@ namespace Slang } } - // Is `newVal` marked as being a better match for our - // chosen code-generation target? - // - // TODO: there is a missing step here where we need - // to check if things are even available in the first place... - bool isBetterForTarget( - IRSpecContext* context, - IRInst* newVal, - IRInst* oldVal) - { - String targetName = getTargetName(context); - - // For right now every declaration might have zero or more - // modifiers, representing the targets for which it is specialized. - // Each modifier has a single string "tag" to represent a target. - // We thus decide that a declaration is "more specialized" by: - // - // - Does it have a modifier with a tag with the string for the current target? - // If yes, it is the most specialized it can be. - // - // - Does it have a no tags? Then it is "unspecialized" and that is okay. - // - // - Does it have a modifier with a tag for a *different* target? - // If yes, then it shouldn't even be usable on this target. - // - // Longer term a better approach is to think of this in terms - // of a "disjunction of conjunctions" that is: - // - // (A and B and C) or (A and D) or (E) or (F and G) ... - // - // A code generation target would then consist of a - // conjunction of invidual tags: - // - // (HLSL and SM_4_0 and Vertex and ...) - // - // A declaration is *applicable* on a target if one of - // its conjunctions of tags is a subset of the target's. - // - // One declaration is *better* than another on a target - // if it is applicable and its tags are a superset - // of the other's. - - auto newLevel = getTargetSpecialiationLevel(newVal, targetName); - auto oldLevel = getTargetSpecialiationLevel(oldVal, targetName); - if(newLevel != oldLevel) - return UInt(newLevel) > UInt(oldLevel); - - // All other factors being equal, a definition is - // better than a declaration. - auto newIsDef = isDefinition(newVal); - auto oldIsDef = isDefinition(oldVal); - if (newIsDef != oldIsDef) - return newIsDef; - - return false; - } - - IRFunc* cloneFuncImpl( - IRSpecContextBase* context, - IRBuilder* builder, - IRFunc* originalFunc, - IROriginalValuesForClone const& originalValues) - { - auto clonedFunc = builder->createFunc(); - registerClonedValue(context, clonedFunc, originalValues); - cloneFunctionCommon(context, clonedFunc, originalFunc); - return clonedFunc; - } - - - IRInst* cloneInst( - IRSpecContextBase* context, - IRBuilder* builder, - IRInst* originalInst, - IROriginalValuesForClone const& originalValues) - { - switch (originalInst->op) - { - // We need to special-case any instruction that is not - // allocated like an ordinary `IRInst` with trailing args. - case kIROp_Func: - return cloneFuncImpl(context, builder, cast<IRFunc>(originalInst), originalValues); - - case kIROp_GlobalVar: - return cloneGlobalVarImpl(context, builder, cast<IRGlobalVar>(originalInst), originalValues); - - case kIROp_GlobalConstant: - return cloneGlobalConstantImpl(context, builder, cast<IRGlobalConstant>(originalInst), originalValues); - - case kIROp_GlobalParam: - return cloneGlobalParamImpl(context, builder, cast<IRGlobalParam>(originalInst), originalValues); - - case kIROp_WitnessTable: - return cloneWitnessTableImpl(context, builder, cast<IRWitnessTable>(originalInst), originalValues); - - case kIROp_StructType: - return cloneStructTypeImpl(context, builder, cast<IRStructType>(originalInst), originalValues); - - case kIROp_InterfaceType: - return cloneInterfaceTypeImpl(context, builder, cast<IRInterfaceType>(originalInst), originalValues); - - case kIROp_Generic: - return cloneGenericImpl(context, builder, cast<IRGeneric>(originalInst), originalValues); - - case kIROp_StructKey: - return cloneStructKeyImpl(context, builder, cast<IRStructKey>(originalInst), originalValues); - - case kIROp_GlobalGenericParam: - return cloneGlobalGenericParamImpl(context, builder, cast<IRGlobalGenericParam>(originalInst), originalValues); - - default: - break; - } - - // The common case is that we just need to construct a cloned - // instruction with the right number of operands, intialize - // it, and then add it to the sequence. - UInt argCount = originalInst->getOperandCount(); - IRInst* clonedInst = createInstWithTrailingArgs<IRInst>( - builder, originalInst->op, - cloneType(context, originalInst->getFullType()), - 0, nullptr, - argCount, nullptr); - registerClonedValue(context, clonedInst, originalValues); - auto oldBuilder = context->builder; - context->builder = builder; - for (UInt aa = 0; aa < argCount; ++aa) - { - IRInst* originalArg = originalInst->getOperand(aa); - IRInst* clonedArg = cloneValue(context, originalArg); - clonedInst->getOperands()[aa].init(clonedInst, clonedArg); - } - builder->addInst(clonedInst); - context->builder = oldBuilder; - cloneDecorations(context, clonedInst, originalInst); - - return clonedInst; - } - - IRInst* cloneGlobalValueImpl( - IRSpecContext* context, - IRInst* originalInst, - IROriginalValuesForClone const& originalValues) - { - auto clonedValue = cloneInst(context, &context->shared->builderStorage, originalInst, originalValues); - clonedValue->moveToEnd(); - return clonedValue; - } - - - /// Clone a global value, which has the given `originalLinkage`. - /// - /// The `originalVal` is a known global IR value with that linkage, if one is available. - /// (It is okay for this parameter to be null). - /// - IRInst* cloneGlobalValueWithLinkage( - IRSpecContext* context, - IRInst* originalVal, - IRLinkageDecoration* originalLinkage) - { - // If the global value being cloned is already in target module, don't clone - // Why checking this? - // When specializing a generic function G (which is already in target module), - // where G calls a normal function F (which is already in target module), - // then when we are making a copy of G via cloneFuncCommom(), it will recursively clone F, - // however we don't want to make a duplicate of F in the target module. - if (originalVal->getParent() == context->getModule()->getModuleInst()) - return originalVal; - - // Check if we've already cloned this value, for the case where - // an original value has already been established. - if (originalVal) - { - if (IRInst* clonedVal = findClonedValue(context, originalVal)) - { - return clonedVal; - } - } - - if(!originalLinkage) - { - // If there is no mangled name, then we assume this is a local symbol, - // and it can't possibly have multiple declarations. - return cloneGlobalValueImpl(context, originalVal, IROriginalValuesForClone()); - } - - // - // We will scan through all of the available declarations - // with the same mangled name as `originalVal` and try - // to pick the "best" one for our target. - - auto mangledName = String(originalLinkage->getMangledName()); - RefPtr<IRSpecSymbol> sym; - if( !context->getSymbols().TryGetValue(mangledName, sym) ) - { - if(!originalVal) - return nullptr; - - // This shouldn't happen! - SLANG_UNEXPECTED("no matching values registered"); - UNREACHABLE_RETURN(cloneGlobalValueImpl(context, originalVal, IROriginalValuesForClone())); - } - - // We will try to track the "best" declaration we can find. - // - // Generally, one declaration wil lbe better than another if it is - // more specialized for the chosen target. Otherwise, we simply favor - // definitions over declarations. - // - IRInst* bestVal = sym->irGlobalValue; - for( auto ss = sym->nextWithSameName; ss; ss = ss->nextWithSameName ) - { - IRInst* newVal = ss->irGlobalValue; - if(isBetterForTarget(context, newVal, bestVal)) - bestVal = newVal; - } - - // Check if we've already cloned this value, for the case where - // we didn't have an original value (just a name), but we've - // now found a representative value. - if (!originalVal) - { - if (IRInst* clonedVal = findClonedValue(context, bestVal)) - { - return clonedVal; - } - } - - return cloneGlobalValueImpl(context, bestVal, IROriginalValuesForClone(sym)); - } - - // Clone a global value, where `originalVal` is one declaration/definition, but we might - // have to consider others, in order to find the "best" version of the symbol. - IRInst* cloneGlobalValue(IRSpecContext* context, IRInst* originalVal) - { - // We are being asked to clone a particular global value, but in - // the IR that comes out of the front-end there could still - // be multiple, target-specific, declarations of any given - // global value, all of which share the same mangled name. - return cloneGlobalValueWithLinkage( - context, - originalVal, - originalVal->findDecoration<IRLinkageDecoration>()); - } - - StructTypeLayout* getGlobalStructLayout( - ProgramLayout* programLayout); - - void insertGlobalValueSymbol( - IRSharedSpecContext* sharedContext, - IRInst* gv) - { - auto linkage = gv->findDecoration<IRLinkageDecoration>(); - - // Don't try to register a symbol for global values - // that don't have linkage. - // - if (!linkage) - return; - - auto mangledName = String(linkage->getMangledName()); - - RefPtr<IRSpecSymbol> sym = new IRSpecSymbol(); - sym->irGlobalValue = gv; - - RefPtr<IRSpecSymbol> prev; - if (sharedContext->symbols.TryGetValue(mangledName, prev)) - { - sym->nextWithSameName = prev->nextWithSameName; - prev->nextWithSameName = sym; - } - else - { - sharedContext->symbols.Add(mangledName, sym); - } - } - - void insertGlobalValueSymbols( - IRSharedSpecContext* sharedContext, - IRModule* originalModule) - { - if (!originalModule) - return; - - for(auto ii : originalModule->getGlobalInsts()) - { - insertGlobalValueSymbol(sharedContext, ii); - } - } - - void initializeSharedSpecContext( - IRSharedSpecContext* sharedContext, - Session* session, - IRModule* module, - IRModule* originalModule, - CodeGenTarget target) - { - - SharedIRBuilder* sharedBuilder = &sharedContext->sharedBuilderStorage; - sharedBuilder->module = nullptr; - sharedBuilder->session = session; - - IRBuilder* builder = &sharedContext->builderStorage; - builder->sharedBuilder = sharedBuilder; - - if( !module ) - { - module = builder->createModule(); - } - - sharedBuilder->module = module; - sharedContext->module = module; - sharedContext->originalModule = originalModule; - sharedContext->target = target; - // We will populate a map with all of the IR values - // that use the same mangled name, to make lookup easier - // in other steps. - insertGlobalValueSymbols(sharedContext, originalModule); - } - - // implementation provided in parameter-binding.cpp - RefPtr<ProgramLayout> specializeProgramLayout( - TargetRequest * targetReq, - ProgramLayout* programLayout, - SubstitutionSet typeSubst); - - struct IRSpecializationState - { - ProgramLayout* programLayout; - CodeGenTarget target; - TargetRequest* targetReq; - - IRModule* irModule = nullptr; - RefPtr<ProgramLayout> newProgramLayout; - - IRSharedSpecContext sharedContextStorage; - IRSpecContext contextStorage; - - IRSpecEnv globalEnv; - - IRSharedSpecContext* getSharedContext() { return &sharedContextStorage; } - IRSpecContext* getContext() { return &contextStorage; } - - IRSpecializationState() - { - contextStorage.env = &globalEnv; - } - - ~IRSpecializationState() - { - newProgramLayout = nullptr; - contextStorage = IRSpecContext(); - sharedContextStorage = IRSharedSpecContext(); - } - }; - - IRSpecializationState* createIRSpecializationState( - EntryPointRequest* entryPointRequest, - ProgramLayout* programLayout, - CodeGenTarget target, - TargetRequest* targetReq) - { - IRSpecializationState* state = new IRSpecializationState(); - - state->programLayout = programLayout; - state->target = target; - state->targetReq = targetReq; - - - auto compileRequest = entryPointRequest->compileRequest; - auto translationUnit = entryPointRequest->getTranslationUnit(); - auto originalIRModule = translationUnit->irModule; - - auto sharedContext = state->getSharedContext(); - initializeSharedSpecContext( - sharedContext, - compileRequest->mSession, - nullptr, - originalIRModule, - target); - - state->irModule = sharedContext->module; - - // We also need to attach the IR definitions for symbols from - // any loaded modules: - for (auto loadedModule : compileRequest->loadedModulesList) - { - insertGlobalValueSymbols(sharedContext, loadedModule->irModule); - } - - auto context = state->getContext(); - context->shared = sharedContext; - context->builder = &sharedContext->builderStorage; - - // Now specialize the program layout using the substitution - // - // TODO: The specialization of the layout is conceptually an AST-level operations, - // and shouldn't be done here in the IR at all. - // - RefPtr<ProgramLayout> newProgramLayout = specializeProgramLayout( - targetReq, - programLayout, - SubstitutionSet(entryPointRequest->globalGenericSubst)); - - // TODO: we need to register the (IR-level) arguments of the global generic parameters as the - // substitutions for the generic parameters in the original IR. - - // applyGlobalGenericParamSubsitution(...); - - - state->newProgramLayout = newProgramLayout; - - // Next, we want to optimize lookup for layout infromation - // associated with global declarations, so that we can - // look things up based on the IR values (using mangled names) - auto globalStructLayout = getGlobalStructLayout(newProgramLayout); - for (auto globalVarLayout : globalStructLayout->fields) - { - auto mangledName = getMangledName(globalVarLayout->varDecl); - context->globalVarLayouts.AddIfNotExists(mangledName, globalVarLayout); - } - - // for now, clone all unreferenced witness tables - for (auto sym :context->getSymbols()) - { - if (sym.Value->irGlobalValue->op == kIROp_WitnessTable) - cloneGlobalValue(context, (IRWitnessTable*)sym.Value->irGlobalValue); - } - return state; - } - - void destroyIRSpecializationState(IRSpecializationState* state) - { - delete state; - } - - IRModule* getIRModule(IRSpecializationState* state) - { - return state->irModule; - } - - void specializeIRForEntryPoint( - IRSpecializationState* state, - EntryPointRequest* entryPointRequest, - ExtensionUsageTracker* extensionUsageTracker) - { - auto target = state->target; - - auto compileRequest = entryPointRequest->compileRequest; - auto session = compileRequest->mSession; - auto translationUnit = entryPointRequest->getTranslationUnit(); - auto originalIRModule = translationUnit->irModule; - if (!originalIRModule) - { - // We should already have emitted IR for the original - // translation unit, and it we don't have it, then - // we are now in trouble. - return; - } - - auto context = state->getContext(); - auto newProgramLayout = state->newProgramLayout; - - auto entryPointLayout = findEntryPointLayout(newProgramLayout, entryPointRequest); - - - // Next, we make sure to clone the global value for - // the entry point function itself, and rely on - // this step to recursively copy over anything else - // it might reference. - auto irEntryPoint = specializeIRForEntryPoint(context, entryPointRequest, entryPointLayout); - - // HACK: right now the bindings for global generic parameters are coming in - // as part of the original IR module, and we need to make sure these get - // copied over, even if they aren't referenced. - // - for(auto inst : originalIRModule->getGlobalInsts()) - { - auto bindInst = as<IRBindGlobalGenericParam>(inst); - if(!bindInst) - continue; - - cloneValue(context, bindInst); - } - - - // TODO: *technically* we should consider the case where - // we have global variables with initializers, since - // these should get run whether or not the entry point - // references them. - - // For GLSL only, we will need to perform "legalization" of - // the entry point and any entry-point parameters. - switch (target) - { - case CodeGenTarget::GLSL: - { - legalizeEntryPointForGLSL( - session, - context->getModule(), - irEntryPoint, - entryPointLayout, - &compileRequest->mSink, - extensionUsageTracker); - } - break; - - default: - break; - } - } - - struct IRGenericSpecContext : IRSpecContextBase - { - IRSpecContextBase* parent = nullptr; - - IRSharedSpecContext* getShared() { return shared; } - - // Override the "maybe clone" logic so that we always clone - virtual IRInst* maybeCloneValue(IRInst* originalVal) override; - }; - - IRInst* IRGenericSpecContext::maybeCloneValue(IRInst* originalVal) - { - if (parent) - { - return parent->maybeCloneValue(originalVal); - } - else - { - return originalVal; - } - } - - // See the work list for the generic spec context with - // every relevant instruction from `inst` through its - // descendents. - void addToSpecializationWorkListRec( - IRSharedGenericSpecContext* sharedContext, - IRInst* inst) - { - if(auto genericInst = as<IRGeneric>(inst)) - { - // We do *not* consider generics, or instructions nested under them. - return; - } - else - { - for(auto child : inst->getChildren()) - { - addToSpecializationWorkListRec(sharedContext, child); - } - - // Default case: consider this instruction for specialization. - sharedContext->addToWorkList(inst); - } - } - - IRInst* specializeGeneric( - IRSharedGenericSpecContext* sharedContext, - IRSpecContextBase* parentContext, - IRGeneric* genericVal, - IRSpecialize* specializeInst) - { - // First, we want to see if an existing specialization - // has already been made. To do that we will construct a key - // for lookup in the generic specialization context. - // - IRGenericSpecKey specializationKey(specializeInst); - { - IRInst* specializedValue = nullptr; - if(sharedContext->specializations.TryGetValue(specializationKey, specializedValue)) - return specializedValue; - } - - // If we get to this point, then we need to construct a - // new IR value to represent the result of specialization. - - // We need to establish a new mapping from inst->inst to - // handle the specialization, because we don't want the - // clones we register in this pass to cause confusion - // in later steps that might clone the same code. - - IRSpecEnv env; - env.parent = &sharedContext->globalEnv; - if (parentContext) - { - env.parent = parentContext->getEnv(); - } - - // The result of specialization should be inserted - // into the global scope, at the same location as - // the original generic. - IRBuilder builderStorage; - IRBuilder* builder = &builderStorage; - builder->sharedBuilder = &sharedContext->sharedBuilderStorage; - builder->setInsertBefore(genericVal); - - IRGenericSpecContext context; - context.shared = sharedContext; - context.parent = parentContext; - context.builder = builder; - context.env = &env; - - // Register the arguments of the `specialize` instruction to be used - // as the "cloned" value for each of the parameters of the generic. - // - UInt argCounter = 0; - for (auto param = genericVal->getFirstParam(); param; param = param->getNextParam()) - { - UInt argIndex = argCounter++; - SLANG_ASSERT(argIndex < specializeInst->getArgCount()); - - IRInst* arg = specializeInst->getArg(argIndex); - - registerClonedValue(&context, arg, param); - } - - // Okay, now we want to run through the body of the generic - // and clone stuff into the parent scope (which had - // better be the global scope). - for (auto bb : genericVal->getBlocks()) - { - // We expect a generic to only ever contain a single block. - SLANG_ASSERT(bb == genericVal->getFirstBlock()); - - // Iterate over the non-parameter ("ordinary") instructions. - for (auto ii : bb->getOrdinaryInsts()) - { - // The last block of the generic is expected to end with - // a `return` instruction for the specialized value that - // comes out of the abstraction. - // - // We thus use that cloned value as the result of the - // specialization step. - if (auto returnValInst = as<IRReturnVal>(ii)) - { - auto clonedResult = cloneValue(&context, returnValInst->getVal()); - - sharedContext->specializations.Add(specializationKey, clonedResult); - - return clonedResult; - } - - // Otherwise, clone the instruction into the global scope - IRInst* clonedInst = cloneInst(&context, context.builder, ii); - - // Now that we've cloned the instruction to a location outside - // of a generic, we should consider whether it can now be specialized. - addToSpecializationWorkListRec(sharedContext, clonedInst); - } - } - - // If we reach this point, something went wrong, because we - // never encountered a `return` inside the body of the generic. - SLANG_UNEXPECTED("no return from generic"); - UNREACHABLE_RETURN(nullptr); - } - - // Find the value in the given witness table that - // satisfies the given requirement (or return - // null if not found). - IRInst* findWitnessVal( - IRWitnessTable* witnessTable, - IRInst* requirementKey) - { - // For now we will do a dumb linear search - for( auto entry : witnessTable->getEntries() ) - { - // If the keys matched, then we use the value from this entry. - if (requirementKey == entry->requirementKey.get()) - { - auto satisfyingVal = entry->satisfyingVal.get(); - return satisfyingVal; - } - } - - // No matching entry found. - return nullptr; - } - - static bool canSpecializeGeneric( - IRGeneric* generic) - { - IRGeneric* g = generic; - for(;;) - { - auto val = findGenericReturnVal(g); - if(!val) - return false; - - if (auto nestedGeneric = as<IRGeneric>(val)) - { - // The outer generic returns an *inner* generic - // (so that multiple calls to `specialize` are - // needed to resolve it). We should look at - // what the nested generic returns to figure - // out whether specialization is allowed. - g = nestedGeneric; - continue; - } - - // We've found the leaf value that will be produced after - // all of the specialization is done. Now we want to know - // if that is a value suitable for actually specializing - // - if (isDefinition(val)) - return true; - return false; - } - } - - // Add any instruction that uses `inst` to the work list, - // so that it can be evaluated (or re-evaluated) for specialization. - void addUsesToWorkList( - IRSharedGenericSpecContext* sharedContext, - IRInst* inst) - { - for(auto u = inst->firstUse; u; u = u->nextUse) - { - sharedContext->addToWorkList(u->getUser()); - } - } - - void specializeGenericsForInst( - IRSharedGenericSpecContext* sharedContext, - IRInst* inst) - { - switch(inst->op) - { - default: - // The default behavior is to do nothing. - // An instruction is specialize-able once its operands - // are specialized, and after that it is also safe - // to consider the instruction specialized. - break; - - case kIROp_Specialize: - { - // We have a `specialize` instruction, so lets see - // whether we have an opportunity to perform the - // specialization here and now. - IRSpecialize* specInst = cast<IRSpecialize>(inst); - - // Look at the base of the `specialize`, and see if - // it directly names a generic, so that we can apply - // specialization here and now. - auto baseVal = specInst->getBase(); - if(auto genericVal = as<IRGeneric>(baseVal)) - { - if (canSpecializeGeneric(genericVal)) - { - // Okay, we have a candidate for specialization here. - // - // We will apply the specialization logic to the body of the generic, - // which will yield, e.g., a specialized `IRFunc`. - // - auto specializedVal = specializeGeneric(sharedContext, nullptr, genericVal, specInst); - // - // Then we will replace the use sites for the `specialize` - // instruction with uses of the specialized value. - // - addUsesToWorkList(sharedContext, specInst); - specInst->replaceUsesWith(specializedVal); - specInst->removeAndDeallocate(); - } - } - } - break; - - case kIROp_lookup_interface_method: - { - // We have a `lookup_interface_method` instruction, - // so let's see whether it is a lookup in a known - // witness table. - IRLookupWitnessMethod* lookupInst = cast<IRLookupWitnessMethod>(inst); - - // We only want to deal with the case where the witness-table - // argument points to a concrete global table (and not, e.g., a - // `specialize` instruction that will yield a table) - auto witnessTable = as<IRWitnessTable>(lookupInst->witnessTable.get()); - if(!witnessTable) - break; - - // Use the witness table to look up the value that - // satisfies the requirement. - auto requirementKey = lookupInst->getRequirementKey(); - auto satisfyingVal = findWitnessVal(witnessTable, requirementKey); - // We expect to always find something, but lets just - // be careful here. - if(!satisfyingVal) - break; - - // If we get through all of the above checks, then we - // have a (more) concrete method that implements the interface, - // and so we should dispatch to that directly, rather than - // use the `lookup_interface_method` instruction. - addUsesToWorkList(sharedContext, lookupInst); - lookupInst->replaceUsesWith(satisfyingVal); - lookupInst->removeAndDeallocate(); - } - break; - } - } - - static bool isInstSpecialized( - IRSharedGenericSpecContext* sharedContext, - IRInst* inst) - { - // If an instruction is still on our work list, then - // it isn't specialized, and conversely we say that - // if it *isn't* on the work list, it must be specialized. - // - // Note: if we end up with bugs in this logic, we could - // maintain an explicit set of specialized insts instead. - // - return !sharedContext->workListSet.Contains(inst); - } - - static bool canSpecializeInst( - IRSharedGenericSpecContext* sharedContext, - IRInst* inst) - { - // We can specialize an instruction once all its - // operands are specialized. - - UInt operandCount = inst->getOperandCount(); - for(UInt ii = 0; ii < operandCount; ++ii) - { - IRInst* operand = inst->getOperand(ii); - if(!isInstSpecialized(sharedContext, operand)) - return false; - } - return true; - } - - // Go through the code in the module and try to identify - // calls to generic functions where the generic arguments - // are known, and specialize the callee based on those - // known values. - void specializeGenerics( - IRModule* module, - CodeGenTarget target) - { - IRSharedGenericSpecContext sharedContextStorage; - auto sharedContext = &sharedContextStorage; - - initializeSharedSpecContext( - sharedContext, - module->session, - module, - module, - target); - - auto moduleInst = module->getModuleInst(); - - // First things first, let's deal with any bindings for global generic parameters. - for(auto inst : moduleInst->getChildren()) - { - auto bindInst = as<IRBindGlobalGenericParam>(inst); - if(!bindInst) - continue; - - // HACK: Our current front-end emit logic can end up emitting multiple - // `bindGlobalGeneric` instructions for the same parameter. This is - // a buggy behavior, but a real fix would require refactoring the way - // global generic arguments are specified today. - // - // For now we will do a sanity check to detect parameters that - // have already been specialized. - if( !as<IRGlobalGenericParam>(bindInst->getOperand(0)) ) - { - // parameter operand is no longer a parameter, so it - // seems things must have been specialized already. - continue; - } - - auto param = bindInst->getParam(); - auto val = bindInst->getVal(); - - param->replaceUsesWith(val); - } - { - // Now we will do a second pass to clean up the - // generic parameters and their bindings. - IRInst* next = nullptr; - for(auto inst = moduleInst->getFirstChild(); inst; inst = next) - { - next = inst->getNextInst(); - - switch(inst->op) - { - default: - break; - - case kIROp_GlobalGenericParam: - case kIROp_BindGlobalGenericParam: - // A "bind" instruction should have no uses in the - // first place, and all the global generic parameters - // should have had their uses replaced. - SLANG_ASSERT(!inst->firstUse); - inst->removeAndDeallocate(); - break; - } - } - } - - // Our goal here is to find `specialize` instructions that - // can be replaced with references to, e.g., a suitably - // specialized function, and to resolve any `lookup_interface_method` - // instructions to the concrete value fetched from a witness - // table. - // - // We need to be careful of a few things: - // - // * It would not in general make sense to consider specialize-able - // instructions under an `IRGeneric`, since that could mean "specialziing" - // code to parameter values that are still unknown. - // - // * We *also* need to be careful not to specialize something when one - // or more of its inputs is also a `specialize` or `lookup_interface_method` - // instruction, because then we'd be propagating through non-concrete - // values. - // - // The approach we use here is to build a work list of instructions - // that *can* become fully specialized, but aren't yet. Any - // instruction on the work list will be considered to be "unspecialized" - // and any instruction not on the work list is considered specialized. - // - // We will start by recursively walking all the instructions to add - // the appropriate ones to our work list: - // - addToSpecializationWorkListRec(sharedContext, moduleInst); - - // Now we are going to repeatedly walk our work list, and filter - // it to create a new work list. - List<IRInst*> workListCopy; - for(;;) - { - // Swap out the work list on the context so we can - // process it here without worrying about concurrent - // modifications. - workListCopy.Clear(); - workListCopy.SwapWith(sharedContext->workList); - - if(workListCopy.Count() == 0) - break; - - for(auto inst : workListCopy) - { - // We need to check whether it is possible to specialize - // the instruction yet (it might not be because its - // operands haven't been specialized) - if(!canSpecializeInst(sharedContext, inst)) - { - // Put it back on the fresh work list, so that - // we can re-consider it in another iteration. - sharedContext->workList.Add(inst); - } - else - { - // Okay, perform any specialization step on this - // instruction that makes sense (which might be - // doing nothing). - specializeGenericsForInst(sharedContext, inst); - - // Remove the instruction from consideration. - sharedContext->workListSet.Remove(inst); - } - } - } - - // Once the work list has gone dry, we should have the invariant - // that there are no `specialize` instructions inside of non-generic - // functions that in turn reference a generic function, *except* - // in the case where that generic is for a builtin function, in - // which case we wouldn't want to specialize it anyway. - } - - void applyGlobalGenericParamSubstitution( - IRSpecContext* /*context*/) - { - // TODO: we need to figure out how to apply this - } - - void markConstExpr( IRBuilder* builder, IRInst* irValue) diff --git a/source/slang/ir.h b/source/slang/ir.h index cf0ccae9a..86d1d1eb4 100644 --- a/source/slang/ir.h +++ b/source/slang/ir.h @@ -1133,5 +1133,4 @@ IRInst* createEmptyInstWithSize( size_t totalSizeInBytes); } - #endif diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj index 3ce172271..a36d9bfbf 100644 --- a/source/slang/slang.vcxproj +++ b/source/slang/slang.vcxproj @@ -181,18 +181,22 @@ <ClInclude Include="expr-defs.h" /> <ClInclude Include="glsl.meta.slang.h" /> <ClInclude Include="hlsl.meta.slang.h" /> + <ClInclude Include="ir-clone.h" /> <ClInclude Include="ir-constexpr.h" /> <ClInclude Include="ir-dce.h" /> <ClInclude Include="ir-dominators.h" /> <ClInclude Include="ir-existential.h" /> + <ClInclude Include="ir-glsl-legalize.h" /> <ClInclude Include="ir-inst-defs.h" /> <ClInclude Include="ir-insts.h" /> + <ClInclude Include="ir-link.h" /> <ClInclude Include="ir-missing-return.h" /> <ClInclude Include="ir-restructure-scoping.h" /> <ClInclude Include="ir-restructure.h" /> <ClInclude Include="ir-sccp.h" /> <ClInclude Include="ir-serialize.h" /> <ClInclude Include="ir-specialize-resources.h" /> + <ClInclude Include="ir-specialize.h" /> <ClInclude Include="ir-ssa.h" /> <ClInclude Include="ir-validate.h" /> <ClInclude Include="ir.h" /> @@ -232,17 +236,21 @@ <ClCompile Include="diagnostics.cpp" /> <ClCompile Include="dxc-support.cpp" /> <ClCompile Include="emit.cpp" /> + <ClCompile Include="ir-clone.cpp" /> <ClCompile Include="ir-constexpr.cpp" /> <ClCompile Include="ir-dce.cpp" /> <ClCompile Include="ir-dominators.cpp" /> <ClCompile Include="ir-existential.cpp" /> + <ClCompile Include="ir-glsl-legalize.cpp" /> <ClCompile Include="ir-legalize-types.cpp" /> + <ClCompile Include="ir-link.cpp" /> <ClCompile Include="ir-missing-return.cpp" /> <ClCompile Include="ir-restructure-scoping.cpp" /> <ClCompile Include="ir-restructure.cpp" /> <ClCompile Include="ir-sccp.cpp" /> <ClCompile Include="ir-serialize.cpp" /> <ClCompile Include="ir-specialize-resources.cpp" /> + <ClCompile Include="ir-specialize.cpp" /> <ClCompile Include="ir-ssa.cpp" /> <ClCompile Include="ir-validate.cpp" /> <ClCompile Include="ir.cpp" /> diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters index 9f3666fc1..390c0cc5f 100644 --- a/source/slang/slang.vcxproj.filters +++ b/source/slang/slang.vcxproj.filters @@ -42,6 +42,9 @@ <ClInclude Include="hlsl.meta.slang.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="ir-clone.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="ir-constexpr.h"> <Filter>Header Files</Filter> </ClInclude> @@ -54,12 +57,18 @@ <ClInclude Include="ir-existential.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="ir-glsl-legalize.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="ir-inst-defs.h"> <Filter>Header Files</Filter> </ClInclude> <ClInclude Include="ir-insts.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="ir-link.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="ir-missing-return.h"> <Filter>Header Files</Filter> </ClInclude> @@ -78,6 +87,9 @@ <ClInclude Include="ir-specialize-resources.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="ir-specialize.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="ir-ssa.h"> <Filter>Header Files</Filter> </ClInclude> @@ -191,6 +203,9 @@ <ClCompile Include="emit.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="ir-clone.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="ir-constexpr.cpp"> <Filter>Source Files</Filter> </ClCompile> @@ -203,9 +218,15 @@ <ClCompile Include="ir-existential.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="ir-glsl-legalize.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="ir-legalize-types.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="ir-link.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="ir-missing-return.cpp"> <Filter>Source Files</Filter> </ClCompile> @@ -224,6 +245,9 @@ <ClCompile Include="ir-specialize-resources.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="ir-specialize.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="ir-ssa.cpp"> <Filter>Source Files</Filter> </ClCompile> |
