diff options
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 86 | ||||
| -rw-r--r-- | source/slang/slang-api.cpp | 9 | ||||
| -rw-r--r-- | source/slang/slang-capability-defs.h | 70 | ||||
| -rw-r--r-- | source/slang/slang-capability.cpp | 903 | ||||
| -rw-r--r-- | source/slang/slang-capability.h | 31 | ||||
| -rwxr-xr-x | source/slang/slang-compiler.cpp | 37 | ||||
| -rwxr-xr-x | source/slang/slang-compiler.h | 38 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 31 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit-glsl.cpp | 91 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 36 | ||||
| -rw-r--r-- | source/slang/slang-options.cpp | 56 | ||||
| -rw-r--r-- | source/slang/slang-parameter-binding.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-profile.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-repro.cpp | 10 | ||||
| -rw-r--r-- | source/slang/slang-serialize-container.cpp | 8 | ||||
| -rw-r--r-- | source/slang/slang.cpp | 60 |
18 files changed, 1041 insertions, 434 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 29779e796..851af7d3f 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -4158,19 +4158,21 @@ struct BuiltInTriangleIntersectionAttributes // 10.3.1 +__target_intrinsic(hlsl) void CallShader<Payload>(uint shaderIndex, inout Payload payload); // `executeCallableNV` is the GLSL intrinsic that will be used to implement // `CallShader()` for GLSL-based targets. // -__target_intrinsic(glsl, "executeCallableNV") -void __executeCallableNV(uint shaderIndex, int payloadLocation); +__target_intrinsic(GL_NV_ray_tracing, "executeCallableNV") +__target_intrinsic(GL_EXT_ray_tracing, "executeCallableEXT") +void __executeCallable(uint shaderIndex, int payloadLocation); // Next is the custom intrinsic that will compute the payload location // for a type being used in a `CallShader()` call for GLSL-based targets. // __generic<Payload> -__target_intrinsic(glsl, "$XC") +__target_intrinsic(__glslRayTracing, "$XC") [__readNone] int __callablePayloadLocation(Payload payload); @@ -4186,11 +4188,12 @@ void CallShader(uint shaderIndex, inout Payload payload) static Payload p; p = payload; - __executeCallableNV(shaderIndex, __callablePayloadLocation(p)); + __executeCallable(shaderIndex, __callablePayloadLocation(p)); payload = p; } // 10.3.2 +__target_intrinsic(hlsl) void TraceRay<payload_t>( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -4201,8 +4204,9 @@ void TraceRay<payload_t>( RayDesc Ray, inout payload_t Payload); -__target_intrinsic(glsl, "traceNV") -void __traceNV( +__target_intrinsic(GL_NV_ray_tracing, "traceNV") +__target_intrinsic(GL_EXT_ray_tracing, "traceRayEXT") +void __traceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, @@ -4222,7 +4226,7 @@ void __traceNV( // syntax works in a pinch. // __generic<Payload> -__target_intrinsic(glsl, "$XP") +__target_intrinsic(__glslRayTracing, "$XP") [__readNone] int __rayPayloadLocation(Payload payload); @@ -4242,7 +4246,7 @@ void TraceRay( static payload_t p; p = Payload; - __traceNV( + __traceRay( AccelerationStructure, RayFlags, InstanceInclusionMask, @@ -4258,10 +4262,12 @@ void TraceRay( } // 10.3.3 +__target_intrinsic(hlsl) bool ReportHit<A>(float tHit, uint hitKind, A attributes); -__target_intrinsic(glsl, "reportIntersectionNV") -bool __reportIntersectionNV(float tHit, uint hitKind); +__target_intrinsic(GL_NV_ray_tracing, "reportIntersectionNV") +__target_intrinsic(GL_EXT_ray_tracing, "reportIntersectionEXT") +bool __reportIntersection(float tHit, uint hitKind); __generic<A> __specialized_for_target(glsl) @@ -4271,15 +4277,19 @@ bool ReportHit(float tHit, uint hitKind, A attributes) static A a; a = attributes; - return __reportIntersectionNV(tHit, hitKind); + return __reportIntersection(tHit, hitKind); } // 10.3.4 -__target_intrinsic(glsl, ignoreIntersectionNV) +__target_intrinsic(hlsl) +__target_intrinsic(GL_NV_ray_tracing, ignoreIntersectionNV) +__target_intrinsic(GL_EXT_ray_tracing, ignoreIntersectionEXT) void IgnoreHit(); // 10.3.5 -__target_intrinsic(glsl, terminateRayNV) +__target_intrinsic(hlsl) +__target_intrinsic(GL_NV_ray_tracing, terminateRayNV) +__target_intrinsic(GL_EXT_ray_tracing, terminateRayEXT) void AcceptHitAndEndSearch(); // 10.4 - System Values and Special Semantics @@ -4289,22 +4299,27 @@ void AcceptHitAndEndSearch(); // 10.4.1 - Ray Dispatch System Values -__target_intrinsic(glsl, "(gl_LaunchIDNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_LaunchIDNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_LaunchIDEXT)") __target_intrinsic(cuda, "optixGetLaunchIndex") uint3 DispatchRaysIndex(); -__target_intrinsic(glsl, "(gl_LaunchSizeNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_LaunchSizeNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_LaunchSizeEXT)") uint3 DispatchRaysDimensions(); // 10.4.2 - Ray System Values -__target_intrinsic(glsl, "(gl_WorldRayOriginNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_WorldRayOriginNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_WorldRayOriginEXT)") float3 WorldRayOrigin(); -__target_intrinsic(glsl, "(gl_WorldRayDirectionNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_WorldRayDirectionNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_WorldRayDirectionEXT)") float3 WorldRayDirection(); -__target_intrinsic(glsl, "(gl_RayTminNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_RayTminNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_RayTminEXT)") float RayTMin(); // Note: The `RayTCurrent()` intrinsic should translate to @@ -4317,39 +4332,48 @@ float RayTMin(); // we should simply provide two overloads here, specialized // to the appropriate Vulkan stages. // -__target_intrinsic(glsl, "$XT") +__target_intrinsic(GL_NV_ray_tracing, "(gl_RayTmaxNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_RayTmaxEXT)") float RayTCurrent(); -__target_intrinsic(glsl, "(gl_IncomingRayFlagsNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_IncomingRayFlagsNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_IncomingRayFlagsEXT)") uint RayFlags(); // 10.4.3 - Primitive/Object Space System Values -__target_intrinsic(glsl, "(gl_InstanceCustomIndexNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_InstanceCustomIndexNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_InstanceCustomIndexEXT)") uint InstanceIndex(); -__target_intrinsic(glsl, "(gl_InstanceID)") +__target_intrinsic(__glslRayTracing, "(gl_InstanceID)") uint InstanceID(); -__target_intrinsic(glsl, "(gl_PrimitiveID)") +__target_intrinsic(__glslRayTracing, "(gl_PrimitiveID)") uint PrimitiveIndex(); -__target_intrinsic(glsl, "(gl_ObjectRayOriginNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_ObjectRayOriginNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_ObjectRayOriginEXT)") float3 ObjectRayOrigin(); -__target_intrinsic(glsl, "(gl_ObjectRayDirectionNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_ObjectRayDirectionNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_ObjectRayDirectionEXT)") float3 ObjectRayDirection(); -__target_intrinsic(glsl, "transpose(gl_ObjectToWorldNV)") +__target_intrinsic(GL_NV_ray_tracing, "transpose(gl_ObjectToWorldNV)") +__target_intrinsic(GL_EXT_ray_tracing, "transpose(gl_ObjectToWorldEXT)") float3x4 ObjectToWorld3x4(); -__target_intrinsic(glsl, "transpose(gl_WorldToObjectNV)") +__target_intrinsic(GL_NV_ray_tracing, "transpose(gl_WorldToObjectNV)") +__target_intrinsic(GL_EXT_ray_tracing, "transpose(gl_WorldToObjectEXT)") float3x4 WorldToObject3x4(); -__target_intrinsic(glsl, "(gl_ObjectToWorldNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_ObjectToWorldNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_ObjectToWorld3x4EXT)") float4x3 ObjectToWorld4x3(); -__target_intrinsic(glsl, "(gl_WorldToObjectNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_WorldToObjectNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_WorldToObject3x4EXT)") float4x3 WorldToObject4x3(); // Note: The provisional DXR spec included these unadorned @@ -4365,7 +4389,8 @@ float3x4 ObjectToWorld() { return ObjectToWorld3x4(); } float3x4 WorldToObject() { return WorldToObject3x4(); } // 10.4.4 - Hit Specific System values -__target_intrinsic(glsl, "(gl_HitKindNV)") +__target_intrinsic(GL_NV_ray_tracing, "(gl_HitKindNV)") +__target_intrinsic(GL_EXT_ray_tracing, "(gl_HitKindEXT)") uint HitKind(); // Pre-defined hit kinds (not documented explicitly) @@ -4543,6 +4568,7 @@ struct FeedbackTexture2DArray<T : __BuiltinSamplerFeedbackType> // // Get the index of the geometry that was hit in an intersection, any-hit, or closest-hit shader +__target_intrinsic(GL_EXT_ray_tracing, "(gl_GeometryIndexEXT)") uint GeometryIndex(); // Status of whether a (closest) hit has been committed in a `RayQuery`. diff --git a/source/slang/slang-api.cpp b/source/slang/slang-api.cpp index c8b932306..e1eee66dd 100644 --- a/source/slang/slang-api.cpp +++ b/source/slang/slang-api.cpp @@ -233,6 +233,15 @@ SLANG_API void spSetTargetFloatingPointMode( request->setTargetFloatingPointMode(targetIndex, mode); } +SLANG_API void spAddTargetCapability( + slang::ICompileRequest* request, + int targetIndex, + SlangCapabilityID capability) +{ + SLANG_ASSERT(request); + request->addTargetCapability(targetIndex, capability); +} + SLANG_API void spSetMatrixLayoutMode( slang::ICompileRequest* request, SlangMatrixLayoutMode mode) diff --git a/source/slang/slang-capability-defs.h b/source/slang/slang-capability-defs.h index 8bf1d80e9..003fd3125 100644 --- a/source/slang/slang-capability-defs.h +++ b/source/slang/slang-capability-defs.h @@ -24,34 +24,64 @@ // TODO: There is probably a way to handle this with // variadic macros. // -#define SLANG_CAPABILITY_ATOM4(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) \ - SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) +#define SLANG_CAPABILITY_ATOM4(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2, BASE3) \ + SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2, BASE3) -#define SLANG_CAPABILITY_ATOM3(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2) \ - SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, Invalid) +#define SLANG_CAPABILITY_ATOM3(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2) \ + SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2, Invalid) -#define SLANG_CAPABILITY_ATOM2(ENUMERATOR, NAME, FLAGS, BASE0, BASE1) \ - SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, Invalid, Invalid) +#define SLANG_CAPABILITY_ATOM2(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1) \ + SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, Invalid, Invalid) -#define SLANG_CAPABILITY_ATOM1(ENUMERATOR, NAME, FLAGS, BASE0) \ - SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, Invalid, Invalid, Invalid) +#define SLANG_CAPABILITY_ATOM1(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0) \ + SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, Invalid, Invalid, Invalid) -#define SLANG_CAPABILITY_ATOM0(ENUMERATOR, NAME, FLAGS) \ - SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, Invalid, Invalid, Invalid, Invalid) +#define SLANG_CAPABILITY_ATOM0(ENUMERATOR, NAME, KIND, CONFLICTS, RANK) \ + SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, Invalid, Invalid, Invalid, Invalid) -// The `__target` capability exists only to provide a common -// abstract base for the capabilities that represent each -// of our compilation targets. +// Several capabilities represent the target formats in which we generate code. +// Because we can only generate code in one format at a time, all of these are +// marked as conflicting with one another along the `TargetFormat` axis. // -SLANG_CAPABILITY_ATOM0(Target, __target, Abstract) +// Note: We are only including here the source code formats we initially generate +// code in and not the formats that code might be translated into "downstream." +// Trying to figure out how to integrate both kinds of formats into our capability +// system will be an interesting challenge (e.g., can we compile code for `hlsl+spirv` +// and for `glsl+spirv` or even just for `spirv`, and how should all of those impact +// overloading). +// +SLANG_CAPABILITY_ATOM0(HLSL, hlsl, Concrete,TargetFormat,0) +SLANG_CAPABILITY_ATOM0(GLSL, glsl, Concrete,TargetFormat,0) +SLANG_CAPABILITY_ATOM0(C, c, Concrete,TargetFormat,0) +SLANG_CAPABILITY_ATOM0(CPP, cpp, Concrete,TargetFormat,0) +SLANG_CAPABILITY_ATOM0(CUDA, cuda, Concrete,TargetFormat,0) -SLANG_CAPABILITY_ATOM1(HLSL, hlsl, Concrete, Target) -SLANG_CAPABILITY_ATOM1(GLSL, glsl, Concrete, Target) -SLANG_CAPABILITY_ATOM1(C, c, Concrete, Target) -SLANG_CAPABILITY_ATOM1(CPP, cpp, Concrete, Target) -SLANG_CAPABILITY_ATOM1(CUDA, cuda, Concrete, Target) -SLANG_CAPABILITY_ATOM1(SPIRV, spirv, Concrete, Target) +// TODO: We should have multiple capabilities for the various SPIR-V versions, +// arranged so that they inherit from one another to represent which versions +// provide a super-set of the features of earlier ones (e.g., SPIR-V 1.4 should +// be expressed as inheriting from SPIR-V 1.3). +// +// For now we are only including the version(s) that are relevant to the +// features controlled by the capability system. +// +SLANG_CAPABILITY_ATOM1(SPIRV_1_4, spirv_1_4, Concrete,None,0, GLSL) +// The following capabilities all pertain to how ray tracing shaders are translated +// to GLSL, where there are two different extensions that can provide the core +// functionality of `TraceRay` and the related operations. +// +// The two extensions are expressed as distinct capabilities that both are marked +// as conflicting on the `RayTracingExtension` axis, so that a compilation target +// cannot have both enabled at once. +// +// The `GL_EXT_ray_tracing` extension should be favored, so it has a rank of `1` +// instead of `0`, which means that when comparing overloads that require these +// extensions, the `EXT` extension will be favored over the `NV` extension, if +// all other factors are equal. +// +SLANG_CAPABILITY_ATOM1(GLSLRayTracing, __glslRayTracing, Abstract,None,0, GLSL) +SLANG_CAPABILITY_ATOM1(GL_NV_ray_tracing, GL_NV_ray_tracing, Concrete,RayTracingExtension,0, GLSLRayTracing) +SLANG_CAPABILITY_ATOM2(GL_EXT_ray_tracing, GL_EXT_ray_tracing, Concrete,RayTracingExtension,1, GLSLRayTracing, SPIRV_1_4) #undef SLANG_CAPABILITY_ATOM0 #undef SLANG_CAPABILITY_ATOM1 diff --git a/source/slang/slang-capability.cpp b/source/slang/slang-capability.cpp index 7b4361a58..a75f6131c 100644 --- a/source/slang/slang-capability.cpp +++ b/source/slang/slang-capability.cpp @@ -1,6 +1,8 @@ // slang-capability.cpp #include "slang-capability.h" +#include "../core/slang-dictionary.h" + // This file implements the core of the "capability" system. namespace Slang @@ -10,37 +12,69 @@ namespace Slang // CapabilityAtom // -// We are going to divide capabilities into a few categories, -// which will be represented as flags for now. -// -// Every capability will be either concrete or abstract. -// An abstract capability basically represents a category -// of related capabilities that all fill a similar role. -// For example, we could have an abstract capability that -// represents "stages" and then the concrete capabilities -// `vertex`, `fragment`, etc. would inherit from it. +// We are going to divide capability atoms into a few categories. // -// Abstract capabilities are critical in our model for -// knowing when two capabilities are fundamentally incompatible. -// For example, it is meaningless to compile code for both -// the `vertex` and `fragment` capabilities at the same time, -// because no target processor supports both at once. +enum class CapabilityAtomFlavor : int32_t +{ + // A concrete capability atom is something that a target + // can directly support, where the presence of the feature + // directly provides functionality. A specific OpenGL + // or Vulkan extension would be an example of a concrete + // capability. + // + Concrete, + + // An abstract capability represents a class of feature + // where multiple different implementations might be possible. + // For example, "ray tracing" might be an abstract feature + // that a function can require, but a specific target will + // only be able to provide that abstract feature via some + // specific concrete feature (e.g., `GL_EXT_ray_tracing`). + Abstract, + + // An alias capability atom is one that is exactly equivalent + // to the things it inherits from. + // + // For example, a `ps_5_1` capability would just be an + // alias for the combination of the `fragment` capability + // and the `sm_5_1` capability. + // + Alias, +}; + +// Certain capability atoms will conflict with one another, +// such that a concrete target should never be able to support +// both. // -// TODO: It is possible that instead of flags this could simply -// identify a "kind" of atom, with two different states. +// It is possible in theory to define "conflicting" capabilities +// in terms of the inheritance graph, but that makes checking +// for conflicts more difficult. // -// TODO: It is likely that in a future change we will want to -// add a third case here for "alias" capabilities, which are -// pseudo-atomic capabilities that are just equivalent to -// the set of their bases. +// Instead, we are going to allow each capability to define a +// mask to indicate group(s) of conflicting capabilities it +// belongs to. Two different capability atoms that have +// overlapping masks will be considered to conflict. // -typedef uint32_t CapabilityAtomFlags; -enum : CapabilityAtomFlags +enum class CapabilityAtomConflictMask : uint32_t { - kCapabilityAtomFlags_Concrete = 0, - kCapabilityAtomFlags_Abstract = 1 << 0, + // By default, most capability atoms do not conflict with one another. + None = 0, + + // Capability atoms that reprsent target code generation formats always conflict. + // (e.g., you cannot generate both HLSL and C++ output at once) + TargetFormat = 1 << 0, + + // Capability atoms that represent GLSL ray tracing extensions conflict with + // one another (we only want to use one such extension at a time). + RayTracingExtension = 1 << 1, }; +// For simplicity in building up our data structure representing +// all capability atoms, we will limit the number of bases that +// a capability atom is allowed to inherit from. +// +static const int kCapabilityAtom_MaxBases = 4; + // The macros in the `slang-capability-defs.h` file will be used // to fill out a `static const` array of information about each // capability atom. @@ -48,11 +82,19 @@ enum : CapabilityAtomFlags struct CapabilityAtomInfo { /// The API-/language-exposed name of the capability. - char const* name; + char const* name; + + /// Flavor of atom: concrete, abstract, or alias + CapabilityAtomFlavor flavor; + + /// A mask to indicate which other categories of atoms this one conflicts with + CapabilityAtomConflictMask conflictMask; - /// Flags to determine if the capability is concrete-vs-abstract, etc. - CapabilityAtomFlags flags; - CapabilityAtom bases[4]; + /// Ranking to use when deciding if this atom is a "better" one to select. + uint32_t rank; + + /// Base atoms this one "inherits" from (terminated with `Invalid` if not all entries used). + CapabilityAtom bases[kCapabilityAtom_MaxBases]; }; // // The array is going to be sized to include an entry for `CapabilityAtom::Invalid` @@ -61,10 +103,10 @@ struct CapabilityAtomInfo // static const CapabilityAtomInfo kCapabilityAtoms[Int(CapabilityAtom::Count) + 1] = { - { "invalid", 0, { CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid } }, + { "invalid", CapabilityAtomFlavor::Concrete, CapabilityAtomConflictMask::None, 0, { CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid } }, -#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) \ - { #NAME, kCapabilityAtomFlags_##FLAGS, { CapabilityAtom::BASE0, CapabilityAtom::BASE1, CapabilityAtom::BASE2, CapabilityAtom::BASE3 } }, +#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAVOR, CONFLICT, RANK, BASE0, BASE1, BASE2, BASE3) \ + { #NAME, CapabilityAtomFlavor::FLAVOR, CapabilityAtomConflictMask::CONFLICT, RANK, { CapabilityAtom::BASE0, CapabilityAtom::BASE1, CapabilityAtom::BASE2, CapabilityAtom::BASE3 } }, #include "slang-capability-defs.h" }; @@ -75,145 +117,6 @@ static CapabilityAtomInfo const& _getInfo(CapabilityAtom atom) return kCapabilityAtoms[Int(atom) + 1]; } -// One capability set or capability atom A implies another set/atom B -// if any target that supports all of the atoms in A must also support -// all of those in B. - - /// Does `thisAtom` imply `thatAtom`? -static bool _implies(CapabilityAtom thisAtom, CapabilityAtom thatAtom) -{ - // When looking at atoms, the immediate easy case is when - // the two atoms are the same: an atomic capability always - // implies itself. - // - if(thisAtom == thatAtom) - return true; - - // Otherwise, we want to look at the bases of `thisAtom` - // to see if any of them imply `thatAtom`, since `thisAtom` - // implies each of its bases. - // - auto& thisAtomInfo = _getInfo(thisAtom); - for( auto thisAtomBase : thisAtomInfo.bases ) - { - // The lists of bases are currently using `Invalid` as - // a sentinel value to terminate them, so we need to - // bail out of the loop when we see the sentinel. - // - if(thisAtomBase == CapabilityAtom::Invalid) - break; - - if(_implies(thisAtomBase, thatAtom)) - return true; - } - - return false; -} - - /// Does `base` have any abstract capabilities in common with `otherAtom` - /// - /// This subroutine is a helper for `_isIncompatible`. -static bool _hasAbstractBaseInCommon(CapabilityAtom base, CapabilityAtom otherAtom) -{ - // First we check the case where `base` itself is an abstract - // capability atom. - // - auto& baseAtomInfo = _getInfo(base); - if(baseAtomInfo.flags & kCapabilityAtomFlags_Abstract) - { - // If `base` is abstract, and `otherAtom` implies `base`, - // then that means that `otherAtom` includes one or - // more atoms that inherit from `base`, and thus the - // two have an abstract base in common. - // - if( _implies(otherAtom, base) ) - return true; - } - - // If `base` itself has bases, then we want to check if any - // of *those* are abstract bases that overlap with `otherAtom`. - // - for( auto baseBase : baseAtomInfo.bases ) - { - if(baseBase == CapabilityAtom::Invalid) - break; - - if(_hasAbstractBaseInCommon(baseBase, otherAtom)) - return true; - } - - // If we didn't manage to find any overlaps, then we conclude - // that there are no shared abstract bases. - // - return false; -} - - /// Is `thisAtom` incompatible with `thatAtom` (such that no target could ever support both at once) -static bool _isIncompatible(CapabilityAtom thisAtom, CapabilityAtom thatAtom) -{ - // If either atom implies the other, then they aren't incompatible. - // - // For example, if there is an atom representing `sm_5_1` that inherits - // from an atom representing `sm_5_0`, then clearly the two aren't - // in any way incompatible (a single target can support both). - // - if(_implies(thisAtom, thatAtom) || _implies(thatAtom, thisAtom)) - return false; - - // If the two atoms are not in an inheritance relationship, then one of - // a few cases can apply: - // - // * They have no common bases; in this case they are compatible. - // An example would be `vertex` and `sm_5_0`. - // - // * They have a common base, but it is not marked abstract; in - // this case they are compatible. E.g., two GLSL extensions that - // both inherit from the `glsl` capability should not conflict. - // - // * They have a common base that is marked abstract; in this - // case they are incompatible. An example would be `vertex` - // and `fragment` both inheriting from the abstract atom - // `__stage`. - // - // To summarize the above list, we note that two atoms are - // incompatible with they have an abstract base in common. - // - return _hasAbstractBaseInCommon(thisAtom, thatAtom); - - // TODO: The above logic is a bit off, but in a way that doesn't - // matter just yet. - // - // We currently have capabilities like: - // - // abstract capability __target; - // capability hlsl : __target; - // capability glsl : __target; - // - // In this case it is clear that `hlsl` and `glsl` should - // be incompatible, and that the rules as implemented - // make that the case. - // - // A problem arises when we start to add things like extensions: - // - // capability EXT_cool_thing : glsl; - // capability EXT_other_stuff : glsl; - // - // In this case, it also seems clear that `EXT_cool_thing` - // and `EXT_other_stuff` should be mutually compatible. - // However, with the rules implemented here right now, they - // would be found incompatible because they share the - // abstract base `__target`. - // - // In this specific case, we know that the relationship - // between the extensions is fine because they both inherit - // from `__target` *through* the concrete atom `glsl`. - // - // Before adding capabilities that represent optional - // extensions like this we need to codify the semantics - // for how incompatibility checks should work in terms - // of the inheritance graph of capability atoms. -} - CapabilityAtom findCapabilityAtom(UnownedStringSlice const& name) { // For now we are implementing a linear search over the @@ -237,51 +140,33 @@ CapabilityAtom findCapabilityAtom(UnownedStringSlice const& name) // CapabilitySet // -// The current design choice in `CapabilitySet` is that it blindly -// stores exactly the atoms it is told to, without any up-front -// processing. -// -// This choice has some down-sides, and there are other representations -// that could be much nicer in the future. Possible improcements include: -// -// * The list of atoms could be *expanded* so that if it contains atom A -// and atom A implies atom B, then the list should also include B. +// The current design choice in `CapabilitySet` is that it stores +// an expanded, deduplicated, and sorted list of the capability +// atoms in the set. "Expanded" here means that it includes the +// transitive closure of the inheritance graph of those atoms. // -// * The list of atoms could be *minimized*, such that if atom A implies -// atom B, then any list that contains A does not include B (both -// expanded and minimized lists have different benefits). -// -// * The list of atoms could be deduplicated. -// -// * The list of atoms could be sorted. -// -// * The lists could be deduplicated and cached in some central place -// (the like the session) so that repreated attempts to create the -// same capability sets return the same objects. -// -// In some parts of the code below we will call out how these improvements -// could affect the algorithms used. - -// Given our simple choices right now, the constructors for `CapabilitySet` -// are all straightforward: just adding the right atoms to the list. +// This choice is intended to make certain operations on +// capability sets more efficient, since use things like +// binary searches to efficiently detect whether an atom +// is present in a set. CapabilitySet::CapabilitySet() {} CapabilitySet::CapabilitySet(Int atomCount, CapabilityAtom const* atoms) { - m_atoms.addRange(atoms, atomCount); + _init(atomCount, atoms); } CapabilitySet::CapabilitySet(CapabilityAtom atom) { - m_atoms.add(atom); + _init(1, &atom); } CapabilitySet::CapabilitySet(List<CapabilityAtom> const& atoms) - : m_atoms(atoms) -{} - +{ + _init(atoms.getCount(), atoms.getBuffer()); +} CapabilitySet CapabilitySet::makeEmpty() { @@ -290,7 +175,118 @@ CapabilitySet CapabilitySet::makeEmpty() CapabilitySet CapabilitySet::makeInvalid() { - return CapabilitySet(CapabilityAtom::Invalid); + // An invalid capability set will always be a singleton + // set of the `Invalid` atom, and we will construct + // the set directly rather than use the more expensive + // logic in `_init()`. + // + CapabilitySet result; + result.m_expandedAtoms.add(CapabilityAtom::Invalid); + return result; +} + + /// Helper routine for `CapabilitySet::_init`. + /// + /// Recursively add all atoms implied by `atom` to `ioExpandedAtoms`. + /// +static void _addAtomsRec( + CapabilityAtom atom, + HashSet<CapabilityAtom>& ioExpandedAtoms) +{ + auto& atomInfo = _getInfo(atom); + + // The first step is to add `atom` itself, *unless* + // it is an alias, because an alias shouldn't impact + // whether one set is considered a subset/superset of + // another. + // + if(atomInfo.flavor != CapabilityAtomFlavor::Alias) + { + ioExpandedAtoms.Add(atom); + } + + // Next we add all the atoms transitively implied by `atom`. + // + for(auto baseAtom : atomInfo.bases) + { + // Note: the list of `bases` is a fixed-size array, but + // can be terminated with `Invalid` to indicate that + // not all of the entries are being used. + // + // If we see the sentinel, then we know we are at the end + // of the list. + // + if(baseAtom == CapabilityAtom::Invalid) + break; + + _addAtomsRec(baseAtom, ioExpandedAtoms); + } +} + +void CapabilitySet::_init(Int atomCount, CapabilityAtom const* atoms) +{ + // In order to fill in the expanded and deduplicated + // set of atoms, we will use an explicit hash set + // and then recursively walk the tree of atoms and + // their bases. + // + HashSet<CapabilityAtom> expandedAtomsSet; + for(Int i = 0; i < atomCount; ++i) + { + _addAtomsRec(atoms[i], expandedAtomsSet); + } + + // We can then translate the set of atoms into a list, + // and then sort that list to produce the data that + // we use in all our other queries. + // + for(auto atom : expandedAtomsSet) + { + m_expandedAtoms.add(atom); + } + m_expandedAtoms.sort(); +} + +void CapabilitySet::calcCompactedAtoms(List<CapabilityAtom>& outAtoms) const +{ + // A "compacted" list of atoms is one that starts with + // the "expanded" list and removes any atoms that are + // implied by another atom already in the list. + // + // If the expanded list contains atom A, and A inherits + // from B, then we know that the expanded list also contains B, + // but the compacted list should not. + // + // We can thus look through the list of atoms A and for + // each base B of A, add it to a set of "redundant" atoms + // that need not appear in the compacted list. + // + HashSet<CapabilityAtom> redundantAtomsSet; + for( auto atom : m_expandedAtoms ) + { + auto& atomInfo = _getInfo(atom); + for(auto baseAtom : atomInfo.bases) + { + // Note: dealing with possible early termination of the `bases` list. + if(baseAtom == CapabilityAtom::Invalid) + break; + + redundantAtomsSet.Add(baseAtom); + } + } + + // Once we are done figuring out which atoms are redundant, + // we can iterate over the expanded list and add all the + // non-redundant ones to the compacted output list. + // + outAtoms.clear(); + for( auto atom : m_expandedAtoms ) + { + if(!redundantAtomsSet.Contains(atom)) + { + outAtoms.add(atom); + } + } } bool CapabilitySet::isEmpty() const @@ -298,7 +294,7 @@ bool CapabilitySet::isEmpty() const // Checking if a capability set is empty is trivial in any representation; // all we need to know is if it has zero atoms in its definition. // - return m_atoms.getCount() == 0; + return m_expandedAtoms.getCount() == 0; } bool CapabilitySet::isInvalid() const @@ -313,115 +309,484 @@ bool CapabilitySet::isInvalid() const // invalid (e.g., a set {A,B} would be invalid if A and B are incompatible, // but it would not be in the canonical form this subroutine checks). // - if(m_atoms.getCount() != 1) return false; - return m_atoms[0] == CapabilityAtom::Invalid; + if(m_expandedAtoms.getCount() != 1) return false; + return m_expandedAtoms[0] == CapabilityAtom::Invalid; } bool CapabilitySet::isIncompatibleWith(CapabilityAtom that) const { - // We know that capabilities that are in an inheritnace - // relationship with one another can't be incompatible. + // Checking for incompatibility is complicated, and it is best + // to only implement it for full (expanded) sets. // - if(this->implies(that) || CapabilitySet(that).implies(*this)) - return false; + return isIncompatibleWith(CapabilitySet(that)); +} - // Othwerise, we want to perform a check for each of the - // atoms in this set, whether it is incompatible with any - // of the atoms in the other set (which in this case is one atom). +uint32_t CapabilitySet::_calcConflictMask() const +{ + // Given a capbility set, we want to compute the mask representing + // all groups of features for which it holds a potentially-conflicting atom. // - for( auto thisAtom : this->m_atoms ) + uint32_t mask = 0; + for( auto atom : m_expandedAtoms ) { - if(_isIncompatible(thisAtom, that)) - return true; + mask |= uint32_t(_getInfo(atom).conflictMask); } - - return false; + return mask; } bool CapabilitySet::isIncompatibleWith(CapabilitySet const& that) const { - // We need to look at the atoms in `this` that are not - // present in `that`, and vice versa. For each such atom - // we will check if it is incompatible with the other, by - // virtue of the other already including a concrete atom - // that cannot co-exist with it. + // The `this` and `that` sets are incompatible if there exists + // an atom A in `this` and an atom `B` in `that` such that + // A and B are not equal, but the two have overlapping "conflict mask." // - for( auto thisAtom : this->m_atoms ) - { - if(that.isIncompatibleWith(thisAtom)) - return true; - } - for( auto thatAtom : that.m_atoms ) + // Equivalently, we can say that the two are in conflict if + // + // * One of the two sets contains an atom A with conflict mask M + // * The other set contains at least one atom that conflicts with M + // * The other set does not contain A + // + // Our approach here is all about minimizing the number of + // iterations we take over lists of atoms, and trying to + // avoid anything super-linear. + + // We start by identifying the OR of the conflict masks for + // all features in `this` and `that`. + // + uint32_t thisMask = this->_calcConflictMask(); + uint32_t thatMask = that._calcConflictMask(); + + // Note: there is a possible early-exit opportunity here if + // `thisMask` and `thatMask` have no overlap: there could + // be no conflicts in that case. + + // Next we will iterate over the two sets in tandem (O(N) time + // in the size of the larger set), and identify any elements + // that are present in one and not the other. + // + Index thisCount = this->m_expandedAtoms.getCount(); + Index thatCount = that.m_expandedAtoms.getCount(); + Index thisIndex = 0; + Index thatIndex = 0; + for(;;) { - if(this->isIncompatibleWith(thatAtom)) - return true; + if(thisIndex == thisCount) break; + if(thatIndex == thatCount) break; + + auto thisAtom = this->m_expandedAtoms[thisIndex]; + auto thatAtom = that.m_expandedAtoms[thatIndex]; + + if(thisAtom == thatAtom) + { + thisIndex++; + thatIndex++; + continue; + } + + if( thisAtom < thatAtom ) + { + // `thisAtom` is present in `this` but not `that. + // + // If `thisAtom` has a conflict mask that overlaps + // with `thatMask`, then we have a conflict: the + // other set doesn't include `thisAtom`, but *does* + // include something with an overlapping mask + // (we don't know what at this point in the code). + // + auto thisAtomMask = uint32_t(_getInfo(thisAtom).conflictMask); + if(thisAtomMask & thatMask) + return true; + thisIndex++; + } + else + { + SLANG_ASSERT(thisAtom > thatAtom); + + // `thatAtom` is present in `that` but not `this. + // + // The logic here is the mirror image of the case above. + // + auto thatAtomMask = uint32_t(_getInfo(thatAtom).conflictMask); + if(thatAtomMask & thisMask) + return true; + thatIndex++; + } } - return false; - // TODO: If we had a representation that stored a minified, - // sorted, deduplicated list of atoms, then it would be easy - // to iterate over the two lists in tandem and identify any - // element that is present in one list but not the other. - // - // Those elements would be the candidates that could cause - // incompatiblity, so that we wouldn't need to perform - // the check on each atom like we do above. + return false; } bool CapabilitySet::implies(CapabilitySet const& that) const { - // This capability set implies `other` if for every atom in `other`, - // that atom is present in this sets list of atoms or it is - // implies by something in the list of atoms. + // One capability set implies another if it is a super-set + // of the other one. Think of it this way: if your target + // supports features {X, Y, Z}, then that implies it also + // supports features {X,Z}. + // + // Because both `this` and `that` have expanded lists + // of all the capability atoms they imply *and* those + // lists are sorted, we can simply walk through the + // lists in tandem and see if there are any entries + // in `that` which are not present in `this. + + Index thisCount = this->m_expandedAtoms.getCount(); + Index thatCount = that.m_expandedAtoms.getCount(); + + // We cannot possibly have `this` contain all the atoms + // in `that` if the latter is has more atoms. + // + if(thatCount > thisCount) + return false; + + // Note: the following iteration is O(N) in the size + // of the larger of the two sets, which is probably + // needlessly inefficient. We might expect that `that` + // will often be a much smaller set, and we'd like to + // scale in its size rather than the size of `this`. + // + // A more advanced algorithm here would be to do + // something recursive: // - for( auto atom : that.m_atoms ) + // * If `that` is singleton set, then we can find + // whether `this` contains it via binary search. + // + // * Otherwise, we can split `that` into two + // equally-sized subsets. By taking a "pivot" value + // from where that split took place we can then + // use a binary search to partition `this` into + // two subsets and recurse on each side of that + // partition. + // + // In practice, the size of the sets we are dealing + // with right now doesn't justify such a "clever" algorithm. + + Index thisIndex = 0; + Index thatIndex = 0; + for(;;) { - if(!this->implies(atom)) + if(thisIndex == thisCount) break; + if(thatIndex == thatCount) break; + + auto thisAtom = this->m_expandedAtoms[thisIndex]; + auto thatAtom = that.m_expandedAtoms[thatIndex]; + + if( thisAtom == thatAtom ) + { + // We have an atom that both sets contain; + // we should skip past it and keep looking. + // + thisIndex++; + thatIndex++; + continue; + } + + if( thisAtom < thatAtom ) + { + // We have an atom that `this` contains, + // but `that` doesn't; that is consistent + // with `this` being a super-set, so we + // just skip the item and keep searching. + // + thisIndex++; + } + else + { + SLANG_ASSERT(thisAtom > thatAtom); + + // We have an atom in `that` which isn't + // also in `this`, so we know it cannot + // be a subset. + // return false; + } } return true; - - // TODO: If we had a representation that stored an expanded - // sorted, deduplicated list of atoms, then we could - // check the `implies` relationship by scanning through - // the two lists in tandem and identifying any element - // in the `that` list that isn't in the `this` list. - // Such elements would indicate that `that` is not a subset - // of `this`. } + /// Helper functor for binary search on lists of `CapabilityAtom` +struct CapabilityAtomComparator +{ + int operator()(CapabilityAtom left, CapabilityAtom right) + { + return int(Int(left) - Int(right)); + } +}; bool CapabilitySet::implies(CapabilityAtom atom) const { - // If our list of explicit atoms contains `atom`, then - // we definitely imply it. + // The common case here is when `atom` is not an alias. // - // TODO: If we stored our atom lists sorted, then - // this operation could be logarithmic rather than - // linear. - // - if(m_atoms.contains(atom)) - return true; + if( _getInfo(atom).flavor != CapabilityAtomFlavor::Alias ) + { + // Every non-alias atom that `this` implies should + // be presented in the `m_expandedAtoms` list. + // + // Because the list is sorted, we can find out whether + // it contains `atom` with a binary search. + // + Index result = m_expandedAtoms.binarySearch(atom, CapabilityAtomComparator()); + return result >= 0; + } + else + { + // In the case where `atom` is an alias, then it won't + // appear in the expanded list, and we need to check + // whether `this` set implies everything that `atom` + // transitively inherits from. + // + // The simplest way to do that is to expand `atom` + // into the full capability set it stands for and + // check that. + // + return implies(CapabilitySet(atom)); + } +} - // If any of our atoms implies `atom` then we - // also imply it. +Int CapabilitySet::countIntersectionWith(CapabilitySet const& that) const +{ + // The goal of this subroutine is to count the number of + // elements in the intersection of `this` and `that`, + // without explicitly forming that intersection. // - // TODO: If we stored an expanded atom list, then - // this recursion could be skipped completely, since - // the containment check above would cover inheirtance - // relationships too. + // Our approach here will be to iterate over the two + // sets in tandem (O(N) in the size of the larger set) + // and check for elements that both contain. // - for( auto thisAtom : m_atoms ) + // TODO: There should be an asymptotically faster + // recursive algorithm here. + + Int intersectionCount = 0; + + Index thisCount = this->m_expandedAtoms.getCount(); + Index thatCount = that.m_expandedAtoms.getCount(); + Index thisIndex = 0; + Index thatIndex = 0; + for(;;) { - if(_implies(thisAtom, atom)) - return true; + if(thisIndex == thisCount) break; + if(thatIndex == thatCount) break; + + auto thisAtom = this->m_expandedAtoms[thisIndex]; + auto thatAtom = that.m_expandedAtoms[thatIndex]; + + if( thisAtom == thatAtom ) + { + // An item both contain. + + intersectionCount++; + thisIndex++; + thatIndex++; + continue; + } + + if( thisAtom < thatAtom ) + { + // An item in `this` but not `that`. + + thisIndex++; + } + else + { + SLANG_ASSERT(thisAtom > thatAtom); + + // An item in `that` but not `this`. + + thatIndex++; + } } + return intersectionCount; +} + +bool CapabilitySet::isBetterForTarget( + CapabilitySet const& existingCaps, + CapabilitySet const& targetCaps) +{ + auto& candidateCaps = *this; + + // The task here is to determine if `candidateCaps` should + // be considered "better" than `existingCaps` in the context + // of compilation for a target with the given `targetCaps`. + // + // In an ideal world, this computation could be quite simple: + // + // * If either `candidateCaps` or `existingCaps` is not implied by + // `targetCaps` (that is, they include requirements that aren't + // provided by the target), then the other is automatically "better." + // + // * Otherwise, one set is "better" than the other if it is a + // super-set (which is what `implies()` tests). + // + // There are two main reasons we can't use that simple logic: + // + // 1. Currently a user of Slang can compile for a target but + // not actually spell out its capabilities fully or correctly. + // They might compile for `sm_5_0` but use ray tracing features + // that require `sm_6_2` and expect the compiler to figure out + // what they "obviously" meant. Thus we cannot assume that + // `targetCaps` can be used to rule out candidates fully. + // + // 2. Sometimes there are multiple ways for a target to provide + // the same feature (e.g., multiple extensions) and because of (1) + // we cannot always rely on the `targetCaps` to tell us which to + // use. Thus we cannot rely on pure subset/`implies()` to define + // better-ness, and need some way to break ties. + // + // The following logic is a bunch of "do what I mean" nonsense that + // tries to capture a reasonable intuition of what "better"-ness + // should mean with these caveats. + + // First, if either candidate is fundamentally incompatible + // with the target, we shouldn't favor it. + // + if(candidateCaps.isIncompatibleWith(targetCaps)) return false; + if(existingCaps.isIncompatibleWith(targetCaps)) return true; + + // Next, we want to compare the candidates to the `targetCaps` + // to figure out whether one is obviously "more specialized" for + // the target. + // + // We measure the degree to which a candidate is specialized for + // the target as the size of its set intersection with `targetCaps`. + // + // TODO: If both `candidateCaps` and `existingCaps` are implied + // by `targetCaps`, then this amounts to just measuring the + // size of each set. We probably want this size-based check to + // come later in the overall process. + // + // TODO: A better model here might be to actually compute the actual + // intersected sets, and then check if one is a super-set of the other. + // + auto candidateIntersectionSize = targetCaps.countIntersectionWith(candidateCaps); + auto existingIntersectionSize = targetCaps.countIntersectionWith(existingCaps); + if(candidateIntersectionSize != existingIntersectionSize) + return candidateIntersectionSize > existingIntersectionSize; + + // Next we want to consider that if one of the two candidates + // is actually available on the target (meaning that it is + // implied by `targetCaps`) then we probably want to pick that one + // (since we can use that candidate on the chosen target without + // enabling any additional features the user didn't ask for). + // + // TODO: This step currently needs to come after the preceeding + // one because otherwise we risk selecting a `__target_intrinsic` + // decoration with *no* requirements (which are currently being + // added implicitly in many places) over any one with explicit + // requirements (since every target implies the empty set of + // requirements). + // + // In many ways the counting-based logic above amounts to a quick + // fix to prefer a non-empty set of requirements over an empty one, + // so long as something in that non-empty set overlaps with the target. + // + // TODO: The best fix is probably to figure out how "catch-all" + // intrinsic function definitions should be encoded; we clearly + // want them to be used only as a fallback when no target-specific + // variants are present. + // + bool candidateIsAvailable = targetCaps.implies(candidateCaps); + bool existingIsAvailable = targetCaps.implies(existingCaps); + if(candidateIsAvailable != existingIsAvailable) + return candidateIsAvailable; + + // All preceding factors being equal, we prefer + // a candidate that is strictly more specialized than the other. + // + // TODO: This logic has the negative effect of always preferring + // to enable optional features even if they aren't necessary. + // It would prefer the set {glsl, optionalFeature} over the set + // {glsl}, even though we might argue that a default implementaton + // that works without any optional features is "obviously" what + // the user means if they didn't enable those features. + // + // TODO: The right answer is possibly that we want to partition + // `candidateCaps` and `existingCaps` into two parts: their + // intersection with `targetCaps` and their difference with it. + // + // For the intersection part of things, we'd want to favor a + // definition that is more specialized, while for the difference + // part we'd actually wnat to favor a definition that is less + // specialized. + // + if(candidateCaps.implies(existingCaps)) return true; + if(existingCaps.implies(candidateCaps)) return true; + + // At this point we have the problem that neither candidate + // appears to be "obviously" better for the target, but we + // want some way to disambiguate them. + // + // What we want to do now is scan through what makes each candidate + // different from the other, and see if anything in either case + // has a ranking that should make it be preferred. + // + // TODO: This should probably *not* be considering anything that + // is implied/supported by the target. + // + auto candidateScore = candidateCaps._calcDifferenceScoreWith(existingCaps); + auto existingScore = existingCaps._calcDifferenceScoreWith(candidateCaps); + if(candidateScore != existingScore) + return candidateScore > existingScore; return false; } +uint32_t CapabilitySet::_calcDifferenceScoreWith(CapabilitySet const& that) const +{ + uint32_t score = 0; + + // Our approach here will be to scan through `this` and `that` + // to identify atoms that are in `this` but not `that` (that is, + // the atoms that would be present in the set difference `this - that`) + // and then compute the maximum rank/score of those atoms. + + Index thisCount = this->m_expandedAtoms.getCount(); + Index thatCount = that.m_expandedAtoms.getCount(); + Index thisIndex = 0; + Index thatIndex = 0; + for(;;) + { + if(thisIndex == thisCount) break; + if(thatIndex == thatCount) break; + + auto thisAtom = this->m_expandedAtoms[thisIndex]; + auto thatAtom = that.m_expandedAtoms[thatIndex]; + + if( thisAtom == thatAtom ) + { + thisIndex++; + thatIndex++; + continue; + } + + if( thisAtom < thatAtom ) + { + // `thisAtom` is not present in `that`, so it + // should contribute to our ranking of the difference. + // + auto thisAtomInfo = _getInfo(thisAtom); + auto thisAtomRank = thisAtomInfo.rank; + + if( thisAtomRank > score ) + { + score = thisAtomRank; + } + + thisIndex++; + } + else + { + SLANG_ASSERT(thisAtom > thatAtom); + thatIndex++; + } + } + return score; +} + + bool CapabilitySet::operator==(CapabilitySet const& other) const { + // TODO: We should be able to implement this more efficiently + // by scanning over the two sets in tandem. + return this->implies(other) && other.implies(*this); } diff --git a/source/slang/slang-capability.h b/source/slang/slang-capability.h index 662f7eed8..5392a669a 100644 --- a/source/slang/slang-capability.h +++ b/source/slang/slang-capability.h @@ -34,7 +34,7 @@ enum class CapabilityAtom : int32_t // Invalid = -1, -#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) \ +#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAVOR, CONFLICT, RANK, BASE0, BASE1, BASE2, BASE3) \ ENUMERATOR, #include "slang-capability-defs.h" @@ -127,22 +127,27 @@ public: bool operator==(CapabilitySet const& that) const; /// Get access to the raw atomic capabilities that define this set. - List<CapabilityAtom> const& getAtoms() const { return m_atoms; } + List<CapabilityAtom> const& getExpandedAtoms() const { return m_expandedAtoms; } + + /// Calculate a list of "compacted" atoms, which excludes any atoms from the expanded list that are implies by another item in the list. + void calcCompactedAtoms(List<CapabilityAtom>& outAtoms) const; + + Int countIntersectionWith(CapabilitySet const& that) const; + + bool isBetterForTarget(CapabilitySet const& that, CapabilitySet const& targetCaps); private: + void _init(Int atomCount, CapabilityAtom const* atoms); - // The underlying representation we are using is currently very simple: - // a capability set is stored as a list of the atoms that were passed - // in at the time the set was constructed. - // - // Currently, no effort is made to sort the atoms, remove duplicates, - // or to expand the list when one atom entails another. - // - // TODO: Much more efficient representations are possible, and we - // should consider them if the performance of `CapabilitySet` ever - // prooves to be an issue. + uint32_t _calcConflictMask() const; + uint32_t _calcDifferenceScoreWith(CapabilitySet const& other) const; + + // The underlying representation we use is a sorted and deduplicated + // list of all the (non-alias) atoms that are present in the set. + // This "expanded" list uses the transitive closure over the inheritnace + // relationship between the atoms. // - List<CapabilityAtom> m_atoms; + List<CapabilityAtom> m_expandedAtoms; }; /// Are the `left` and `right` capability sets unequal? diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index 158aac141..1e9357f4f 100755 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -335,15 +335,20 @@ namespace Slang // - Profile Profile::lookUp(char const* name) + Profile Profile::lookUp(UnownedStringSlice const& name) { - #define PROFILE(TAG, NAME, STAGE, VERSION) if(strcmp(name, #NAME) == 0) return Profile::TAG; - #define PROFILE_ALIAS(TAG, DEF, NAME) if(strcmp(name, #NAME) == 0) return Profile::TAG; + #define PROFILE(TAG, NAME, STAGE, VERSION) if(name == UnownedTerminatedStringSlice(#NAME)) return Profile::TAG; + #define PROFILE_ALIAS(TAG, DEF, NAME) if(name == UnownedTerminatedStringSlice(#NAME)) return Profile::TAG; #include "slang-profile-defs.h" return Profile::Unknown; } + Profile Profile::lookUp(char const* name) + { + return lookUp(UnownedTerminatedStringSlice(name)); + } + char const* Profile::getName() { switch( raw ) @@ -968,7 +973,7 @@ namespace Slang DWORD flags = 0; - switch( targetReq->floatingPointMode ) + switch( targetReq->getFloatingPointMode() ) { default: break; @@ -1237,7 +1242,7 @@ SlangResult dissassembleDXILUsingDXC( // If we are not in pass through, lookup the default compiler for the emitted source type if (downstreamCompiler == PassThroughMode::None) { - auto target = targetReq->target; + auto target = targetReq->getTarget(); switch (target) { case CodeGenTarget::PTX: @@ -1387,7 +1392,7 @@ SlangResult dissassembleDXILUsingDXC( options.flags &= ~(CompileOptions::Flag::EnableExceptionHandling | CompileOptions::Flag::EnableSecurityChecks); // Set what kind of target we should build - switch (targetReq->target) + switch (targetReq->getTarget()) { case CodeGenTarget::HostCallable: case CodeGenTarget::SharedLibrary: @@ -1434,7 +1439,7 @@ SlangResult dissassembleDXILUsingDXC( default: SLANG_ASSERT(!"Unhandled debug level"); break; } - switch( targetReq->floatingPointMode ) + switch( targetReq->getFloatingPointMode() ) { case FloatingPointMode::Default: options.floatingPointMode = DownstreamCompiler::FloatingPointMode::Default; break; case FloatingPointMode::Precise: options.floatingPointMode = DownstreamCompiler::FloatingPointMode::Precise; break; @@ -1628,6 +1633,20 @@ SlangResult dissassembleDXILUsingDXC( request.spirvVersion.minor = spirvLanguageVersion.m_minor; request.spirvVersion.patch = spirvLanguageVersion.m_patch; } + else + { + // HACK: look at the requested capabilities of the target, + // and see if they specify a SPIR-V version that we should + // pass down. + // + auto targetCaps = targetReq->getTargetCaps(); + if(targetCaps.implies(CapabilityAtom::SPIRV_1_4)) + { + request.spirvVersion.major = 1; + request.spirvVersion.minor = 4; + request.spirvVersion.patch = 0; + } + } request.outputFunc = outputFunc; request.outputUserData = &spirvOut; @@ -1698,7 +1717,7 @@ SlangResult dissassembleDXILUsingDXC( { CompileResult result; - auto target = targetReq->target; + auto target = targetReq->getTarget(); switch (target) { @@ -2041,7 +2060,7 @@ SlangResult dissassembleDXILUsingDXC( { // Writing to console, so we need to generate text output. - switch (targetReq->target) + switch (targetReq->getTarget()) { #if SLANG_ENABLE_DXBC_SUPPORT case CodeGenTarget::DXBytecode: diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index ace2cb842..008c5eb5a 100755 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -1141,12 +1141,22 @@ namespace Slang class TargetRequest : public RefObject { public: - Linkage* linkage; - CodeGenTarget target; - SlangTargetFlags targetFlags = 0; - Slang::Profile targetProfile = Slang::Profile(); - FloatingPointMode floatingPointMode = FloatingPointMode::Default; - CapabilitySet targetCaps = CapabilitySet::makeInvalid(); + TargetRequest(Linkage* linkage, CodeGenTarget format); + + void addTargetFlags(SlangTargetFlags flags) + { + targetFlags |= flags; + } + void setTargetProfile(Slang::Profile profile) + { + targetProfile = profile; + } + void setFloatingPointMode(FloatingPointMode mode) + { + floatingPointMode = mode; + } + void addCapability(CapabilityAtom capability); + bool isWholeProgramRequest() { @@ -1154,9 +1164,10 @@ namespace Slang } Linkage* getLinkage() { return linkage; } - CodeGenTarget getTarget() { return target; } + CodeGenTarget getTarget() { return format; } Profile getTargetProfile() { return targetProfile; } FloatingPointMode getFloatingPointMode() { return floatingPointMode; } + SlangTargetFlags getTargetFlags() { return targetFlags; } CapabilitySet getTargetCaps(); Session* getSession(); @@ -1168,6 +1179,15 @@ namespace Slang Dictionary<Type*, RefPtr<TypeLayout>>& getTypeLayouts() { return typeLayouts; } TypeLayout* getTypeLayout(Type* type); + + private: + Linkage* linkage = nullptr; + CodeGenTarget format = CodeGenTarget::Unknown; + SlangTargetFlags targetFlags = 0; + Slang::Profile targetProfile = Slang::Profile(); + FloatingPointMode floatingPointMode = FloatingPointMode::Default; + List<CapabilityAtom> rawCapabilities; + CapabilitySet cookedCapabilities; }; /// Are we generating code for a D3D API? @@ -1898,6 +1918,8 @@ namespace Slang virtual SLANG_NO_THROW SlangResult SLANG_MCALL getSession(slang::ISession** outSession) SLANG_OVERRIDE; virtual SLANG_NO_THROW SlangReflection* SLANG_MCALL getReflection() SLANG_OVERRIDE; virtual SLANG_NO_THROW void SLANG_MCALL setCommandLineCompilerMode() SLANG_OVERRIDE; + virtual SLANG_NO_THROW SlangResult SLANG_MCALL addTargetCapability(SlangInt targetIndex, SlangCapabilityID capability) SLANG_OVERRIDE; + EndToEndCompileRequest( Session* session); @@ -2158,6 +2180,8 @@ namespace Slang SLANG_NO_THROW SlangResult SLANG_MCALL loadStdLib(const void* stdLib, size_t stdLibSizeInBytes) override; SLANG_NO_THROW SlangResult SLANG_MCALL saveStdLib(ISlangBlob** outBlob) override; + SLANG_NO_THROW SlangCapabilityID SLANG_MCALL findCapability(char const* name) override; + /// Get the default compiler for a language DownstreamCompiler* getDefaultDownstreamCompiler(SourceLanguage sourceLanguage); diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 9d208b8a3..facb8a710 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -630,7 +630,7 @@ String CLikeSourceEmitter::generateName(IRInst* inst) // If the instruction names something // that should be emitted as a target intrinsic, // then use that name instead. - if(auto intrinsicDecoration = findBestTargetIntrinsicDecorationXXX(inst)) + if(auto intrinsicDecoration = findBestTargetIntrinsicDecoration(inst)) { return String(intrinsicDecoration->getDefinition()); } @@ -1092,7 +1092,7 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst) // This is significant, because we can within a target intrinsics definition multiple accesses to the same // parameter. This is not indicated into the call, and can lead to output code computes something multiple // times as it is folding into the expression of the the target intrinsic, which we don't want. - if (auto targetIntrinsicDecoration = findBestTargetIntrinsicDecorationXXX(funcValue)) + if (auto targetIntrinsicDecoration = findBestTargetIntrinsicDecoration(funcValue)) { // Find the index of the original instruction, to see if it's multiply used. IRUse* args = callInst->getArgs(); @@ -1300,7 +1300,7 @@ IRTargetSpecificDecoration* CLikeSourceEmitter::findBestTargetDecoration(IRInst* return Slang::findBestTargetDecoration(inInst, getTargetCaps()); } -IRTargetIntrinsicDecoration* CLikeSourceEmitter::findBestTargetIntrinsicDecorationXXX(IRInst* inInst) +IRTargetIntrinsicDecoration* CLikeSourceEmitter::findBestTargetIntrinsicDecoration(IRInst* inInst) { return as<IRTargetIntrinsicDecoration>(findBestTargetDecoration(inInst)); } @@ -1834,25 +1834,6 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl( } break; - case 'T': - { - // The `$XT` case handles selecting between - // the `gl_HitTNV` and `gl_RayTmaxNV` builtins, - // based on what stage we are using: - switch( m_entryPointStage ) - { - default: - m_writer->emit("gl_RayTmaxNV"); - break; - - case Stage::AnyHit: - case Stage::ClosestHit: - m_writer->emit("gl_HitTNV"); - break; - } - } - break; - default: SLANG_RELEASE_ASSERT(false); break; @@ -1955,7 +1936,7 @@ void CLikeSourceEmitter::emitCallExpr(IRCall* inst, EmitOpInfo outerPrec) // We want to detect any call to an intrinsic operation, // that we can emit it directly without mangling, etc. - if(auto targetIntrinsic = findBestTargetIntrinsicDecorationXXX(funcValue)) + if(auto targetIntrinsic = findBestTargetIntrinsicDecoration(funcValue)) { emitIntrinsicCallExpr(inst, targetIntrinsic, outerPrec); } @@ -3334,7 +3315,7 @@ bool CLikeSourceEmitter::isTargetIntrinsic(IRFunc* func) // it has a suitable decoration marking it as a // target intrinsic for the current compilation target. // - return findBestTargetIntrinsicDecorationXXX(func) != nullptr; + return findBestTargetIntrinsicDecoration(func) != nullptr; } void CLikeSourceEmitter::emitFunc(IRFunc* func) @@ -3367,7 +3348,7 @@ void CLikeSourceEmitter::emitStruct(IRStructType* structType) { // If the selected `struct` type is actually an intrinsic // on our target, then we don't want to emit anything at all. - if(auto intrinsicDecoration = findBestTargetIntrinsicDecorationXXX(structType)) + if(auto intrinsicDecoration = findBestTargetIntrinsicDecoration(structType)) { return; } diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 0b6e63110..a26959e54 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -176,7 +176,7 @@ public: void emitInstResultDecl(IRInst* inst); IRTargetSpecificDecoration* findBestTargetDecoration(IRInst* inst); - IRTargetIntrinsicDecoration* findBestTargetIntrinsicDecorationXXX(IRInst* inst); + IRTargetIntrinsicDecoration* findBestTargetIntrinsicDecoration(IRInst* inst); // Check if the string being used to define a target intrinsic // is an "ordinary" name, such that we can simply emit a call diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index cc2494455..2a28be70a 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -38,7 +38,23 @@ SlangResult GLSLSourceEmitter::init() void GLSLSourceEmitter::_requireRayTracing() { - m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_NV_ray_tracing")); + // There is more than one extension that provides ray-tracing capabilities, + // and we need to pick which one to enable. + // + // By default, we will use the `GL_EXT_ray_tracing` extension, but if + // the user has explicitly opted in to the `GL_NV_ray_tracing` extension + // we will use that one instead. + // + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_NV_ray_tracing")); + } + else + { + m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_ray_tracing")); + m_glslExtensionTracker->requireSPIRVVersion(SemanticVersion(1, 4)); + } + m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460); } @@ -542,7 +558,14 @@ bool GLSLSourceEmitter::_emitGLSLLayoutQualifier(LayoutResourceKind kind, EmitVa m_writer->emit("layout(push_constant)\n"); break; case LayoutResourceKind::ShaderRecord: - m_writer->emit("layout(shaderRecordNV)\n"); + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("layout(shaderRecordNV)\n"); + } + else + { + m_writer->emit("layout(shaderRecordEXT)\n"); + } break; } @@ -1029,19 +1052,40 @@ void GLSLSourceEmitter::emitLayoutQualifiersImpl(IRVarLayout* layout) case LayoutResourceKind::RayPayload: { - m_writer->emit("rayPayloadInNV "); + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("rayPayloadInNV "); + } + else + { + m_writer->emit("rayPayloadInEXT "); + } } break; case LayoutResourceKind::CallablePayload: { - m_writer->emit("callableDataInNV "); + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("callableDataInNV "); + } + else + { + m_writer->emit("callableDataInEXT "); + } } break; case LayoutResourceKind::HitAttributes: { - m_writer->emit("hitAttributeNV "); + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("hitAttributeNV "); + } + else + { + m_writer->emit("hitAttributeEXT "); + } } break; @@ -1704,7 +1748,15 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) case kIROp_RaytracingAccelerationStructureType: { _requireRayTracing(); - m_writer->emit("accelerationStructureNV"); + + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("accelerationStructureNV"); + } + else + { + m_writer->emit("accelerationStructureEXT"); + } break; } @@ -1806,19 +1858,40 @@ void GLSLSourceEmitter::emitVarDecorationsImpl(IRInst* varDecl) m_writer->emit("layout(location = "); m_writer->emit(getRayPayloadLocation(varDecl)); m_writer->emit(")\n"); - m_writer->emit("rayPayloadNV\n"); + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("rayPayloadNV\n"); + } + else + { + m_writer->emit("rayPayloadEXT\n"); + } } if (varDecl->findDecoration<IRVulkanCallablePayloadDecoration>()) { m_writer->emit("layout(location = "); m_writer->emit(getCallablePayloadLocation(varDecl)); m_writer->emit(")\n"); - m_writer->emit("callableDataNV\n"); + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("callableDataNV\n"); + } + else + { + m_writer->emit("callableDataEXT\n"); + } } if (varDecl->findDecoration<IRVulkanHitAttributesDecoration>()) { - m_writer->emit("hitAttributeNV\n"); + if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) ) + { + m_writer->emit("hitAttributeNV\n"); + } + else + { + m_writer->emit("hitAttributeEXT\n"); + } } if (varDecl->findDecoration<IRGloballyCoherentDecoration>()) diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index ef1442a1b..20e8c0beb 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -542,7 +542,7 @@ Result linkAndOptimizeIR( { case CodeGenTarget::HLSL: { - auto profile = targetRequest->targetProfile; + auto profile = targetRequest->getTargetProfile(); if( profile.getFamily() == ProfileFamily::DX ) { if(profile.getVersion() <= ProfileVersion::DX_5_0) diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index aa72cc0c3..7e84ca66a 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -2160,8 +2160,24 @@ namespace Slang IRType* capabilityAtomType = getIntType(); IRType* capabilitySetType = getCapabilitySetType(); + // Not: Our `CapabilitySet` representation consists of a list + // of `CapabilityAtom`s, and by default the list is stored + // "expanded" so that it includes atoms that are transitively + // implied by one another. + // + // For representation in the IR, it is preferable to include + // as few atoms as possible, so that we don't store anything + // redundant in, e.g., serialized modules. + // + // We thus requqest a list of "compacted" atoms which should + // be a minimal list of atoms such that they will produce + // the same `CapabilitySet` when expanded. + + List<CapabilityAtom> compactedAtoms; + caps.calcCompactedAtoms(compactedAtoms); + List<IRInst*> args; - for( auto atom : caps.getAtoms() ) + for( auto atom : compactedAtoms ) { args.add(getIntValue(capabilityAtomType, Int(atom))); } @@ -5656,22 +5672,6 @@ namespace Slang // IRTargetIntrinsicDecoration // - static bool _areIntrinsicCapsBetterForTarget( - CapabilitySet const& candidateCaps, - CapabilitySet const& existingCaps, - CapabilitySet const& targetCaps) - { - bool candidateIsAvailable = targetCaps.implies(candidateCaps); - bool existingIsAvailable = targetCaps.implies(existingCaps); - if(candidateIsAvailable != existingIsAvailable) - return candidateIsAvailable; - - if(candidateCaps.implies(existingCaps)) - return true; - - return false; - } - IRTargetIntrinsicDecoration* findAnyTargetIntrinsicDecoration( IRInst* val) { @@ -5704,7 +5704,7 @@ namespace Slang if (decorationCaps.isIncompatibleWith(targetCaps)) continue; - if(!bestDecoration || _areIntrinsicCapsBetterForTarget(decorationCaps, bestCaps, targetCaps)) + if(!bestDecoration || decorationCaps.isBetterForTarget(bestCaps, targetCaps)) { bestDecoration = decoration; bestCaps = decorationCaps; diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp index c3de87d87..c3db39773 100644 --- a/source/slang/slang-options.cpp +++ b/source/slang/slang-options.cpp @@ -162,6 +162,8 @@ struct OptionsParser int targetID = -1; FloatingPointMode floatingPointMode = FloatingPointMode::Default; + List<CapabilityAtom> capabilityAtoms; + // State for tracking command-line errors bool conflictingProfilesSet = false; bool redundantProfileSet = false; @@ -393,6 +395,11 @@ struct OptionsParser rawTarget->profileVersion = profileVersion; } + void addCapabilityAtom(RawTarget* rawTarget, CapabilityAtom atom) + { + rawTarget->capabilityAtoms.add(atom); + } + void setFloatingPointMode(RawTarget* rawTarget, FloatingPointMode mode) { rawTarget->floatingPointMode = mode; @@ -655,13 +662,28 @@ struct OptionsParser // specific stage to use for an entry point. else if (argStr == "-profile") { - String name; - SLANG_RETURN_ON_FAIL(tryReadCommandLineArgument(sink, arg, &argCursor, argEnd, name)); + String operand; + SLANG_RETURN_ON_FAIL(tryReadCommandLineArgument(sink, arg, &argCursor, argEnd, operand)); + + // A a convenience, the `-profile` option supporst an operand that consists + // of multiple tokens separated with `+`. The eventual goal is that each + // of these tokens will represent a capability that should be assumed to + // be present on the target. + // + List<UnownedStringSlice> slices; + StringUtil::split(operand.getUnownedSlice(), '+', slices); + Index sliceCount = slices.getCount(); + + // For now, we will require that the *first* capability in the list is + // special, and reprsents the traditional `Profile` to compile for in + // the existing Slang model. + // + UnownedStringSlice profileName = sliceCount >= 1 ? slices[0] : UnownedTerminatedStringSlice(""); - SlangProfileID profileID = session->findProfile(name.begin()); + SlangProfileID profileID = Slang::Profile::lookUp(profileName).raw; if( profileID == SLANG_PROFILE_UNKNOWN ) { - sink->diagnose(SourceLoc(), Diagnostics::unknownProfile, name); + sink->diagnose(SourceLoc(), Diagnostics::unknownProfile, profileName); return SLANG_FAIL; } else @@ -678,6 +700,22 @@ struct OptionsParser setStage(getCurrentEntryPoint(), stage); } } + + // Any additional capability tokens will be assumed to represent `CapabilityAtom`s. + // Those atoms will need to be added to the supported capabilities of the target. + // + for(Index i = 1; i < sliceCount; ++i) + { + UnownedStringSlice atomName = slices[i]; + CapabilityAtom atom = findCapabilityAtom(atomName); + if( atom == CapabilityAtom::Invalid ) + { + sink->diagnose(SourceLoc(), Diagnostics::unknownProfile, atomName); + return SLANG_FAIL; + } + + addCapabilityAtom(getCurrentTarget(), atom); + } } else if (argStr == "-stage") { @@ -1329,6 +1367,10 @@ struct OptionsParser { setProfileVersion(getCurrentTarget(), defaultTarget.profileVersion); } + for( auto atom : defaultTarget.capabilityAtoms ) + { + addCapabilityAtom(getCurrentTarget(), atom); + } getCurrentTarget()->targetFlags |= defaultTarget.targetFlags; @@ -1412,6 +1454,10 @@ struct OptionsParser { compileRequest->setTargetProfile(targetID, Profile(rawTarget.profileVersion).raw); } + for( auto atom : rawTarget.capabilityAtoms ) + { + requestImpl->addTargetCapability(targetID, SlangCapabilityID(atom)); + } if( rawTarget.targetFlags ) { @@ -1539,7 +1585,7 @@ struct OptionsParser } else { - target->targetFlags |= SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM; + target->addTargetFlags(SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM); targetInfo->wholeTargetOutputPath = rawOutput.path; } } diff --git a/source/slang/slang-parameter-binding.cpp b/source/slang/slang-parameter-binding.cpp index 73c94f722..9b080a11c 100644 --- a/source/slang/slang-parameter-binding.cpp +++ b/source/slang/slang-parameter-binding.cpp @@ -1450,7 +1450,7 @@ static RefPtr<TypeLayout> processSimpleEntryPointParameter( // if( isD3DTarget(context->getTargetRequest()) ) { - auto version = context->getTargetRequest()->targetProfile.getVersion(); + auto version = context->getTargetRequest()->getTargetProfile().getVersion(); if( version <= ProfileVersion::DX_5_0 ) { // We will address the conflict here by claiming the corresponding @@ -3486,7 +3486,7 @@ RefPtr<ProgramLayout> generateParameterBindings( // On a CPU target, it's okay to have global scope parameters that use Uniform resources (because on CPU // all resources are 'Uniform') // TODO(JS): We'll just assume the same with CUDA target for now.. - if (!_isCPUTarget(targetReq->target) && !_isPTXTarget(targetReq->target)) + if (!_isCPUTarget(targetReq->getTarget()) && !_isPTXTarget(targetReq->getTarget())) { for( auto& parameterInfo : sharedContext.parameters ) { diff --git a/source/slang/slang-profile.h b/source/slang/slang-profile.h index f5b15eda6..5150da27a 100644 --- a/source/slang/slang-profile.h +++ b/source/slang/slang-profile.h @@ -103,6 +103,7 @@ namespace Slang ProfileFamily getFamily() const { return getProfileFamily(getVersion()); } + static Profile lookUp(UnownedStringSlice const& name); static Profile lookUp(char const* name); char const* getName(); diff --git a/source/slang/slang-repro.cpp b/source/slang/slang-repro.cpp index e47fade70..61ab3b75d 100644 --- a/source/slang/slang-repro.cpp +++ b/source/slang/slang-repro.cpp @@ -418,8 +418,8 @@ static bool _isStorable(const PathInfo::Type type) auto& dst = base[dstTargets[i]]; dst.target = srcTargetRequest->getTarget(); dst.profile = srcTargetRequest->getTargetProfile(); - dst.targetFlags = srcTargetRequest->targetFlags; - dst.floatingPointMode = srcTargetRequest->floatingPointMode; + dst.targetFlags = srcTargetRequest->getTargetFlags(); + dst.floatingPointMode = srcTargetRequest->getFloatingPointMode(); } // Copy the entry point/target output names @@ -906,9 +906,9 @@ struct LoadContext auto dstTarget = linkage->targets[index]; SLANG_ASSERT(dstTarget->getTarget() == src.target); - dstTarget->targetProfile = src.profile; - dstTarget->targetFlags = src.targetFlags; - dstTarget->floatingPointMode = src.floatingPointMode; + dstTarget->setTargetProfile(src.profile); + dstTarget->addTargetFlags(src.targetFlags); + dstTarget->setFloatingPointMode(src.floatingPointMode); // If there is output state (like output filenames) add here if (src.outputStates.getCount()) diff --git a/source/slang/slang-serialize-container.cpp b/source/slang/slang-serialize-container.cpp index abdc382f9..344b4aa02 100644 --- a/source/slang/slang-serialize-container.cpp +++ b/source/slang/slang-serialize-container.cpp @@ -123,10 +123,10 @@ namespace Slang { auto& dstTarget = targetComponent.target; - dstTarget.floatingPointMode = target->floatingPointMode; - dstTarget.profile = target->targetProfile; - dstTarget.flags = target->targetFlags; - dstTarget.codeGenTarget = target->target; + dstTarget.floatingPointMode = target->getFloatingPointMode(); + dstTarget.profile = target->getTargetProfile(); + dstTarget.flags = target->getTargetFlags(); + dstTarget.codeGenTarget = target->getTarget(); out.targetComponents.add(targetComponent); } diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index ef838b871..fbcc97c51 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -474,6 +474,12 @@ SLANG_NO_THROW SlangProfileID SLANG_MCALL Session::findProfile( return Slang::Profile::lookUp(name).raw; } +SLANG_NO_THROW SlangCapabilityID SLANG_MCALL Session::findCapability( + char const* name) +{ + return SlangCapabilityID(Slang::findCapabilityAtom(UnownedTerminatedStringSlice(name))); +} + SLANG_NO_THROW void SLANG_MCALL Session::setDownstreamCompilerPath( SlangPassThrough inPassThrough, char const* path) @@ -571,7 +577,7 @@ DownstreamCompiler* Session::getDefaultDownstreamCompiler(SourceLanguage sourceL Profile getEffectiveProfile(EntryPoint* entryPoint, TargetRequest* target) { auto entryPointProfile = entryPoint->getProfile(); - auto targetProfile = target->targetProfile; + auto targetProfile = target->getTargetProfile(); // Depending on the target *format* we might have to restrict the // profile family to one that makes sense. @@ -579,7 +585,7 @@ Profile getEffectiveProfile(EntryPoint* entryPoint, TargetRequest* target) // TODO: Some of this should really be handled as validation at // the front-end. People shouldn't be allowed to ask for SPIR-V // output with Shader Model 5.0... - switch(target->target) + switch(target->getTarget()) { default: break; @@ -747,9 +753,9 @@ void Linkage::addTarget( auto targetIndex = addTarget(CodeGenTarget(desc.format)); auto target = targets[targetIndex]; - target->floatingPointMode = FloatingPointMode(desc.floatingPointMode); - target->targetFlags = desc.flags; - target->targetProfile = Profile(desc.profile); + target->setFloatingPointMode(FloatingPointMode(desc.floatingPointMode)); + target->addTargetFlags(desc.flags); + target->setTargetProfile(Profile(desc.profile)); } #if 0 @@ -961,6 +967,12 @@ SlangResult Linkage::setMatrixLayoutMode( // TargetRequest // +TargetRequest::TargetRequest(Linkage* linkage, CodeGenTarget format) + : linkage(linkage) + , format(format) +{} + + Session* TargetRequest::getSession() { return linkage->getSessionImpl(); @@ -971,10 +983,17 @@ MatrixLayoutMode TargetRequest::getDefaultMatrixLayoutMode() return linkage->getDefaultMatrixLayoutMode(); } +void TargetRequest::addCapability(CapabilityAtom capability) +{ + rawCapabilities.add(capability); + cookedCapabilities = CapabilitySet::makeEmpty(); +} + + CapabilitySet TargetRequest::getTargetCaps() { - if(!targetCaps.isInvalid()) - return targetCaps; + if(!cookedCapabilities.isEmpty()) + return cookedCapabilities; // The full `CapabilitySet` for the target will be computed // from the combination of the code generation format, and @@ -996,7 +1015,7 @@ CapabilitySet TargetRequest::getTargetCaps() // are available where can be directly encoded on the declarations. List<CapabilityAtom> atoms; - switch(target) + switch(format) { case CodeGenTarget::GLSL: case CodeGenTarget::GLSL_Vulkan: @@ -1033,9 +1052,11 @@ CapabilitySet TargetRequest::getTargetCaps() default: break; } + for(auto atom : rawCapabilities) + atoms.add(atom); - targetCaps = CapabilitySet(atoms); - return targetCaps; + cookedCapabilities = CapabilitySet(atoms); + return cookedCapabilities; } @@ -2136,9 +2157,7 @@ int EndToEndCompileRequest::addEntryPoint( UInt Linkage::addTarget( CodeGenTarget target) { - RefPtr<TargetRequest> targetReq = new TargetRequest(); - targetReq->linkage = this; - targetReq->target = target; + RefPtr<TargetRequest> targetReq = new TargetRequest(this, target); Index result = targets.getCount(); targets.add(targetReq); @@ -3681,17 +3700,17 @@ int EndToEndCompileRequest::addCodeGenTarget(SlangCompileTarget target) void EndToEndCompileRequest::setTargetProfile(int targetIndex, SlangProfileID profile) { - getLinkage()->targets[targetIndex]->targetProfile = Profile(profile); + getLinkage()->targets[targetIndex]->setTargetProfile(Profile(profile)); } void EndToEndCompileRequest::setTargetFlags(int targetIndex, SlangTargetFlags flags) { - getLinkage()->targets[targetIndex]->targetFlags = flags; + getLinkage()->targets[targetIndex]->addTargetFlags(flags); } void EndToEndCompileRequest::setTargetFloatingPointMode(int targetIndex, SlangFloatingPointMode mode) { - getLinkage()->targets[targetIndex]->floatingPointMode = FloatingPointMode(mode); + getLinkage()->targets[targetIndex]->setFloatingPointMode(FloatingPointMode(mode)); } void EndToEndCompileRequest::setMatrixLayoutMode(SlangMatrixLayoutMode mode) @@ -3705,6 +3724,15 @@ void EndToEndCompileRequest::setTargetMatrixLayoutMode(int targetIndex, SlangMat setMatrixLayoutMode(mode); } +SlangResult EndToEndCompileRequest::addTargetCapability(SlangInt targetIndex, SlangCapabilityID capability) +{ + auto& targets = getLinkage()->targets; + if(targetIndex < 0 || targetIndex >= targets.getCount()) + return SLANG_E_INVALID_ARG; + targets[targetIndex]->addCapability(CapabilityAtom(capability)); + return SLANG_OK; +} + void EndToEndCompileRequest::setDebugInfoLevel(SlangDebugInfoLevel level) { getLinkage()->debugInfoLevel = DebugInfoLevel(level); |
