summaryrefslogtreecommitdiff
path: root/source/slang/slang-capability.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang/slang-capability.cpp')
-rw-r--r--source/slang/slang-capability.cpp903
1 files changed, 634 insertions, 269 deletions
diff --git a/source/slang/slang-capability.cpp b/source/slang/slang-capability.cpp
index 7b4361a58..a75f6131c 100644
--- a/source/slang/slang-capability.cpp
+++ b/source/slang/slang-capability.cpp
@@ -1,6 +1,8 @@
// slang-capability.cpp
#include "slang-capability.h"
+#include "../core/slang-dictionary.h"
+
// This file implements the core of the "capability" system.
namespace Slang
@@ -10,37 +12,69 @@ namespace Slang
// CapabilityAtom
//
-// We are going to divide capabilities into a few categories,
-// which will be represented as flags for now.
-//
-// Every capability will be either concrete or abstract.
-// An abstract capability basically represents a category
-// of related capabilities that all fill a similar role.
-// For example, we could have an abstract capability that
-// represents "stages" and then the concrete capabilities
-// `vertex`, `fragment`, etc. would inherit from it.
+// We are going to divide capability atoms into a few categories.
//
-// Abstract capabilities are critical in our model for
-// knowing when two capabilities are fundamentally incompatible.
-// For example, it is meaningless to compile code for both
-// the `vertex` and `fragment` capabilities at the same time,
-// because no target processor supports both at once.
+enum class CapabilityAtomFlavor : int32_t
+{
+ // A concrete capability atom is something that a target
+ // can directly support, where the presence of the feature
+ // directly provides functionality. A specific OpenGL
+ // or Vulkan extension would be an example of a concrete
+ // capability.
+ //
+ Concrete,
+
+ // An abstract capability represents a class of feature
+ // where multiple different implementations might be possible.
+ // For example, "ray tracing" might be an abstract feature
+ // that a function can require, but a specific target will
+ // only be able to provide that abstract feature via some
+ // specific concrete feature (e.g., `GL_EXT_ray_tracing`).
+ Abstract,
+
+ // An alias capability atom is one that is exactly equivalent
+ // to the things it inherits from.
+ //
+ // For example, a `ps_5_1` capability would just be an
+ // alias for the combination of the `fragment` capability
+ // and the `sm_5_1` capability.
+ //
+ Alias,
+};
+
+// Certain capability atoms will conflict with one another,
+// such that a concrete target should never be able to support
+// both.
//
-// TODO: It is possible that instead of flags this could simply
-// identify a "kind" of atom, with two different states.
+// It is possible in theory to define "conflicting" capabilities
+// in terms of the inheritance graph, but that makes checking
+// for conflicts more difficult.
//
-// TODO: It is likely that in a future change we will want to
-// add a third case here for "alias" capabilities, which are
-// pseudo-atomic capabilities that are just equivalent to
-// the set of their bases.
+// Instead, we are going to allow each capability to define a
+// mask to indicate group(s) of conflicting capabilities it
+// belongs to. Two different capability atoms that have
+// overlapping masks will be considered to conflict.
//
-typedef uint32_t CapabilityAtomFlags;
-enum : CapabilityAtomFlags
+enum class CapabilityAtomConflictMask : uint32_t
{
- kCapabilityAtomFlags_Concrete = 0,
- kCapabilityAtomFlags_Abstract = 1 << 0,
+ // By default, most capability atoms do not conflict with one another.
+ None = 0,
+
+ // Capability atoms that reprsent target code generation formats always conflict.
+ // (e.g., you cannot generate both HLSL and C++ output at once)
+ TargetFormat = 1 << 0,
+
+ // Capability atoms that represent GLSL ray tracing extensions conflict with
+ // one another (we only want to use one such extension at a time).
+ RayTracingExtension = 1 << 1,
};
+// For simplicity in building up our data structure representing
+// all capability atoms, we will limit the number of bases that
+// a capability atom is allowed to inherit from.
+//
+static const int kCapabilityAtom_MaxBases = 4;
+
// The macros in the `slang-capability-defs.h` file will be used
// to fill out a `static const` array of information about each
// capability atom.
@@ -48,11 +82,19 @@ enum : CapabilityAtomFlags
struct CapabilityAtomInfo
{
/// The API-/language-exposed name of the capability.
- char const* name;
+ char const* name;
+
+ /// Flavor of atom: concrete, abstract, or alias
+ CapabilityAtomFlavor flavor;
+
+ /// A mask to indicate which other categories of atoms this one conflicts with
+ CapabilityAtomConflictMask conflictMask;
- /// Flags to determine if the capability is concrete-vs-abstract, etc.
- CapabilityAtomFlags flags;
- CapabilityAtom bases[4];
+ /// Ranking to use when deciding if this atom is a "better" one to select.
+ uint32_t rank;
+
+ /// Base atoms this one "inherits" from (terminated with `Invalid` if not all entries used).
+ CapabilityAtom bases[kCapabilityAtom_MaxBases];
};
//
// The array is going to be sized to include an entry for `CapabilityAtom::Invalid`
@@ -61,10 +103,10 @@ struct CapabilityAtomInfo
//
static const CapabilityAtomInfo kCapabilityAtoms[Int(CapabilityAtom::Count) + 1] =
{
- { "invalid", 0, { CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid } },
+ { "invalid", CapabilityAtomFlavor::Concrete, CapabilityAtomConflictMask::None, 0, { CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid } },
-#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) \
- { #NAME, kCapabilityAtomFlags_##FLAGS, { CapabilityAtom::BASE0, CapabilityAtom::BASE1, CapabilityAtom::BASE2, CapabilityAtom::BASE3 } },
+#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAVOR, CONFLICT, RANK, BASE0, BASE1, BASE2, BASE3) \
+ { #NAME, CapabilityAtomFlavor::FLAVOR, CapabilityAtomConflictMask::CONFLICT, RANK, { CapabilityAtom::BASE0, CapabilityAtom::BASE1, CapabilityAtom::BASE2, CapabilityAtom::BASE3 } },
#include "slang-capability-defs.h"
};
@@ -75,145 +117,6 @@ static CapabilityAtomInfo const& _getInfo(CapabilityAtom atom)
return kCapabilityAtoms[Int(atom) + 1];
}
-// One capability set or capability atom A implies another set/atom B
-// if any target that supports all of the atoms in A must also support
-// all of those in B.
-
- /// Does `thisAtom` imply `thatAtom`?
-static bool _implies(CapabilityAtom thisAtom, CapabilityAtom thatAtom)
-{
- // When looking at atoms, the immediate easy case is when
- // the two atoms are the same: an atomic capability always
- // implies itself.
- //
- if(thisAtom == thatAtom)
- return true;
-
- // Otherwise, we want to look at the bases of `thisAtom`
- // to see if any of them imply `thatAtom`, since `thisAtom`
- // implies each of its bases.
- //
- auto& thisAtomInfo = _getInfo(thisAtom);
- for( auto thisAtomBase : thisAtomInfo.bases )
- {
- // The lists of bases are currently using `Invalid` as
- // a sentinel value to terminate them, so we need to
- // bail out of the loop when we see the sentinel.
- //
- if(thisAtomBase == CapabilityAtom::Invalid)
- break;
-
- if(_implies(thisAtomBase, thatAtom))
- return true;
- }
-
- return false;
-}
-
- /// Does `base` have any abstract capabilities in common with `otherAtom`
- ///
- /// This subroutine is a helper for `_isIncompatible`.
-static bool _hasAbstractBaseInCommon(CapabilityAtom base, CapabilityAtom otherAtom)
-{
- // First we check the case where `base` itself is an abstract
- // capability atom.
- //
- auto& baseAtomInfo = _getInfo(base);
- if(baseAtomInfo.flags & kCapabilityAtomFlags_Abstract)
- {
- // If `base` is abstract, and `otherAtom` implies `base`,
- // then that means that `otherAtom` includes one or
- // more atoms that inherit from `base`, and thus the
- // two have an abstract base in common.
- //
- if( _implies(otherAtom, base) )
- return true;
- }
-
- // If `base` itself has bases, then we want to check if any
- // of *those* are abstract bases that overlap with `otherAtom`.
- //
- for( auto baseBase : baseAtomInfo.bases )
- {
- if(baseBase == CapabilityAtom::Invalid)
- break;
-
- if(_hasAbstractBaseInCommon(baseBase, otherAtom))
- return true;
- }
-
- // If we didn't manage to find any overlaps, then we conclude
- // that there are no shared abstract bases.
- //
- return false;
-}
-
- /// Is `thisAtom` incompatible with `thatAtom` (such that no target could ever support both at once)
-static bool _isIncompatible(CapabilityAtom thisAtom, CapabilityAtom thatAtom)
-{
- // If either atom implies the other, then they aren't incompatible.
- //
- // For example, if there is an atom representing `sm_5_1` that inherits
- // from an atom representing `sm_5_0`, then clearly the two aren't
- // in any way incompatible (a single target can support both).
- //
- if(_implies(thisAtom, thatAtom) || _implies(thatAtom, thisAtom))
- return false;
-
- // If the two atoms are not in an inheritance relationship, then one of
- // a few cases can apply:
- //
- // * They have no common bases; in this case they are compatible.
- // An example would be `vertex` and `sm_5_0`.
- //
- // * They have a common base, but it is not marked abstract; in
- // this case they are compatible. E.g., two GLSL extensions that
- // both inherit from the `glsl` capability should not conflict.
- //
- // * They have a common base that is marked abstract; in this
- // case they are incompatible. An example would be `vertex`
- // and `fragment` both inheriting from the abstract atom
- // `__stage`.
- //
- // To summarize the above list, we note that two atoms are
- // incompatible with they have an abstract base in common.
- //
- return _hasAbstractBaseInCommon(thisAtom, thatAtom);
-
- // TODO: The above logic is a bit off, but in a way that doesn't
- // matter just yet.
- //
- // We currently have capabilities like:
- //
- // abstract capability __target;
- // capability hlsl : __target;
- // capability glsl : __target;
- //
- // In this case it is clear that `hlsl` and `glsl` should
- // be incompatible, and that the rules as implemented
- // make that the case.
- //
- // A problem arises when we start to add things like extensions:
- //
- // capability EXT_cool_thing : glsl;
- // capability EXT_other_stuff : glsl;
- //
- // In this case, it also seems clear that `EXT_cool_thing`
- // and `EXT_other_stuff` should be mutually compatible.
- // However, with the rules implemented here right now, they
- // would be found incompatible because they share the
- // abstract base `__target`.
- //
- // In this specific case, we know that the relationship
- // between the extensions is fine because they both inherit
- // from `__target` *through* the concrete atom `glsl`.
- //
- // Before adding capabilities that represent optional
- // extensions like this we need to codify the semantics
- // for how incompatibility checks should work in terms
- // of the inheritance graph of capability atoms.
-}
-
CapabilityAtom findCapabilityAtom(UnownedStringSlice const& name)
{
// For now we are implementing a linear search over the
@@ -237,51 +140,33 @@ CapabilityAtom findCapabilityAtom(UnownedStringSlice const& name)
// CapabilitySet
//
-// The current design choice in `CapabilitySet` is that it blindly
-// stores exactly the atoms it is told to, without any up-front
-// processing.
-//
-// This choice has some down-sides, and there are other representations
-// that could be much nicer in the future. Possible improcements include:
-//
-// * The list of atoms could be *expanded* so that if it contains atom A
-// and atom A implies atom B, then the list should also include B.
+// The current design choice in `CapabilitySet` is that it stores
+// an expanded, deduplicated, and sorted list of the capability
+// atoms in the set. "Expanded" here means that it includes the
+// transitive closure of the inheritance graph of those atoms.
//
-// * The list of atoms could be *minimized*, such that if atom A implies
-// atom B, then any list that contains A does not include B (both
-// expanded and minimized lists have different benefits).
-//
-// * The list of atoms could be deduplicated.
-//
-// * The list of atoms could be sorted.
-//
-// * The lists could be deduplicated and cached in some central place
-// (the like the session) so that repreated attempts to create the
-// same capability sets return the same objects.
-//
-// In some parts of the code below we will call out how these improvements
-// could affect the algorithms used.
-
-// Given our simple choices right now, the constructors for `CapabilitySet`
-// are all straightforward: just adding the right atoms to the list.
+// This choice is intended to make certain operations on
+// capability sets more efficient, since use things like
+// binary searches to efficiently detect whether an atom
+// is present in a set.
CapabilitySet::CapabilitySet()
{}
CapabilitySet::CapabilitySet(Int atomCount, CapabilityAtom const* atoms)
{
- m_atoms.addRange(atoms, atomCount);
+ _init(atomCount, atoms);
}
CapabilitySet::CapabilitySet(CapabilityAtom atom)
{
- m_atoms.add(atom);
+ _init(1, &atom);
}
CapabilitySet::CapabilitySet(List<CapabilityAtom> const& atoms)
- : m_atoms(atoms)
-{}
-
+{
+ _init(atoms.getCount(), atoms.getBuffer());
+}
CapabilitySet CapabilitySet::makeEmpty()
{
@@ -290,7 +175,118 @@ CapabilitySet CapabilitySet::makeEmpty()
CapabilitySet CapabilitySet::makeInvalid()
{
- return CapabilitySet(CapabilityAtom::Invalid);
+ // An invalid capability set will always be a singleton
+ // set of the `Invalid` atom, and we will construct
+ // the set directly rather than use the more expensive
+ // logic in `_init()`.
+ //
+ CapabilitySet result;
+ result.m_expandedAtoms.add(CapabilityAtom::Invalid);
+ return result;
+}
+
+ /// Helper routine for `CapabilitySet::_init`.
+ ///
+ /// Recursively add all atoms implied by `atom` to `ioExpandedAtoms`.
+ ///
+static void _addAtomsRec(
+ CapabilityAtom atom,
+ HashSet<CapabilityAtom>& ioExpandedAtoms)
+{
+ auto& atomInfo = _getInfo(atom);
+
+ // The first step is to add `atom` itself, *unless*
+ // it is an alias, because an alias shouldn't impact
+ // whether one set is considered a subset/superset of
+ // another.
+ //
+ if(atomInfo.flavor != CapabilityAtomFlavor::Alias)
+ {
+ ioExpandedAtoms.Add(atom);
+ }
+
+ // Next we add all the atoms transitively implied by `atom`.
+ //
+ for(auto baseAtom : atomInfo.bases)
+ {
+ // Note: the list of `bases` is a fixed-size array, but
+ // can be terminated with `Invalid` to indicate that
+ // not all of the entries are being used.
+ //
+ // If we see the sentinel, then we know we are at the end
+ // of the list.
+ //
+ if(baseAtom == CapabilityAtom::Invalid)
+ break;
+
+ _addAtomsRec(baseAtom, ioExpandedAtoms);
+ }
+}
+
+void CapabilitySet::_init(Int atomCount, CapabilityAtom const* atoms)
+{
+ // In order to fill in the expanded and deduplicated
+ // set of atoms, we will use an explicit hash set
+ // and then recursively walk the tree of atoms and
+ // their bases.
+ //
+ HashSet<CapabilityAtom> expandedAtomsSet;
+ for(Int i = 0; i < atomCount; ++i)
+ {
+ _addAtomsRec(atoms[i], expandedAtomsSet);
+ }
+
+ // We can then translate the set of atoms into a list,
+ // and then sort that list to produce the data that
+ // we use in all our other queries.
+ //
+ for(auto atom : expandedAtomsSet)
+ {
+ m_expandedAtoms.add(atom);
+ }
+ m_expandedAtoms.sort();
+}
+
+void CapabilitySet::calcCompactedAtoms(List<CapabilityAtom>& outAtoms) const
+{
+ // A "compacted" list of atoms is one that starts with
+ // the "expanded" list and removes any atoms that are
+ // implied by another atom already in the list.
+ //
+ // If the expanded list contains atom A, and A inherits
+ // from B, then we know that the expanded list also contains B,
+ // but the compacted list should not.
+ //
+ // We can thus look through the list of atoms A and for
+ // each base B of A, add it to a set of "redundant" atoms
+ // that need not appear in the compacted list.
+ //
+ HashSet<CapabilityAtom> redundantAtomsSet;
+ for( auto atom : m_expandedAtoms )
+ {
+ auto& atomInfo = _getInfo(atom);
+ for(auto baseAtom : atomInfo.bases)
+ {
+ // Note: dealing with possible early termination of the `bases` list.
+ if(baseAtom == CapabilityAtom::Invalid)
+ break;
+
+ redundantAtomsSet.Add(baseAtom);
+ }
+ }
+
+ // Once we are done figuring out which atoms are redundant,
+ // we can iterate over the expanded list and add all the
+ // non-redundant ones to the compacted output list.
+ //
+ outAtoms.clear();
+ for( auto atom : m_expandedAtoms )
+ {
+ if(!redundantAtomsSet.Contains(atom))
+ {
+ outAtoms.add(atom);
+ }
+ }
}
bool CapabilitySet::isEmpty() const
@@ -298,7 +294,7 @@ bool CapabilitySet::isEmpty() const
// Checking if a capability set is empty is trivial in any representation;
// all we need to know is if it has zero atoms in its definition.
//
- return m_atoms.getCount() == 0;
+ return m_expandedAtoms.getCount() == 0;
}
bool CapabilitySet::isInvalid() const
@@ -313,115 +309,484 @@ bool CapabilitySet::isInvalid() const
// invalid (e.g., a set {A,B} would be invalid if A and B are incompatible,
// but it would not be in the canonical form this subroutine checks).
//
- if(m_atoms.getCount() != 1) return false;
- return m_atoms[0] == CapabilityAtom::Invalid;
+ if(m_expandedAtoms.getCount() != 1) return false;
+ return m_expandedAtoms[0] == CapabilityAtom::Invalid;
}
bool CapabilitySet::isIncompatibleWith(CapabilityAtom that) const
{
- // We know that capabilities that are in an inheritnace
- // relationship with one another can't be incompatible.
+ // Checking for incompatibility is complicated, and it is best
+ // to only implement it for full (expanded) sets.
//
- if(this->implies(that) || CapabilitySet(that).implies(*this))
- return false;
+ return isIncompatibleWith(CapabilitySet(that));
+}
- // Othwerise, we want to perform a check for each of the
- // atoms in this set, whether it is incompatible with any
- // of the atoms in the other set (which in this case is one atom).
+uint32_t CapabilitySet::_calcConflictMask() const
+{
+ // Given a capbility set, we want to compute the mask representing
+ // all groups of features for which it holds a potentially-conflicting atom.
//
- for( auto thisAtom : this->m_atoms )
+ uint32_t mask = 0;
+ for( auto atom : m_expandedAtoms )
{
- if(_isIncompatible(thisAtom, that))
- return true;
+ mask |= uint32_t(_getInfo(atom).conflictMask);
}
-
- return false;
+ return mask;
}
bool CapabilitySet::isIncompatibleWith(CapabilitySet const& that) const
{
- // We need to look at the atoms in `this` that are not
- // present in `that`, and vice versa. For each such atom
- // we will check if it is incompatible with the other, by
- // virtue of the other already including a concrete atom
- // that cannot co-exist with it.
+ // The `this` and `that` sets are incompatible if there exists
+ // an atom A in `this` and an atom `B` in `that` such that
+ // A and B are not equal, but the two have overlapping "conflict mask."
//
- for( auto thisAtom : this->m_atoms )
- {
- if(that.isIncompatibleWith(thisAtom))
- return true;
- }
- for( auto thatAtom : that.m_atoms )
+ // Equivalently, we can say that the two are in conflict if
+ //
+ // * One of the two sets contains an atom A with conflict mask M
+ // * The other set contains at least one atom that conflicts with M
+ // * The other set does not contain A
+ //
+ // Our approach here is all about minimizing the number of
+ // iterations we take over lists of atoms, and trying to
+ // avoid anything super-linear.
+
+ // We start by identifying the OR of the conflict masks for
+ // all features in `this` and `that`.
+ //
+ uint32_t thisMask = this->_calcConflictMask();
+ uint32_t thatMask = that._calcConflictMask();
+
+ // Note: there is a possible early-exit opportunity here if
+ // `thisMask` and `thatMask` have no overlap: there could
+ // be no conflicts in that case.
+
+ // Next we will iterate over the two sets in tandem (O(N) time
+ // in the size of the larger set), and identify any elements
+ // that are present in one and not the other.
+ //
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
{
- if(this->isIncompatibleWith(thatAtom))
- return true;
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if(thisAtom == thatAtom)
+ {
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // `thisAtom` is present in `this` but not `that.
+ //
+ // If `thisAtom` has a conflict mask that overlaps
+ // with `thatMask`, then we have a conflict: the
+ // other set doesn't include `thisAtom`, but *does*
+ // include something with an overlapping mask
+ // (we don't know what at this point in the code).
+ //
+ auto thisAtomMask = uint32_t(_getInfo(thisAtom).conflictMask);
+ if(thisAtomMask & thatMask)
+ return true;
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+
+ // `thatAtom` is present in `that` but not `this.
+ //
+ // The logic here is the mirror image of the case above.
+ //
+ auto thatAtomMask = uint32_t(_getInfo(thatAtom).conflictMask);
+ if(thatAtomMask & thisMask)
+ return true;
+ thatIndex++;
+ }
}
- return false;
- // TODO: If we had a representation that stored a minified,
- // sorted, deduplicated list of atoms, then it would be easy
- // to iterate over the two lists in tandem and identify any
- // element that is present in one list but not the other.
- //
- // Those elements would be the candidates that could cause
- // incompatiblity, so that we wouldn't need to perform
- // the check on each atom like we do above.
+ return false;
}
bool CapabilitySet::implies(CapabilitySet const& that) const
{
- // This capability set implies `other` if for every atom in `other`,
- // that atom is present in this sets list of atoms or it is
- // implies by something in the list of atoms.
+ // One capability set implies another if it is a super-set
+ // of the other one. Think of it this way: if your target
+ // supports features {X, Y, Z}, then that implies it also
+ // supports features {X,Z}.
+ //
+ // Because both `this` and `that` have expanded lists
+ // of all the capability atoms they imply *and* those
+ // lists are sorted, we can simply walk through the
+ // lists in tandem and see if there are any entries
+ // in `that` which are not present in `this.
+
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+
+ // We cannot possibly have `this` contain all the atoms
+ // in `that` if the latter is has more atoms.
+ //
+ if(thatCount > thisCount)
+ return false;
+
+ // Note: the following iteration is O(N) in the size
+ // of the larger of the two sets, which is probably
+ // needlessly inefficient. We might expect that `that`
+ // will often be a much smaller set, and we'd like to
+ // scale in its size rather than the size of `this`.
+ //
+ // A more advanced algorithm here would be to do
+ // something recursive:
//
- for( auto atom : that.m_atoms )
+ // * If `that` is singleton set, then we can find
+ // whether `this` contains it via binary search.
+ //
+ // * Otherwise, we can split `that` into two
+ // equally-sized subsets. By taking a "pivot" value
+ // from where that split took place we can then
+ // use a binary search to partition `this` into
+ // two subsets and recurse on each side of that
+ // partition.
+ //
+ // In practice, the size of the sets we are dealing
+ // with right now doesn't justify such a "clever" algorithm.
+
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
{
- if(!this->implies(atom))
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if( thisAtom == thatAtom )
+ {
+ // We have an atom that both sets contain;
+ // we should skip past it and keep looking.
+ //
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // We have an atom that `this` contains,
+ // but `that` doesn't; that is consistent
+ // with `this` being a super-set, so we
+ // just skip the item and keep searching.
+ //
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+
+ // We have an atom in `that` which isn't
+ // also in `this`, so we know it cannot
+ // be a subset.
+ //
return false;
+ }
}
return true;
-
- // TODO: If we had a representation that stored an expanded
- // sorted, deduplicated list of atoms, then we could
- // check the `implies` relationship by scanning through
- // the two lists in tandem and identifying any element
- // in the `that` list that isn't in the `this` list.
- // Such elements would indicate that `that` is not a subset
- // of `this`.
}
+ /// Helper functor for binary search on lists of `CapabilityAtom`
+struct CapabilityAtomComparator
+{
+ int operator()(CapabilityAtom left, CapabilityAtom right)
+ {
+ return int(Int(left) - Int(right));
+ }
+};
bool CapabilitySet::implies(CapabilityAtom atom) const
{
- // If our list of explicit atoms contains `atom`, then
- // we definitely imply it.
+ // The common case here is when `atom` is not an alias.
//
- // TODO: If we stored our atom lists sorted, then
- // this operation could be logarithmic rather than
- // linear.
- //
- if(m_atoms.contains(atom))
- return true;
+ if( _getInfo(atom).flavor != CapabilityAtomFlavor::Alias )
+ {
+ // Every non-alias atom that `this` implies should
+ // be presented in the `m_expandedAtoms` list.
+ //
+ // Because the list is sorted, we can find out whether
+ // it contains `atom` with a binary search.
+ //
+ Index result = m_expandedAtoms.binarySearch(atom, CapabilityAtomComparator());
+ return result >= 0;
+ }
+ else
+ {
+ // In the case where `atom` is an alias, then it won't
+ // appear in the expanded list, and we need to check
+ // whether `this` set implies everything that `atom`
+ // transitively inherits from.
+ //
+ // The simplest way to do that is to expand `atom`
+ // into the full capability set it stands for and
+ // check that.
+ //
+ return implies(CapabilitySet(atom));
+ }
+}
- // If any of our atoms implies `atom` then we
- // also imply it.
+Int CapabilitySet::countIntersectionWith(CapabilitySet const& that) const
+{
+ // The goal of this subroutine is to count the number of
+ // elements in the intersection of `this` and `that`,
+ // without explicitly forming that intersection.
//
- // TODO: If we stored an expanded atom list, then
- // this recursion could be skipped completely, since
- // the containment check above would cover inheirtance
- // relationships too.
+ // Our approach here will be to iterate over the two
+ // sets in tandem (O(N) in the size of the larger set)
+ // and check for elements that both contain.
//
- for( auto thisAtom : m_atoms )
+ // TODO: There should be an asymptotically faster
+ // recursive algorithm here.
+
+ Int intersectionCount = 0;
+
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
{
- if(_implies(thisAtom, atom))
- return true;
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if( thisAtom == thatAtom )
+ {
+ // An item both contain.
+
+ intersectionCount++;
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // An item in `this` but not `that`.
+
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+
+ // An item in `that` but not `this`.
+
+ thatIndex++;
+ }
}
+ return intersectionCount;
+}
+
+bool CapabilitySet::isBetterForTarget(
+ CapabilitySet const& existingCaps,
+ CapabilitySet const& targetCaps)
+{
+ auto& candidateCaps = *this;
+
+ // The task here is to determine if `candidateCaps` should
+ // be considered "better" than `existingCaps` in the context
+ // of compilation for a target with the given `targetCaps`.
+ //
+ // In an ideal world, this computation could be quite simple:
+ //
+ // * If either `candidateCaps` or `existingCaps` is not implied by
+ // `targetCaps` (that is, they include requirements that aren't
+ // provided by the target), then the other is automatically "better."
+ //
+ // * Otherwise, one set is "better" than the other if it is a
+ // super-set (which is what `implies()` tests).
+ //
+ // There are two main reasons we can't use that simple logic:
+ //
+ // 1. Currently a user of Slang can compile for a target but
+ // not actually spell out its capabilities fully or correctly.
+ // They might compile for `sm_5_0` but use ray tracing features
+ // that require `sm_6_2` and expect the compiler to figure out
+ // what they "obviously" meant. Thus we cannot assume that
+ // `targetCaps` can be used to rule out candidates fully.
+ //
+ // 2. Sometimes there are multiple ways for a target to provide
+ // the same feature (e.g., multiple extensions) and because of (1)
+ // we cannot always rely on the `targetCaps` to tell us which to
+ // use. Thus we cannot rely on pure subset/`implies()` to define
+ // better-ness, and need some way to break ties.
+ //
+ // The following logic is a bunch of "do what I mean" nonsense that
+ // tries to capture a reasonable intuition of what "better"-ness
+ // should mean with these caveats.
+
+ // First, if either candidate is fundamentally incompatible
+ // with the target, we shouldn't favor it.
+ //
+ if(candidateCaps.isIncompatibleWith(targetCaps)) return false;
+ if(existingCaps.isIncompatibleWith(targetCaps)) return true;
+
+ // Next, we want to compare the candidates to the `targetCaps`
+ // to figure out whether one is obviously "more specialized" for
+ // the target.
+ //
+ // We measure the degree to which a candidate is specialized for
+ // the target as the size of its set intersection with `targetCaps`.
+ //
+ // TODO: If both `candidateCaps` and `existingCaps` are implied
+ // by `targetCaps`, then this amounts to just measuring the
+ // size of each set. We probably want this size-based check to
+ // come later in the overall process.
+ //
+ // TODO: A better model here might be to actually compute the actual
+ // intersected sets, and then check if one is a super-set of the other.
+ //
+ auto candidateIntersectionSize = targetCaps.countIntersectionWith(candidateCaps);
+ auto existingIntersectionSize = targetCaps.countIntersectionWith(existingCaps);
+ if(candidateIntersectionSize != existingIntersectionSize)
+ return candidateIntersectionSize > existingIntersectionSize;
+
+ // Next we want to consider that if one of the two candidates
+ // is actually available on the target (meaning that it is
+ // implied by `targetCaps`) then we probably want to pick that one
+ // (since we can use that candidate on the chosen target without
+ // enabling any additional features the user didn't ask for).
+ //
+ // TODO: This step currently needs to come after the preceeding
+ // one because otherwise we risk selecting a `__target_intrinsic`
+ // decoration with *no* requirements (which are currently being
+ // added implicitly in many places) over any one with explicit
+ // requirements (since every target implies the empty set of
+ // requirements).
+ //
+ // In many ways the counting-based logic above amounts to a quick
+ // fix to prefer a non-empty set of requirements over an empty one,
+ // so long as something in that non-empty set overlaps with the target.
+ //
+ // TODO: The best fix is probably to figure out how "catch-all"
+ // intrinsic function definitions should be encoded; we clearly
+ // want them to be used only as a fallback when no target-specific
+ // variants are present.
+ //
+ bool candidateIsAvailable = targetCaps.implies(candidateCaps);
+ bool existingIsAvailable = targetCaps.implies(existingCaps);
+ if(candidateIsAvailable != existingIsAvailable)
+ return candidateIsAvailable;
+
+ // All preceding factors being equal, we prefer
+ // a candidate that is strictly more specialized than the other.
+ //
+ // TODO: This logic has the negative effect of always preferring
+ // to enable optional features even if they aren't necessary.
+ // It would prefer the set {glsl, optionalFeature} over the set
+ // {glsl}, even though we might argue that a default implementaton
+ // that works without any optional features is "obviously" what
+ // the user means if they didn't enable those features.
+ //
+ // TODO: The right answer is possibly that we want to partition
+ // `candidateCaps` and `existingCaps` into two parts: their
+ // intersection with `targetCaps` and their difference with it.
+ //
+ // For the intersection part of things, we'd want to favor a
+ // definition that is more specialized, while for the difference
+ // part we'd actually wnat to favor a definition that is less
+ // specialized.
+ //
+ if(candidateCaps.implies(existingCaps)) return true;
+ if(existingCaps.implies(candidateCaps)) return true;
+
+ // At this point we have the problem that neither candidate
+ // appears to be "obviously" better for the target, but we
+ // want some way to disambiguate them.
+ //
+ // What we want to do now is scan through what makes each candidate
+ // different from the other, and see if anything in either case
+ // has a ranking that should make it be preferred.
+ //
+ // TODO: This should probably *not* be considering anything that
+ // is implied/supported by the target.
+ //
+ auto candidateScore = candidateCaps._calcDifferenceScoreWith(existingCaps);
+ auto existingScore = existingCaps._calcDifferenceScoreWith(candidateCaps);
+ if(candidateScore != existingScore)
+ return candidateScore > existingScore;
return false;
}
+uint32_t CapabilitySet::_calcDifferenceScoreWith(CapabilitySet const& that) const
+{
+ uint32_t score = 0;
+
+ // Our approach here will be to scan through `this` and `that`
+ // to identify atoms that are in `this` but not `that` (that is,
+ // the atoms that would be present in the set difference `this - that`)
+ // and then compute the maximum rank/score of those atoms.
+
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
+ {
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if( thisAtom == thatAtom )
+ {
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // `thisAtom` is not present in `that`, so it
+ // should contribute to our ranking of the difference.
+ //
+ auto thisAtomInfo = _getInfo(thisAtom);
+ auto thisAtomRank = thisAtomInfo.rank;
+
+ if( thisAtomRank > score )
+ {
+ score = thisAtomRank;
+ }
+
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+ thatIndex++;
+ }
+ }
+ return score;
+}
+
+
bool CapabilitySet::operator==(CapabilitySet const& other) const
{
+ // TODO: We should be able to implement this more efficiently
+ // by scanning over the two sets in tandem.
+
return this->implies(other) && other.implies(*this);
}