summaryrefslogtreecommitdiff
path: root/source/slang
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/hlsl.meta.slang86
-rw-r--r--source/slang/slang-api.cpp9
-rw-r--r--source/slang/slang-capability-defs.h70
-rw-r--r--source/slang/slang-capability.cpp903
-rw-r--r--source/slang/slang-capability.h31
-rwxr-xr-xsource/slang/slang-compiler.cpp37
-rwxr-xr-xsource/slang/slang-compiler.h38
-rw-r--r--source/slang/slang-emit-c-like.cpp31
-rw-r--r--source/slang/slang-emit-c-like.h2
-rw-r--r--source/slang/slang-emit-glsl.cpp91
-rw-r--r--source/slang/slang-emit.cpp2
-rw-r--r--source/slang/slang-ir.cpp36
-rw-r--r--source/slang/slang-options.cpp56
-rw-r--r--source/slang/slang-parameter-binding.cpp4
-rw-r--r--source/slang/slang-profile.h1
-rw-r--r--source/slang/slang-repro.cpp10
-rw-r--r--source/slang/slang-serialize-container.cpp8
-rw-r--r--source/slang/slang.cpp60
18 files changed, 1041 insertions, 434 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 29779e796..851af7d3f 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -4158,19 +4158,21 @@ struct BuiltInTriangleIntersectionAttributes
// 10.3.1
+__target_intrinsic(hlsl)
void CallShader<Payload>(uint shaderIndex, inout Payload payload);
// `executeCallableNV` is the GLSL intrinsic that will be used to implement
// `CallShader()` for GLSL-based targets.
//
-__target_intrinsic(glsl, "executeCallableNV")
-void __executeCallableNV(uint shaderIndex, int payloadLocation);
+__target_intrinsic(GL_NV_ray_tracing, "executeCallableNV")
+__target_intrinsic(GL_EXT_ray_tracing, "executeCallableEXT")
+void __executeCallable(uint shaderIndex, int payloadLocation);
// Next is the custom intrinsic that will compute the payload location
// for a type being used in a `CallShader()` call for GLSL-based targets.
//
__generic<Payload>
-__target_intrinsic(glsl, "$XC")
+__target_intrinsic(__glslRayTracing, "$XC")
[__readNone]
int __callablePayloadLocation(Payload payload);
@@ -4186,11 +4188,12 @@ void CallShader(uint shaderIndex, inout Payload payload)
static Payload p;
p = payload;
- __executeCallableNV(shaderIndex, __callablePayloadLocation(p));
+ __executeCallable(shaderIndex, __callablePayloadLocation(p));
payload = p;
}
// 10.3.2
+__target_intrinsic(hlsl)
void TraceRay<payload_t>(
RaytracingAccelerationStructure AccelerationStructure,
uint RayFlags,
@@ -4201,8 +4204,9 @@ void TraceRay<payload_t>(
RayDesc Ray,
inout payload_t Payload);
-__target_intrinsic(glsl, "traceNV")
-void __traceNV(
+__target_intrinsic(GL_NV_ray_tracing, "traceNV")
+__target_intrinsic(GL_EXT_ray_tracing, "traceRayEXT")
+void __traceRay(
RaytracingAccelerationStructure AccelerationStructure,
uint RayFlags,
uint InstanceInclusionMask,
@@ -4222,7 +4226,7 @@ void __traceNV(
// syntax works in a pinch.
//
__generic<Payload>
-__target_intrinsic(glsl, "$XP")
+__target_intrinsic(__glslRayTracing, "$XP")
[__readNone]
int __rayPayloadLocation(Payload payload);
@@ -4242,7 +4246,7 @@ void TraceRay(
static payload_t p;
p = Payload;
- __traceNV(
+ __traceRay(
AccelerationStructure,
RayFlags,
InstanceInclusionMask,
@@ -4258,10 +4262,12 @@ void TraceRay(
}
// 10.3.3
+__target_intrinsic(hlsl)
bool ReportHit<A>(float tHit, uint hitKind, A attributes);
-__target_intrinsic(glsl, "reportIntersectionNV")
-bool __reportIntersectionNV(float tHit, uint hitKind);
+__target_intrinsic(GL_NV_ray_tracing, "reportIntersectionNV")
+__target_intrinsic(GL_EXT_ray_tracing, "reportIntersectionEXT")
+bool __reportIntersection(float tHit, uint hitKind);
__generic<A>
__specialized_for_target(glsl)
@@ -4271,15 +4277,19 @@ bool ReportHit(float tHit, uint hitKind, A attributes)
static A a;
a = attributes;
- return __reportIntersectionNV(tHit, hitKind);
+ return __reportIntersection(tHit, hitKind);
}
// 10.3.4
-__target_intrinsic(glsl, ignoreIntersectionNV)
+__target_intrinsic(hlsl)
+__target_intrinsic(GL_NV_ray_tracing, ignoreIntersectionNV)
+__target_intrinsic(GL_EXT_ray_tracing, ignoreIntersectionEXT)
void IgnoreHit();
// 10.3.5
-__target_intrinsic(glsl, terminateRayNV)
+__target_intrinsic(hlsl)
+__target_intrinsic(GL_NV_ray_tracing, terminateRayNV)
+__target_intrinsic(GL_EXT_ray_tracing, terminateRayEXT)
void AcceptHitAndEndSearch();
// 10.4 - System Values and Special Semantics
@@ -4289,22 +4299,27 @@ void AcceptHitAndEndSearch();
// 10.4.1 - Ray Dispatch System Values
-__target_intrinsic(glsl, "(gl_LaunchIDNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_LaunchIDNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_LaunchIDEXT)")
__target_intrinsic(cuda, "optixGetLaunchIndex")
uint3 DispatchRaysIndex();
-__target_intrinsic(glsl, "(gl_LaunchSizeNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_LaunchSizeNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_LaunchSizeEXT)")
uint3 DispatchRaysDimensions();
// 10.4.2 - Ray System Values
-__target_intrinsic(glsl, "(gl_WorldRayOriginNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_WorldRayOriginNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_WorldRayOriginEXT)")
float3 WorldRayOrigin();
-__target_intrinsic(glsl, "(gl_WorldRayDirectionNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_WorldRayDirectionNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_WorldRayDirectionEXT)")
float3 WorldRayDirection();
-__target_intrinsic(glsl, "(gl_RayTminNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_RayTminNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_RayTminEXT)")
float RayTMin();
// Note: The `RayTCurrent()` intrinsic should translate to
@@ -4317,39 +4332,48 @@ float RayTMin();
// we should simply provide two overloads here, specialized
// to the appropriate Vulkan stages.
//
-__target_intrinsic(glsl, "$XT")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_RayTmaxNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_RayTmaxEXT)")
float RayTCurrent();
-__target_intrinsic(glsl, "(gl_IncomingRayFlagsNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_IncomingRayFlagsNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_IncomingRayFlagsEXT)")
uint RayFlags();
// 10.4.3 - Primitive/Object Space System Values
-__target_intrinsic(glsl, "(gl_InstanceCustomIndexNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_InstanceCustomIndexNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_InstanceCustomIndexEXT)")
uint InstanceIndex();
-__target_intrinsic(glsl, "(gl_InstanceID)")
+__target_intrinsic(__glslRayTracing, "(gl_InstanceID)")
uint InstanceID();
-__target_intrinsic(glsl, "(gl_PrimitiveID)")
+__target_intrinsic(__glslRayTracing, "(gl_PrimitiveID)")
uint PrimitiveIndex();
-__target_intrinsic(glsl, "(gl_ObjectRayOriginNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_ObjectRayOriginNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_ObjectRayOriginEXT)")
float3 ObjectRayOrigin();
-__target_intrinsic(glsl, "(gl_ObjectRayDirectionNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_ObjectRayDirectionNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_ObjectRayDirectionEXT)")
float3 ObjectRayDirection();
-__target_intrinsic(glsl, "transpose(gl_ObjectToWorldNV)")
+__target_intrinsic(GL_NV_ray_tracing, "transpose(gl_ObjectToWorldNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "transpose(gl_ObjectToWorldEXT)")
float3x4 ObjectToWorld3x4();
-__target_intrinsic(glsl, "transpose(gl_WorldToObjectNV)")
+__target_intrinsic(GL_NV_ray_tracing, "transpose(gl_WorldToObjectNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "transpose(gl_WorldToObjectEXT)")
float3x4 WorldToObject3x4();
-__target_intrinsic(glsl, "(gl_ObjectToWorldNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_ObjectToWorldNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_ObjectToWorld3x4EXT)")
float4x3 ObjectToWorld4x3();
-__target_intrinsic(glsl, "(gl_WorldToObjectNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_WorldToObjectNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_WorldToObject3x4EXT)")
float4x3 WorldToObject4x3();
// Note: The provisional DXR spec included these unadorned
@@ -4365,7 +4389,8 @@ float3x4 ObjectToWorld() { return ObjectToWorld3x4(); }
float3x4 WorldToObject() { return WorldToObject3x4(); }
// 10.4.4 - Hit Specific System values
-__target_intrinsic(glsl, "(gl_HitKindNV)")
+__target_intrinsic(GL_NV_ray_tracing, "(gl_HitKindNV)")
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_HitKindEXT)")
uint HitKind();
// Pre-defined hit kinds (not documented explicitly)
@@ -4543,6 +4568,7 @@ struct FeedbackTexture2DArray<T : __BuiltinSamplerFeedbackType>
//
// Get the index of the geometry that was hit in an intersection, any-hit, or closest-hit shader
+__target_intrinsic(GL_EXT_ray_tracing, "(gl_GeometryIndexEXT)")
uint GeometryIndex();
// Status of whether a (closest) hit has been committed in a `RayQuery`.
diff --git a/source/slang/slang-api.cpp b/source/slang/slang-api.cpp
index c8b932306..e1eee66dd 100644
--- a/source/slang/slang-api.cpp
+++ b/source/slang/slang-api.cpp
@@ -233,6 +233,15 @@ SLANG_API void spSetTargetFloatingPointMode(
request->setTargetFloatingPointMode(targetIndex, mode);
}
+SLANG_API void spAddTargetCapability(
+ slang::ICompileRequest* request,
+ int targetIndex,
+ SlangCapabilityID capability)
+{
+ SLANG_ASSERT(request);
+ request->addTargetCapability(targetIndex, capability);
+}
+
SLANG_API void spSetMatrixLayoutMode(
slang::ICompileRequest* request,
SlangMatrixLayoutMode mode)
diff --git a/source/slang/slang-capability-defs.h b/source/slang/slang-capability-defs.h
index 8bf1d80e9..003fd3125 100644
--- a/source/slang/slang-capability-defs.h
+++ b/source/slang/slang-capability-defs.h
@@ -24,34 +24,64 @@
// TODO: There is probably a way to handle this with
// variadic macros.
//
-#define SLANG_CAPABILITY_ATOM4(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) \
- SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3)
+#define SLANG_CAPABILITY_ATOM4(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2, BASE3) \
+ SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2, BASE3)
-#define SLANG_CAPABILITY_ATOM3(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2) \
- SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, Invalid)
+#define SLANG_CAPABILITY_ATOM3(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2) \
+ SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, BASE2, Invalid)
-#define SLANG_CAPABILITY_ATOM2(ENUMERATOR, NAME, FLAGS, BASE0, BASE1) \
- SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, Invalid, Invalid)
+#define SLANG_CAPABILITY_ATOM2(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1) \
+ SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, BASE1, Invalid, Invalid)
-#define SLANG_CAPABILITY_ATOM1(ENUMERATOR, NAME, FLAGS, BASE0) \
- SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, Invalid, Invalid, Invalid)
+#define SLANG_CAPABILITY_ATOM1(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0) \
+ SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, BASE0, Invalid, Invalid, Invalid)
-#define SLANG_CAPABILITY_ATOM0(ENUMERATOR, NAME, FLAGS) \
- SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, Invalid, Invalid, Invalid, Invalid)
+#define SLANG_CAPABILITY_ATOM0(ENUMERATOR, NAME, KIND, CONFLICTS, RANK) \
+ SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, KIND, CONFLICTS, RANK, Invalid, Invalid, Invalid, Invalid)
-// The `__target` capability exists only to provide a common
-// abstract base for the capabilities that represent each
-// of our compilation targets.
+// Several capabilities represent the target formats in which we generate code.
+// Because we can only generate code in one format at a time, all of these are
+// marked as conflicting with one another along the `TargetFormat` axis.
//
-SLANG_CAPABILITY_ATOM0(Target, __target, Abstract)
+// Note: We are only including here the source code formats we initially generate
+// code in and not the formats that code might be translated into "downstream."
+// Trying to figure out how to integrate both kinds of formats into our capability
+// system will be an interesting challenge (e.g., can we compile code for `hlsl+spirv`
+// and for `glsl+spirv` or even just for `spirv`, and how should all of those impact
+// overloading).
+//
+SLANG_CAPABILITY_ATOM0(HLSL, hlsl, Concrete,TargetFormat,0)
+SLANG_CAPABILITY_ATOM0(GLSL, glsl, Concrete,TargetFormat,0)
+SLANG_CAPABILITY_ATOM0(C, c, Concrete,TargetFormat,0)
+SLANG_CAPABILITY_ATOM0(CPP, cpp, Concrete,TargetFormat,0)
+SLANG_CAPABILITY_ATOM0(CUDA, cuda, Concrete,TargetFormat,0)
-SLANG_CAPABILITY_ATOM1(HLSL, hlsl, Concrete, Target)
-SLANG_CAPABILITY_ATOM1(GLSL, glsl, Concrete, Target)
-SLANG_CAPABILITY_ATOM1(C, c, Concrete, Target)
-SLANG_CAPABILITY_ATOM1(CPP, cpp, Concrete, Target)
-SLANG_CAPABILITY_ATOM1(CUDA, cuda, Concrete, Target)
-SLANG_CAPABILITY_ATOM1(SPIRV, spirv, Concrete, Target)
+// TODO: We should have multiple capabilities for the various SPIR-V versions,
+// arranged so that they inherit from one another to represent which versions
+// provide a super-set of the features of earlier ones (e.g., SPIR-V 1.4 should
+// be expressed as inheriting from SPIR-V 1.3).
+//
+// For now we are only including the version(s) that are relevant to the
+// features controlled by the capability system.
+//
+SLANG_CAPABILITY_ATOM1(SPIRV_1_4, spirv_1_4, Concrete,None,0, GLSL)
+// The following capabilities all pertain to how ray tracing shaders are translated
+// to GLSL, where there are two different extensions that can provide the core
+// functionality of `TraceRay` and the related operations.
+//
+// The two extensions are expressed as distinct capabilities that both are marked
+// as conflicting on the `RayTracingExtension` axis, so that a compilation target
+// cannot have both enabled at once.
+//
+// The `GL_EXT_ray_tracing` extension should be favored, so it has a rank of `1`
+// instead of `0`, which means that when comparing overloads that require these
+// extensions, the `EXT` extension will be favored over the `NV` extension, if
+// all other factors are equal.
+//
+SLANG_CAPABILITY_ATOM1(GLSLRayTracing, __glslRayTracing, Abstract,None,0, GLSL)
+SLANG_CAPABILITY_ATOM1(GL_NV_ray_tracing, GL_NV_ray_tracing, Concrete,RayTracingExtension,0, GLSLRayTracing)
+SLANG_CAPABILITY_ATOM2(GL_EXT_ray_tracing, GL_EXT_ray_tracing, Concrete,RayTracingExtension,1, GLSLRayTracing, SPIRV_1_4)
#undef SLANG_CAPABILITY_ATOM0
#undef SLANG_CAPABILITY_ATOM1
diff --git a/source/slang/slang-capability.cpp b/source/slang/slang-capability.cpp
index 7b4361a58..a75f6131c 100644
--- a/source/slang/slang-capability.cpp
+++ b/source/slang/slang-capability.cpp
@@ -1,6 +1,8 @@
// slang-capability.cpp
#include "slang-capability.h"
+#include "../core/slang-dictionary.h"
+
// This file implements the core of the "capability" system.
namespace Slang
@@ -10,37 +12,69 @@ namespace Slang
// CapabilityAtom
//
-// We are going to divide capabilities into a few categories,
-// which will be represented as flags for now.
-//
-// Every capability will be either concrete or abstract.
-// An abstract capability basically represents a category
-// of related capabilities that all fill a similar role.
-// For example, we could have an abstract capability that
-// represents "stages" and then the concrete capabilities
-// `vertex`, `fragment`, etc. would inherit from it.
+// We are going to divide capability atoms into a few categories.
//
-// Abstract capabilities are critical in our model for
-// knowing when two capabilities are fundamentally incompatible.
-// For example, it is meaningless to compile code for both
-// the `vertex` and `fragment` capabilities at the same time,
-// because no target processor supports both at once.
+enum class CapabilityAtomFlavor : int32_t
+{
+ // A concrete capability atom is something that a target
+ // can directly support, where the presence of the feature
+ // directly provides functionality. A specific OpenGL
+ // or Vulkan extension would be an example of a concrete
+ // capability.
+ //
+ Concrete,
+
+ // An abstract capability represents a class of feature
+ // where multiple different implementations might be possible.
+ // For example, "ray tracing" might be an abstract feature
+ // that a function can require, but a specific target will
+ // only be able to provide that abstract feature via some
+ // specific concrete feature (e.g., `GL_EXT_ray_tracing`).
+ Abstract,
+
+ // An alias capability atom is one that is exactly equivalent
+ // to the things it inherits from.
+ //
+ // For example, a `ps_5_1` capability would just be an
+ // alias for the combination of the `fragment` capability
+ // and the `sm_5_1` capability.
+ //
+ Alias,
+};
+
+// Certain capability atoms will conflict with one another,
+// such that a concrete target should never be able to support
+// both.
//
-// TODO: It is possible that instead of flags this could simply
-// identify a "kind" of atom, with two different states.
+// It is possible in theory to define "conflicting" capabilities
+// in terms of the inheritance graph, but that makes checking
+// for conflicts more difficult.
//
-// TODO: It is likely that in a future change we will want to
-// add a third case here for "alias" capabilities, which are
-// pseudo-atomic capabilities that are just equivalent to
-// the set of their bases.
+// Instead, we are going to allow each capability to define a
+// mask to indicate group(s) of conflicting capabilities it
+// belongs to. Two different capability atoms that have
+// overlapping masks will be considered to conflict.
//
-typedef uint32_t CapabilityAtomFlags;
-enum : CapabilityAtomFlags
+enum class CapabilityAtomConflictMask : uint32_t
{
- kCapabilityAtomFlags_Concrete = 0,
- kCapabilityAtomFlags_Abstract = 1 << 0,
+ // By default, most capability atoms do not conflict with one another.
+ None = 0,
+
+ // Capability atoms that reprsent target code generation formats always conflict.
+ // (e.g., you cannot generate both HLSL and C++ output at once)
+ TargetFormat = 1 << 0,
+
+ // Capability atoms that represent GLSL ray tracing extensions conflict with
+ // one another (we only want to use one such extension at a time).
+ RayTracingExtension = 1 << 1,
};
+// For simplicity in building up our data structure representing
+// all capability atoms, we will limit the number of bases that
+// a capability atom is allowed to inherit from.
+//
+static const int kCapabilityAtom_MaxBases = 4;
+
// The macros in the `slang-capability-defs.h` file will be used
// to fill out a `static const` array of information about each
// capability atom.
@@ -48,11 +82,19 @@ enum : CapabilityAtomFlags
struct CapabilityAtomInfo
{
/// The API-/language-exposed name of the capability.
- char const* name;
+ char const* name;
+
+ /// Flavor of atom: concrete, abstract, or alias
+ CapabilityAtomFlavor flavor;
+
+ /// A mask to indicate which other categories of atoms this one conflicts with
+ CapabilityAtomConflictMask conflictMask;
- /// Flags to determine if the capability is concrete-vs-abstract, etc.
- CapabilityAtomFlags flags;
- CapabilityAtom bases[4];
+ /// Ranking to use when deciding if this atom is a "better" one to select.
+ uint32_t rank;
+
+ /// Base atoms this one "inherits" from (terminated with `Invalid` if not all entries used).
+ CapabilityAtom bases[kCapabilityAtom_MaxBases];
};
//
// The array is going to be sized to include an entry for `CapabilityAtom::Invalid`
@@ -61,10 +103,10 @@ struct CapabilityAtomInfo
//
static const CapabilityAtomInfo kCapabilityAtoms[Int(CapabilityAtom::Count) + 1] =
{
- { "invalid", 0, { CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid } },
+ { "invalid", CapabilityAtomFlavor::Concrete, CapabilityAtomConflictMask::None, 0, { CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid, CapabilityAtom::Invalid } },
-#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) \
- { #NAME, kCapabilityAtomFlags_##FLAGS, { CapabilityAtom::BASE0, CapabilityAtom::BASE1, CapabilityAtom::BASE2, CapabilityAtom::BASE3 } },
+#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAVOR, CONFLICT, RANK, BASE0, BASE1, BASE2, BASE3) \
+ { #NAME, CapabilityAtomFlavor::FLAVOR, CapabilityAtomConflictMask::CONFLICT, RANK, { CapabilityAtom::BASE0, CapabilityAtom::BASE1, CapabilityAtom::BASE2, CapabilityAtom::BASE3 } },
#include "slang-capability-defs.h"
};
@@ -75,145 +117,6 @@ static CapabilityAtomInfo const& _getInfo(CapabilityAtom atom)
return kCapabilityAtoms[Int(atom) + 1];
}
-// One capability set or capability atom A implies another set/atom B
-// if any target that supports all of the atoms in A must also support
-// all of those in B.
-
- /// Does `thisAtom` imply `thatAtom`?
-static bool _implies(CapabilityAtom thisAtom, CapabilityAtom thatAtom)
-{
- // When looking at atoms, the immediate easy case is when
- // the two atoms are the same: an atomic capability always
- // implies itself.
- //
- if(thisAtom == thatAtom)
- return true;
-
- // Otherwise, we want to look at the bases of `thisAtom`
- // to see if any of them imply `thatAtom`, since `thisAtom`
- // implies each of its bases.
- //
- auto& thisAtomInfo = _getInfo(thisAtom);
- for( auto thisAtomBase : thisAtomInfo.bases )
- {
- // The lists of bases are currently using `Invalid` as
- // a sentinel value to terminate them, so we need to
- // bail out of the loop when we see the sentinel.
- //
- if(thisAtomBase == CapabilityAtom::Invalid)
- break;
-
- if(_implies(thisAtomBase, thatAtom))
- return true;
- }
-
- return false;
-}
-
- /// Does `base` have any abstract capabilities in common with `otherAtom`
- ///
- /// This subroutine is a helper for `_isIncompatible`.
-static bool _hasAbstractBaseInCommon(CapabilityAtom base, CapabilityAtom otherAtom)
-{
- // First we check the case where `base` itself is an abstract
- // capability atom.
- //
- auto& baseAtomInfo = _getInfo(base);
- if(baseAtomInfo.flags & kCapabilityAtomFlags_Abstract)
- {
- // If `base` is abstract, and `otherAtom` implies `base`,
- // then that means that `otherAtom` includes one or
- // more atoms that inherit from `base`, and thus the
- // two have an abstract base in common.
- //
- if( _implies(otherAtom, base) )
- return true;
- }
-
- // If `base` itself has bases, then we want to check if any
- // of *those* are abstract bases that overlap with `otherAtom`.
- //
- for( auto baseBase : baseAtomInfo.bases )
- {
- if(baseBase == CapabilityAtom::Invalid)
- break;
-
- if(_hasAbstractBaseInCommon(baseBase, otherAtom))
- return true;
- }
-
- // If we didn't manage to find any overlaps, then we conclude
- // that there are no shared abstract bases.
- //
- return false;
-}
-
- /// Is `thisAtom` incompatible with `thatAtom` (such that no target could ever support both at once)
-static bool _isIncompatible(CapabilityAtom thisAtom, CapabilityAtom thatAtom)
-{
- // If either atom implies the other, then they aren't incompatible.
- //
- // For example, if there is an atom representing `sm_5_1` that inherits
- // from an atom representing `sm_5_0`, then clearly the two aren't
- // in any way incompatible (a single target can support both).
- //
- if(_implies(thisAtom, thatAtom) || _implies(thatAtom, thisAtom))
- return false;
-
- // If the two atoms are not in an inheritance relationship, then one of
- // a few cases can apply:
- //
- // * They have no common bases; in this case they are compatible.
- // An example would be `vertex` and `sm_5_0`.
- //
- // * They have a common base, but it is not marked abstract; in
- // this case they are compatible. E.g., two GLSL extensions that
- // both inherit from the `glsl` capability should not conflict.
- //
- // * They have a common base that is marked abstract; in this
- // case they are incompatible. An example would be `vertex`
- // and `fragment` both inheriting from the abstract atom
- // `__stage`.
- //
- // To summarize the above list, we note that two atoms are
- // incompatible with they have an abstract base in common.
- //
- return _hasAbstractBaseInCommon(thisAtom, thatAtom);
-
- // TODO: The above logic is a bit off, but in a way that doesn't
- // matter just yet.
- //
- // We currently have capabilities like:
- //
- // abstract capability __target;
- // capability hlsl : __target;
- // capability glsl : __target;
- //
- // In this case it is clear that `hlsl` and `glsl` should
- // be incompatible, and that the rules as implemented
- // make that the case.
- //
- // A problem arises when we start to add things like extensions:
- //
- // capability EXT_cool_thing : glsl;
- // capability EXT_other_stuff : glsl;
- //
- // In this case, it also seems clear that `EXT_cool_thing`
- // and `EXT_other_stuff` should be mutually compatible.
- // However, with the rules implemented here right now, they
- // would be found incompatible because they share the
- // abstract base `__target`.
- //
- // In this specific case, we know that the relationship
- // between the extensions is fine because they both inherit
- // from `__target` *through* the concrete atom `glsl`.
- //
- // Before adding capabilities that represent optional
- // extensions like this we need to codify the semantics
- // for how incompatibility checks should work in terms
- // of the inheritance graph of capability atoms.
-}
-
CapabilityAtom findCapabilityAtom(UnownedStringSlice const& name)
{
// For now we are implementing a linear search over the
@@ -237,51 +140,33 @@ CapabilityAtom findCapabilityAtom(UnownedStringSlice const& name)
// CapabilitySet
//
-// The current design choice in `CapabilitySet` is that it blindly
-// stores exactly the atoms it is told to, without any up-front
-// processing.
-//
-// This choice has some down-sides, and there are other representations
-// that could be much nicer in the future. Possible improcements include:
-//
-// * The list of atoms could be *expanded* so that if it contains atom A
-// and atom A implies atom B, then the list should also include B.
+// The current design choice in `CapabilitySet` is that it stores
+// an expanded, deduplicated, and sorted list of the capability
+// atoms in the set. "Expanded" here means that it includes the
+// transitive closure of the inheritance graph of those atoms.
//
-// * The list of atoms could be *minimized*, such that if atom A implies
-// atom B, then any list that contains A does not include B (both
-// expanded and minimized lists have different benefits).
-//
-// * The list of atoms could be deduplicated.
-//
-// * The list of atoms could be sorted.
-//
-// * The lists could be deduplicated and cached in some central place
-// (the like the session) so that repreated attempts to create the
-// same capability sets return the same objects.
-//
-// In some parts of the code below we will call out how these improvements
-// could affect the algorithms used.
-
-// Given our simple choices right now, the constructors for `CapabilitySet`
-// are all straightforward: just adding the right atoms to the list.
+// This choice is intended to make certain operations on
+// capability sets more efficient, since use things like
+// binary searches to efficiently detect whether an atom
+// is present in a set.
CapabilitySet::CapabilitySet()
{}
CapabilitySet::CapabilitySet(Int atomCount, CapabilityAtom const* atoms)
{
- m_atoms.addRange(atoms, atomCount);
+ _init(atomCount, atoms);
}
CapabilitySet::CapabilitySet(CapabilityAtom atom)
{
- m_atoms.add(atom);
+ _init(1, &atom);
}
CapabilitySet::CapabilitySet(List<CapabilityAtom> const& atoms)
- : m_atoms(atoms)
-{}
-
+{
+ _init(atoms.getCount(), atoms.getBuffer());
+}
CapabilitySet CapabilitySet::makeEmpty()
{
@@ -290,7 +175,118 @@ CapabilitySet CapabilitySet::makeEmpty()
CapabilitySet CapabilitySet::makeInvalid()
{
- return CapabilitySet(CapabilityAtom::Invalid);
+ // An invalid capability set will always be a singleton
+ // set of the `Invalid` atom, and we will construct
+ // the set directly rather than use the more expensive
+ // logic in `_init()`.
+ //
+ CapabilitySet result;
+ result.m_expandedAtoms.add(CapabilityAtom::Invalid);
+ return result;
+}
+
+ /// Helper routine for `CapabilitySet::_init`.
+ ///
+ /// Recursively add all atoms implied by `atom` to `ioExpandedAtoms`.
+ ///
+static void _addAtomsRec(
+ CapabilityAtom atom,
+ HashSet<CapabilityAtom>& ioExpandedAtoms)
+{
+ auto& atomInfo = _getInfo(atom);
+
+ // The first step is to add `atom` itself, *unless*
+ // it is an alias, because an alias shouldn't impact
+ // whether one set is considered a subset/superset of
+ // another.
+ //
+ if(atomInfo.flavor != CapabilityAtomFlavor::Alias)
+ {
+ ioExpandedAtoms.Add(atom);
+ }
+
+ // Next we add all the atoms transitively implied by `atom`.
+ //
+ for(auto baseAtom : atomInfo.bases)
+ {
+ // Note: the list of `bases` is a fixed-size array, but
+ // can be terminated with `Invalid` to indicate that
+ // not all of the entries are being used.
+ //
+ // If we see the sentinel, then we know we are at the end
+ // of the list.
+ //
+ if(baseAtom == CapabilityAtom::Invalid)
+ break;
+
+ _addAtomsRec(baseAtom, ioExpandedAtoms);
+ }
+}
+
+void CapabilitySet::_init(Int atomCount, CapabilityAtom const* atoms)
+{
+ // In order to fill in the expanded and deduplicated
+ // set of atoms, we will use an explicit hash set
+ // and then recursively walk the tree of atoms and
+ // their bases.
+ //
+ HashSet<CapabilityAtom> expandedAtomsSet;
+ for(Int i = 0; i < atomCount; ++i)
+ {
+ _addAtomsRec(atoms[i], expandedAtomsSet);
+ }
+
+ // We can then translate the set of atoms into a list,
+ // and then sort that list to produce the data that
+ // we use in all our other queries.
+ //
+ for(auto atom : expandedAtomsSet)
+ {
+ m_expandedAtoms.add(atom);
+ }
+ m_expandedAtoms.sort();
+}
+
+void CapabilitySet::calcCompactedAtoms(List<CapabilityAtom>& outAtoms) const
+{
+ // A "compacted" list of atoms is one that starts with
+ // the "expanded" list and removes any atoms that are
+ // implied by another atom already in the list.
+ //
+ // If the expanded list contains atom A, and A inherits
+ // from B, then we know that the expanded list also contains B,
+ // but the compacted list should not.
+ //
+ // We can thus look through the list of atoms A and for
+ // each base B of A, add it to a set of "redundant" atoms
+ // that need not appear in the compacted list.
+ //
+ HashSet<CapabilityAtom> redundantAtomsSet;
+ for( auto atom : m_expandedAtoms )
+ {
+ auto& atomInfo = _getInfo(atom);
+ for(auto baseAtom : atomInfo.bases)
+ {
+ // Note: dealing with possible early termination of the `bases` list.
+ if(baseAtom == CapabilityAtom::Invalid)
+ break;
+
+ redundantAtomsSet.Add(baseAtom);
+ }
+ }
+
+ // Once we are done figuring out which atoms are redundant,
+ // we can iterate over the expanded list and add all the
+ // non-redundant ones to the compacted output list.
+ //
+ outAtoms.clear();
+ for( auto atom : m_expandedAtoms )
+ {
+ if(!redundantAtomsSet.Contains(atom))
+ {
+ outAtoms.add(atom);
+ }
+ }
}
bool CapabilitySet::isEmpty() const
@@ -298,7 +294,7 @@ bool CapabilitySet::isEmpty() const
// Checking if a capability set is empty is trivial in any representation;
// all we need to know is if it has zero atoms in its definition.
//
- return m_atoms.getCount() == 0;
+ return m_expandedAtoms.getCount() == 0;
}
bool CapabilitySet::isInvalid() const
@@ -313,115 +309,484 @@ bool CapabilitySet::isInvalid() const
// invalid (e.g., a set {A,B} would be invalid if A and B are incompatible,
// but it would not be in the canonical form this subroutine checks).
//
- if(m_atoms.getCount() != 1) return false;
- return m_atoms[0] == CapabilityAtom::Invalid;
+ if(m_expandedAtoms.getCount() != 1) return false;
+ return m_expandedAtoms[0] == CapabilityAtom::Invalid;
}
bool CapabilitySet::isIncompatibleWith(CapabilityAtom that) const
{
- // We know that capabilities that are in an inheritnace
- // relationship with one another can't be incompatible.
+ // Checking for incompatibility is complicated, and it is best
+ // to only implement it for full (expanded) sets.
//
- if(this->implies(that) || CapabilitySet(that).implies(*this))
- return false;
+ return isIncompatibleWith(CapabilitySet(that));
+}
- // Othwerise, we want to perform a check for each of the
- // atoms in this set, whether it is incompatible with any
- // of the atoms in the other set (which in this case is one atom).
+uint32_t CapabilitySet::_calcConflictMask() const
+{
+ // Given a capbility set, we want to compute the mask representing
+ // all groups of features for which it holds a potentially-conflicting atom.
//
- for( auto thisAtom : this->m_atoms )
+ uint32_t mask = 0;
+ for( auto atom : m_expandedAtoms )
{
- if(_isIncompatible(thisAtom, that))
- return true;
+ mask |= uint32_t(_getInfo(atom).conflictMask);
}
-
- return false;
+ return mask;
}
bool CapabilitySet::isIncompatibleWith(CapabilitySet const& that) const
{
- // We need to look at the atoms in `this` that are not
- // present in `that`, and vice versa. For each such atom
- // we will check if it is incompatible with the other, by
- // virtue of the other already including a concrete atom
- // that cannot co-exist with it.
+ // The `this` and `that` sets are incompatible if there exists
+ // an atom A in `this` and an atom `B` in `that` such that
+ // A and B are not equal, but the two have overlapping "conflict mask."
//
- for( auto thisAtom : this->m_atoms )
- {
- if(that.isIncompatibleWith(thisAtom))
- return true;
- }
- for( auto thatAtom : that.m_atoms )
+ // Equivalently, we can say that the two are in conflict if
+ //
+ // * One of the two sets contains an atom A with conflict mask M
+ // * The other set contains at least one atom that conflicts with M
+ // * The other set does not contain A
+ //
+ // Our approach here is all about minimizing the number of
+ // iterations we take over lists of atoms, and trying to
+ // avoid anything super-linear.
+
+ // We start by identifying the OR of the conflict masks for
+ // all features in `this` and `that`.
+ //
+ uint32_t thisMask = this->_calcConflictMask();
+ uint32_t thatMask = that._calcConflictMask();
+
+ // Note: there is a possible early-exit opportunity here if
+ // `thisMask` and `thatMask` have no overlap: there could
+ // be no conflicts in that case.
+
+ // Next we will iterate over the two sets in tandem (O(N) time
+ // in the size of the larger set), and identify any elements
+ // that are present in one and not the other.
+ //
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
{
- if(this->isIncompatibleWith(thatAtom))
- return true;
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if(thisAtom == thatAtom)
+ {
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // `thisAtom` is present in `this` but not `that.
+ //
+ // If `thisAtom` has a conflict mask that overlaps
+ // with `thatMask`, then we have a conflict: the
+ // other set doesn't include `thisAtom`, but *does*
+ // include something with an overlapping mask
+ // (we don't know what at this point in the code).
+ //
+ auto thisAtomMask = uint32_t(_getInfo(thisAtom).conflictMask);
+ if(thisAtomMask & thatMask)
+ return true;
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+
+ // `thatAtom` is present in `that` but not `this.
+ //
+ // The logic here is the mirror image of the case above.
+ //
+ auto thatAtomMask = uint32_t(_getInfo(thatAtom).conflictMask);
+ if(thatAtomMask & thisMask)
+ return true;
+ thatIndex++;
+ }
}
- return false;
- // TODO: If we had a representation that stored a minified,
- // sorted, deduplicated list of atoms, then it would be easy
- // to iterate over the two lists in tandem and identify any
- // element that is present in one list but not the other.
- //
- // Those elements would be the candidates that could cause
- // incompatiblity, so that we wouldn't need to perform
- // the check on each atom like we do above.
+ return false;
}
bool CapabilitySet::implies(CapabilitySet const& that) const
{
- // This capability set implies `other` if for every atom in `other`,
- // that atom is present in this sets list of atoms or it is
- // implies by something in the list of atoms.
+ // One capability set implies another if it is a super-set
+ // of the other one. Think of it this way: if your target
+ // supports features {X, Y, Z}, then that implies it also
+ // supports features {X,Z}.
+ //
+ // Because both `this` and `that` have expanded lists
+ // of all the capability atoms they imply *and* those
+ // lists are sorted, we can simply walk through the
+ // lists in tandem and see if there are any entries
+ // in `that` which are not present in `this.
+
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+
+ // We cannot possibly have `this` contain all the atoms
+ // in `that` if the latter is has more atoms.
+ //
+ if(thatCount > thisCount)
+ return false;
+
+ // Note: the following iteration is O(N) in the size
+ // of the larger of the two sets, which is probably
+ // needlessly inefficient. We might expect that `that`
+ // will often be a much smaller set, and we'd like to
+ // scale in its size rather than the size of `this`.
+ //
+ // A more advanced algorithm here would be to do
+ // something recursive:
//
- for( auto atom : that.m_atoms )
+ // * If `that` is singleton set, then we can find
+ // whether `this` contains it via binary search.
+ //
+ // * Otherwise, we can split `that` into two
+ // equally-sized subsets. By taking a "pivot" value
+ // from where that split took place we can then
+ // use a binary search to partition `this` into
+ // two subsets and recurse on each side of that
+ // partition.
+ //
+ // In practice, the size of the sets we are dealing
+ // with right now doesn't justify such a "clever" algorithm.
+
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
{
- if(!this->implies(atom))
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if( thisAtom == thatAtom )
+ {
+ // We have an atom that both sets contain;
+ // we should skip past it and keep looking.
+ //
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // We have an atom that `this` contains,
+ // but `that` doesn't; that is consistent
+ // with `this` being a super-set, so we
+ // just skip the item and keep searching.
+ //
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+
+ // We have an atom in `that` which isn't
+ // also in `this`, so we know it cannot
+ // be a subset.
+ //
return false;
+ }
}
return true;
-
- // TODO: If we had a representation that stored an expanded
- // sorted, deduplicated list of atoms, then we could
- // check the `implies` relationship by scanning through
- // the two lists in tandem and identifying any element
- // in the `that` list that isn't in the `this` list.
- // Such elements would indicate that `that` is not a subset
- // of `this`.
}
+ /// Helper functor for binary search on lists of `CapabilityAtom`
+struct CapabilityAtomComparator
+{
+ int operator()(CapabilityAtom left, CapabilityAtom right)
+ {
+ return int(Int(left) - Int(right));
+ }
+};
bool CapabilitySet::implies(CapabilityAtom atom) const
{
- // If our list of explicit atoms contains `atom`, then
- // we definitely imply it.
+ // The common case here is when `atom` is not an alias.
//
- // TODO: If we stored our atom lists sorted, then
- // this operation could be logarithmic rather than
- // linear.
- //
- if(m_atoms.contains(atom))
- return true;
+ if( _getInfo(atom).flavor != CapabilityAtomFlavor::Alias )
+ {
+ // Every non-alias atom that `this` implies should
+ // be presented in the `m_expandedAtoms` list.
+ //
+ // Because the list is sorted, we can find out whether
+ // it contains `atom` with a binary search.
+ //
+ Index result = m_expandedAtoms.binarySearch(atom, CapabilityAtomComparator());
+ return result >= 0;
+ }
+ else
+ {
+ // In the case where `atom` is an alias, then it won't
+ // appear in the expanded list, and we need to check
+ // whether `this` set implies everything that `atom`
+ // transitively inherits from.
+ //
+ // The simplest way to do that is to expand `atom`
+ // into the full capability set it stands for and
+ // check that.
+ //
+ return implies(CapabilitySet(atom));
+ }
+}
- // If any of our atoms implies `atom` then we
- // also imply it.
+Int CapabilitySet::countIntersectionWith(CapabilitySet const& that) const
+{
+ // The goal of this subroutine is to count the number of
+ // elements in the intersection of `this` and `that`,
+ // without explicitly forming that intersection.
//
- // TODO: If we stored an expanded atom list, then
- // this recursion could be skipped completely, since
- // the containment check above would cover inheirtance
- // relationships too.
+ // Our approach here will be to iterate over the two
+ // sets in tandem (O(N) in the size of the larger set)
+ // and check for elements that both contain.
//
- for( auto thisAtom : m_atoms )
+ // TODO: There should be an asymptotically faster
+ // recursive algorithm here.
+
+ Int intersectionCount = 0;
+
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
{
- if(_implies(thisAtom, atom))
- return true;
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if( thisAtom == thatAtom )
+ {
+ // An item both contain.
+
+ intersectionCount++;
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // An item in `this` but not `that`.
+
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+
+ // An item in `that` but not `this`.
+
+ thatIndex++;
+ }
}
+ return intersectionCount;
+}
+
+bool CapabilitySet::isBetterForTarget(
+ CapabilitySet const& existingCaps,
+ CapabilitySet const& targetCaps)
+{
+ auto& candidateCaps = *this;
+
+ // The task here is to determine if `candidateCaps` should
+ // be considered "better" than `existingCaps` in the context
+ // of compilation for a target with the given `targetCaps`.
+ //
+ // In an ideal world, this computation could be quite simple:
+ //
+ // * If either `candidateCaps` or `existingCaps` is not implied by
+ // `targetCaps` (that is, they include requirements that aren't
+ // provided by the target), then the other is automatically "better."
+ //
+ // * Otherwise, one set is "better" than the other if it is a
+ // super-set (which is what `implies()` tests).
+ //
+ // There are two main reasons we can't use that simple logic:
+ //
+ // 1. Currently a user of Slang can compile for a target but
+ // not actually spell out its capabilities fully or correctly.
+ // They might compile for `sm_5_0` but use ray tracing features
+ // that require `sm_6_2` and expect the compiler to figure out
+ // what they "obviously" meant. Thus we cannot assume that
+ // `targetCaps` can be used to rule out candidates fully.
+ //
+ // 2. Sometimes there are multiple ways for a target to provide
+ // the same feature (e.g., multiple extensions) and because of (1)
+ // we cannot always rely on the `targetCaps` to tell us which to
+ // use. Thus we cannot rely on pure subset/`implies()` to define
+ // better-ness, and need some way to break ties.
+ //
+ // The following logic is a bunch of "do what I mean" nonsense that
+ // tries to capture a reasonable intuition of what "better"-ness
+ // should mean with these caveats.
+
+ // First, if either candidate is fundamentally incompatible
+ // with the target, we shouldn't favor it.
+ //
+ if(candidateCaps.isIncompatibleWith(targetCaps)) return false;
+ if(existingCaps.isIncompatibleWith(targetCaps)) return true;
+
+ // Next, we want to compare the candidates to the `targetCaps`
+ // to figure out whether one is obviously "more specialized" for
+ // the target.
+ //
+ // We measure the degree to which a candidate is specialized for
+ // the target as the size of its set intersection with `targetCaps`.
+ //
+ // TODO: If both `candidateCaps` and `existingCaps` are implied
+ // by `targetCaps`, then this amounts to just measuring the
+ // size of each set. We probably want this size-based check to
+ // come later in the overall process.
+ //
+ // TODO: A better model here might be to actually compute the actual
+ // intersected sets, and then check if one is a super-set of the other.
+ //
+ auto candidateIntersectionSize = targetCaps.countIntersectionWith(candidateCaps);
+ auto existingIntersectionSize = targetCaps.countIntersectionWith(existingCaps);
+ if(candidateIntersectionSize != existingIntersectionSize)
+ return candidateIntersectionSize > existingIntersectionSize;
+
+ // Next we want to consider that if one of the two candidates
+ // is actually available on the target (meaning that it is
+ // implied by `targetCaps`) then we probably want to pick that one
+ // (since we can use that candidate on the chosen target without
+ // enabling any additional features the user didn't ask for).
+ //
+ // TODO: This step currently needs to come after the preceeding
+ // one because otherwise we risk selecting a `__target_intrinsic`
+ // decoration with *no* requirements (which are currently being
+ // added implicitly in many places) over any one with explicit
+ // requirements (since every target implies the empty set of
+ // requirements).
+ //
+ // In many ways the counting-based logic above amounts to a quick
+ // fix to prefer a non-empty set of requirements over an empty one,
+ // so long as something in that non-empty set overlaps with the target.
+ //
+ // TODO: The best fix is probably to figure out how "catch-all"
+ // intrinsic function definitions should be encoded; we clearly
+ // want them to be used only as a fallback when no target-specific
+ // variants are present.
+ //
+ bool candidateIsAvailable = targetCaps.implies(candidateCaps);
+ bool existingIsAvailable = targetCaps.implies(existingCaps);
+ if(candidateIsAvailable != existingIsAvailable)
+ return candidateIsAvailable;
+
+ // All preceding factors being equal, we prefer
+ // a candidate that is strictly more specialized than the other.
+ //
+ // TODO: This logic has the negative effect of always preferring
+ // to enable optional features even if they aren't necessary.
+ // It would prefer the set {glsl, optionalFeature} over the set
+ // {glsl}, even though we might argue that a default implementaton
+ // that works without any optional features is "obviously" what
+ // the user means if they didn't enable those features.
+ //
+ // TODO: The right answer is possibly that we want to partition
+ // `candidateCaps` and `existingCaps` into two parts: their
+ // intersection with `targetCaps` and their difference with it.
+ //
+ // For the intersection part of things, we'd want to favor a
+ // definition that is more specialized, while for the difference
+ // part we'd actually wnat to favor a definition that is less
+ // specialized.
+ //
+ if(candidateCaps.implies(existingCaps)) return true;
+ if(existingCaps.implies(candidateCaps)) return true;
+
+ // At this point we have the problem that neither candidate
+ // appears to be "obviously" better for the target, but we
+ // want some way to disambiguate them.
+ //
+ // What we want to do now is scan through what makes each candidate
+ // different from the other, and see if anything in either case
+ // has a ranking that should make it be preferred.
+ //
+ // TODO: This should probably *not* be considering anything that
+ // is implied/supported by the target.
+ //
+ auto candidateScore = candidateCaps._calcDifferenceScoreWith(existingCaps);
+ auto existingScore = existingCaps._calcDifferenceScoreWith(candidateCaps);
+ if(candidateScore != existingScore)
+ return candidateScore > existingScore;
return false;
}
+uint32_t CapabilitySet::_calcDifferenceScoreWith(CapabilitySet const& that) const
+{
+ uint32_t score = 0;
+
+ // Our approach here will be to scan through `this` and `that`
+ // to identify atoms that are in `this` but not `that` (that is,
+ // the atoms that would be present in the set difference `this - that`)
+ // and then compute the maximum rank/score of those atoms.
+
+ Index thisCount = this->m_expandedAtoms.getCount();
+ Index thatCount = that.m_expandedAtoms.getCount();
+ Index thisIndex = 0;
+ Index thatIndex = 0;
+ for(;;)
+ {
+ if(thisIndex == thisCount) break;
+ if(thatIndex == thatCount) break;
+
+ auto thisAtom = this->m_expandedAtoms[thisIndex];
+ auto thatAtom = that.m_expandedAtoms[thatIndex];
+
+ if( thisAtom == thatAtom )
+ {
+ thisIndex++;
+ thatIndex++;
+ continue;
+ }
+
+ if( thisAtom < thatAtom )
+ {
+ // `thisAtom` is not present in `that`, so it
+ // should contribute to our ranking of the difference.
+ //
+ auto thisAtomInfo = _getInfo(thisAtom);
+ auto thisAtomRank = thisAtomInfo.rank;
+
+ if( thisAtomRank > score )
+ {
+ score = thisAtomRank;
+ }
+
+ thisIndex++;
+ }
+ else
+ {
+ SLANG_ASSERT(thisAtom > thatAtom);
+ thatIndex++;
+ }
+ }
+ return score;
+}
+
+
bool CapabilitySet::operator==(CapabilitySet const& other) const
{
+ // TODO: We should be able to implement this more efficiently
+ // by scanning over the two sets in tandem.
+
return this->implies(other) && other.implies(*this);
}
diff --git a/source/slang/slang-capability.h b/source/slang/slang-capability.h
index 662f7eed8..5392a669a 100644
--- a/source/slang/slang-capability.h
+++ b/source/slang/slang-capability.h
@@ -34,7 +34,7 @@ enum class CapabilityAtom : int32_t
//
Invalid = -1,
-#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAGS, BASE0, BASE1, BASE2, BASE3) \
+#define SLANG_CAPABILITY_ATOM(ENUMERATOR, NAME, FLAVOR, CONFLICT, RANK, BASE0, BASE1, BASE2, BASE3) \
ENUMERATOR,
#include "slang-capability-defs.h"
@@ -127,22 +127,27 @@ public:
bool operator==(CapabilitySet const& that) const;
/// Get access to the raw atomic capabilities that define this set.
- List<CapabilityAtom> const& getAtoms() const { return m_atoms; }
+ List<CapabilityAtom> const& getExpandedAtoms() const { return m_expandedAtoms; }
+
+ /// Calculate a list of "compacted" atoms, which excludes any atoms from the expanded list that are implies by another item in the list.
+ void calcCompactedAtoms(List<CapabilityAtom>& outAtoms) const;
+
+ Int countIntersectionWith(CapabilitySet const& that) const;
+
+ bool isBetterForTarget(CapabilitySet const& that, CapabilitySet const& targetCaps);
private:
+ void _init(Int atomCount, CapabilityAtom const* atoms);
- // The underlying representation we are using is currently very simple:
- // a capability set is stored as a list of the atoms that were passed
- // in at the time the set was constructed.
- //
- // Currently, no effort is made to sort the atoms, remove duplicates,
- // or to expand the list when one atom entails another.
- //
- // TODO: Much more efficient representations are possible, and we
- // should consider them if the performance of `CapabilitySet` ever
- // prooves to be an issue.
+ uint32_t _calcConflictMask() const;
+ uint32_t _calcDifferenceScoreWith(CapabilitySet const& other) const;
+
+ // The underlying representation we use is a sorted and deduplicated
+ // list of all the (non-alias) atoms that are present in the set.
+ // This "expanded" list uses the transitive closure over the inheritnace
+ // relationship between the atoms.
//
- List<CapabilityAtom> m_atoms;
+ List<CapabilityAtom> m_expandedAtoms;
};
/// Are the `left` and `right` capability sets unequal?
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index 158aac141..1e9357f4f 100755
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -335,15 +335,20 @@ namespace Slang
//
- Profile Profile::lookUp(char const* name)
+ Profile Profile::lookUp(UnownedStringSlice const& name)
{
- #define PROFILE(TAG, NAME, STAGE, VERSION) if(strcmp(name, #NAME) == 0) return Profile::TAG;
- #define PROFILE_ALIAS(TAG, DEF, NAME) if(strcmp(name, #NAME) == 0) return Profile::TAG;
+ #define PROFILE(TAG, NAME, STAGE, VERSION) if(name == UnownedTerminatedStringSlice(#NAME)) return Profile::TAG;
+ #define PROFILE_ALIAS(TAG, DEF, NAME) if(name == UnownedTerminatedStringSlice(#NAME)) return Profile::TAG;
#include "slang-profile-defs.h"
return Profile::Unknown;
}
+ Profile Profile::lookUp(char const* name)
+ {
+ return lookUp(UnownedTerminatedStringSlice(name));
+ }
+
char const* Profile::getName()
{
switch( raw )
@@ -968,7 +973,7 @@ namespace Slang
DWORD flags = 0;
- switch( targetReq->floatingPointMode )
+ switch( targetReq->getFloatingPointMode() )
{
default:
break;
@@ -1237,7 +1242,7 @@ SlangResult dissassembleDXILUsingDXC(
// If we are not in pass through, lookup the default compiler for the emitted source type
if (downstreamCompiler == PassThroughMode::None)
{
- auto target = targetReq->target;
+ auto target = targetReq->getTarget();
switch (target)
{
case CodeGenTarget::PTX:
@@ -1387,7 +1392,7 @@ SlangResult dissassembleDXILUsingDXC(
options.flags &= ~(CompileOptions::Flag::EnableExceptionHandling | CompileOptions::Flag::EnableSecurityChecks);
// Set what kind of target we should build
- switch (targetReq->target)
+ switch (targetReq->getTarget())
{
case CodeGenTarget::HostCallable:
case CodeGenTarget::SharedLibrary:
@@ -1434,7 +1439,7 @@ SlangResult dissassembleDXILUsingDXC(
default: SLANG_ASSERT(!"Unhandled debug level"); break;
}
- switch( targetReq->floatingPointMode )
+ switch( targetReq->getFloatingPointMode() )
{
case FloatingPointMode::Default: options.floatingPointMode = DownstreamCompiler::FloatingPointMode::Default; break;
case FloatingPointMode::Precise: options.floatingPointMode = DownstreamCompiler::FloatingPointMode::Precise; break;
@@ -1628,6 +1633,20 @@ SlangResult dissassembleDXILUsingDXC(
request.spirvVersion.minor = spirvLanguageVersion.m_minor;
request.spirvVersion.patch = spirvLanguageVersion.m_patch;
}
+ else
+ {
+ // HACK: look at the requested capabilities of the target,
+ // and see if they specify a SPIR-V version that we should
+ // pass down.
+ //
+ auto targetCaps = targetReq->getTargetCaps();
+ if(targetCaps.implies(CapabilityAtom::SPIRV_1_4))
+ {
+ request.spirvVersion.major = 1;
+ request.spirvVersion.minor = 4;
+ request.spirvVersion.patch = 0;
+ }
+ }
request.outputFunc = outputFunc;
request.outputUserData = &spirvOut;
@@ -1698,7 +1717,7 @@ SlangResult dissassembleDXILUsingDXC(
{
CompileResult result;
- auto target = targetReq->target;
+ auto target = targetReq->getTarget();
switch (target)
{
@@ -2041,7 +2060,7 @@ SlangResult dissassembleDXILUsingDXC(
{
// Writing to console, so we need to generate text output.
- switch (targetReq->target)
+ switch (targetReq->getTarget())
{
#if SLANG_ENABLE_DXBC_SUPPORT
case CodeGenTarget::DXBytecode:
diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h
index ace2cb842..008c5eb5a 100755
--- a/source/slang/slang-compiler.h
+++ b/source/slang/slang-compiler.h
@@ -1141,12 +1141,22 @@ namespace Slang
class TargetRequest : public RefObject
{
public:
- Linkage* linkage;
- CodeGenTarget target;
- SlangTargetFlags targetFlags = 0;
- Slang::Profile targetProfile = Slang::Profile();
- FloatingPointMode floatingPointMode = FloatingPointMode::Default;
- CapabilitySet targetCaps = CapabilitySet::makeInvalid();
+ TargetRequest(Linkage* linkage, CodeGenTarget format);
+
+ void addTargetFlags(SlangTargetFlags flags)
+ {
+ targetFlags |= flags;
+ }
+ void setTargetProfile(Slang::Profile profile)
+ {
+ targetProfile = profile;
+ }
+ void setFloatingPointMode(FloatingPointMode mode)
+ {
+ floatingPointMode = mode;
+ }
+ void addCapability(CapabilityAtom capability);
+
bool isWholeProgramRequest()
{
@@ -1154,9 +1164,10 @@ namespace Slang
}
Linkage* getLinkage() { return linkage; }
- CodeGenTarget getTarget() { return target; }
+ CodeGenTarget getTarget() { return format; }
Profile getTargetProfile() { return targetProfile; }
FloatingPointMode getFloatingPointMode() { return floatingPointMode; }
+ SlangTargetFlags getTargetFlags() { return targetFlags; }
CapabilitySet getTargetCaps();
Session* getSession();
@@ -1168,6 +1179,15 @@ namespace Slang
Dictionary<Type*, RefPtr<TypeLayout>>& getTypeLayouts() { return typeLayouts; }
TypeLayout* getTypeLayout(Type* type);
+
+ private:
+ Linkage* linkage = nullptr;
+ CodeGenTarget format = CodeGenTarget::Unknown;
+ SlangTargetFlags targetFlags = 0;
+ Slang::Profile targetProfile = Slang::Profile();
+ FloatingPointMode floatingPointMode = FloatingPointMode::Default;
+ List<CapabilityAtom> rawCapabilities;
+ CapabilitySet cookedCapabilities;
};
/// Are we generating code for a D3D API?
@@ -1898,6 +1918,8 @@ namespace Slang
virtual SLANG_NO_THROW SlangResult SLANG_MCALL getSession(slang::ISession** outSession) SLANG_OVERRIDE;
virtual SLANG_NO_THROW SlangReflection* SLANG_MCALL getReflection() SLANG_OVERRIDE;
virtual SLANG_NO_THROW void SLANG_MCALL setCommandLineCompilerMode() SLANG_OVERRIDE;
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL addTargetCapability(SlangInt targetIndex, SlangCapabilityID capability) SLANG_OVERRIDE;
+
EndToEndCompileRequest(
Session* session);
@@ -2158,6 +2180,8 @@ namespace Slang
SLANG_NO_THROW SlangResult SLANG_MCALL loadStdLib(const void* stdLib, size_t stdLibSizeInBytes) override;
SLANG_NO_THROW SlangResult SLANG_MCALL saveStdLib(ISlangBlob** outBlob) override;
+ SLANG_NO_THROW SlangCapabilityID SLANG_MCALL findCapability(char const* name) override;
+
/// Get the default compiler for a language
DownstreamCompiler* getDefaultDownstreamCompiler(SourceLanguage sourceLanguage);
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 9d208b8a3..facb8a710 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -630,7 +630,7 @@ String CLikeSourceEmitter::generateName(IRInst* inst)
// If the instruction names something
// that should be emitted as a target intrinsic,
// then use that name instead.
- if(auto intrinsicDecoration = findBestTargetIntrinsicDecorationXXX(inst))
+ if(auto intrinsicDecoration = findBestTargetIntrinsicDecoration(inst))
{
return String(intrinsicDecoration->getDefinition());
}
@@ -1092,7 +1092,7 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst)
// This is significant, because we can within a target intrinsics definition multiple accesses to the same
// parameter. This is not indicated into the call, and can lead to output code computes something multiple
// times as it is folding into the expression of the the target intrinsic, which we don't want.
- if (auto targetIntrinsicDecoration = findBestTargetIntrinsicDecorationXXX(funcValue))
+ if (auto targetIntrinsicDecoration = findBestTargetIntrinsicDecoration(funcValue))
{
// Find the index of the original instruction, to see if it's multiply used.
IRUse* args = callInst->getArgs();
@@ -1300,7 +1300,7 @@ IRTargetSpecificDecoration* CLikeSourceEmitter::findBestTargetDecoration(IRInst*
return Slang::findBestTargetDecoration(inInst, getTargetCaps());
}
-IRTargetIntrinsicDecoration* CLikeSourceEmitter::findBestTargetIntrinsicDecorationXXX(IRInst* inInst)
+IRTargetIntrinsicDecoration* CLikeSourceEmitter::findBestTargetIntrinsicDecoration(IRInst* inInst)
{
return as<IRTargetIntrinsicDecoration>(findBestTargetDecoration(inInst));
}
@@ -1834,25 +1834,6 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl(
}
break;
- case 'T':
- {
- // The `$XT` case handles selecting between
- // the `gl_HitTNV` and `gl_RayTmaxNV` builtins,
- // based on what stage we are using:
- switch( m_entryPointStage )
- {
- default:
- m_writer->emit("gl_RayTmaxNV");
- break;
-
- case Stage::AnyHit:
- case Stage::ClosestHit:
- m_writer->emit("gl_HitTNV");
- break;
- }
- }
- break;
-
default:
SLANG_RELEASE_ASSERT(false);
break;
@@ -1955,7 +1936,7 @@ void CLikeSourceEmitter::emitCallExpr(IRCall* inst, EmitOpInfo outerPrec)
// We want to detect any call to an intrinsic operation,
// that we can emit it directly without mangling, etc.
- if(auto targetIntrinsic = findBestTargetIntrinsicDecorationXXX(funcValue))
+ if(auto targetIntrinsic = findBestTargetIntrinsicDecoration(funcValue))
{
emitIntrinsicCallExpr(inst, targetIntrinsic, outerPrec);
}
@@ -3334,7 +3315,7 @@ bool CLikeSourceEmitter::isTargetIntrinsic(IRFunc* func)
// it has a suitable decoration marking it as a
// target intrinsic for the current compilation target.
//
- return findBestTargetIntrinsicDecorationXXX(func) != nullptr;
+ return findBestTargetIntrinsicDecoration(func) != nullptr;
}
void CLikeSourceEmitter::emitFunc(IRFunc* func)
@@ -3367,7 +3348,7 @@ void CLikeSourceEmitter::emitStruct(IRStructType* structType)
{
// If the selected `struct` type is actually an intrinsic
// on our target, then we don't want to emit anything at all.
- if(auto intrinsicDecoration = findBestTargetIntrinsicDecorationXXX(structType))
+ if(auto intrinsicDecoration = findBestTargetIntrinsicDecoration(structType))
{
return;
}
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 0b6e63110..a26959e54 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -176,7 +176,7 @@ public:
void emitInstResultDecl(IRInst* inst);
IRTargetSpecificDecoration* findBestTargetDecoration(IRInst* inst);
- IRTargetIntrinsicDecoration* findBestTargetIntrinsicDecorationXXX(IRInst* inst);
+ IRTargetIntrinsicDecoration* findBestTargetIntrinsicDecoration(IRInst* inst);
// Check if the string being used to define a target intrinsic
// is an "ordinary" name, such that we can simply emit a call
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index cc2494455..2a28be70a 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -38,7 +38,23 @@ SlangResult GLSLSourceEmitter::init()
void GLSLSourceEmitter::_requireRayTracing()
{
- m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_NV_ray_tracing"));
+ // There is more than one extension that provides ray-tracing capabilities,
+ // and we need to pick which one to enable.
+ //
+ // By default, we will use the `GL_EXT_ray_tracing` extension, but if
+ // the user has explicitly opted in to the `GL_NV_ray_tracing` extension
+ // we will use that one instead.
+ //
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_NV_ray_tracing"));
+ }
+ else
+ {
+ m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_ray_tracing"));
+ m_glslExtensionTracker->requireSPIRVVersion(SemanticVersion(1, 4));
+ }
+
m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460);
}
@@ -542,7 +558,14 @@ bool GLSLSourceEmitter::_emitGLSLLayoutQualifier(LayoutResourceKind kind, EmitVa
m_writer->emit("layout(push_constant)\n");
break;
case LayoutResourceKind::ShaderRecord:
- m_writer->emit("layout(shaderRecordNV)\n");
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("layout(shaderRecordNV)\n");
+ }
+ else
+ {
+ m_writer->emit("layout(shaderRecordEXT)\n");
+ }
break;
}
@@ -1029,19 +1052,40 @@ void GLSLSourceEmitter::emitLayoutQualifiersImpl(IRVarLayout* layout)
case LayoutResourceKind::RayPayload:
{
- m_writer->emit("rayPayloadInNV ");
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("rayPayloadInNV ");
+ }
+ else
+ {
+ m_writer->emit("rayPayloadInEXT ");
+ }
}
break;
case LayoutResourceKind::CallablePayload:
{
- m_writer->emit("callableDataInNV ");
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("callableDataInNV ");
+ }
+ else
+ {
+ m_writer->emit("callableDataInEXT ");
+ }
}
break;
case LayoutResourceKind::HitAttributes:
{
- m_writer->emit("hitAttributeNV ");
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("hitAttributeNV ");
+ }
+ else
+ {
+ m_writer->emit("hitAttributeEXT ");
+ }
}
break;
@@ -1704,7 +1748,15 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
case kIROp_RaytracingAccelerationStructureType:
{
_requireRayTracing();
- m_writer->emit("accelerationStructureNV");
+
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("accelerationStructureNV");
+ }
+ else
+ {
+ m_writer->emit("accelerationStructureEXT");
+ }
break;
}
@@ -1806,19 +1858,40 @@ void GLSLSourceEmitter::emitVarDecorationsImpl(IRInst* varDecl)
m_writer->emit("layout(location = ");
m_writer->emit(getRayPayloadLocation(varDecl));
m_writer->emit(")\n");
- m_writer->emit("rayPayloadNV\n");
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("rayPayloadNV\n");
+ }
+ else
+ {
+ m_writer->emit("rayPayloadEXT\n");
+ }
}
if (varDecl->findDecoration<IRVulkanCallablePayloadDecoration>())
{
m_writer->emit("layout(location = ");
m_writer->emit(getCallablePayloadLocation(varDecl));
m_writer->emit(")\n");
- m_writer->emit("callableDataNV\n");
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("callableDataNV\n");
+ }
+ else
+ {
+ m_writer->emit("callableDataEXT\n");
+ }
}
if (varDecl->findDecoration<IRVulkanHitAttributesDecoration>())
{
- m_writer->emit("hitAttributeNV\n");
+ if( getTargetCaps().implies(CapabilityAtom::GL_NV_ray_tracing) )
+ {
+ m_writer->emit("hitAttributeNV\n");
+ }
+ else
+ {
+ m_writer->emit("hitAttributeEXT\n");
+ }
}
if (varDecl->findDecoration<IRGloballyCoherentDecoration>())
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index ef1442a1b..20e8c0beb 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -542,7 +542,7 @@ Result linkAndOptimizeIR(
{
case CodeGenTarget::HLSL:
{
- auto profile = targetRequest->targetProfile;
+ auto profile = targetRequest->getTargetProfile();
if( profile.getFamily() == ProfileFamily::DX )
{
if(profile.getVersion() <= ProfileVersion::DX_5_0)
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index aa72cc0c3..7e84ca66a 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -2160,8 +2160,24 @@ namespace Slang
IRType* capabilityAtomType = getIntType();
IRType* capabilitySetType = getCapabilitySetType();
+ // Not: Our `CapabilitySet` representation consists of a list
+ // of `CapabilityAtom`s, and by default the list is stored
+ // "expanded" so that it includes atoms that are transitively
+ // implied by one another.
+ //
+ // For representation in the IR, it is preferable to include
+ // as few atoms as possible, so that we don't store anything
+ // redundant in, e.g., serialized modules.
+ //
+ // We thus requqest a list of "compacted" atoms which should
+ // be a minimal list of atoms such that they will produce
+ // the same `CapabilitySet` when expanded.
+
+ List<CapabilityAtom> compactedAtoms;
+ caps.calcCompactedAtoms(compactedAtoms);
+
List<IRInst*> args;
- for( auto atom : caps.getAtoms() )
+ for( auto atom : compactedAtoms )
{
args.add(getIntValue(capabilityAtomType, Int(atom)));
}
@@ -5656,22 +5672,6 @@ namespace Slang
// IRTargetIntrinsicDecoration
//
- static bool _areIntrinsicCapsBetterForTarget(
- CapabilitySet const& candidateCaps,
- CapabilitySet const& existingCaps,
- CapabilitySet const& targetCaps)
- {
- bool candidateIsAvailable = targetCaps.implies(candidateCaps);
- bool existingIsAvailable = targetCaps.implies(existingCaps);
- if(candidateIsAvailable != existingIsAvailable)
- return candidateIsAvailable;
-
- if(candidateCaps.implies(existingCaps))
- return true;
-
- return false;
- }
-
IRTargetIntrinsicDecoration* findAnyTargetIntrinsicDecoration(
IRInst* val)
{
@@ -5704,7 +5704,7 @@ namespace Slang
if (decorationCaps.isIncompatibleWith(targetCaps))
continue;
- if(!bestDecoration || _areIntrinsicCapsBetterForTarget(decorationCaps, bestCaps, targetCaps))
+ if(!bestDecoration || decorationCaps.isBetterForTarget(bestCaps, targetCaps))
{
bestDecoration = decoration;
bestCaps = decorationCaps;
diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp
index c3de87d87..c3db39773 100644
--- a/source/slang/slang-options.cpp
+++ b/source/slang/slang-options.cpp
@@ -162,6 +162,8 @@ struct OptionsParser
int targetID = -1;
FloatingPointMode floatingPointMode = FloatingPointMode::Default;
+ List<CapabilityAtom> capabilityAtoms;
+
// State for tracking command-line errors
bool conflictingProfilesSet = false;
bool redundantProfileSet = false;
@@ -393,6 +395,11 @@ struct OptionsParser
rawTarget->profileVersion = profileVersion;
}
+ void addCapabilityAtom(RawTarget* rawTarget, CapabilityAtom atom)
+ {
+ rawTarget->capabilityAtoms.add(atom);
+ }
+
void setFloatingPointMode(RawTarget* rawTarget, FloatingPointMode mode)
{
rawTarget->floatingPointMode = mode;
@@ -655,13 +662,28 @@ struct OptionsParser
// specific stage to use for an entry point.
else if (argStr == "-profile")
{
- String name;
- SLANG_RETURN_ON_FAIL(tryReadCommandLineArgument(sink, arg, &argCursor, argEnd, name));
+ String operand;
+ SLANG_RETURN_ON_FAIL(tryReadCommandLineArgument(sink, arg, &argCursor, argEnd, operand));
+
+ // A a convenience, the `-profile` option supporst an operand that consists
+ // of multiple tokens separated with `+`. The eventual goal is that each
+ // of these tokens will represent a capability that should be assumed to
+ // be present on the target.
+ //
+ List<UnownedStringSlice> slices;
+ StringUtil::split(operand.getUnownedSlice(), '+', slices);
+ Index sliceCount = slices.getCount();
+
+ // For now, we will require that the *first* capability in the list is
+ // special, and reprsents the traditional `Profile` to compile for in
+ // the existing Slang model.
+ //
+ UnownedStringSlice profileName = sliceCount >= 1 ? slices[0] : UnownedTerminatedStringSlice("");
- SlangProfileID profileID = session->findProfile(name.begin());
+ SlangProfileID profileID = Slang::Profile::lookUp(profileName).raw;
if( profileID == SLANG_PROFILE_UNKNOWN )
{
- sink->diagnose(SourceLoc(), Diagnostics::unknownProfile, name);
+ sink->diagnose(SourceLoc(), Diagnostics::unknownProfile, profileName);
return SLANG_FAIL;
}
else
@@ -678,6 +700,22 @@ struct OptionsParser
setStage(getCurrentEntryPoint(), stage);
}
}
+
+ // Any additional capability tokens will be assumed to represent `CapabilityAtom`s.
+ // Those atoms will need to be added to the supported capabilities of the target.
+ //
+ for(Index i = 1; i < sliceCount; ++i)
+ {
+ UnownedStringSlice atomName = slices[i];
+ CapabilityAtom atom = findCapabilityAtom(atomName);
+ if( atom == CapabilityAtom::Invalid )
+ {
+ sink->diagnose(SourceLoc(), Diagnostics::unknownProfile, atomName);
+ return SLANG_FAIL;
+ }
+
+ addCapabilityAtom(getCurrentTarget(), atom);
+ }
}
else if (argStr == "-stage")
{
@@ -1329,6 +1367,10 @@ struct OptionsParser
{
setProfileVersion(getCurrentTarget(), defaultTarget.profileVersion);
}
+ for( auto atom : defaultTarget.capabilityAtoms )
+ {
+ addCapabilityAtom(getCurrentTarget(), atom);
+ }
getCurrentTarget()->targetFlags |= defaultTarget.targetFlags;
@@ -1412,6 +1454,10 @@ struct OptionsParser
{
compileRequest->setTargetProfile(targetID, Profile(rawTarget.profileVersion).raw);
}
+ for( auto atom : rawTarget.capabilityAtoms )
+ {
+ requestImpl->addTargetCapability(targetID, SlangCapabilityID(atom));
+ }
if( rawTarget.targetFlags )
{
@@ -1539,7 +1585,7 @@ struct OptionsParser
}
else
{
- target->targetFlags |= SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM;
+ target->addTargetFlags(SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM);
targetInfo->wholeTargetOutputPath = rawOutput.path;
}
}
diff --git a/source/slang/slang-parameter-binding.cpp b/source/slang/slang-parameter-binding.cpp
index 73c94f722..9b080a11c 100644
--- a/source/slang/slang-parameter-binding.cpp
+++ b/source/slang/slang-parameter-binding.cpp
@@ -1450,7 +1450,7 @@ static RefPtr<TypeLayout> processSimpleEntryPointParameter(
//
if( isD3DTarget(context->getTargetRequest()) )
{
- auto version = context->getTargetRequest()->targetProfile.getVersion();
+ auto version = context->getTargetRequest()->getTargetProfile().getVersion();
if( version <= ProfileVersion::DX_5_0 )
{
// We will address the conflict here by claiming the corresponding
@@ -3486,7 +3486,7 @@ RefPtr<ProgramLayout> generateParameterBindings(
// On a CPU target, it's okay to have global scope parameters that use Uniform resources (because on CPU
// all resources are 'Uniform')
// TODO(JS): We'll just assume the same with CUDA target for now..
- if (!_isCPUTarget(targetReq->target) && !_isPTXTarget(targetReq->target))
+ if (!_isCPUTarget(targetReq->getTarget()) && !_isPTXTarget(targetReq->getTarget()))
{
for( auto& parameterInfo : sharedContext.parameters )
{
diff --git a/source/slang/slang-profile.h b/source/slang/slang-profile.h
index f5b15eda6..5150da27a 100644
--- a/source/slang/slang-profile.h
+++ b/source/slang/slang-profile.h
@@ -103,6 +103,7 @@ namespace Slang
ProfileFamily getFamily() const { return getProfileFamily(getVersion()); }
+ static Profile lookUp(UnownedStringSlice const& name);
static Profile lookUp(char const* name);
char const* getName();
diff --git a/source/slang/slang-repro.cpp b/source/slang/slang-repro.cpp
index e47fade70..61ab3b75d 100644
--- a/source/slang/slang-repro.cpp
+++ b/source/slang/slang-repro.cpp
@@ -418,8 +418,8 @@ static bool _isStorable(const PathInfo::Type type)
auto& dst = base[dstTargets[i]];
dst.target = srcTargetRequest->getTarget();
dst.profile = srcTargetRequest->getTargetProfile();
- dst.targetFlags = srcTargetRequest->targetFlags;
- dst.floatingPointMode = srcTargetRequest->floatingPointMode;
+ dst.targetFlags = srcTargetRequest->getTargetFlags();
+ dst.floatingPointMode = srcTargetRequest->getFloatingPointMode();
}
// Copy the entry point/target output names
@@ -906,9 +906,9 @@ struct LoadContext
auto dstTarget = linkage->targets[index];
SLANG_ASSERT(dstTarget->getTarget() == src.target);
- dstTarget->targetProfile = src.profile;
- dstTarget->targetFlags = src.targetFlags;
- dstTarget->floatingPointMode = src.floatingPointMode;
+ dstTarget->setTargetProfile(src.profile);
+ dstTarget->addTargetFlags(src.targetFlags);
+ dstTarget->setFloatingPointMode(src.floatingPointMode);
// If there is output state (like output filenames) add here
if (src.outputStates.getCount())
diff --git a/source/slang/slang-serialize-container.cpp b/source/slang/slang-serialize-container.cpp
index abdc382f9..344b4aa02 100644
--- a/source/slang/slang-serialize-container.cpp
+++ b/source/slang/slang-serialize-container.cpp
@@ -123,10 +123,10 @@ namespace Slang {
auto& dstTarget = targetComponent.target;
- dstTarget.floatingPointMode = target->floatingPointMode;
- dstTarget.profile = target->targetProfile;
- dstTarget.flags = target->targetFlags;
- dstTarget.codeGenTarget = target->target;
+ dstTarget.floatingPointMode = target->getFloatingPointMode();
+ dstTarget.profile = target->getTargetProfile();
+ dstTarget.flags = target->getTargetFlags();
+ dstTarget.codeGenTarget = target->getTarget();
out.targetComponents.add(targetComponent);
}
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index ef838b871..fbcc97c51 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -474,6 +474,12 @@ SLANG_NO_THROW SlangProfileID SLANG_MCALL Session::findProfile(
return Slang::Profile::lookUp(name).raw;
}
+SLANG_NO_THROW SlangCapabilityID SLANG_MCALL Session::findCapability(
+ char const* name)
+{
+ return SlangCapabilityID(Slang::findCapabilityAtom(UnownedTerminatedStringSlice(name)));
+}
+
SLANG_NO_THROW void SLANG_MCALL Session::setDownstreamCompilerPath(
SlangPassThrough inPassThrough,
char const* path)
@@ -571,7 +577,7 @@ DownstreamCompiler* Session::getDefaultDownstreamCompiler(SourceLanguage sourceL
Profile getEffectiveProfile(EntryPoint* entryPoint, TargetRequest* target)
{
auto entryPointProfile = entryPoint->getProfile();
- auto targetProfile = target->targetProfile;
+ auto targetProfile = target->getTargetProfile();
// Depending on the target *format* we might have to restrict the
// profile family to one that makes sense.
@@ -579,7 +585,7 @@ Profile getEffectiveProfile(EntryPoint* entryPoint, TargetRequest* target)
// TODO: Some of this should really be handled as validation at
// the front-end. People shouldn't be allowed to ask for SPIR-V
// output with Shader Model 5.0...
- switch(target->target)
+ switch(target->getTarget())
{
default:
break;
@@ -747,9 +753,9 @@ void Linkage::addTarget(
auto targetIndex = addTarget(CodeGenTarget(desc.format));
auto target = targets[targetIndex];
- target->floatingPointMode = FloatingPointMode(desc.floatingPointMode);
- target->targetFlags = desc.flags;
- target->targetProfile = Profile(desc.profile);
+ target->setFloatingPointMode(FloatingPointMode(desc.floatingPointMode));
+ target->addTargetFlags(desc.flags);
+ target->setTargetProfile(Profile(desc.profile));
}
#if 0
@@ -961,6 +967,12 @@ SlangResult Linkage::setMatrixLayoutMode(
// TargetRequest
//
+TargetRequest::TargetRequest(Linkage* linkage, CodeGenTarget format)
+ : linkage(linkage)
+ , format(format)
+{}
+
+
Session* TargetRequest::getSession()
{
return linkage->getSessionImpl();
@@ -971,10 +983,17 @@ MatrixLayoutMode TargetRequest::getDefaultMatrixLayoutMode()
return linkage->getDefaultMatrixLayoutMode();
}
+void TargetRequest::addCapability(CapabilityAtom capability)
+{
+ rawCapabilities.add(capability);
+ cookedCapabilities = CapabilitySet::makeEmpty();
+}
+
+
CapabilitySet TargetRequest::getTargetCaps()
{
- if(!targetCaps.isInvalid())
- return targetCaps;
+ if(!cookedCapabilities.isEmpty())
+ return cookedCapabilities;
// The full `CapabilitySet` for the target will be computed
// from the combination of the code generation format, and
@@ -996,7 +1015,7 @@ CapabilitySet TargetRequest::getTargetCaps()
// are available where can be directly encoded on the declarations.
List<CapabilityAtom> atoms;
- switch(target)
+ switch(format)
{
case CodeGenTarget::GLSL:
case CodeGenTarget::GLSL_Vulkan:
@@ -1033,9 +1052,11 @@ CapabilitySet TargetRequest::getTargetCaps()
default:
break;
}
+ for(auto atom : rawCapabilities)
+ atoms.add(atom);
- targetCaps = CapabilitySet(atoms);
- return targetCaps;
+ cookedCapabilities = CapabilitySet(atoms);
+ return cookedCapabilities;
}
@@ -2136,9 +2157,7 @@ int EndToEndCompileRequest::addEntryPoint(
UInt Linkage::addTarget(
CodeGenTarget target)
{
- RefPtr<TargetRequest> targetReq = new TargetRequest();
- targetReq->linkage = this;
- targetReq->target = target;
+ RefPtr<TargetRequest> targetReq = new TargetRequest(this, target);
Index result = targets.getCount();
targets.add(targetReq);
@@ -3681,17 +3700,17 @@ int EndToEndCompileRequest::addCodeGenTarget(SlangCompileTarget target)
void EndToEndCompileRequest::setTargetProfile(int targetIndex, SlangProfileID profile)
{
- getLinkage()->targets[targetIndex]->targetProfile = Profile(profile);
+ getLinkage()->targets[targetIndex]->setTargetProfile(Profile(profile));
}
void EndToEndCompileRequest::setTargetFlags(int targetIndex, SlangTargetFlags flags)
{
- getLinkage()->targets[targetIndex]->targetFlags = flags;
+ getLinkage()->targets[targetIndex]->addTargetFlags(flags);
}
void EndToEndCompileRequest::setTargetFloatingPointMode(int targetIndex, SlangFloatingPointMode mode)
{
- getLinkage()->targets[targetIndex]->floatingPointMode = FloatingPointMode(mode);
+ getLinkage()->targets[targetIndex]->setFloatingPointMode(FloatingPointMode(mode));
}
void EndToEndCompileRequest::setMatrixLayoutMode(SlangMatrixLayoutMode mode)
@@ -3705,6 +3724,15 @@ void EndToEndCompileRequest::setTargetMatrixLayoutMode(int targetIndex, SlangMat
setMatrixLayoutMode(mode);
}
+SlangResult EndToEndCompileRequest::addTargetCapability(SlangInt targetIndex, SlangCapabilityID capability)
+{
+ auto& targets = getLinkage()->targets;
+ if(targetIndex < 0 || targetIndex >= targets.getCount())
+ return SLANG_E_INVALID_ARG;
+ targets[targetIndex]->addCapability(CapabilityAtom(capability));
+ return SLANG_OK;
+}
+
void EndToEndCompileRequest::setDebugInfoLevel(SlangDebugInfoLevel level)
{
getLinkage()->debugInfoLevel = DebugInfoLevel(level);