diff options
| author | ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> | 2024-06-12 16:38:23 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-12 13:38:23 -0700 |
| commit | 8813c610562b1c30222ec3ef0734ef601d43b617 (patch) | |
| tree | b012aa178a52a0313ce9deab31006556a2637011 /source | |
| parent | 7447fcafa71440336f553d6e0af21b12fc74d138 (diff) | |
Capability System: Implicit capability upgrade warning/error (#4241)
* capability upgrade warning/error
adjusted implementation + tests to support a warning/error if capabilities are implicitly upgraded and test accordingly.
* add glsl profile caps
* add GLSL and HLSL capabilities to the associated capability
* syntax error in capdef
* only error if user explicitly enables capabilities
1. changed testing infrastructure to not set a `profile` explicitly,
2. Added tests to be sure this works as intended with user API and with slangc command line
* Change capability atom definitions and how Slang manages them to fix errors
1. most `glsl_spirv` version atoms have been removed from `.capdef`, instead we will translate `spirv` version atoms into `glsl_spirv` since there is no point in writing the same code twice in `.capdef` files to define `spirv` versions.
2. add spirv version, and hlsl sm version (and equivlent) capability dependencies
3. removed some stage requirments which were set on objects, keep the wrapper capabilities. I am keeping the wrapper capabilities since I am unaware on if there are stage limitations (spec says code in practice does not work).
* check internal version instead of version profile (_spirv_1_5 vs. spirv_1_5)
* remove unused OpCapability. adjust SPIRV version'ing again for glsl_spirv
* apply workaround for glslang bug with rayquery usage
* ensure capabilities targetted by a profile and added together by a user are valid
* remove additions to `spirv_1_*` wrapper
* spirv_* -> glsl_spirv fix
* fix bug where incompatable profiles would cause invalid target caps
* try to avoid joining invalid capabilities
* fix the warning/error & printing
* run through tests to fix capability system and test mistakes
many mistakes were mesh shaders doing `-profile glsl_450+spirv_1_4`. This is not allowed for a few reasons
1. the test tooling does not handle arguments the same as `slangc`
2. glsl_450 core profile does not support mesh shaders, nor does spirv_1_4. sm_6_5 does work in this senario
* set some sm_4_1 intrinsics to sm_4_0
* replace `GLSL_` defs with `glsl_`
* swap the unsupported render-test syntax for working syntax
* set d3d11/d3d12 profile defaults
this is required since sm version changes compiled code & behavior
* adjusted nvapi capabilities with atomics + d3d11 set to use sm_5_0 as per default
* cleanup
* address review
* incorrect styling
* change `bitscanForward` to work as intended on 32 bit targets
---------
Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'source')
23 files changed, 1022 insertions, 743 deletions
diff --git a/source/core/slang-uint-set.cpp b/source/core/slang-uint-set.cpp index b6871c192..ba71254e1 100644 --- a/source/core/slang-uint-set.cpp +++ b/source/core/slang-uint-set.cpp @@ -106,7 +106,7 @@ void UIntSet::subtractWith(const UIntSet& set) /* static */void UIntSet::calcUnion(UIntSet& outRs, const UIntSet& set1, const UIntSet& set2) { - outRs.m_buffer.setCount(Math::Max(set1.m_buffer.getCount(), set2.m_buffer.getCount())); + outRs.resizeBackingBufferDirectly(Math::Max(set1.m_buffer.getCount(), set2.m_buffer.getCount())); outRs.clear(); for (Index i = 0; i < set1.m_buffer.getCount(); i++) outRs.m_buffer[i] |= set1.m_buffer[i]; @@ -117,7 +117,7 @@ void UIntSet::subtractWith(const UIntSet& set) /* static */void UIntSet::calcIntersection(UIntSet& outRs, const UIntSet& set1, const UIntSet& set2) { const Index minCount = Math::Min(set1.m_buffer.getCount(), set2.m_buffer.getCount()); - outRs.m_buffer.setCount(minCount); + outRs.resizeBackingBufferDirectly(minCount); for (Index i = 0; i < minCount; i++) outRs.m_buffer[i] = set1.m_buffer[i] & set2.m_buffer[i]; @@ -125,7 +125,7 @@ void UIntSet::subtractWith(const UIntSet& set) /* static */void UIntSet::calcSubtract(UIntSet& outRs, const UIntSet& set1, const UIntSet& set2) { - outRs.m_buffer.setCount(set1.m_buffer.getCount()); + outRs.resizeBackingBufferDirectly(set1.m_buffer.getCount()); const Index minCount = Math::Min(set1.m_buffer.getCount(), set2.m_buffer.getCount()); for (Index i = 0; i < minCount; i++) diff --git a/source/core/slang-uint-set.h b/source/core/slang-uint-set.h index 077bc7981..4ba067871 100644 --- a/source/core/slang-uint-set.h +++ b/source/core/slang-uint-set.h @@ -32,10 +32,10 @@ static inline Index bitscanForward(uint64_t in) #else uint32_t out; // check for 0s in 0bit->31bit. If all 0's, check for 0s in 32bit->63bit - if (_BitScanForward((unsigned long*)&out, *(((uint32_t*)&in) + 1))) + if (_BitScanForward((unsigned long*)&out, *(((uint32_t*)&in)))) return Index(out); - _BitScanForward((unsigned long*)&out, *(((uint32_t*)&in))); - return Index(out); + _BitScanForward((unsigned long*)&out, *(((uint32_t*)&in)+1)); + return Index(out)+32; #endif// #ifdef _WIN64 #else diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index db5f9c2fa..d78ab3c0e 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -152,7 +152,7 @@ public in int gl_ViewportIndex : SV_ViewportArrayIndex; [OverloadRank(15)] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public matrix<float, N, N> operator*<let N:int>(matrix<float, N, N> m1, matrix<float, N, N> m2) { return mul(m2, m1); @@ -160,7 +160,7 @@ public matrix<float, N, N> operator*<let N:int>(matrix<float, N, N> m1, matrix<f [OverloadRank(15)] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public matrix<half, N, N> operator*<let N:int>(matrix<half, N, N> m1, matrix<half, N, N> m2) { return mul(m2, m1); @@ -168,7 +168,7 @@ public matrix<half, N, N> operator*<let N:int>(matrix<half, N, N> m1, matrix<hal [OverloadRank(15)] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public matrix<double, N, N> operator*<let N:int>(matrix<double, N, N> m1, matrix<double, N, N> m2) { return mul(m2, m1); @@ -176,7 +176,7 @@ public matrix<double, N, N> operator*<let N:int>(matrix<double, N, N> m1, matrix [ForceInline] [OverloadRank(15)] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public matrix<T, R, L> operator*<T:__BuiltinFloatingPointType, let L : int, let C : int, let R : int>(matrix<T, C, L> m1, matrix<T, R, C> m2) { return mul(m2, m1); @@ -184,7 +184,7 @@ public matrix<T, R, L> operator*<T:__BuiltinFloatingPointType, let L : int, let [ForceInline] [OverloadRank(15)] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, R> operator*<T:__BuiltinFloatingPointType, let C : int, let R : int>(vector<T, C> v, matrix<T, R, C> m) { return mul(m, v); @@ -192,7 +192,7 @@ public vector<T, R> operator*<T:__BuiltinFloatingPointType, let C : int, let R : [ForceInline] [OverloadRank(15)] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, C> operator*<T:__BuiltinFloatingPointType, let C : int, let R : int>(matrix<T, R, C> m, vector<T, R> v) { return mul(v, m); @@ -306,7 +306,7 @@ ${{{{ __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T atan(T y, T x) { return atan2(y, x); @@ -315,7 +315,7 @@ public T atan(T y, T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T,N> atan(vector<T,N> y, vector<T,N> x) { return atan2(y, x); @@ -328,7 +328,7 @@ public vector<T,N> atan(vector<T,N> y, vector<T,N> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T inversesqrt(T x) { return rsqrt(x); @@ -337,7 +337,7 @@ public T inversesqrt(T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, N> inversesqrt(vector<T, N> x) { return rsqrt(x); @@ -350,7 +350,7 @@ public vector<T, N> inversesqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] public T roundEven(T x) { return rint(x); @@ -359,7 +359,7 @@ public T roundEven(T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] public vector<T,N> roundEven(vector<T,N> x) { return rint(x); @@ -368,7 +368,7 @@ public vector<T,N> roundEven(vector<T,N> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T mod(T x, T y) { // SPIR-V doesn't have "modulus". @@ -385,7 +385,7 @@ public T mod(T x, T y) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, N> mod(vector<T, N> x, T y) { __target_switch @@ -399,7 +399,7 @@ public vector<T, N> mod(vector<T, N> x, T y) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, N> mod(vector<T, N> x, vector<T, N> y) { __target_switch @@ -412,7 +412,7 @@ public vector<T, N> mod(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T,N> min(vector<T,N> x, T y) { __target_switch @@ -425,7 +425,7 @@ public vector<T,N> min(vector<T,N> x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T,N> max(vector<T,N> x, T y) { __target_switch @@ -438,7 +438,7 @@ public vector<T,N> max(vector<T,N> x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T,N> clamp(vector<T,N> x, T minBound, T maxBound) { __target_switch @@ -452,7 +452,7 @@ public vector<T,N> clamp(vector<T,N> x, T minBound, T maxBound) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T mix(T x, T y, T a) { return lerp(x, y, a); @@ -461,7 +461,7 @@ public T mix(T x, T y, T a) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, N> mix(vector<T, N> x, vector<T, N> y, T a) { __target_switch @@ -475,7 +475,7 @@ public vector<T, N> mix(vector<T, N> x, vector<T, N> y, T a) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, N> mix(vector<T, N> x, vector<T, N> y, vector<T, N> a) { return lerp(x, y, a); @@ -484,7 +484,7 @@ public vector<T, N> mix(vector<T, N> x, vector<T, N> y, vector<T, N> a) __generic<T> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T mix(T x, T y, bool a) { __target_switch @@ -501,7 +501,7 @@ public T mix(T x, T y, bool a) __generic<T, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector<T, N> mix(vector<T, N> x, vector<T, N> y, vector<bool, N> a) { __target_switch @@ -2121,7 +2121,7 @@ public int textureSamples(Sampler2DMSArray<T,sampleCount> sampler) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> texture(Sampler1D<vector<T,N>> sampler, float p) { return __vectorReshape<4>(sampler.Sample(p)); @@ -2129,7 +2129,7 @@ public vector<T,4> texture(Sampler1D<vector<T,N>> sampler, float p) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> texture(Sampler1D<vector<T,N>> sampler, float p, constexpr float bias) { return __vectorReshape<4>(sampler.SampleBias(p, bias)); @@ -2137,7 +2137,7 @@ public vector<T,4> texture(Sampler1D<vector<T,N>> sampler, float p, constexpr fl __generic<T:__BuiltinArithmeticType, let N:int, Shape: __ITextureShape, let isArray:int, let sampleCount:int, let format:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> texture(__TextureImpl< vector<T,N>, Shape, @@ -2155,7 +2155,7 @@ public vector<T,4> texture(__TextureImpl< __generic<T:__BuiltinArithmeticType, let N:int, Shape: __ITextureShape, let isArray:int, let sampleCount:int, let format:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> texture(__TextureImpl< vector<T,N>, Shape, @@ -2229,14 +2229,14 @@ public float texture(sampler2DShadow sampler, vec3 p, float bias) } } -[require(glsl_hlsl_spirv, texture_shadowlod_cube)] +[require(glsl_hlsl_spirv, texture_shadowlod)] [ForceInline] public float texture(samplerCubeShadow sampler, vec4 p) { return sampler.SampleCmp(p.xyz, p.w); } -[require(glsl_hlsl_spirv, texture_shadowlod_cube)] +[require(glsl_hlsl_spirv, texture_shadowlod)] [ForceInline] public float texture(samplerCubeShadow sampler, vec4 p, float bias) { @@ -2295,7 +2295,7 @@ public float texture(sampler2DArrayShadow sampler, vec4 p) } [ForceInline] -[require(glsl_hlsl_spirv, texture_shadowlod_cube)] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(samplerCubeArrayShadow sampler, vec4 p, float compare) { return sampler.SampleCmp(p, compare); @@ -2307,7 +2307,7 @@ public float texture(samplerCubeArrayShadow sampler, vec4 p, float compare) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec2 p) { __requireComputeDerivative(); @@ -2324,7 +2324,7 @@ public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec2 p) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec2 p, float bias) { __requireComputeDerivative(); @@ -2341,7 +2341,7 @@ public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec2 p, float bia __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec4 p) { __requireComputeDerivative(); @@ -2358,7 +2358,7 @@ public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec4 p) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec4 p, float bias) { __requireComputeDerivative(); @@ -2375,7 +2375,7 @@ public vector<T,4> textureProj(Sampler1D<vector<T,N>> sampler, vec4 p, float bia __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec3 p) { __requireComputeDerivative(); @@ -2392,7 +2392,7 @@ public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec3 p) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec3 p, float bias) { __requireComputeDerivative(); @@ -2409,7 +2409,7 @@ public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec3 p, float bia __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec4 p) { __requireComputeDerivative(); @@ -2426,7 +2426,7 @@ public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec4 p) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec4 p, float bias) { __requireComputeDerivative(); @@ -2443,7 +2443,7 @@ public vector<T,4> textureProj(Sampler2D<vector<T,N>> sampler, vec4 p, float bia __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler3D<vector<T,N>> sampler, vec4 p) { __requireComputeDerivative(); @@ -2460,7 +2460,7 @@ public vector<T,4> textureProj(Sampler3D<vector<T,N>> sampler, vec4 p) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProj(Sampler3D<vector<T,N>> sampler, vec4 p, float bias) { __requireComputeDerivative(); @@ -2565,7 +2565,7 @@ public float textureProj(sampler2DShadow sampler, vec4 p, float bias) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureLod(Sampler1D<vector<T,N>> sampler, float p, float lod) { return __vectorReshape<4>(sampler.SampleLevel(p, lod)); @@ -2573,7 +2573,7 @@ public vector<T,4> textureLod(Sampler1D<vector<T,N>> sampler, float p, float lod __generic<T:__BuiltinArithmeticType, let N:int, Shape: __ITextureShape, let isArray:int, let sampleCount:int, let format:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureLod(__TextureImpl< vector<T,N>, Shape, @@ -2652,7 +2652,7 @@ public float textureLod(sampler1DArrayShadow sampler, vec3 p, float lod) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureOffset(Sampler1D<vector<T,N>> sampler, float p, constexpr int offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2660,7 +2660,7 @@ public vector<T,4> textureOffset(Sampler1D<vector<T,N>> sampler, float p, conste __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureOffset(Sampler2D<vector<T,N>> sampler, vec2 p, constexpr ivec2 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2668,7 +2668,7 @@ public vector<T,4> textureOffset(Sampler2D<vector<T,N>> sampler, vec2 p, constex __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureOffset(Sampler3D<vector<T,N>> sampler, vec3 p, constexpr ivec3 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2742,7 +2742,7 @@ public float textureOffset(sampler1DShadow sampler, vec3 p, constexpr int offset __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureOffset(Sampler1DArray<vector<T,N>> sampler, vec2 p, constexpr int offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2750,7 +2750,7 @@ public vector<T,4> textureOffset(Sampler1DArray<vector<T,N>> sampler, vec2 p, co __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureOffset(Sampler2DArray<vector<T,N>> sampler, vec3 p, constexpr ivec2 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2921,7 +2921,7 @@ public vector<T,4> texelFetchOffset(Sampler2DRect<vector<T,N>> sampler, ivec2 p, __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec2 p, constexpr int offset) { __requireComputeDerivative(); @@ -2938,7 +2938,7 @@ public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec2 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec2 p, constexpr int offset, float bias) { __requireComputeDerivative(); @@ -2955,7 +2955,7 @@ public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec2 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec4 p, constexpr int offset) { __requireComputeDerivative(); @@ -2976,7 +2976,7 @@ public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec4 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec4 p, constexpr int offset, float bias) { __requireComputeDerivative(); @@ -2997,7 +2997,7 @@ public vector<T,4> textureProjOffset(Sampler1D<vector<T,N>> sampler, vec4 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec3 p, constexpr ivec2 offset) { __requireComputeDerivative(); @@ -3014,7 +3014,7 @@ public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec3 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec3 p, constexpr ivec2 offset, float bias) { __requireComputeDerivative(); @@ -3031,7 +3031,7 @@ public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec3 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec4 p, constexpr ivec2 offset) { __requireComputeDerivative(); @@ -3052,7 +3052,7 @@ public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec4 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec4 p, constexpr ivec2 offset, float bias) { __requireComputeDerivative(); @@ -3073,7 +3073,7 @@ public vector<T,4> textureProjOffset(Sampler2D<vector<T,N>> sampler, vec4 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler3D<vector<T,N>> sampler, vec4 p, constexpr ivec3 offset) { __requireComputeDerivative(); @@ -3090,7 +3090,7 @@ public vector<T,4> textureProjOffset(Sampler3D<vector<T,N>> sampler, vec4 p, con __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjOffset(Sampler3D<vector<T,N>> sampler, vec4 p, constexpr ivec3 offset, float bias) { __requireComputeDerivative(); @@ -3195,7 +3195,7 @@ public float textureProjOffset(sampler2DShadow sampler, vec4 p, constexpr ivec2 __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0)] public vector<T,4> textureLodOffset(Sampler1D<vector<T,N>> sampler, float p, float lod, constexpr int offset) { return __vectorReshape<4>(sampler.SampleLevel(p, lod, offset)); @@ -3203,7 +3203,7 @@ public vector<T,4> textureLodOffset(Sampler1D<vector<T,N>> sampler, float p, flo __generic<T:__BuiltinArithmeticType, let N:int, Shape:__ITextureShape, let isArray:int, let sampleCount:int, let format:int> [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0)] public vector<T,4> textureLodOffset(__TextureImpl< vector<T,N>, Shape, @@ -3285,7 +3285,7 @@ public float textureLodOffset(sampler1DArrayShadow sampler, vec3 p, float lod, c __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLod(Sampler1D<vector<T,N>> sampler, vec2 p, float lod) { __target_switch @@ -3301,7 +3301,7 @@ public vector<T,4> textureProjLod(Sampler1D<vector<T,N>> sampler, vec2 p, float __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLod(Sampler1D<vector<T,N>> sampler, vec4 p, float lod) { __target_switch @@ -3321,7 +3321,7 @@ public vector<T,4> textureProjLod(Sampler1D<vector<T,N>> sampler, vec4 p, float __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLod(Sampler2D<vector<T,N>> sampler, vec3 p, float lod) { __target_switch @@ -3337,7 +3337,7 @@ public vector<T,4> textureProjLod(Sampler2D<vector<T,N>> sampler, vec3 p, float __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLod(Sampler2D<vector<T,N>> sampler, vec4 p, float lod) { __target_switch @@ -3357,7 +3357,7 @@ public vector<T,4> textureProjLod(Sampler2D<vector<T,N>> sampler, vec4 p, float __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLod(Sampler3D<vector<T,N>> sampler, vec4 p, float lod) { __target_switch @@ -3417,7 +3417,7 @@ public float textureProjLod(sampler2DShadow sampler, vec4 p, float lod) __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLodOffset(Sampler1D<vector<T,N>> sampler, vec2 p, float lod, constexpr int offset) { __target_switch @@ -3433,7 +3433,7 @@ public vector<T,4> textureProjLodOffset(Sampler1D<vector<T,N>> sampler, vec2 p, __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLodOffset(Sampler1D<vector<T,N>> sampler, vec4 p, float lod, constexpr int offset) { __target_switch @@ -3453,7 +3453,7 @@ public vector<T,4> textureProjLodOffset(Sampler1D<vector<T,N>> sampler, vec4 p, __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLodOffset(Sampler2D<vector<T,N>> sampler, vec3 p, float lod, constexpr ivec2 offset) { __target_switch @@ -3469,7 +3469,7 @@ public vector<T,4> textureProjLodOffset(Sampler2D<vector<T,N>> sampler, vec3 p, __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLodOffset(Sampler2D<vector<T,N>> sampler, vec4 p, float lod, constexpr ivec2 offset) { __target_switch @@ -3489,7 +3489,7 @@ public vector<T,4> textureProjLodOffset(Sampler2D<vector<T,N>> sampler, vec4 p, __generic<T:__BuiltinArithmeticType, let N:int> [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector<T,4> textureProjLodOffset(Sampler3D<vector<T,N>> sampler, vec4 p, float lod, constexpr ivec3 offset) { __target_switch @@ -3626,7 +3626,7 @@ public float textureGrad(sampler2DShadow sampler, vec3 p, vec2 dPdx, vec2 dPdy) } [ForceInline] -[require(glsl_spirv, texture_shadowlod_cube)] +[require(glsl_spirv, texture_shadowlod)] public float textureGrad(samplerCubeShadow sampler, vec4 p, vec3 dPdx, vec3 dPdy) { __target_switch @@ -4171,163 +4171,163 @@ public vec4 textureGatherOffsets(__TextureImpl< // error when we try to translate the GLSL to SPIR-V. // So we cannot use them. -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1D(sampler1D sampler, float coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1D(sampler1D sampler, float coord, float bias) { return texture(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec2 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec2 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec4 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DLod(sampler1D sampler, float coord, float lod) { return textureLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProjLod(sampler1D sampler, vec2 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProjLod(sampler1D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2D(sampler2D sampler, vec2 coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2D(sampler2D sampler, vec2 coord, float bias) { return texture(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec3 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec3 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec4 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DLod(sampler2D sampler, vec2 coord, float lod) { return textureLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProjLod(sampler2D sampler, vec3 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProjLod(sampler2D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3D(sampler3D sampler, vec3 coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3D(sampler3D sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DProj(sampler3D sampler, vec4 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DProj(sampler3D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DLod(sampler3D sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DProjLod(sampler3D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 textureCube(samplerCube sampler, vec3 coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 textureCube(samplerCube sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 textureCubeLod(samplerCube sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); @@ -9390,7 +9390,7 @@ public vec4 noise4(vector<float, N> x) // TODO: if called after a return, error. [ForceInline] -[require(glsl_hlsl_spirv, glsl_barrier)] +[require(glsl_hlsl_spirv, memorybarrier)] public void barrier() { __target_switch diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c03c47703..9a87604ae 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -37,12 +37,12 @@ struct GLSLShaderStorageBuffer {} __generic<T,L:IBufferDataLayout> __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) -[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, appendstructuredbuffer)] uint2 __structuredBufferGetDimensions(AppendStructuredBuffer<T,L> buffer); __generic<T,L:IBufferDataLayout> __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) -[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, consumestructuredbuffer)] uint2 __structuredBufferGetDimensions(ConsumeStructuredBuffer<T,L> buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) @@ -578,7 +578,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector<float, Shape.dimensions+isArray> location) { __requireComputeDerivative(); @@ -634,7 +634,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset) { __requireComputeDerivative(); @@ -663,7 +663,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp) { __requireComputeDerivative(); @@ -691,7 +691,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp, out uint status) { __target_switch @@ -709,7 +709,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias) { __requireComputeDerivative(); @@ -737,7 +737,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset) { __requireComputeDerivative(); @@ -896,7 +896,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY) { __target_switch @@ -923,7 +923,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset) { __target_switch @@ -950,7 +950,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp) { __target_switch @@ -977,7 +977,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(vector<float, Shape.dimensions+isArray> location, float level) { __target_switch @@ -1034,7 +1034,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(vector<float, Shape.dimensions+isArray> location, float level, constexpr vector<int, Shape.planeDimensions> offset) { __target_switch @@ -1116,7 +1116,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> { [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location) { __requireComputeDerivative(); @@ -1200,7 +1200,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset) { __requireComputeDerivative(); @@ -1248,7 +1248,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp) { __requireComputeDerivative(); @@ -1296,7 +1296,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp, out uint status) { __target_switch @@ -1314,7 +1314,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias) { __requireComputeDerivative(); @@ -1364,7 +1364,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset) { __requireComputeDerivative(); @@ -1619,7 +1619,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY) { __target_switch @@ -1670,7 +1670,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset) { __target_switch @@ -1719,7 +1719,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp) { __target_switch @@ -1767,7 +1767,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(SamplerState s, vector<float, Shape.dimensions+isArray> location, float level) { __target_switch @@ -1847,7 +1847,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(SamplerState s, vector<float, Shape.dimensions+isArray> location, float level, constexpr vector<int, Shape.planeDimensions> offset) { __target_switch @@ -3228,8 +3229,6 @@ ${{{{ const char* textureTypeName = isCombined ? "Sampler" : "Texture"; StringBuilder requireStringBuilder; - if (shape == kStdlibShapeIndexCube) - requireStringBuilder << "[require(any_target, texture_cube)]"; auto requireString = requireStringBuilder.toString(); }}}} $(requireString) @@ -3710,13 +3709,13 @@ uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value) // Conversion between uint64_t and uint2 -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint2 __asuint2(uint64_t i) { return uint2(uint(i), uint(uint64_t(i) >> 32)); } -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint64_t __asuint64(uint2 i) { return (uint64_t(i.y) << 32) | i.x; @@ -4072,7 +4071,7 @@ ${{{{ __cuda_sm_version(2.0) [__requiresNVAPI] [ForceInline] - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd, out float originalValue) { __target_switch @@ -4143,7 +4142,7 @@ ${{{{ [__requiresNVAPI] [ForceInline] __cuda_sm_version(2.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd) { __target_switch @@ -4163,7 +4162,7 @@ ${{{{ // Int64 Add [ForceInline] __cuda_sm_version(6.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) { __target_switch @@ -4182,7 +4181,7 @@ ${{{{ // Without returning original value __cuda_sm_version(6.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) { __target_switch @@ -4199,7 +4198,7 @@ ${{{{ // Cas uint64_t - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -4217,7 +4216,7 @@ ${{{{ // Max __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { __target_switch @@ -4275,7 +4274,7 @@ ${{{{ // Min __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { __target_switch @@ -4333,7 +4332,7 @@ ${{{{ // And __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { __target_switch @@ -4371,7 +4370,7 @@ ${{{{ // Or __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { __target_switch @@ -4409,7 +4408,7 @@ ${{{{ // Xor __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { __target_switch @@ -4446,7 +4445,7 @@ ${{{{ // Exchange - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { __target_switch @@ -4551,6 +4550,7 @@ ${{{{ } [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) { __target_switch @@ -4567,6 +4567,7 @@ ${{{{ } [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -5211,7 +5212,7 @@ void abort(); __generic<T : __BuiltinIntegerType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T abs(T x) { __target_switch @@ -5232,7 +5233,7 @@ T abs(T x) __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> abs(vector<T, N> x) { __target_switch @@ -5250,7 +5251,7 @@ vector<T, N> abs(vector<T, N> x) __generic<T : __BuiltinIntegerType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> abs(matrix<T,N,M> x) { __target_switch @@ -5263,7 +5264,7 @@ matrix<T,N,M> abs(matrix<T,N,M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T abs(T x) { __target_switch @@ -5281,7 +5282,7 @@ T abs(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> abs(vector<T, N> x) { __target_switch @@ -5299,7 +5300,7 @@ vector<T, N> abs(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> abs(matrix<T,N,M> x) { __target_switch @@ -5313,7 +5314,7 @@ matrix<T,N,M> abs(matrix<T,N,M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fabs(T x) { __target_switch @@ -5327,7 +5328,7 @@ T fabs(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> fabs(vector<T, N> x) { __target_switch @@ -5343,7 +5344,7 @@ vector<T, N> fabs(vector<T, N> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T acos(T x) { __target_switch @@ -5361,7 +5362,7 @@ T acos(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> acos(vector<T, N> x) { __target_switch @@ -5379,7 +5380,7 @@ vector<T, N> acos(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> acos(matrix<T, N, M> x) { __target_switch @@ -5395,7 +5396,7 @@ matrix<T, N, M> acos(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T acosh(T x) { __target_switch @@ -5415,7 +5416,7 @@ T acosh(T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> acosh(vector<T,N> x) { __target_switch @@ -5527,7 +5528,7 @@ bool all(matrix<T,N,M> x) // Barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void AllMemoryBarrier() { __target_switch @@ -5545,7 +5546,7 @@ void AllMemoryBarrier() // Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void AllMemoryBarrierWithGroupSync() { __target_switch @@ -5778,28 +5779,28 @@ matrix<float,N,M> asfloat(matrix<uint,N,M> x) // No op [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] float asfloat(float x) { return x; } __generic<let N : int> [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] vector<float,N> asfloat(vector<float,N> x) { return x; } __generic<let N : int, let M : int> [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] matrix<float,N,M> asfloat(matrix<float,N,M> x) { return x; } // Inverse sine (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T asin(T x) { __target_switch @@ -5817,7 +5818,7 @@ T asin(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> asin(vector<T, N> x) { __target_switch @@ -5835,7 +5836,7 @@ vector<T, N> asin(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> asin(matrix<T, N, M> x) { __target_switch @@ -5851,7 +5852,7 @@ matrix<T, N, M> asin(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T asinh(T x) { __target_switch @@ -5871,7 +5872,7 @@ T asinh(T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> asinh(vector<T,N> x) { __target_switch @@ -6329,7 +6330,7 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> va // Inverse tangent (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atan(T x) { __target_switch @@ -6347,7 +6348,7 @@ T atan(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> atan(vector<T, N> x) { __target_switch @@ -6365,7 +6366,7 @@ vector<T, N> atan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> atan(matrix<T, N, M> x) { __target_switch @@ -6378,7 +6379,7 @@ matrix<T, N, M> atan(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atan2(T y, T x) { __target_switch @@ -6396,7 +6397,7 @@ T atan2(T y, T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> atan2(vector<T, N> y, vector<T, N> x) { __target_switch @@ -6414,7 +6415,7 @@ vector<T, N> atan2(vector<T, N> y, vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x) { __target_switch @@ -6430,7 +6431,7 @@ matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atanh(T x) { __target_switch @@ -6450,7 +6451,7 @@ T atanh(T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> atanh(vector<T,N> x) { __target_switch @@ -6468,7 +6469,7 @@ vector<T,N> atanh(vector<T,N> x) // Ceiling (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ceil(T x) { __target_switch @@ -6486,7 +6487,7 @@ T ceil(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> ceil(vector<T, N> x) { __target_switch @@ -6504,7 +6505,7 @@ vector<T, N> ceil(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> ceil(matrix<T, N, M> x) { __target_switch @@ -6603,7 +6604,7 @@ bool CheckAccessFullyMapped(uint status); // Clamp (HLSL SM 1.0) __generic<T : __BuiltinIntegerType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T clamp(T x, T minBound, T maxBound) { __target_switch @@ -6627,7 +6628,7 @@ T clamp(T x, T minBound, T maxBound) __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) { __target_switch @@ -6651,7 +6652,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) __generic<T : __BuiltinIntegerType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound) { __target_switch @@ -6664,7 +6665,7 @@ matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBo __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T clamp(T x, T minBound, T maxBound) { __target_switch @@ -6682,7 +6683,7 @@ T clamp(T x, T minBound, T maxBound) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) { __target_switch @@ -6700,7 +6701,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound) { __target_switch @@ -6751,7 +6752,7 @@ void clip(matrix<T,N,M> x) // Cosine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cos(T x) { __target_switch @@ -6769,7 +6770,7 @@ T cos(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> cos(vector<T, N> x) { __target_switch @@ -6787,7 +6788,7 @@ vector<T, N> cos(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> cos(matrix<T, N, M> x) { __target_switch @@ -6801,7 +6802,7 @@ matrix<T, N, M> cos(matrix<T, N, M> x) // Hyperbolic cosine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cosh(T x) { __target_switch @@ -6819,7 +6820,7 @@ T cosh(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> cosh(vector<T,N> x) { __target_switch @@ -6837,7 +6838,7 @@ vector<T,N> cosh(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> cosh(matrix<T, N, M> x) { __target_switch @@ -6852,7 +6853,7 @@ matrix<T, N, M> cosh(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cospi(T x) { __target_switch @@ -6865,7 +6866,7 @@ T cospi(T x) __generic<T : __BuiltinFloatingPointType, let N: int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> cospi(vector<T,N> x) { __target_switch @@ -6902,7 +6903,7 @@ uint countbits(uint value) // TODO: SPIRV does not support integer vectors. __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,3> cross(vector<T,3> left, vector<T,3> right) { __target_switch @@ -6923,7 +6924,7 @@ vector<T,3> cross(vector<T,3> left, vector<T,3> right) __generic<T : __BuiltinIntegerType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, 3> cross(vector<T, 3> left, vector<T, 3> right) { __target_switch @@ -6943,7 +6944,7 @@ vector<T, 3> cross(vector<T, 3> left, vector<T, 3> right) // Convert encoded color [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] int4 D3DCOLORtoUBYTE4(float4 color) { __target_switch @@ -7182,7 +7183,7 @@ T determinant(matrix<T,N,N> m) // Barrier for device memory __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void DeviceMemoryBarrier() { __target_switch @@ -7199,7 +7200,7 @@ void DeviceMemoryBarrier() } __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void DeviceMemoryBarrierWithGroupSync() { __target_switch @@ -7219,7 +7220,7 @@ void DeviceMemoryBarrierWithGroupSync() __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T distance(vector<T, N> x, vector<T, N> y) { __target_switch @@ -7237,7 +7238,7 @@ T distance(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T distance(T x, T y) { __target_switch @@ -7255,7 +7256,7 @@ T distance(T x, T y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fdim(T x, T y) { __target_switch @@ -7268,7 +7269,7 @@ T fdim(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> fdim(vector<T,N> x, vector<T,N> y) { __target_switch @@ -7313,7 +7314,7 @@ vector<T,N> divide(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(T x, T y) { __target_switch @@ -7327,7 +7328,7 @@ T dot(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(vector<T, N> x, vector<T, N> y) { __target_switch @@ -7348,7 +7349,7 @@ T dot(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(vector<T, N> x, vector<T, N> y) { __target_switch @@ -7538,7 +7539,7 @@ matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp(T x) { __target_switch @@ -7556,7 +7557,7 @@ T exp(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> exp(vector<T, N> x) { __target_switch @@ -7574,7 +7575,7 @@ vector<T, N> exp(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> exp(matrix<T, N, M> x) { __target_switch @@ -7589,7 +7590,7 @@ matrix<T, N, M> exp(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp2(T x) { __target_switch @@ -7621,7 +7622,7 @@ T exp2(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> exp2(vector<T,N> x) { __target_switch @@ -7640,7 +7641,7 @@ vector<T,N> exp2(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> exp2(matrix<T,N,M> x) { __target_switch @@ -7655,7 +7656,7 @@ matrix<T,N,M> exp2(matrix<T,N,M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp10(T x) { __target_switch @@ -7669,7 +7670,7 @@ T exp10(T x) __generic<T : __BuiltinFloatingPointType, let N: int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> exp10(vector<T,N> x) { __target_switch @@ -7853,7 +7854,7 @@ vector<float16_t, N> f32tof16_(vector<float, N> value) // Flip surface normal to face forward, if needed __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_400)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng) { __target_switch @@ -8006,7 +8007,7 @@ vector<uint,N> firstbitlow(vector<uint,N> value) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T floor(T x) { __target_switch @@ -8024,7 +8025,7 @@ T floor(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> floor(vector<T, N> x) { __target_switch @@ -8042,7 +8043,7 @@ vector<T, N> floor(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> floor(matrix<T, N, M> x) { __target_switch @@ -8113,7 +8114,7 @@ matrix<T, N, M> fma(matrix<T, N, M> a, matrix<T, N, M> b, matrix<T, N, M> c) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmod(T x, T y) { // In HLSL, `fmod` returns a remainder. @@ -8184,7 +8185,7 @@ T fmod(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> fmod(vector<T, N> x, vector<T, N> y) { __target_switch @@ -8201,7 +8202,7 @@ vector<T, N> fmod(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y) { __target_switch @@ -8215,7 +8216,7 @@ matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y) // Fractional part __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T frac(T x) { __target_switch @@ -8233,7 +8234,7 @@ T frac(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> frac(vector<T, N> x) { __target_switch @@ -8259,7 +8260,7 @@ matrix<T, N, M> frac(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fract(T x) { return frac(x); @@ -8268,7 +8269,7 @@ T fract(T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> fract(vector<T, N> x) { return frac(x); @@ -8278,7 +8279,7 @@ vector<T, N> fract(vector<T, N> x) // Split float into mantissa and exponent __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T frexp(T x, out int exp) { __target_switch @@ -8296,7 +8297,7 @@ T frexp(T x, out int exp) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp) { __target_switch @@ -8314,7 +8315,7 @@ vector<T, N> frexp(vector<T, N> x, out vector<int, N> exp) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<int, N, M, L> exp) { __target_switch @@ -8512,7 +8513,7 @@ float2 GetRenderTargetSamplePosition(int Index) // Group memory barrier __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void GroupMemoryBarrier() { __target_switch @@ -8529,7 +8530,7 @@ void GroupMemoryBarrier() } } -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void __subgroupBarrier() { __target_switch @@ -8547,7 +8548,7 @@ void __subgroupBarrier() } __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void GroupMemoryBarrierWithGroupSync() { __target_switch @@ -9449,7 +9450,7 @@ void InterlockedXor(__ref uint64_t dest, uint64_t value, out uint64_t origina __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isfinite(T x) { __target_switch @@ -9467,7 +9468,7 @@ bool isfinite(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<bool, N> isfinite(vector<T, N> x) { __target_switch @@ -9485,7 +9486,7 @@ vector<bool, N> isfinite(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<bool, N, M> isfinite(matrix<T, N, M> x) { __target_switch @@ -9499,7 +9500,7 @@ matrix<bool, N, M> isfinite(matrix<T, N, M> x) // Is floating-point value infinite? __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isinf(T x) { __target_switch @@ -9518,7 +9519,7 @@ bool isinf(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<bool, N> isinf(vector<T, N> x) { __target_switch @@ -9536,7 +9537,7 @@ vector<bool, N> isinf(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<bool, N, M> isinf(matrix<T, N, M> x) { __target_switch @@ -9550,7 +9551,7 @@ matrix<bool, N, M> isinf(matrix<T, N, M> x) // Is floating-point value not-a-number? __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isnan(T x) { __target_switch @@ -9569,7 +9570,7 @@ bool isnan(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<bool, N> isnan(vector<T, N> x) { __target_switch @@ -9587,7 +9588,7 @@ vector<bool, N> isnan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<bool, N, M> isnan(matrix<T, N, M> x) { __target_switch @@ -9602,7 +9603,7 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ldexp(T x, T exp) { __target_switch @@ -9615,7 +9616,7 @@ T ldexp(T x, T exp) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp) { __target_switch @@ -9628,7 +9629,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp) { __target_switch @@ -9641,7 +9642,7 @@ matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp) __generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ldexp(T x, E exp) { __target_switch @@ -9659,7 +9660,7 @@ T ldexp(T x, E exp) __generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp) { __target_switch @@ -9683,7 +9684,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp) // Vector length __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T length(vector<T, N> x) { __target_switch @@ -9701,7 +9702,7 @@ T length(vector<T, N> x) // Scalar float length __generic<T : __BuiltinFloatingPointType> -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T length(T x) { __target_switch @@ -9718,7 +9719,7 @@ T length(T x) // Linear interpolation __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T lerp(T x, T y, T s) { __target_switch @@ -9736,7 +9737,7 @@ T lerp(T x, T y, T s) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s) { __target_switch @@ -9754,7 +9755,7 @@ vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s) { __target_switch @@ -9767,7 +9768,7 @@ matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s) // Legacy lighting function (obsolete) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] float4 lit(float n_dot_l, float n_dot_h, float m) { __target_switch @@ -9784,7 +9785,7 @@ float4 lit(float n_dot_l, float n_dot_h, float m) // Base-e logarithm __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log(T x) { __target_switch @@ -9802,7 +9803,7 @@ T log(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> log(vector<T, N> x) { __target_switch @@ -9820,7 +9821,7 @@ vector<T, N> log(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> log(matrix<T, N, M> x) { __target_switch @@ -9834,7 +9835,7 @@ matrix<T, N, M> log(matrix<T, N, M> x) // Base-10 logarithm __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log10(T x) { __target_switch @@ -9857,7 +9858,7 @@ T log10(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> log10(vector<T,N> x) { __target_switch @@ -9880,7 +9881,7 @@ vector<T,N> log10(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> log10(matrix<T,N,M> x) { __target_switch @@ -9894,7 +9895,7 @@ matrix<T,N,M> log10(matrix<T,N,M> x) // Base-2 logarithm __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log2(T x) { __target_switch @@ -9912,7 +9913,7 @@ T log2(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> log2(vector<T,N> x) { __target_switch @@ -9930,7 +9931,7 @@ vector<T,N> log2(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> log2(matrix<T,N,M> x) { __target_switch @@ -10043,7 +10044,7 @@ matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, // maximum __generic<T : __BuiltinIntegerType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max(T x, T y) { // Note: a stdlib implementation of `max` (or `min`) will require splitting @@ -10078,7 +10079,7 @@ T max(T x, T y) __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> max(vector<T, N> x, vector<T, N> y) { __target_switch @@ -10108,7 +10109,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinIntegerType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) { __target_switch @@ -10121,7 +10122,7 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max(T x, T y) { __target_switch @@ -10139,7 +10140,7 @@ T max(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> max(vector<T, N> x, vector<T, N> y) { __target_switch @@ -10157,7 +10158,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) { __target_switch @@ -10170,7 +10171,7 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max3(T x, T y, T z) { __target_switch @@ -10183,7 +10184,7 @@ T max3(T x, T y, T z) __generic<T : __BuiltinFloatingPointType, let N: int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> max3(vector<T,N> x, vector<T,N> y, vector<T,N> z) { __target_switch @@ -10196,7 +10197,7 @@ vector<T,N> max3(vector<T,N> x, vector<T,N> y, vector<T,N> z) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmax(T x, T y) { __target_switch @@ -10210,7 +10211,7 @@ T fmax(T x, T y) __generic<T : __BuiltinFloatingPointType, let N: int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> fmax(vector<T,N> x, vector<T,N> y) { __target_switch @@ -10223,7 +10224,7 @@ vector<T,N> fmax(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmax3(T x, T y, T z) { __target_switch @@ -10257,7 +10258,7 @@ T fmax3(T x, T y, T z) __generic<T : __BuiltinFloatingPointType, let N: int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> fmax3(vector<T,N> x, vector<T,N> y, vector<T,N> z) { __target_switch @@ -10272,7 +10273,7 @@ vector<T,N> fmax3(vector<T,N> x, vector<T,N> y, vector<T,N> z) // minimum __generic<T : __BuiltinIntegerType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min(T x, T y) { __target_switch @@ -10300,7 +10301,7 @@ T min(T x, T y) __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> min(vector<T,N> x, vector<T,N> y) { __target_switch @@ -10326,7 +10327,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinIntegerType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) { __target_switch @@ -10339,7 +10340,7 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min(T x, T y) { __target_switch @@ -10357,7 +10358,7 @@ T min(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> min(vector<T,N> x, vector<T,N> y) { __target_switch @@ -10375,7 +10376,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) { __target_switch @@ -10388,7 +10389,7 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min3(T x, T y, T z) { __target_switch @@ -10401,7 +10402,7 @@ T min3(T x, T y, T z) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> min3(vector<T,N> x, vector<T,N> y, vector<T,N> z) { __target_switch @@ -10414,7 +10415,7 @@ vector<T,N> min3(vector<T,N> x, vector<T,N> y, vector<T,N> z) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmin(T x, T y) { __target_switch @@ -10428,7 +10429,7 @@ T fmin(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> fmin(vector<T,N> x, vector<T,N> y) { __target_switch @@ -10442,7 +10443,7 @@ vector<T,N> fmin(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmin3(T x, T y, T z) { __target_switch @@ -10476,7 +10477,7 @@ T fmin3(T x, T y, T z) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> fmin3(vector<T,N> x, vector<T,N> y, vector<T,N> z) { __target_switch @@ -10491,7 +10492,7 @@ vector<T,N> fmin3(vector<T,N> x, vector<T,N> y, vector<T,N> z) // Median __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T median3(T x, T y, T z) { __target_switch @@ -10519,7 +10520,7 @@ T median3(T x, T y, T z) __generic<T : __BuiltinFloatingPointType, let N: int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> median3(vector<T,N> x, vector<T,N> y, vector<T,N> z) { __target_switch @@ -10538,7 +10539,7 @@ vector<T,N> median3(vector<T,N> x, vector<T,N> y, vector<T,N> z) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmedian3(T x, T y, T z) { __target_switch @@ -10567,7 +10568,7 @@ T fmedian3(T x, T y, T z) __generic<T : __BuiltinFloatingPointType, let N: int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> fmedian3(vector<T,N> x, vector<T,N> y, vector<T,N> z) { __target_switch @@ -10582,7 +10583,7 @@ vector<T,N> fmedian3(vector<T,N> x, vector<T,N> y, vector<T,N> z) // split into integer and fractional parts (both with same sign) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T modf(T x, out T ip) { __target_switch @@ -10600,7 +10601,7 @@ T modf(T x, out T ip) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> modf(vector<T,N> x, out vector<T,N> ip) { __target_switch @@ -10618,7 +10619,7 @@ vector<T,N> modf(vector<T,N> x, out vector<T,N> ip) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M,L> ip) { __target_switch @@ -10631,7 +10632,7 @@ matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M,L> ip) // msad4 (whatever that is) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint4 msad4(uint reference, uint2 source, uint4 accum) { __target_switch @@ -10659,39 +10660,39 @@ uint4 msad4(uint reference, uint2 source, uint4 accum) __generic<T : __BuiltinArithmeticType> __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(T x, T y); // scalar-vector and vector-scalar __generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> mul(vector<T, N> x, T y); __generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> mul(T x, vector<T, N> y); // scalar-matrix and matrix-scalar __generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> mul(matrix<T, N, M> x, T y); __generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> mul(T x, matrix<T, N, M> y); // vector-vector (dot product) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(vector<T, N> x, vector<T, N> y) { __target_switch @@ -10705,7 +10706,7 @@ T mul(vector<T, N> x, vector<T, N> y) } __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(vector<T, N> x, vector<T, N> y) { __target_switch @@ -10719,7 +10720,7 @@ T mul(vector<T, N> x, vector<T, N> y) // vector-matrix __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) { __target_switch @@ -10746,7 +10747,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) } __generic<T : __BuiltinIntegerType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) { __target_switch @@ -10770,7 +10771,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) } __generic<T : __BuiltinLogicalType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) { __target_switch @@ -10796,7 +10797,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) // matrix-vector __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) { __target_switch @@ -10823,7 +10824,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) } __generic<T : __BuiltinIntegerType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) { __target_switch @@ -10847,7 +10848,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) } __generic<T : __BuiltinLogicalType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) { __target_switch @@ -10873,7 +10874,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) // matrix-matrix __generic<T : __BuiltinFloatingPointType, let R : int, let N : int, let C : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right) { __target_switch @@ -10901,7 +10902,7 @@ matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right) } __generic<T : __BuiltinIntegerType, let R : int, let N : int, let C : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right) { __target_switch @@ -10926,7 +10927,7 @@ matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right) } __generic<T : __BuiltinLogicalType, let R : int, let N : int, let C : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,R,C> mul(matrix<T,R,N> left, matrix<T,N,C> right) { __target_switch @@ -11057,7 +11058,7 @@ T NonUniformResourceIndex<T>(T value) { return value; } // Normalize a vector __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> normalize(vector<T,N> x) { __target_switch @@ -11075,7 +11076,7 @@ vector<T,N> normalize(vector<T,N> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T normalize(T x) { __target_switch @@ -11094,7 +11095,7 @@ T normalize(T x) // Raise to a power __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T pow(T x, T y) { __target_switch @@ -11112,7 +11113,7 @@ T pow(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> pow(vector<T, N> x, vector<T, N> y) { __target_switch @@ -11130,7 +11131,7 @@ vector<T, N> pow(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y) { __target_switch @@ -11143,7 +11144,7 @@ matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T powr(T x, T y) { __target_switch @@ -11156,7 +11157,7 @@ T powr(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> powr(vector<T, N> x, vector<T, N> y) { __target_switch @@ -11302,7 +11303,7 @@ void ProcessTriTessFactorsMin( // Degrees to radians __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T radians(T x) { __target_switch @@ -11319,7 +11320,7 @@ T radians(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> radians(vector<T, N> x) { __target_switch @@ -11336,7 +11337,7 @@ vector<T, N> radians(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> radians(matrix<T, N, M> x) { __target_switch @@ -11350,7 +11351,7 @@ matrix<T, N, M> radians(matrix<T, N, M> x) // Approximate reciprocal __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rcp(T x) { __target_switch @@ -11363,7 +11364,7 @@ T rcp(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> rcp(vector<T, N> x) { __target_switch @@ -11379,7 +11380,7 @@ vector<T, N> rcp(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> rcp(matrix<T, N, M> x) { __target_switch @@ -11393,7 +11394,7 @@ matrix<T, N, M> rcp(matrix<T, N, M> x) // Reflect incident vector across plane with given normal __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T reflect(T i, T n) { __target_switch @@ -11411,7 +11412,7 @@ T reflect(T i, T n) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> reflect(vector<T,N> i, vector<T,N> n) { __target_switch @@ -11430,7 +11431,7 @@ vector<T,N> reflect(vector<T,N> i, vector<T,N> n) // Refract incident vector given surface normal and index of refraction __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta) { __target_switch @@ -11451,7 +11452,7 @@ vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T refract(T i, T n, T eta) { __target_switch @@ -11513,7 +11514,7 @@ vector<uint, N> reversebits(vector<uint, N> value) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rint(T x) { __target_switch @@ -11544,7 +11545,7 @@ T rint(T x) __generic<T : __BuiltinFloatingPointType, let N:int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> rint(vector<T,N> x) { __target_switch @@ -11562,7 +11563,7 @@ vector<T,N> rint(vector<T,N> x) // Round-to-nearest __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T round(T x) { __target_switch @@ -11580,7 +11581,7 @@ T round(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> round(vector<T, N> x) { __target_switch @@ -11598,7 +11599,7 @@ vector<T, N> round(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> round(matrix<T,N,M> x) { __target_switch @@ -11612,7 +11613,7 @@ matrix<T,N,M> round(matrix<T,N,M> x) // Reciprocal of square root __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rsqrt(T x) { __target_switch @@ -11632,7 +11633,7 @@ T rsqrt(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> rsqrt(vector<T, N> x) { __target_switch @@ -11650,7 +11651,7 @@ vector<T, N> rsqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> rsqrt(matrix<T, N, M> x) { __target_switch @@ -11665,7 +11666,7 @@ matrix<T, N, M> rsqrt(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T saturate(T x) { __target_switch @@ -11679,7 +11680,7 @@ T saturate(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> saturate(vector<T,N> x) { __target_switch @@ -11695,7 +11696,7 @@ vector<T,N> saturate(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> saturate(matrix<T,N,M> x) { __target_switch @@ -11764,7 +11765,7 @@ vector<int, N> sign(vector<T, N> x) __generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] matrix<int, N, M> sign(matrix<T, N, M> x) { __target_switch @@ -11779,7 +11780,7 @@ matrix<int, N, M> sign(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sin(T x) { __target_switch @@ -11797,7 +11798,7 @@ T sin(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> sin(vector<T, N> x) { __target_switch @@ -11815,7 +11816,7 @@ vector<T, N> sin(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> sin(matrix<T, N, M> x) { __target_switch @@ -11852,7 +11853,7 @@ vector<T,N> __sincos_metal(vector<T,N> x, out vector<T,N> c) __generic<T : __BuiltinFloatingPointType> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(T x, out T s, out T c) { __target_switch @@ -11872,7 +11873,7 @@ void sincos(T x, out T s, out T c) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c) { __target_switch @@ -11891,7 +11892,7 @@ void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int, let L1: int, let L2 : int> [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c) { __target_switch @@ -11906,7 +11907,7 @@ void sincos(matrix<T,N,M> x, out matrix<T,N,M,L1> s, out matrix<T,N,M,L2> c) // Hyperbolic Sine __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sinh(T x) { __target_switch @@ -11924,7 +11925,7 @@ T sinh(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> sinh(vector<T, N> x) { __target_switch @@ -11942,7 +11943,7 @@ vector<T, N> sinh(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> sinh(matrix<T, N, M> x) { __target_switch @@ -11957,7 +11958,7 @@ matrix<T, N, M> sinh(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sinpi(T x) { __target_switch @@ -11970,7 +11971,7 @@ T sinpi(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> sinpi(vector<T,N> x) { __target_switch @@ -11985,7 +11986,7 @@ vector<T,N> sinpi(vector<T,N> x) // Smooth step (Hermite interpolation) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T smoothstep(T min, T max, T x) { __target_switch @@ -12004,7 +12005,7 @@ T smoothstep(T min, T max, T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x) { __target_switch @@ -12022,7 +12023,7 @@ vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N, M> x) { __target_switch @@ -12036,7 +12037,7 @@ matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N // Square root __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sqrt(T x) { __target_switch @@ -12054,7 +12055,7 @@ T sqrt(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> sqrt(vector<T, N> x) { __target_switch @@ -12072,7 +12073,7 @@ vector<T, N> sqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> sqrt(matrix<T, N, M> x) { __target_switch @@ -12086,7 +12087,7 @@ matrix<T, N, M> sqrt(matrix<T, N, M> x) // Step function __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T step(T y, T x) { __target_switch @@ -12104,7 +12105,7 @@ T step(T y, T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> step(vector<T,N> y, vector<T,N> x) { __target_switch @@ -12122,7 +12123,7 @@ vector<T,N> step(vector<T,N> y, vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x) { __target_switch @@ -12136,7 +12137,7 @@ matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x) // Tangent __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tan(T x) { __target_switch @@ -12154,7 +12155,7 @@ T tan(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> tan(vector<T, N> x) { __target_switch @@ -12172,7 +12173,7 @@ vector<T, N> tan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> tan(matrix<T, N, M> x) { __target_switch @@ -12186,7 +12187,7 @@ matrix<T, N, M> tan(matrix<T, N, M> x) // Hyperbolic tangent __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tanh(T x) { __target_switch @@ -12204,7 +12205,7 @@ T tanh(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> tanh(vector<T,N> x) { __target_switch @@ -12222,7 +12223,7 @@ vector<T,N> tanh(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T,N,M> tanh(matrix<T,N,M> x) { __target_switch @@ -12237,7 +12238,7 @@ matrix<T,N,M> tanh(matrix<T,N,M> x) __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tanpi(T x) { __target_switch @@ -12250,7 +12251,7 @@ T tanpi(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T,N> tanpi(vector<T,N> x) { __target_switch @@ -12265,7 +12266,7 @@ vector<T,N> tanpi(vector<T,N> x) // Matrix transpose __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] matrix<T, M, N> transpose(matrix<T, N, M> x) { @@ -12286,7 +12287,7 @@ matrix<T, M, N> transpose(matrix<T, N, M> x) } __generic<T : __BuiltinIntegerType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] matrix<T, M, N> transpose(matrix<T, N, M> x) { @@ -12307,7 +12308,7 @@ matrix<T, M, N> transpose(matrix<T, N, M> x) } __generic<T : __BuiltinLogicalType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] [OverloadRank(-1)] matrix<T, M, N> transpose(matrix<T, N, M> x) @@ -12331,7 +12332,7 @@ matrix<T, M, N> transpose(matrix<T, N, M> x) // Truncate to integer __generic<T : __BuiltinFloatingPointType> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T trunc(T x) { __target_switch @@ -12349,7 +12350,7 @@ T trunc(T x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector<T, N> trunc(vector<T, N> x) { __target_switch @@ -12367,7 +12368,7 @@ vector<T, N> trunc(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix<T, N, M> trunc(matrix<T, N, M> x) { __target_switch @@ -12548,7 +12549,7 @@ uint WaveMaskCountBits(WaveMask mask, bool value) // It seems this can only mean the active threads are the "threads the program flow would lead to". This implies a lockstep // "straight SIMD" style interpretation. That being the case this op on HLSL is just a memory barrier without any Sync. -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void AllMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch @@ -12580,7 +12581,7 @@ void AllMemoryBarrierWithWaveMaskSync(WaveMask mask) // aspect of HLSL seems to make everything in lock step - but that's not quite so, it only has to apparently be that way as far as the programmers // model appears - divergence could perhaps potentially still happen. -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch @@ -12596,7 +12597,7 @@ void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask) } } -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void AllMemoryBarrierWithWaveSync() { __target_switch @@ -12612,7 +12613,7 @@ void AllMemoryBarrierWithWaveSync() } } -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void GroupMemoryBarrierWithWaveSync() { __target_switch @@ -15095,7 +15096,7 @@ struct RaytracingAccelerationStructure {}; // 10.1.5 - Intersection Attributes Structure __target_intrinsic(hlsl, BuiltInTriangleIntersectionAttributes) -[require(cpp_cuda_glsl_hlsl_spirv, rayobject)] +[require(cpp_cuda_glsl_hlsl_spirv, raytracing)] struct BuiltInTriangleIntersectionAttributes { __target_intrinsic(hlsl, barycentrics) @@ -16888,7 +16889,6 @@ ${{{{ return spirv_asm { OpCapability RayQueryKHR; - OpCapability RayTracingPositionFetchKHR; OpCapability RayQueryPositionFetchKHR; OpExtension "SPV_KHR_ray_query"; OpExtension "SPV_KHR_ray_tracing_position_fetch"; @@ -19742,7 +19742,7 @@ extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite) [__requiresNVAPI] [ForceInline] __glsl_extension(GL_EXT_shader_atomic_float) - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue) { __target_switch diff --git a/source/slang/slang-ast-support-types.h b/source/slang/slang-ast-support-types.h index 648baa820..76c638693 100644 --- a/source/slang/slang-ast-support-types.h +++ b/source/slang/slang-ast-support-types.h @@ -67,7 +67,7 @@ namespace Slang void printDiagnosticArg(StringBuilder& sb, DeclRefBase* declRefBase); void printDiagnosticArg(StringBuilder& sb, ASTNodeType nodeType); void printDiagnosticArg(StringBuilder& sb, const CapabilitySet& set); - + void printDiagnosticArg(StringBuilder& sb, List<CapabilityAtom>& set); struct QualifiedDeclPath { diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 6d4e5b3f4..53679be35 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -58,53 +58,10 @@ def c : target + textualTarget; def cpp : target + textualTarget; def cuda : target + textualTarget; def metal : target + textualTarget; -def spirv_1_0 : target; - -// We have multiple capabilities for the various SPIR-V versions, -// arranged so that they inherit from one another to represent which versions -// provide a super-set of the features of earlier ones (e.g., SPIR-V 1.4 is -// expressed as inheriting from SPIR-V 1.3). -// -def spirv_1_1 : spirv_1_0; -def spirv_1_2 : spirv_1_1; -def spirv_1_3 : spirv_1_2; -def spirv_1_4 : spirv_1_3; -def spirv_1_5 : spirv_1_4; -def spirv_1_6 : spirv_1_5; -alias spirv = spirv_1_0; -alias spirv_latest = spirv_1_6; - -alias any_target = hlsl | metal | glsl | c | cpp | cuda | spirv; -alias any_textual_target = hlsl | metal | glsl | c | cpp | cuda; -alias any_gfx_target = hlsl | metal | glsl | spirv; -alias any_cpp_target = cpp | cuda; - -alias cpp_cuda = cpp | cuda; -alias cpp_cuda_glsl_spirv = cpp | cuda | glsl | spirv; -alias cpp_cuda_glsl_hlsl = cpp | cuda | glsl | hlsl; -alias cpp_cuda_glsl_hlsl_spirv = cpp | cuda | glsl | hlsl | spirv_1_0; -alias cpp_cuda_glsl_hlsl_metal_spirv = cpp | cuda | glsl | hlsl | metal | spirv_1_0; -alias cpp_cuda_hlsl = cpp | cuda | hlsl; -alias cpp_cuda_hlsl_spirv = cpp | cuda | hlsl | spirv_1_0; -alias cpp_cuda_hlsl_metal_spirv = cpp | cuda | hlsl | metal | spirv_1_0; -alias cpp_glsl = cpp | glsl; -alias cpp_glsl_hlsl_spirv = cpp | glsl | hlsl | spirv_1_0; -alias cpp_glsl_hlsl_metal_spirv = cpp | glsl | hlsl | metal | spirv_1_0; -alias cpp_hlsl = cpp | hlsl; -alias cuda_glsl_hlsl = cuda | glsl | hlsl; -alias cuda_glsl_hlsl_spirv = cuda | glsl | hlsl | spirv_1_0; -alias cuda_glsl_hlsl_metal_spirv = cuda | glsl | hlsl | metal | spirv_1_0; -alias cuda_glsl_spirv = cuda | glsl | spirv; -alias cuda_hlsl = cuda | hlsl; -alias cuda_hlsl_spirv = cuda | hlsl | spirv; -alias glsl_hlsl_spirv = glsl | hlsl | spirv; -alias glsl_hlsl_metal_spirv = glsl | hlsl | metal | spirv; -alias glsl_metal_spirv = glsl | metal | spirv; -alias glsl_spirv = glsl | spirv; -alias hlsl_spirv = hlsl | spirv; +def spirv : target; // Capabilities that stand for target spirv version for GLSL backend. -// These are not compilation targets. +// These are not compilation targets. We will convert `_spirv_*`->`glsl_spirv_*` during a compile. def glsl_spirv_1_0 : glsl; def glsl_spirv_1_1 : glsl_spirv_1_0; def glsl_spirv_1_2 : glsl_spirv_1_1; @@ -113,6 +70,20 @@ def glsl_spirv_1_4 : glsl_spirv_1_3; def glsl_spirv_1_5 : glsl_spirv_1_4; def glsl_spirv_1_6 : glsl_spirv_1_5; +// We have multiple capabilities for the various SPIR-V versions, +// arranged so that they inherit from one another to represent which versions +// provide a super-set of the features of earlier ones (e.g., SPIR-V 1.4 is +// expressed as inheriting from SPIR-V 1.3). +// +def _spirv_1_0 : spirv; +def _spirv_1_1 : _spirv_1_0; +def _spirv_1_2 : _spirv_1_1; +def _spirv_1_3 : _spirv_1_2; +def _spirv_1_4 : _spirv_1_3; +def _spirv_1_5 : _spirv_1_4; +def _spirv_1_6 : _spirv_1_5; +alias _spirv_latest = _spirv_1_6; + def _GLSL_130 : glsl; def _GLSL_140 : _GLSL_130; def _GLSL_150 : _GLSL_140; @@ -125,7 +96,6 @@ def _GLSL_440 : _GLSL_430; def _GLSL_450 : _GLSL_440; def _GLSL_460 : _GLSL_450; - // metal versions def metallib_2_3 : metal; def metallib_2_4 : metallib_2_3; @@ -161,6 +131,35 @@ def _cuda_sm_7_0 : _cuda_sm_6_0; def _cuda_sm_8_0 : _cuda_sm_7_0; def _cuda_sm_9_0 : _cuda_sm_8_0; +alias any_target = hlsl | metal | glsl | c | cpp | cuda | spirv; +alias any_textual_target = hlsl | metal | glsl | c | cpp | cuda; +alias any_gfx_target = hlsl | metal | glsl | spirv; +alias any_cpp_target = cpp | cuda; + +alias cpp_cuda = cpp | cuda; +alias cpp_cuda_glsl_spirv = cpp | cuda | glsl | spirv; +alias cpp_cuda_glsl_hlsl = cpp | cuda | glsl | hlsl; +alias cpp_cuda_glsl_hlsl_spirv = cpp | cuda | glsl | hlsl | spirv; +alias cpp_cuda_glsl_hlsl_metal_spirv = cpp | cuda | glsl | hlsl | metal | spirv; +alias cpp_cuda_hlsl = cpp | cuda | hlsl; +alias cpp_cuda_hlsl_spirv = cpp | cuda | hlsl | spirv; +alias cpp_cuda_hlsl_metal_spirv = cpp | cuda | hlsl | metal | spirv; +alias cpp_glsl = cpp | glsl; +alias cpp_glsl_hlsl_spirv = cpp | glsl | hlsl | spirv; +alias cpp_glsl_hlsl_metal_spirv = cpp | glsl | hlsl | metal | spirv; +alias cpp_hlsl = cpp | hlsl; +alias cuda_glsl_hlsl = cuda | glsl | hlsl; +alias cuda_glsl_hlsl_spirv = cuda | glsl | hlsl | spirv; +alias cuda_glsl_hlsl_metal_spirv = cuda | glsl | hlsl | metal | spirv; +alias cuda_glsl_spirv = cuda | glsl | spirv; +alias cuda_hlsl = cuda | hlsl; +alias cuda_hlsl_spirv = cuda | hlsl | spirv; +alias glsl_hlsl_spirv = glsl | hlsl | spirv; +alias glsl_hlsl_metal_spirv = glsl | hlsl | metal | spirv; +alias glsl_metal_spirv = glsl | metal | spirv; +alias glsl_spirv = glsl | spirv; +alias hlsl_spirv = hlsl | spirv; + abstract stage; def vertex : stage; def fragment : stage; @@ -205,58 +204,65 @@ alias raytracingstages_compute_fragment_geometry_vertex = raytracing_stages | co // SPIRV extensions. -def SPV_EXT_fragment_shader_interlock : spirv_1_0; -def SPV_KHR_fragment_shader_barycentric : spirv_1_0; -def SPV_EXT_fragment_fully_covered : spirv_1_0; -def SPV_EXT_descriptor_indexing : spirv_1_0; -def SPV_EXT_shader_atomic_float_add : spirv_1_0; +def SPV_EXT_fragment_shader_interlock : _spirv_1_0; +def SPV_EXT_physical_storage_buffer : _spirv_1_3; +def SPV_EXT_fragment_fully_covered : _spirv_1_0; +def SPV_EXT_descriptor_indexing : _spirv_1_0; +def SPV_EXT_shader_atomic_float_add : _spirv_1_0; def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; -def SPV_EXT_shader_atomic_float_min_max : spirv_1_0; -def SPV_KHR_non_semantic_info : spirv_1_0; -def SPV_NV_shader_subgroup_partitioned : spirv_1_0; -def SPV_NV_ray_tracing_motion_blur : spirv_1_0; -def SPV_EXT_mesh_shader : spirv_1_4; -def SPV_KHR_ray_tracing : spirv_1_4; -def SPV_KHR_ray_query : spirv_1_0; -def SPV_KHR_ray_tracing_position_fetch : SPV_KHR_ray_tracing + SPV_KHR_ray_query; -def SPV_NV_shader_invocation_reorder : spirv_1_5 + SPV_KHR_ray_tracing; -def SPV_KHR_shader_clock : spirv_1_0; -def SPV_NV_shader_image_footprint : spirv_1_0; -def SPV_GOOGLE_user_type : spirv_1_0; -def SPV_NV_compute_shader_derivatives : spirv_1_0; -def SPV_EXT_demote_to_helper_invocation : spirv_1_4; +def SPV_EXT_shader_atomic_float_min_max : _spirv_1_0; +def SPV_EXT_mesh_shader : _spirv_1_4; +def SPV_EXT_demote_to_helper_invocation : _spirv_1_4; + +def SPV_KHR_fragment_shader_barycentric : _spirv_1_0; +def SPV_KHR_non_semantic_info : _spirv_1_0; +def SPV_KHR_ray_tracing : _spirv_1_4; +def SPV_KHR_ray_query : _spirv_1_0; +def SPV_KHR_ray_tracing_position_fetch : _spirv_1_0; // requires SPV_KHR_ray_query or SPV_KHR_ray_tracing +def SPV_KHR_shader_clock : _spirv_1_0; + +def SPV_NV_shader_subgroup_partitioned : _spirv_1_0; +def SPV_NV_ray_tracing_motion_blur : _spirv_1_0; +def SPV_NV_shader_invocation_reorder : _spirv_1_5 + SPV_KHR_ray_tracing; +def SPV_NV_shader_image_footprint : _spirv_1_0; +def SPV_NV_compute_shader_derivatives : _spirv_1_0; + +def SPV_GOOGLE_user_type : _spirv_1_0; // SPIRV Capabilities. def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; -def spvInt64Atomics : spirv_1_0; +def spvInt64Atomics : _spirv_1_0; def spvAtomicFloat32MinMaxEXT : SPV_EXT_shader_atomic_float_min_max; def spvAtomicFloat16MinMaxEXT : SPV_EXT_shader_atomic_float_min_max; -def spvDerivativeControl : spirv_1_0; -def spvImageQuery : spirv_1_0; -def spvImageGatherExtended : spirv_1_0; +def spvDerivativeControl : _spirv_1_0; +def spvImageQuery : _spirv_1_0; +def spvImageGatherExtended : _spirv_1_0; +def spvSparseResidency : _spirv_1_0; def spvImageFootprintNV : SPV_NV_shader_image_footprint; -def spvMinLod : spirv_1_0; +def spvMinLod : _spirv_1_0; def spvFragmentShaderPixelInterlockEXT : SPV_EXT_fragment_shader_interlock; def spvFragmentBarycentricKHR : SPV_KHR_fragment_shader_barycentric; def spvFragmentFullyCoveredEXT : SPV_EXT_fragment_fully_covered; -def spvGroupNonUniformBallot : spirv_1_3; -def spvGroupNonUniformShuffle : spirv_1_3; -def spvGroupNonUniformArithmetic : spirv_1_3; -def spvGroupNonUniformQuad : spirv_1_3; -def spvGroupNonUniformVote : spirv_1_3; -def spvGroupNonUniformPartitionedNV : spirv_1_3 + SPV_NV_shader_subgroup_partitioned; +def spvGroupNonUniformBallot : _spirv_1_3; +def spvGroupNonUniformShuffle : _spirv_1_3; +def spvGroupNonUniformArithmetic : _spirv_1_3; +def spvGroupNonUniformQuad : _spirv_1_3; +def spvGroupNonUniformVote : _spirv_1_3; +def spvGroupNonUniformPartitionedNV : _spirv_1_3 + SPV_NV_shader_subgroup_partitioned; def spvRayTracingMotionBlurNV : SPV_NV_ray_tracing_motion_blur; def spvMeshShadingEXT : SPV_EXT_mesh_shader; def spvRayTracingKHR : SPV_KHR_ray_tracing; -def spvRayTracingPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch; +def spvRayTracingPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch + spvRayTracingKHR; def spvRayQueryKHR : SPV_KHR_ray_query; -def spvRayQueryPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch; +def spvRayQueryPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch + spvRayQueryKHR; def spvShaderInvocationReorderNV : SPV_NV_shader_invocation_reorder; def spvShaderClockKHR : SPV_KHR_shader_clock; -def spvShaderNonUniform : spirv_1_5; +def spvShaderNonUniformEXT : SPV_EXT_descriptor_indexing; +def spvShaderNonUniform : spvShaderNonUniformEXT; def spvDemoteToHelperInvocationEXT : SPV_EXT_demote_to_helper_invocation; +def spvDemoteToHelperInvocation : spvDemoteToHelperInvocationEXT; // The following capabilities all pertain to how ray tracing shaders are translated // to GLSL, where there are two different extensions that can provide the core @@ -271,109 +277,125 @@ def spvDemoteToHelperInvocationEXT : SPV_EXT_demote_to_helper_invocation; // extensions, the `EXT` extension will be favored over the `NV` extension, if // all other factors are equal. // +// If a user enabled a GL_ARB/GL_NV/GL_KHR, the user will also be-able to enable any equal GL_EXT +// To describe this relationship, all GL_EXT which were promoted (or originally not an EXT) +// will be set as a derived atom. -def _GL_ARB_derivative_control : glsl; -def _GL_ARB_fragment_shader_interlock : glsl; -def _GL_ARB_gpu_shader5 : glsl; -def _GL_ARB_shader_image_size : glsl; -def _GL_ARB_shader_texture_image_samples : glsl; -def _GL_ARB_sparse_texture_clamp : glsl; -def _GL_EXT_texture_query_lod : glsl; -def _GL_ARB_texture_query_levels : glsl; -def _GL_ARB_texture_cube_map : glsl; -def _GL_ARB_texture_gather : glsl; -def _GL_EXT_buffer_reference : glsl; -def _GL_EXT_buffer_reference_uvec2 : glsl; +def _GL_EXT_buffer_reference : _GLSL_450; +def _GL_EXT_buffer_reference_uvec2 : _GLSL_450; def _GL_EXT_debug_printf : glsl; -def _GL_EXT_fragment_shader_barycentric : glsl; -def _GL_EXT_mesh_shader : glsl; +def _GL_EXT_demote_to_helper_invocation : _GLSL_140; +def _GL_EXT_fragment_shader_barycentric : _GLSL_450; +def _GL_EXT_mesh_shader : _GLSL_450; def _GL_EXT_nonuniform_qualifier : glsl; -def _GL_EXT_ray_query : glsl_spirv_1_4; -def _GL_EXT_ray_tracing : glsl_spirv_1_4; -def _GL_EXT_ray_tracing_position_fetch : glsl_spirv_1_4; -def _GL_EXT_samplerless_texture_functions : glsl; +def _GL_EXT_ray_query : _GLSL_460 + glsl_spirv_1_4; // spirv_1_4 is required due to glslang bug which enables `SPV_KHR_ray_tracing` regardless of context +def _GL_EXT_ray_tracing : _GLSL_460; +def _GL_EXT_ray_tracing_position_fetch : _GL_EXT_ray_query; // requires _GL_EXT_ray_tracing or _GL_EXT_ray_query +def _GL_EXT_samplerless_texture_functions : _GLSL_140; def _GL_EXT_shader_atomic_float : glsl; +def _GL_EXT_shader_atomic_float_min_max : glsl; def _GL_EXT_shader_atomic_float2 : glsl; def _GL_EXT_shader_atomic_int64 : glsl; -def _GL_EXT_shader_atomic_float_min_max : glsl; -def _GL_EXT_shader_explicit_arithmetic_types_int64 : glsl; -def _GL_EXT_shader_image_load_store : glsl; +def _GL_EXT_shader_explicit_arithmetic_types_int64 : _GLSL_140; +def _GL_EXT_shader_image_load_store : _GLSL_130; def _GL_EXT_shader_realtime_clock : glsl; -def _GL_EXT_texture_shadow_lod : glsl; -def _GL_KHR_memory_scope_semantics : glsl; -def _GL_KHR_shader_subgroup_arithmetic : glsl; -def _GL_KHR_shader_subgroup_basic : glsl; -def _GL_KHR_shader_subgroup_ballot : glsl; -def _GL_KHR_shader_subgroup_quad : glsl; -def _GL_KHR_shader_subgroup_shuffle : glsl; -def _GL_KHR_shader_subgroup_shuffle_relative : glsl; -def _GL_KHR_shader_subgroup_shuffle_clustered : glsl; -def _GL_KHR_shader_subgroup_vote : glsl; -def _GL_NV_compute_shader_derivatives : glsl; -def _GL_NV_shader_subgroup_partitioned : glsl; -def _GL_NV_ray_tracing_motion_blur : glsl_spirv_1_4; -def _GL_NV_shader_atomic_fp16_vector : glsl; -def _GL_NV_shader_invocation_reorder : glsl_spirv_1_4; -def _GL_NV_shader_texture_footprint : glsl; +def _GL_EXT_texture_query_lod : glsl; +def _GL_EXT_texture_shadow_lod : _GLSL_130; + +def _GL_ARB_derivative_control : _GLSL_400; +def _GL_ARB_fragment_shader_interlock : _GLSL_450; +def _GL_ARB_gpu_shader5 : _GLSL_150; +def _GL_ARB_shader_image_load_store : _GL_EXT_shader_image_load_store; +def _GL_ARB_shader_image_size : _GLSL_420; +def _GL_ARB_texture_multisample : _GLSL_140; +def _GL_ARB_shader_texture_image_samples : _GLSL_150; +def _GL_ARB_sparse_texture : glsl; +def _GL_ARB_sparse_texture2 : _GL_ARB_sparse_texture; +def _GL_ARB_sparse_texture_clamp : _GL_ARB_sparse_texture2; +def _GL_ARB_texture_gather : _GLSL_130; +def _GL_ARB_texture_query_levels : _GLSL_130; + +def _GL_KHR_memory_scope_semantics : _GLSL_420; +def _GL_KHR_shader_subgroup_arithmetic : _GLSL_140; +def _GL_KHR_shader_subgroup_ballot : _GLSL_140; +def _GL_KHR_shader_subgroup_basic : _GLSL_140; +def _GL_KHR_shader_subgroup_clustered : _GLSL_140; +def _GL_KHR_shader_subgroup_quad : _GLSL_140; +def _GL_KHR_shader_subgroup_shuffle : _GLSL_140; +def _GL_KHR_shader_subgroup_shuffle_relative : _GLSL_140; +def _GL_KHR_shader_subgroup_vote : _GLSL_140; + +def _GL_NV_compute_shader_derivatives : _GLSL_450; +def _GL_NV_fragment_shader_barycentric : _GL_EXT_fragment_shader_barycentric; def _GL_NV_gpu_shader5 : _GL_ARB_gpu_shader5; -alias _GL_NV_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric; -alias _GL_NV_ray_tracing = _GL_EXT_ray_tracing; +def _GL_NV_ray_tracing : _GL_EXT_ray_tracing; +def _GL_NV_ray_tracing_motion_blur : _GLSL_460; +def _GL_NV_shader_atomic_fp16_vector : _GL_NV_gpu_shader5; +def _GL_NV_shader_invocation_reorder : _GLSL_460; +def _GL_NV_shader_subgroup_partitioned : _GLSL_140; +def _GL_NV_shader_texture_footprint : _GLSL_450; // GLSL extension and SPV extension associations. -alias GL_ARB_derivative_control = _GL_ARB_derivative_control | spvDerivativeControl; -alias GL_ARB_fragment_shader_interlock = _GL_ARB_fragment_shader_interlock | spvFragmentShaderPixelInterlockEXT; -alias GL_ARB_gpu_shader5 = _GL_ARB_gpu_shader5 | spirv_1_0; -alias GL_ARB_sparse_texture_clamp = _GL_ARB_fragment_shader_interlock | spirv_1_0; -alias GL_EXT_texture_query_lod = _GL_EXT_texture_query_lod | spvImageQuery | metal; -alias GL_ARB_texture_query_levels = _GL_ARB_texture_query_levels | spvImageQuery | metal; -alias GL_ARB_texture_cube_map = _GL_ARB_texture_cube_map | spirv_1_0; -alias GL_ARB_texture_gather = _GL_ARB_texture_gather | spirv_1_0 | metal; -alias GL_EXT_buffer_reference = _GL_ARB_fragment_shader_interlock | spirv_1_5; -alias GL_EXT_buffer_reference_uvec2 = _GL_EXT_buffer_reference_uvec2 | spirv_1_0; +alias GL_EXT_buffer_reference = _GL_EXT_buffer_reference | SPV_EXT_physical_storage_buffer; +alias GL_EXT_buffer_reference_uvec2 = _GL_EXT_buffer_reference_uvec2 | _spirv_1_0; alias GL_EXT_debug_printf = _GL_EXT_debug_printf | SPV_KHR_non_semantic_info; +alias GL_EXT_demote_to_helper_invocation = _GL_EXT_demote_to_helper_invocation | spvDemoteToHelperInvocationEXT; alias GL_EXT_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric | spvFragmentBarycentricKHR; alias GL_EXT_mesh_shader = _GL_EXT_mesh_shader | spvMeshShadingEXT; -alias GL_EXT_nonuniform_qualifier = _GL_EXT_nonuniform_qualifier | spvShaderNonUniform; -alias GL_EXT_ray_query = _GL_EXT_ray_query | spvRayTracingKHR + spvRayQueryKHR; -alias GL_EXT_ray_tracing = _GL_EXT_ray_tracing | spvRayTracingKHR + spvRayQueryKHR; -alias GL_EXT_ray_tracing_position_fetch = _GL_EXT_ray_tracing_position_fetch | spvRayTracingPositionFetchKHR + spvRayQueryPositionFetchKHR; -alias GL_EXT_samplerless_texture_functions = _GL_EXT_samplerless_texture_functions | spirv_1_0; +alias GL_EXT_nonuniform_qualifier = _GL_EXT_nonuniform_qualifier | spvShaderNonUniformEXT; +alias GL_EXT_ray_query = _GL_EXT_ray_query | spvRayQueryKHR; +alias GL_EXT_ray_tracing = _GL_EXT_ray_tracing | spvRayTracingKHR; +alias GL_EXT_ray_tracing_position_fetch_ray_tracing = _GL_EXT_ray_tracing_position_fetch | spvRayTracingPositionFetchKHR; +alias GL_EXT_ray_tracing_position_fetch_ray_query = _GL_EXT_ray_tracing_position_fetch | spvRayQueryPositionFetchKHR; +alias GL_EXT_samplerless_texture_functions = _GL_EXT_samplerless_texture_functions | _spirv_1_0; alias GL_EXT_shader_atomic_float = _GL_EXT_shader_atomic_float | spvAtomicFloat32AddEXT + spvAtomicFloat32MinMaxEXT; +alias GL_EXT_shader_atomic_float_min_max = _GL_EXT_shader_atomic_float_min_max | spvAtomicFloat32MinMaxEXT + spvAtomicFloat16MinMaxEXT; alias GL_EXT_shader_atomic_float2 = _GL_EXT_shader_atomic_float2 | spvAtomicFloat32AddEXT + spvAtomicFloat32MinMaxEXT + spvAtomicFloat16AddEXT + spvAtomicFloat16MinMaxEXT; alias GL_EXT_shader_atomic_int64 = _GL_EXT_shader_atomic_int64 | spvInt64Atomics; -alias GL_EXT_shader_atomic_float_min_max = _GL_EXT_shader_atomic_float_min_max | spvAtomicFloat32MinMaxEXT + spvAtomicFloat16MinMaxEXT; -alias GL_EXT_shader_explicit_arithmetic_types_int64 = _GL_EXT_shader_explicit_arithmetic_types_int64 | spirv_1_0; -alias GL_EXT_shader_image_load_store = _GL_EXT_shader_image_load_store | spirv_1_0; +alias GL_EXT_shader_explicit_arithmetic_types_int64 = _GL_EXT_shader_explicit_arithmetic_types_int64 | _spirv_1_0; +alias GL_EXT_shader_image_load_store = _GL_EXT_shader_image_load_store | _spirv_1_0; alias GL_EXT_shader_realtime_clock = _GL_EXT_shader_realtime_clock | spvShaderClockKHR; -alias GL_EXT_texture_shadow_lod = _GL_EXT_texture_shadow_lod + _GLSL_400 | spirv_1_0; -alias GL_KHR_memory_scope_semantics = _GL_KHR_memory_scope_semantics | spirv_1_0; +alias GL_EXT_texture_query_lod = _GL_EXT_texture_query_lod | spvImageQuery | metal; +alias GL_EXT_texture_shadow_lod = _GL_EXT_texture_shadow_lod | _spirv_1_0; + +alias GL_ARB_derivative_control = _GL_ARB_derivative_control | spvDerivativeControl; +alias GL_ARB_fragment_shader_interlock = _GL_ARB_fragment_shader_interlock | spvFragmentShaderPixelInterlockEXT; +alias GL_ARB_gpu_shader5 = _GL_ARB_gpu_shader5 | _spirv_1_0; +alias GL_ARB_shader_image_load_store = GL_EXT_shader_image_load_store; +alias GL_ARB_shader_image_size = _GL_ARB_shader_image_size | spvImageQuery | metal; +alias GL_ARB_texture_multisample = _GL_ARB_texture_multisample | _spirv_1_0; +alias GL_ARB_shader_texture_image_samples = _GL_ARB_shader_texture_image_samples | spvImageQuery | metal; +alias GL_ARB_sparse_texture_clamp = _GL_ARB_sparse_texture_clamp | spvSparseResidency; +alias GL_ARB_texture_gather = _GL_ARB_texture_gather | spvImageGatherExtended | metal; +alias GL_ARB_texture_query_levels = _GL_ARB_texture_query_levels | spvImageQuery | metal; + +alias GL_KHR_memory_scope_semantics = _GL_KHR_memory_scope_semantics | _spirv_1_0; alias GL_KHR_shader_subgroup_arithmetic = _GL_KHR_shader_subgroup_arithmetic | spvGroupNonUniformArithmetic; -alias GL_KHR_shader_subgroup_basic = _GL_KHR_shader_subgroup_basic | spvGroupNonUniformBallot; alias GL_KHR_shader_subgroup_ballot = _GL_KHR_shader_subgroup_ballot | spvGroupNonUniformBallot; -alias GL_KHR_shader_subgroup_clustered = _GL_KHR_shader_subgroup_shuffle_clustered | spvGroupNonUniformShuffle; +alias GL_KHR_shader_subgroup_basic = _GL_KHR_shader_subgroup_basic | spvGroupNonUniformBallot; +alias GL_KHR_shader_subgroup_clustered = _GL_KHR_shader_subgroup_clustered | spvGroupNonUniformShuffle; +alias GL_KHR_shader_subgroup_quad = _GL_KHR_shader_subgroup_quad | spvGroupNonUniformQuad; alias GL_KHR_shader_subgroup_shuffle = _GL_KHR_shader_subgroup_shuffle | spvGroupNonUniformShuffle; alias GL_KHR_shader_subgroup_shuffle_relative = _GL_KHR_shader_subgroup_shuffle_relative | spvGroupNonUniformShuffle; alias GL_KHR_shader_subgroup_vote = _GL_KHR_shader_subgroup_vote | spvGroupNonUniformVote; -alias GL_KHR_shader_subgroup_quad = _GL_KHR_shader_subgroup_quad | spvGroupNonUniformQuad; + alias GL_NV_compute_shader_derivatives = _GL_NV_compute_shader_derivatives | SPV_NV_compute_shader_derivatives | _sm_6_6; -alias GL_ARB_shader_image_size = _GL_ARB_shader_image_size | spvImageQuery | metal; -alias GL_ARB_shader_texture_image_samples = _GL_ARB_shader_texture_image_samples | spvImageQuery | metal; -alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spirv_1_0; -alias GL_NV_shader_subgroup_partitioned = _GL_NV_shader_subgroup_partitioned | spvGroupNonUniformPartitionedNV; +alias GL_NV_fragment_shader_barycentric = GL_EXT_fragment_shader_barycentric; +alias GL_NV_gpu_shader5 = GL_ARB_gpu_shader5; +alias GL_NV_ray_tracing = GL_EXT_ray_tracing; alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTracingMotionBlurNV; +alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | _spirv_1_0; alias GL_NV_shader_invocation_reorder = _GL_NV_shader_invocation_reorder + _GL_EXT_buffer_reference_uvec2 | spvShaderInvocationReorderNV; +alias GL_NV_shader_subgroup_partitioned = _GL_NV_shader_subgroup_partitioned | spvGroupNonUniformPartitionedNV; alias GL_NV_shader_texture_footprint = _GL_NV_shader_texture_footprint | spvImageFootprintNV; -alias GL_NV_fragment_shader_barycentric = GL_EXT_fragment_shader_barycentric; -alias GL_NV_ray_tracing = GL_EXT_ray_tracing; - // Define feature names alias nvapi = hlsl_nvapi; -alias raytracing = GL_EXT_ray_tracing | _sm_6_5 | cuda; +alias raytracing = GL_EXT_ray_tracing | _sm_6_3 | cuda; alias ser = raytracing + GL_NV_shader_invocation_reorder | raytracing + hlsl_nvapi | cuda; -alias motionblur = GL_NV_ray_tracing_motion_blur | _sm_6_5 + hlsl_nvapi | cuda; -alias rayquery = GL_EXT_ray_query | _sm_6_5; +alias motionblur = GL_NV_ray_tracing_motion_blur | _sm_6_3 + hlsl_nvapi | cuda; +alias rayquery = GL_EXT_ray_query | _sm_6_3; alias raytracing_motionblur = raytracing + motionblur | cuda; alias ser_motion = ser + motionblur; alias shaderclock = GL_EXT_shader_realtime_clock | hlsl_nvapi | cpp | cuda; @@ -383,120 +405,187 @@ alias fragmentshaderinterlock = _GL_ARB_fragment_shader_interlock | hlsl_nvapi | alias atomic64 = GL_EXT_shader_atomic_int64 | _sm_6_6 | cpp | cuda; alias atomicfloat = GL_EXT_shader_atomic_float | _sm_6_0 + hlsl_nvapi | cpp | cuda; alias atomicfloat2 = GL_EXT_shader_atomic_float2 | _sm_6_6 + hlsl_nvapi | cpp | cuda; -alias groupnonuniform = GL_KHR_shader_subgroup_ballot + GL_KHR_shader_subgroup_shuffle - + GL_KHR_shader_subgroup_arithmetic + GL_KHR_shader_subgroup_quad + GL_KHR_shader_subgroup_vote - | _sm_6_0 | cuda; alias fragmentshaderbarycentric = GL_EXT_fragment_shader_barycentric | _sm_6_1; -alias shadermemorycontrol = glsl | spirv_1_0 | _sm_5_0; +alias shadermemorycontrol = glsl | _spirv_1_0 | _sm_5_0; alias shadermemorycontrol_compute = raytracingstages_compute + shadermemorycontrol; alias subpass = fragment + any_gfx_target; alias waveprefix = _sm_6_5 | _cuda_sm_7_0 | GL_KHR_shader_subgroup_arithmetic; alias bufferreference = GL_EXT_buffer_reference; alias bufferreference_int64 = bufferreference + GL_EXT_shader_explicit_arithmetic_types_int64; -// Define what each HLSL shader model means on different targets. - - -alias sm_4_0 = _sm_4_0 - | glsl_spirv_1_0 + _GL_ARB_sparse_texture_clamp + _GL_EXT_samplerless_texture_functions - | spirv_1_0 + spvImageQuery + spvImageGatherExtended + spvMinLod + SPV_GOOGLE_user_type +// Define what each shader model means on different targets. + +alias spirv_1_0 = _spirv_1_0; +alias spirv_1_1 = _spirv_1_1 + | spirv_1_0 + ; +alias spirv_1_2 = _spirv_1_2 + | spirv_1_1 + ; +alias spirv_1_3 = _spirv_1_3 + | spirv_1_2 + ; +alias spirv_1_4 = _spirv_1_4 + | spirv_1_3 + ; +alias spirv_1_5 = _spirv_1_5 + GL_EXT_nonuniform_qualifier + GL_EXT_buffer_reference + | spirv_1_4 + ; +alias spirv_1_6 = _spirv_1_6 + GL_EXT_debug_printf + GL_EXT_demote_to_helper_invocation + | spirv_1_5 + ; +alias spirv_latest = _spirv_1_6; + +alias sm_4_0_version = _sm_4_0 + | _GLSL_150 + | spirv_1_0 | _cuda_sm_2_0 | metal | cpp ; +alias sm_4_0 = sm_4_0_version + | SPV_GOOGLE_user_type + spvMinLod + | GL_ARB_sparse_texture_clamp + GL_EXT_samplerless_texture_functions + GL_EXT_texture_query_lod + GL_EXT_texture_shadow_lod + GL_EXT_debug_printf + ; -alias sm_4_1 = _sm_4_1 - | glsl_spirv_1_0 + _GLSL_150 + sm_4_0 - | spirv_1_0 + sm_4_0 +alias sm_4_1_version = _sm_4_1 + | _GLSL_150 + | spirv_1_0 | _cuda_sm_6_0 | metal | cpp ; +alias sm_4_1 = sm_4_1_version + // previous + | sm_4_0 + ; -alias sm_5_0 = _sm_5_0 - | glsl_spirv_1_0 + sm_4_1 + _GL_KHR_memory_scope_semantics - | spirv_1_0 + sm_4_1 + spvDerivativeControl + spvFragmentFullyCoveredEXT +alias sm_5_0_version = _sm_5_0 + | _GLSL_330 + | spirv_1_0 | _cuda_sm_9_0 | metal | cpp ; +alias sm_5_0 = sm_5_0_version + | GL_KHR_memory_scope_semantics + GL_ARB_gpu_shader5 + GL_ARB_derivative_control + | spvFragmentFullyCoveredEXT + // previous + | sm_4_1 + ; -alias sm_5_1 = _sm_5_1 - | glsl_spirv_1_0 + sm_5_0 + _GL_ARB_gpu_shader5 + _GL_EXT_nonuniform_qualifier - | spirv_1_0 + sm_5_0 + spvShaderNonUniform +alias sm_5_1_version = _sm_5_1 + | _GLSL_330 + | spirv_1_0 | _cuda_sm_9_0 | metal | cpp ; +alias sm_5_1 = sm_5_1_version + | GL_EXT_nonuniform_qualifier + GL_ARB_gpu_shader5 + // previous + | sm_5_0 + ; -alias sm_6_0 = _sm_6_0 - | glsl_spirv_1_3 + sm_5_1 - + groupnonuniform + atomicfloat - | spirv_1_3 + sm_5_1 - + groupnonuniform + atomicfloat +alias sm_6_0_version = _sm_6_0 + | _GLSL_450 + | spirv_1_3 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_0 = sm_6_0_version + | GL_KHR_shader_subgroup_ballot + GL_KHR_shader_subgroup_shuffle + GL_KHR_shader_subgroup_arithmetic + GL_KHR_shader_subgroup_quad + GL_KHR_shader_subgroup_vote + // previous + | sm_5_1 + ; -alias sm_6_1 = _sm_6_1 - | glsl_spirv_1_3 + sm_6_0 + fragmentshaderbarycentric - | spirv_1_3 + sm_6_0 + fragmentshaderbarycentric +alias sm_6_1_version = _sm_6_1 + | _GLSL_450 + | spirv_1_3 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_1 = sm_6_1_version + | GL_EXT_fragment_shader_barycentric + // previous + | sm_6_0 + ; -alias sm_6_2 = _sm_6_2 - | glsl_spirv_1_3 + sm_6_1 - | spirv_1_3 + sm_6_1 +alias sm_6_2_version = _sm_6_2 + | _GLSL_450 + | spirv_1_3 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_2 = sm_6_2_version + // previous + | sm_6_1 + ; -alias sm_6_3 = _sm_6_3 - | glsl_spirv_1_4 + sm_6_2 + _GL_EXT_ray_tracing - | spirv_1_4 + sm_6_2 + SPV_KHR_ray_tracing +alias sm_6_3_version = _sm_6_3 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_3 = sm_6_3_version + | GL_EXT_ray_tracing + GL_EXT_ray_tracing_position_fetch_ray_tracing + // previous + | sm_6_2 + ; -alias sm_6_4 = _sm_6_4 - | glsl_spirv_1_4 + sm_6_3 - | spirv_1_4 + sm_6_3 +alias sm_6_4_version = _sm_6_4 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_4 = sm_6_4_version + // previous + | sm_6_3 + ; -alias sm_6_5 = _sm_6_5 - | glsl_spirv_1_4 + sm_6_4 + raytracing + meshshading - | spirv_1_4 + sm_6_4 + raytracing + meshshading +alias sm_6_5_version = _sm_6_5 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_5 = sm_6_5_version + // also requires: GL_NV_shader_subgroup_partitioned + | GL_EXT_mesh_shader + GL_EXT_ray_query + GL_EXT_ray_tracing_position_fetch_ray_query + // previous + | sm_6_4 + ; -alias sm_6_6 = _sm_6_6 - | glsl_spirv_1_5 + sm_6_5 - + GL_EXT_shader_atomic_int64 + atomicfloat2 - | spirv_1_5 + sm_6_5 - + GL_EXT_shader_atomic_int64 + atomicfloat2 - + SPV_EXT_descriptor_indexing +alias sm_6_6_version = _sm_6_6 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_6 = sm_6_6_version + | GL_EXT_shader_atomic_int64 + GL_EXT_shader_atomic_float2 + GL_EXT_shader_atomic_float + _GL_EXT_shader_atomic_float_min_max + | sm_6_5 + ; -alias sm_6_7 = _sm_6_7 - | glsl_spirv_1_5 + sm_6_6 - | spirv_1_5 + sm_6_6 +alias sm_6_7_version = _sm_6_7 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_7 = sm_6_7_version + | sm_6_6 + ; // Profiles alias GLSL_130 = _GLSL_130 @@ -510,8 +599,10 @@ alias GLSL_140 = _GLSL_140 | _sm_4_1 | _cuda_sm_2_0 | spirv_1_0 - | metal + | metal | cpp + // previous + | GLSL_130 ; alias GLSL_150 = _GLSL_150 | _sm_4_1 @@ -519,6 +610,10 @@ alias GLSL_150 = _GLSL_150 | spirv_1_0 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_texture_multisample + // previous + | GLSL_140 ; alias GLSL_330 = _GLSL_330 | _sm_5_0 @@ -526,6 +621,8 @@ alias GLSL_330 = _GLSL_330 | spirv_1_0 | metal | cpp + // previous + | GLSL_150 ; alias GLSL_400 = _GLSL_400 | _sm_5_1 @@ -533,6 +630,10 @@ alias GLSL_400 = _GLSL_400 | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_gpu_shader5 + GL_ARB_texture_gather + // previous + | GLSL_330 ; alias GLSL_410 = _GLSL_410 | _sm_5_1 @@ -540,6 +641,8 @@ alias GLSL_410 = _GLSL_410 | spirv_1_3 | metal | cpp + // previous + | GLSL_400 ; alias GLSL_420 = _GLSL_420 | _sm_5_1 @@ -547,6 +650,10 @@ alias GLSL_420 = _GLSL_420 | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_shader_image_load_store + // previous + | GLSL_410 ; alias GLSL_430 = _GLSL_430 | _sm_5_1 @@ -554,27 +661,39 @@ alias GLSL_430 = _GLSL_430 | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_shader_image_size + GL_ARB_texture_query_levels + // previous + | GLSL_420 ; alias GLSL_440 = _GLSL_440 | _sm_6_0 | _cuda_sm_6_0 - | spirv_1_5 + | spirv_1_3 | metal | cpp + // previous + | GLSL_430 ; -alias GLSL_450 = _GLSL_450 +alias GLSL_450 = _GLSL_450 | _sm_6_0 | _cuda_sm_6_0 - | spirv_1_5 + | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_derivative_control + GL_ARB_shader_texture_image_samples + // previous + | GLSL_440 ; alias GLSL_460 = _GLSL_460 | _sm_6_6 | _cuda_sm_6_0 - | spirv_1_5 + | spirv_1_3 | metal | cpp + // previous + | GLSL_450 ; alias GLSL_410_SPIRV_1_0 = _GLSL_410 | spirv_1_0; @@ -606,15 +725,13 @@ alias SPIRV_1_4 = spirv_1_4; alias SPIRV_1_5 = spirv_1_5; alias SPIRV_1_6 = spirv_1_6; -alias sm_2_0_GLSL_140 = _GLSL_140 + sm_4_0 | sm_4_0; -alias sm_2_0_GLSL_400 = _GLSL_400 + sm_4_0 | sm_4_0; -alias appendstructuredbuffer = sm_5_0 + raytracingstages_compute_fragment; +alias appendstructuredbuffer = sm_5_0_version; alias atomic_hlsl = _sm_4_0; alias atomic_hlsl_nvapi = _sm_4_0 + hlsl_nvapi; alias atomic_hlsl_sm_6_6 = _sm_6_6; -alias byteaddressbuffer = sm_4_0; -alias byteaddressbuffer_rw = sm_4_0 + raytracingstages_compute_fragment; -alias consumestructuredbuffer = sm_5_0 + raytracingstages_compute_fragment; +alias byteaddressbuffer = sm_4_0_version; +alias byteaddressbuffer_rw = sm_4_0_version; +alias consumestructuredbuffer = sm_5_0_version; alias fragmentprocessing = fragment + _sm_5_0 | fragment + glsl_spirv | raytracingstages_compute_amplification_mesh + GL_NV_compute_shader_derivatives @@ -624,44 +741,23 @@ alias fragmentprocessing_derivativecontrol = fragment + _sm_5_0 | raytracingstages_compute_amplification_mesh + GL_NV_compute_shader_derivatives ; alias getattributeatvertex = fragment + _sm_6_1 | fragment + GL_EXT_fragment_shader_barycentric; -alias memorybarrier_compute = raytracingstages_compute + sm_5_0; -alias glsl_barrier = hlsl + memorybarrier_compute - | glsl_spirv + compute_tesscontrol_tesseval - ; -alias structuredbuffer = sm_4_0; -alias structuredbuffer_rw = sm_4_0 + raytracingstages_compute_fragment; -alias texture_sm_4_1 = sm_4_1 +alias memorybarrier = sm_5_0_version; +alias structuredbuffer = sm_4_0_version; +alias structuredbuffer_rw = sm_4_0_version; +alias texture_sm_4_0 = sm_4_0_version | GL_ARB_sparse_texture_clamp + GL_EXT_texture_query_lod ; -alias texture_sm_4_1_samplerless = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 + GL_EXT_samplerless_texture_functions - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment - | spirv_1_0 + texture_sm_4_1 + GL_EXT_samplerless_texture_functions - | metal + texture_sm_4_1 - ; -alias texture_sm_4_1_compute_fragment = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment - | spirv_1_0 + texture_sm_4_1 - | metal + texture_sm_4_1 - ; -// supposedly works on compute but docs say nothing, so for now keep as compute_fragment -alias texture_sm_4_1_fragment = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment - | spirv_1_0 + texture_sm_4_1 - | metal + texture_sm_4_1 +alias texture_sm_4_1 = sm_4_1_version | GL_ARB_sparse_texture_clamp + GL_EXT_texture_query_lod + ; +alias texture_sm_4_1_samplerless = texture_sm_4_1 + // add samplerless to all targets that need an extension + | GL_EXT_samplerless_texture_functions ; -alias texture_sm_4_1_clamp_fragment = texture_sm_4_1_fragment + GL_ARB_sparse_texture_clamp; -alias texture_sm_4_1_vertex_fragment_geometry = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment_geometry_vertex - | spirv_1_0 + texture_sm_4_1 - | metal + texture_sm_4_1 - ; + +// supposedly works on only limited stages, support all stages for now +alias texture_sm_4_1_compute_fragment = texture_sm_4_1; +alias texture_sm_4_0_fragment = texture_sm_4_0; +alias texture_sm_4_1_clamp_fragment = texture_sm_4_0_fragment + GL_ARB_sparse_texture_clamp; +alias texture_sm_4_1_vertex_fragment_geometry = texture_sm_4_1; alias texture_gather = texture_sm_4_1_vertex_fragment_geometry + GL_ARB_texture_gather; alias image_samples = texture_sm_4_1_compute_fragment + GL_ARB_shader_texture_image_samples; alias image_size = texture_sm_4_1_compute_fragment + GL_ARB_shader_image_size; @@ -670,9 +766,6 @@ alias texture_querylod = texture_sm_4_1 + GL_EXT_texture_query_lod; alias texture_querylevels = texture_sm_4_1 + GL_ARB_texture_query_levels; alias texture_shadowlod = texture_sm_4_1 + GL_EXT_texture_shadow_lod | texture_sm_4_1; -alias texture_shadowlod_cube = texture_shadowlod | texture_shadowlod + GL_ARB_texture_cube_map; -alias texture_cube = texture_sm_4_1 + GL_ARB_texture_cube_map | texture_sm_4_1; -alias texture_querylevels_cube = texture_querylevels + GL_ARB_texture_cube_map | texture_querylevels; alias atomic_glsl_float1 = GL_EXT_shader_atomic_float; alias atomic_glsl_float2 = GL_EXT_shader_atomic_float2; @@ -686,8 +779,8 @@ alias printf = GL_EXT_debug_printf | _sm_4_0 | _cuda_sm_2_0 | cpp; alias texturefootprint = GL_NV_shader_texture_footprint + GLSL_450 | hlsl_nvapi + _sm_4_0; alias texturefootprintclamp = texturefootprint + GL_ARB_sparse_texture_clamp; -alias shader5_sm_4_0 = GL_ARB_gpu_shader5 + _GLSL_140 + sm_4_0 | sm_4_0; -alias shader5_sm_5_0 = GL_ARB_gpu_shader5 + _GLSL_140 + sm_4_0 | sm_5_0; +alias shader5_sm_4_0 = GL_ARB_gpu_shader5 | sm_4_0_version; +alias shader5_sm_5_0 = GL_ARB_gpu_shader5 | sm_5_0_version; alias subgroup_basic = GL_KHR_shader_subgroup_basic | _sm_6_0 | _cuda_sm_7_0; alias subgroup_ballot = spirv_1_0 + GL_KHR_shader_subgroup_ballot @@ -714,17 +807,16 @@ alias subgroup_clustered = GL_KHR_shader_subgroup_clustered | _sm_6_0 | _cuda_sm alias subgroup_quad = GL_KHR_shader_subgroup_quad | _sm_6_0 | _cuda_sm_7_0; alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5; +alias atomic_glsl_hlsl_nvapi_cuda_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0; +alias atomic_glsl_hlsl_nvapi_cuda5_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0; +alias atomic_glsl_hlsl_nvapi_cuda6_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0; +alias atomic_glsl_hlsl_nvapi_cuda9_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_9_0; + alias atomic_glsl_hlsl_cuda = atomic_glsl | _sm_5_0 | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda_float1 = atomic_glsl_float1 | atomic_hlsl_nvapi | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda_float2 = atomic_glsl_float2 | atomic_hlsl_nvapi | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda2_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda5_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_6_0; -alias atomic_glsl_hlsl_cuda6_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_6_0; -alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_9_0; +alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | _sm_6_6 | _cuda_sm_9_0; alias breakpoint = GL_EXT_debug_printf | hlsl | _cuda_sm_8_0 | cpp; -alias rayobject = raytracing | rayquery; alias raytracing_allstages = raytracing_stages + raytracing; alias raytracing_anyhit = anyhit + raytracing; alias raytracing_intersection = intersection + raytracing; @@ -733,10 +825,10 @@ alias raytracing_anyhit_closesthit_intersection = anyhit_closesthit_intersection alias raytracing_raygen_closesthit_miss = raygen_closesthit_miss + raytracing; alias raytracing_anyhit_closesthit_intersection_miss = anyhit_closesthit_intersection_miss + raytracing; alias raytracing_raygen_closesthit_miss_callable = raygen_closesthit_miss_callable + raytracing; -alias raytracing_position = raytracing + GL_EXT_ray_tracing_position_fetch + anyhit_closesthit; +alias raytracing_position = raytracing + GL_EXT_ray_tracing_position_fetch_ray_tracing + anyhit_closesthit; alias raytracing_motionblur_anyhit_closesthit_intersection_miss = anyhit_closesthit_intersection_miss + raytracing_motionblur; alias raytracing_motionblur_raygen_closesthit_miss = raygen_closesthit_miss + raytracing_motionblur; -alias rayquery_position = rayquery + GL_EXT_ray_tracing_position_fetch; +alias rayquery_position = rayquery + GL_EXT_ray_tracing_position_fetch_ray_query; alias ser_raygen = raygen + ser; alias ser_raygen_closesthit_miss = raygen_closesthit_miss + ser; alias ser_any_closesthit_intersection_miss = anyhit_closesthit_intersection_miss + ser; @@ -746,7 +838,7 @@ alias ser_motion_raygen_closesthit_miss = raygen_closesthit_miss + ser_motion; alias ser_motion_raygen = raygen + ser_motion; alias all = _sm_6_7 + hlsl_nvapi - | glsl_spirv_1_5 + sm_6_7 + | sm_6_7 + ser + shaderclock + texturefootprint + fragmentshaderinterlock + _GL_NV_shader_subgroup_partitioned + _GL_NV_ray_tracing_motion_blur + _GL_NV_shader_texture_footprint | spirv_1_5 + sm_6_7 diff --git a/source/slang/slang-capability.cpp b/source/slang/slang-capability.cpp index 21b2641b4..2c3069f61 100644 --- a/source/slang/slang-capability.cpp +++ b/source/slang/slang-capability.cpp @@ -100,16 +100,16 @@ bool isDirectChildOfAbstractAtom(CapabilityAtom name) return _getInfo(name).abstractBase != CapabilityName::Invalid; } -bool isTargetVersionAtom(CapabilityName name) +bool isTargetVersionAtom(CapabilityAtom name) { - if (name >= CapabilityName::spirv_1_0 && name <= getLatestSpirvAtom()) + if (name >= CapabilityAtom::_spirv_1_0 && name <= getLatestSpirvAtom()) return true; - if (name >= CapabilityName::metallib_2_3 && name <= getLatestMetalAtom()) + if (name >= CapabilityAtom::metallib_2_3 && name <= getLatestMetalAtom()) return true; return false; } -bool isSpirvExtensionAtom(CapabilityName name) +bool isSpirvExtensionAtom(CapabilityAtom name) { return UnownedStringSlice(_getInfo(name).name).startsWith("SPV_"); } @@ -124,26 +124,26 @@ CapabilityName findCapabilityName(UnownedStringSlice const& name) return result; } -CapabilityName getLatestSpirvAtom() +inline CapabilityAtom getLatestSpirvAtom() { - static CapabilityName result = CapabilityName::Invalid; - if (result == CapabilityName::Invalid) + static CapabilityAtom result = CapabilityAtom::Invalid; + if (result == CapabilityAtom::Invalid) { - CapabilitySet latestSpirvCapSet = CapabilitySet(CapabilityName::spirv_latest); + CapabilitySet latestSpirvCapSet = CapabilitySet(CapabilityName::_spirv_latest); auto latestSpirvCapSetElements = latestSpirvCapSet.getAtomSets()->getElements<CapabilityAtom>(); - result = (CapabilityName)latestSpirvCapSetElements[latestSpirvCapSetElements.getCount() - 2]; //-1 gets shader stage + result = asAtom(latestSpirvCapSetElements[latestSpirvCapSetElements.getCount() - 2]); //-1 gets shader stage } return result; } -CapabilityName getLatestMetalAtom() +CapabilityAtom getLatestMetalAtom() { - static CapabilityName result = CapabilityName::Invalid; - if (result == CapabilityName::Invalid) + static CapabilityAtom result = CapabilityAtom::Invalid; + if (result == CapabilityAtom::Invalid) { CapabilitySet latestMetalCapSet = CapabilitySet(CapabilityName::metallib_latest); auto latestMetalCapSetElements = latestMetalCapSet.getAtomSets()->getElements<CapabilityAtom>(); - result = (CapabilityName)latestMetalCapSetElements[latestMetalCapSetElements.getCount() - 2]; //-1 gets shader stage + result = asAtom(latestMetalCapSetElements[latestMetalCapSetElements.getCount() - 2]); //-1 gets shader stage } return result; } @@ -175,7 +175,7 @@ CapabilityAtom getTargetAtomInSet(const CapabilityAtomSet& atomSet) auto iter = out.begin(); if (iter == out.end()) return CapabilityAtom::Invalid; - return (CapabilityAtom)*iter; + return asAtom(*iter); } CapabilityAtom getStageAtomInSet(const CapabilityAtomSet& atomSet) @@ -186,7 +186,7 @@ CapabilityAtom getStageAtomInSet(const CapabilityAtomSet& atomSet) auto iter = out.begin(); if (iter == out.end()) return CapabilityAtom::Invalid; - return (CapabilityAtom)*iter; + return asAtom(*iter); } template<CapabilityName keyholeAtomToPermuteWith> @@ -201,11 +201,11 @@ void CapabilitySet::addPermutationsOfConjunctionForEachInContainer(CapabilityAto if constexpr (keyholeAtomToPermuteWith == CapabilityName::target) { - addConjunction(conjunctionPermutation, (CapabilityAtom)atom, knownStageAtom); + addConjunction(conjunctionPermutation, asAtom(atom), knownStageAtom); } else if constexpr (keyholeAtomToPermuteWith == CapabilityName::stage) { - addConjunction(conjunctionPermutation, knownTargetAtom, (CapabilityAtom)atom); + addConjunction(conjunctionPermutation, knownTargetAtom, asAtom(atom)); } else { @@ -394,17 +394,25 @@ bool CapabilitySet::implies(CapabilityAtom atom) const return this->implies(tmpSet); } -bool CapabilitySet::implies(CapabilitySet const& other, const bool onlyRequireSingleImply) const +CapabilitySet::ImpliesReturnFlags CapabilitySet::_implies(CapabilitySet const& otherSet, ImpliesFlags flags) const { // x implies (c | d) only if (x implies c) and (x implies d). - for (const auto& otherTarget : other.m_targetSets) + bool onlyRequireSingleImply = ((int)flags & (int)ImpliesFlags::OnlyRequireASingleValidImply); + int flagsCollected = (int)CapabilitySet::ImpliesReturnFlags::NotImplied; + + if (otherSet.isEmpty()) + return CapabilitySet::ImpliesReturnFlags::Implied; + + for (const auto& otherTarget : otherSet.m_targetSets) { auto thisTarget = this->m_targetSets.tryGetValue(otherTarget.first); if (!thisTarget) { + if (onlyRequireSingleImply) + continue; // 'this' lacks a target 'other' has. - return false; + return CapabilitySet::ImpliesReturnFlags::NotImplied; } for (const auto& otherStage : otherTarget.second.shaderStageSets) @@ -412,31 +420,44 @@ bool CapabilitySet::implies(CapabilitySet const& other, const bool onlyRequireSi auto thisStage = thisTarget->shaderStageSets.tryGetValue(otherStage.first); if (!thisStage) { + if (onlyRequireSingleImply) + continue; // 'this' lacks a stage 'other' has. - return false; + return CapabilitySet::ImpliesReturnFlags::NotImplied; } // all stage sets that are in 'other' must be contained by 'this' - if(thisStage->atomSet) + if (thisStage->atomSet) { auto& thisStageSet = thisStage->atomSet.value(); - if(otherStage.second.atomSet) - { - if (!onlyRequireSingleImply) + if (otherStage.second.atomSet) + { + auto contained = thisStageSet.contains(otherStage.second.atomSet.value()); + if (!onlyRequireSingleImply && !contained) { - if (!thisStageSet.contains(otherStage.second.atomSet.value())) - return false; + return CapabilitySet::ImpliesReturnFlags::NotImplied; } - else + else if (onlyRequireSingleImply && contained) { - if (thisStageSet.contains(otherStage.second.atomSet.value())) - return true; + return CapabilitySet::ImpliesReturnFlags::Implied; } } } } } - return !onlyRequireSingleImply; + if (!onlyRequireSingleImply) + flagsCollected |= (int)CapabilitySet::ImpliesReturnFlags::Implied; + + return (CapabilitySet::ImpliesReturnFlags)flagsCollected; +} + +bool CapabilitySet::implies(CapabilitySet const& other) const +{ + return (int)_implies(other, ImpliesFlags::None) & (int)CapabilitySet::ImpliesReturnFlags::Implied; +} +CapabilitySet::ImpliesReturnFlags CapabilitySet::atLeastOneSetImpliedInOther(CapabilitySet const& other) const +{ + return _implies(other, ImpliesFlags::OnlyRequireASingleValidImply); } void CapabilityTargetSet::unionWith(const CapabilityTargetSet& other) @@ -827,6 +848,53 @@ bool CapabilitySet::checkCapabilityRequirement(CapabilitySet const& available, C return true; } +/// Converts spirv version atom to the glsl_spirv equivlent. If not possible, Invalid is returned +inline CapabilityName maybeConvertSpirvVersionToGlslSpirvVersion(CapabilityName& atom) +{ + if (atom >= CapabilityName::_spirv_1_0 && asAtom(atom) <= getLatestSpirvAtom()) + { + return (CapabilityName)((Int)CapabilityName::glsl_spirv_1_0 + ((Int)atom - (Int)CapabilityName::_spirv_1_0)); + } + return CapabilityName::Invalid; +} + +void CapabilitySet::addSpirvVersionFromOtherAsGlslSpirvVersion(CapabilitySet& other) +{ + if (auto* otherTargetSet = other.m_targetSets.tryGetValue(CapabilityAtom::spirv)) + { + auto* thisTargetSet = m_targetSets.tryGetValue(CapabilityAtom::glsl); + if (!thisTargetSet) + return; + + for (auto& otherStageSet : otherTargetSet->shaderStageSets) + { + if (!otherStageSet.second.atomSet) + continue; + + auto* thisStageSet = thisTargetSet->shaderStageSets.tryGetValue(otherStageSet.first); + if (!thisStageSet || !thisStageSet->atomSet) + continue; + + CapabilityAtomSet::Iterator otherAtom = otherStageSet.second.atomSet->begin(); + while (otherAtom != otherStageSet.second.atomSet->end()) + { + otherAtom++; + auto otherAtomName = (CapabilityName)*otherAtom; + if (otherAtomName > (CapabilityName)getLatestSpirvAtom()) + { + otherAtom = otherStageSet.second.atomSet->end(); + continue; + } + auto maybeConvertedSpirvVersionAtom = maybeConvertSpirvVersionToGlslSpirvVersion(otherAtomName); + if (maybeConvertedSpirvVersionAtom == CapabilityName::Invalid) + continue; + + thisStageSet->atomSet->add((UInt)maybeConvertedSpirvVersionAtom); + } + } + } +} + void printDiagnosticArg(StringBuilder& sb, const CapabilitySet& capSet) { bool isFirstSet = true; @@ -864,6 +932,20 @@ void printDiagnosticArg(StringBuilder& sb, CapabilityName name) sb << _getInfo(name).name; } +void printDiagnosticArg(StringBuilder& sb, List<CapabilityAtom>& list) +{ + sb << "{"; + auto count = list.getCount(); + for(Index i = 0; i < count; i++) + { + printDiagnosticArg(sb, list[i]); + if (i + 1 != count) + sb << ", "; + } + sb << "}"; +} + + #ifdef UNIT_TEST_CAPABILITIES #define CHECK_CAPS(inData) SLANG_ASSERT(inData>0) diff --git a/source/slang/slang-capability.h b/source/slang/slang-capability.h index 8fd9e2bd4..53164be7f 100644 --- a/source/slang/slang-capability.h +++ b/source/slang/slang-capability.h @@ -139,8 +139,15 @@ public: /// Is this capability set incompatible with the given `other` atomic capability. bool isIncompatibleWith(CapabilitySet const& other) const; + enum class ImpliesReturnFlags : int + { + NotImplied = 0, + Implied = 1 << 0, + }; /// Does this capability set imply all the capabilities in `other`? - bool implies(CapabilitySet const& other, const bool onlyRequireSingleImply = false) const; + bool implies(CapabilitySet const& other) const; + /// Does this capability set imply at least 1 set in other. + ImpliesReturnFlags atLeastOneSetImpliedInOther(CapabilitySet const& other) const; /// Does this capability set imply the atomic capability `other`? bool implies(CapabilityAtom other) const; @@ -289,6 +296,10 @@ public: /// Get access to the raw atomic capabilities that define this set. /// Get all bottom level UIntSets for each CapabilityTargetSet. CapabilitySet::AtomSets::Iterator getAtomSets() const; + + /// Add spirv version capabilities from 'spirv CapabilityTargetSet' as glsl_spirv version capability in 'glsl CapabilityTargetSet' + void addSpirvVersionFromOtherAsGlslSpirvVersion(CapabilitySet& other); + private: /// underlying data of CapabilitySet. CapabilityTargetSets m_targetSets{}; @@ -296,6 +307,13 @@ private: void addCapability(CapabilityName name); bool hasSameTargets(const CapabilitySet& other) const; + + enum class ImpliesFlags + { + None = 0, + OnlyRequireASingleValidImply = 1 << 0, + }; + ImpliesReturnFlags _implies(CapabilitySet const& other, ImpliesFlags flags) const; }; /// Returns true if atom is derived from base @@ -304,8 +322,16 @@ bool isCapabilityDerivedFrom(CapabilityAtom atom, CapabilityAtom base); /// Find a capability atom with the given `name`, or return CapabilityAtom::Invalid. CapabilityName findCapabilityName(UnownedStringSlice const& name); -CapabilityName getLatestSpirvAtom(); -CapabilityName getLatestMetalAtom(); +CapabilityAtom getLatestSpirvAtom(); +CapabilityAtom getLatestMetalAtom(); + +/// For debug purposes ensure a casted CapabilityAtom is valid +template<typename T> +inline CapabilityAtom asAtom(T name) +{ + SLANG_ASSERT((UInt)name < (UInt)CapabilityAtom::Count); + return CapabilityAtom(name); +} /// Gets the capability names. void getCapabilityNames(List<UnownedStringSlice>& ioNames); @@ -316,8 +342,8 @@ bool isDirectChildOfAbstractAtom(CapabilityAtom name); /// Return true if `name` represents an atom for a target version, e.g. spirv_1_5. -bool isTargetVersionAtom(CapabilityName name); -bool isSpirvExtensionAtom(CapabilityName name); +bool isTargetVersionAtom(CapabilityAtom name); +bool isSpirvExtensionAtom(CapabilityAtom name); void printDiagnosticArg(StringBuilder& sb, CapabilityAtom atom); void printDiagnosticArg(StringBuilder& sb, CapabilityName name); diff --git a/source/slang/slang-check-decl.cpp b/source/slang/slang-check-decl.cpp index f8f6d2dcb..74d08f8e5 100644 --- a/source/slang/slang-check-decl.cpp +++ b/source/slang/slang-check-decl.cpp @@ -10123,9 +10123,10 @@ namespace Slang // then the decl is using things that require conflicting set of capabilities, and we should diagnose an error. if (referencedDecl && decl) { - diagnoseCapabilityErrors( + maybeDiagnose( visitor->getSink(), visitor->getOptionSet(), + DiagnosticCategory::Capability, referenceLoc, Diagnostics::conflictingCapabilityDueToUseOfDecl, referencedDecl, @@ -10135,9 +10136,10 @@ namespace Slang } else if (decl) { - diagnoseCapabilityErrors( + maybeDiagnose( visitor->getSink(), visitor->getOptionSet(), + DiagnosticCategory::Capability, referenceLoc, Diagnostics::conflictingCapabilityDueToStatement, nodeCaps, @@ -10146,9 +10148,10 @@ namespace Slang } else { - diagnoseCapabilityErrors( + maybeDiagnose( visitor->getSink(), visitor->getOptionSet(), + DiagnosticCategory::Capability, referenceLoc, Diagnostics::conflictingCapabilityDueToStatementEnclosingFunc, nodeCaps, @@ -10251,7 +10254,7 @@ namespace Slang targetCap.join(bodyCap); if (targetCap.isInvalid()) { - diagnoseCapabilityErrors(Base::getSink(), outerContext.getOptionSet(), targetCase->body->loc, Diagnostics::conflictingCapabilityDueToStatement, bodyCap, "target_switch", oldCap); + maybeDiagnose(Base::getSink(), outerContext.getOptionSet(), DiagnosticCategory::Capability, targetCase->body->loc, Diagnostics::conflictingCapabilityDueToStatement, bodyCap, "target_switch", oldCap); } set.unionWith(targetCap); } @@ -10390,7 +10393,7 @@ namespace Slang auto stageCaps = CapabilitySet(Profile(entryPointAttr->stage).getCapabilityName()); if (declaredCaps.isIncompatibleWith(stageCaps)) { - diagnoseCapabilityErrors(getSink(), this->getOptionSet(), funcDecl->loc, Diagnostics::stageIsInCompatibleWithCapabilityDefinition, funcDecl, stageCaps, declaredCaps); + maybeDiagnose(getSink(), this->getOptionSet(), DiagnosticCategory::Capability, funcDecl->loc, Diagnostics::stageIsInCompatibleWithCapabilityDefinition, funcDecl, stageCaps, declaredCaps); } else { @@ -10604,7 +10607,7 @@ namespace Slang printedDecls.add(declToPrint); if (auto provenance = declToPrint->capabilityRequirementProvenance.tryGetValue(atomToFind)) { - diagnoseCapabilityErrors(sink, optionSet, provenance->referenceLoc, Diagnostics::seeUsingOf, provenance->referencedDecl); + maybeDiagnose(sink, optionSet, DiagnosticCategory::Capability, provenance->referenceLoc, Diagnostics::seeUsingOf, provenance->referencedDecl); declToPrint = provenance->referencedDecl; if (printedDecls.contains(declToPrint)) break; @@ -10625,7 +10628,7 @@ namespace Slang } if (declToPrint && !optionallyNeverPrintDecl) { - diagnoseCapabilityErrors(sink, optionSet, declToPrint->loc, Diagnostics::seeDefinitionOf, declToPrint); + maybeDiagnose(sink, optionSet, DiagnosticCategory::Capability, declToPrint->loc, Diagnostics::seeDefinitionOf, declToPrint); } } @@ -10654,7 +10657,7 @@ namespace Slang CapabilityAtom outFailedAtom{}; if (hasTargetAtom(failedAtomsInsideAvailableSet, outFailedAtom)) { - diagnoseCapabilityErrors(getSink(), this->getOptionSet(), decl->loc, Diagnostics::declHasDependenciesNotCompatibleOnTarget, decl, outFailedAtom); + maybeDiagnose(getSink(), this->getOptionSet(), DiagnosticCategory::Capability, decl->loc, Diagnostics::declHasDependenciesNotCompatibleOnTarget, decl, outFailedAtom); // Anything defined on a non-failed target atom may be the culprit to why we fail having a target capability. // Print out all possible culprits. @@ -10665,7 +10668,7 @@ namespace Slang for (auto atom : targetsNotUsedSet) { - CapabilityAtom formattedAtom = (CapabilityAtom)atom; + CapabilityAtom formattedAtom = asAtom(atom); diagnoseCapabilityProvenance(this->getOptionSet(), getSink(), decl, formattedAtom, true); } return; @@ -10688,8 +10691,8 @@ namespace Slang // can come from multiple referenced items in a function body. for (auto i : failedAtomsInsideAvailableSet) { - CapabilityAtom formattedAtom = (CapabilityAtom)i; - diagnoseCapabilityErrors(getSink(), this->getOptionSet(), decl->loc, diagnosticInfo, decl, formattedAtom); + CapabilityAtom formattedAtom = asAtom(i); + maybeDiagnose(getSink(), this->getOptionSet(), DiagnosticCategory::Capability, decl->loc, diagnosticInfo, decl, formattedAtom); // Print provenances. diagnoseCapabilityProvenance(this->getOptionSet(), getSink(), decl, formattedAtom); } diff --git a/source/slang/slang-check-shader.cpp b/source/slang/slang-check-shader.cpp index 2ebc9d3a4..67abb56b7 100644 --- a/source/slang/slang-check-shader.cpp +++ b/source/slang/slang-check-shader.cpp @@ -519,7 +519,7 @@ namespace Slang targetCaps.join(stageCapabilitySet); if (targetCaps.isIncompatibleWith(entryPointFuncDecl->inferredCapabilityRequirements)) { - diagnoseCapabilityErrors(sink, linkage->m_optionSet, entryPointFuncDecl, Diagnostics::entryPointUsesUnavailableCapability, entryPointFuncDecl, entryPointFuncDecl->inferredCapabilityRequirements, targetCaps); + maybeDiagnose(sink, linkage->m_optionSet, DiagnosticCategory::Capability, entryPointFuncDecl, Diagnostics::entryPointUsesUnavailableCapability, entryPointFuncDecl, entryPointFuncDecl->inferredCapabilityRequirements, targetCaps); // Find out what exactly is incompatible and print out a trace of provenance to // help user diagnose their code. @@ -532,7 +532,7 @@ namespace Slang { for (auto inferredAtom : *interredCapConjunctions.begin()) { - CapabilityAtom inferredAtomFormatted = (CapabilityAtom)inferredAtom; + CapabilityAtom inferredAtomFormatted = asAtom(inferredAtom); if (!compileCaps->contains((UInt)inferredAtom)) { diagnoseCapabilityProvenance(linkage->m_optionSet, sink, entryPointFuncDecl, inferredAtomFormatted); @@ -540,6 +540,38 @@ namespace Slang } } } + else + { + // Only attempt to error if a user adds to slangc either `-profile` or `-capability` + if ( + ( + target->getOptionSet().hasOption(CompilerOptionName::Capability) + || + target->getOptionSet().hasOption(CompilerOptionName::Profile) + ) + && targetCaps.atLeastOneSetImpliedInOther(entryPointFuncDecl->inferredCapabilityRequirements) == CapabilitySet::ImpliesReturnFlags::NotImplied + ) + { + CapabilitySet combinedSets = targetCaps; + combinedSets.join(entryPointFuncDecl->inferredCapabilityRequirements); + CapabilityAtomSet addedAtoms{}; + if (auto targetCapSet = targetCaps.getAtomSets()) + { + if (auto combinedSet = combinedSets.getAtomSets()) + { + CapabilityAtomSet::calcSubtract(addedAtoms, (*combinedSet), (*targetCapSet)); + } + } + maybeDiagnoseWarningOrError( + sink, + target->getOptionSet(), + DiagnosticCategory::Capability, + entryPointFuncDecl->loc, + Diagnostics::profileImplicitlyUpgraded, + Diagnostics::profileImplicitlyUpgradedRestrictive, + addedAtoms.getElements<CapabilityAtom>()); + } + } } } diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index ed208ca37..fce01b770 100644 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -656,7 +656,7 @@ namespace Slang { for (auto atom : conjunctions) { - switch ((CapabilityAtom)atom) + switch (asAtom(atom)) { default: break; diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 881f511d0..8f1860433 100755 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -201,10 +201,16 @@ namespace Slang Name* getName() { return m_name; } /// Get the stage that the entry point is to be compiled for - Stage getStage() { return m_profile.getStage(); } + Stage getStage() + { + return m_profile.getStage(); + } /// Get the profile that the entry point is to be compiled for - Profile getProfile() { return m_profile; } + Profile getProfile() + { + return m_profile; + } /// Get the index to the translation unit int getTranslationUnitIndex() const { return m_translationUnitIndex; } @@ -3353,6 +3359,34 @@ struct CompileTimerRAII session->addTotalCompileTime(elapsedTime); } }; + +// helpers for error/warning reporting +enum class DiagnosticCategory +{ + None = 0, + Capability = 1 << 0, +}; +template<typename P, typename... Args> +bool maybeDiagnose(DiagnosticSink* sink, CompilerOptionSet& optionSet, DiagnosticCategory errorType, P const& pos, DiagnosticInfo const& info, Args const&... args) +{ + if ((int)errorType & (int)DiagnosticCategory::Capability && optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) + return false; + return sink->diagnose(pos, info, args...); +} + +template<typename P, typename... Args> +bool maybeDiagnoseWarningOrError(DiagnosticSink* sink, CompilerOptionSet& optionSet, DiagnosticCategory errorType, P const& pos, DiagnosticInfo const& warningInfo, DiagnosticInfo const& errorInfo, Args const&... args) +{ + if ((int)errorType & (int)DiagnosticCategory::Capability && optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck)) + { + return maybeDiagnose(sink, optionSet, errorType, pos, errorInfo, args...); + } + else + { + return maybeDiagnose(sink, optionSet, errorType, pos, warningInfo, args...); + } +} + } #endif diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index f4bad6664..e0d818c97 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -733,6 +733,8 @@ DIAGNOSTIC(41001, Error, recursiveType, "type '$0' contains cyclic reference to DIAGNOSTIC(41010, Warning, missingReturn, "control flow may reach end of non-'void' function") DIAGNOSTIC(41011, Error, profileIncompatibleWithTargetSwitch, "__target_switch has no compatable target with current profile '$0'") +DIAGNOSTIC(41012, Warning, profileImplicitlyUpgraded, "user set `profile` had an implicit upgrade applied to it, atoms added: '$0'") +DIAGNOSTIC(41012, Error, profileImplicitlyUpgradedRestrictive, "user set `profile` had an implicit upgrade applied to it, atoms added: '$0'") DIAGNOSTIC(41015, Error, usingUninitializedValue, "use of uninitialized value '$0'") DIAGNOSTIC(41016, Warning, returningWithUninitializedOut, "returning without initializing out parameter '$0'") DIAGNOSTIC(41017, Warning, returningWithPartiallyUninitializedOut, "returning without fully initializing out parameter '$0'") diff --git a/source/slang/slang-doc-markdown-writer.cpp b/source/slang/slang-doc-markdown-writer.cpp index ac3b9ca7e..f29574180 100644 --- a/source/slang/slang-doc-markdown-writer.cpp +++ b/source/slang/slang-doc-markdown-writer.cpp @@ -432,7 +432,7 @@ static DocMarkdownWriter::Requirement _getRequirementFromTargetToken(const Token return Requirement{CodeGenTarget::SPIRV, UnownedStringSlice("")}; } - const CapabilityAtom targetCap = (CapabilityAtom)findCapabilityName(targetName); + const CapabilityAtom targetCap = asAtom(findCapabilityName(targetName)); if (targetCap == CapabilityAtom::Invalid) { diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index 936dc15ff..c788d9cbf 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -75,6 +75,13 @@ void GLSLSourceEmitter::_requireRayTracing() m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460); } +void GLSLSourceEmitter::_requireRayQuery() +{ + m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_ray_query")); + m_glslExtensionTracker->requireSPIRVVersion(SemanticVersion(1, 4)); // required due to glslang bug which enables `SPV_KHR_ray_tracing` regardless of context + m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460); +} + void GLSLSourceEmitter::_requireFragmentShaderBarycentric() { m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_fragment_shader_barycentric")); @@ -2584,7 +2591,7 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) { case kIROp_RaytracingAccelerationStructureType: { - _requireRayTracing(); + _requireRayQuery(); m_writer->emit("accelerationStructureEXT"); break; } diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h index a30195d75..efd3ded75 100644 --- a/source/slang/slang-emit-glsl.h +++ b/source/slang/slang-emit-glsl.h @@ -126,6 +126,8 @@ protected: void _requireRayTracing(); + void _requireRayQuery(); + void _requireFragmentShaderBarycentric(); void _emitSpecialFloatImpl(IRType* type, const char* valueExpr); diff --git a/source/slang/slang-ir-glsl-legalize.cpp b/source/slang/slang-ir-glsl-legalize.cpp index bcf2d8a4f..ae30184c8 100644 --- a/source/slang/slang-ir-glsl-legalize.cpp +++ b/source/slang/slang-ir-glsl-legalize.cpp @@ -3560,20 +3560,20 @@ void legalizeEntryPointForGLSL( void decorateModuleWithSPIRVVersion(IRModule* module, SemanticVersion spirvVersion) { - CapabilityName atom = CapabilityName::spirv_1_0; + CapabilityName atom = CapabilityName::_spirv_1_0; switch (spirvVersion.m_major) { case 1: { switch (spirvVersion.m_minor) { - case 0: atom = CapabilityName::spirv_1_0; break; - case 1: atom = CapabilityName::spirv_1_1; break; - case 2: atom = CapabilityName::spirv_1_2; break; - case 3: atom = CapabilityName::spirv_1_3; break; - case 4: atom = CapabilityName::spirv_1_4; break; - case 5: atom = CapabilityName::spirv_1_5; break; - case 6: atom = CapabilityName::spirv_1_6; break; + case 0: atom = CapabilityName::_spirv_1_0; break; + case 1: atom = CapabilityName::_spirv_1_1; break; + case 2: atom = CapabilityName::_spirv_1_2; break; + case 3: atom = CapabilityName::_spirv_1_3; break; + case 4: atom = CapabilityName::_spirv_1_4; break; + case 5: atom = CapabilityName::_spirv_1_5; break; + case 6: atom = CapabilityName::_spirv_1_6; break; default: SLANG_UNEXPECTED("Unknown SPIRV version"); } break; diff --git a/source/slang/slang-ir-specialize-target-switch.cpp b/source/slang/slang-ir-specialize-target-switch.cpp index e3ef06e18..c501cdab5 100644 --- a/source/slang/slang-ir-specialize-target-switch.cpp +++ b/source/slang/slang-ir-specialize-target-switch.cpp @@ -28,6 +28,7 @@ namespace Slang bool isEqual; CapabilitySet bestCapSet = CapabilitySet::makeInvalid(); IRBlock* targetBlock = nullptr; + CapabilitySet::ImpliesReturnFlags impliesReturnType = CapabilitySet::ImpliesReturnFlags::NotImplied; for (UInt i = 0; i < targetSwitch->getCaseCount(); i++) { auto cap = (CapabilityName)getIntVal(targetSwitch->getCaseValue(i)); @@ -41,9 +42,8 @@ namespace Slang bool isBetterForTarget = capSet.isBetterForTarget(bestCapSet, target->getTargetCaps(), isEqual); if (isBetterForTarget) { - CapabilitySet joinedCapSet = capSet; - joinedCapSet.join(target->getTargetCaps()); - bool targetImpliesCapSet = target->getTargetCaps().implies(joinedCapSet, true); + impliesReturnType = target->getTargetCaps().atLeastOneSetImpliedInOther(capSet); + bool targetImpliesCapSet = ((int)impliesReturnType & (int)CapabilitySet::ImpliesReturnFlags::Implied || capSet.isEmpty()); if (targetImpliesCapSet) { // Now check if bestCapSet contains targetCaps. If it does not then this is an invalid target diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 989790b13..d7b980bf8 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2156,25 +2156,25 @@ struct SPIRVLegalizationContext : public SourceEmitterBase auto spirvAtom = ((CapabilityName)atom); switch (spirvAtom) { - case CapabilityName::spirv_1_0: + case CapabilityName::_spirv_1_0: m_sharedContext->requireSpirvVersion(0x10000); break; - case CapabilityName::spirv_1_1: + case CapabilityName::_spirv_1_1: m_sharedContext->requireSpirvVersion(0x10100); break; - case CapabilityName::spirv_1_2: + case CapabilityName::_spirv_1_2: m_sharedContext->requireSpirvVersion(0x10200); break; - case CapabilityName::spirv_1_3: + case CapabilityName::_spirv_1_3: m_sharedContext->requireSpirvVersion(0x10300); break; - case CapabilityName::spirv_1_4: + case CapabilityName::_spirv_1_4: m_sharedContext->requireSpirvVersion(0x10400); break; - case CapabilityName::spirv_1_5: + case CapabilityName::_spirv_1_5: m_sharedContext->requireSpirvVersion(0x10500); break; - case CapabilityName::spirv_1_6: + case CapabilityName::_spirv_1_6: m_sharedContext->requireSpirvVersion(0x10600); break; case CapabilityName::SPV_EXT_demote_to_helper_invocation: diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 4a6701acc..f8faf7c07 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -11632,11 +11632,11 @@ RefPtr<IRModule> TargetProgram::createIRModuleForLayout(DiagnosticSink* sink) { for (auto atomVal : atomSet) { - auto atom = (CapabilityName)atomVal; - if (atom >= CapabilityName::spirv_1_0 && atom <= latestSpirvAtom || - atom >= CapabilityName::metallib_2_3 && atom <= latestMetalAtom) + auto atom = asAtom(atomVal); + if (atom >= CapabilityAtom::_spirv_1_0 && atom <= latestSpirvAtom || + atom >= CapabilityAtom::metallib_2_3 && atom <= latestMetalAtom) { - builder->addRequireCapabilityAtomDecoration(irFunc, atom); + builder->addRequireCapabilityAtomDecoration(irFunc, (CapabilityName)atom); } } } diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp index ec37223bd..87c9b1192 100644 --- a/source/slang/slang-options.cpp +++ b/source/slang/slang-options.cpp @@ -289,6 +289,7 @@ void initCommandOptions(CommandOptions& options) { OptionKind::Language, "-lang", "-lang <language>", "Set the language for the following input files."}, { OptionKind::MatrixLayoutColumn, "-matrix-layout-column-major", nullptr, "Set the default matrix layout to column-major."}, { OptionKind::MatrixLayoutRow,"-matrix-layout-row-major", nullptr, "Set the default matrix layout to row-major."}, + { OptionKind::RestrictiveCapabilityCheck,"-restrictive-capability-check", nullptr, "Many capability warnings will become an error."}, { OptionKind::ZeroInitialize, "-zero-initialize", nullptr, "Initialize all variables to zero." "Structs will set all struct-fields without an init expression to 0." @@ -1699,6 +1700,7 @@ SlangResult OptionsParser::_parse( case OptionKind::VulkanEmitReflection: case OptionKind::ZeroInitialize: case OptionKind::IgnoreCapabilities: + case OptionKind::RestrictiveCapabilityCheck: case OptionKind::MinimumSlangOptimization: case OptionKind::DisableNonEssentialValidations: case OptionKind::DisableSourceMap: diff --git a/source/slang/slang-profile-defs.h b/source/slang/slang-profile-defs.h index 3b9ee27f5..25506a0a6 100644 --- a/source/slang/slang-profile-defs.h +++ b/source/slang/slang-profile-defs.h @@ -57,7 +57,9 @@ LANGUAGE_ALIAS(SPIRV, spirv_vk) // Pipeline stages to target PROFILE_STAGE(Vertex, vertex, SLANG_STAGE_VERTEX) PROFILE_STAGE(Hull, hull, SLANG_STAGE_HULL) +PROFILE_STAGE_ALIAS(TessControl, tesscontrol, Hull) PROFILE_STAGE(Domain, domain, SLANG_STAGE_DOMAIN) +PROFILE_STAGE_ALIAS(TessEval, tesseval, Domain) PROFILE_STAGE(Geometry, geometry, SLANG_STAGE_GEOMETRY) PROFILE_STAGE(Pixel, pixel, SLANG_STAGE_FRAGMENT) PROFILE_STAGE(Compute, compute, SLANG_STAGE_COMPUTE) @@ -190,14 +192,14 @@ PROFILE(DX_Vertex_4_0, vs_4_0, Vertex, DX_4_0) PROFILE(DX_Vertex_4_1, vs_4_1, Vertex, DX_4_1) PROFILE(DX_Vertex_5_0, vs_5_0, Vertex, DX_5_0) PROFILE(DX_Vertex_5_1, vs_5_1, Vertex, DX_5_1) -PROFILE(DX_Vertex_6_0, vs_6_0, Vertex, DX_6_0) -PROFILE(DX_Vertex_6_1, vs_6_1, Vertex, DX_6_1) -PROFILE(DX_Vertex_6_2, vs_6_2, Vertex, DX_6_2) -PROFILE(DX_Vertex_6_3, vs_6_3, Vertex, DX_6_3) -PROFILE(DX_Vertex_6_4, vs_6_4, Vertex, DX_6_4) -PROFILE(DX_Vertex_6_5, vs_6_5, Vertex, DX_6_5) -PROFILE(DX_Vertex_6_6, vs_6_6, Vertex, DX_6_6) -PROFILE(DX_Vertex_6_7, vs_6_7, Vertex, DX_6_7) +PROFILE(DX_Vertex_6_0, vs_6_0, Vertex, DX_6_0) +PROFILE(DX_Vertex_6_1, vs_6_1, Vertex, DX_6_1) +PROFILE(DX_Vertex_6_2, vs_6_2, Vertex, DX_6_2) +PROFILE(DX_Vertex_6_3, vs_6_3, Vertex, DX_6_3) +PROFILE(DX_Vertex_6_4, vs_6_4, Vertex, DX_6_4) +PROFILE(DX_Vertex_6_5, vs_6_5, Vertex, DX_6_5) +PROFILE(DX_Vertex_6_6, vs_6_6, Vertex, DX_6_6) +PROFILE(DX_Vertex_6_7, vs_6_7, Vertex, DX_6_7) PROFILE(DX_Mesh_6_5, ms_6_5, Mesh, DX_6_5) PROFILE(DX_Mesh_6_6, ms_6_6, Mesh, DX_6_6) diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index 10dc8f57c..e87aeb399 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -1724,7 +1724,7 @@ CapabilitySet TargetRequest::getTargetCaps() { for (auto atom : profileCapAtomSet) { - if (isTargetVersionAtom((CapabilityName)atom)) + if (isTargetVersionAtom(asAtom(atom))) { atoms.add((CapabilityName)atom); hasTargetVersionAtom = true; @@ -1742,7 +1742,7 @@ CapabilitySet TargetRequest::getTargetCaps() { for (auto atom : profileCapAtomSet) { - if (isSpirvExtensionAtom((CapabilityName)atom)) + if (isSpirvExtensionAtom(asAtom(atom))) { atoms.add((CapabilityName)atom); hasTargetVersionAtom = true; @@ -1754,6 +1754,7 @@ CapabilitySet TargetRequest::getTargetCaps() { isGLSLTarget = true; atoms.add(CapabilityName::glsl); + profileCaps.addSpirvVersionFromOtherAsGlslSpirvVersion(profileCaps); } break; @@ -1796,30 +1797,24 @@ CapabilitySet TargetRequest::getTargetCaps() CapabilitySet targetCap = CapabilitySet(atoms); - CapabilityName latestSpirvAtom = getLatestSpirvAtom(); - + if (profileCaps.atLeastOneSetImpliedInOther(targetCap) == CapabilitySet::ImpliesReturnFlags::Implied) + targetCap.join(profileCaps); + for (auto atomVal : optionSet.getArray(CompilerOptionName::Capability)) { - auto atom = (CapabilityName)atomVal.intValue; - if (isGLSLTarget) - { - // If we are emitting GLSL code, we need to - // translate all spirv_*_* capabilities to - // glsl_spirv_*_* instead. - // - if (atom >= CapabilityName::spirv_1_0 && atom <= latestSpirvAtom) - { - atom = (CapabilityName)((Int)CapabilityName::glsl_spirv_1_0 + ((Int)atom - (Int)CapabilityName::spirv_1_0)); - } - } - if (!targetCap.isIncompatibleWith(atom)) - { - // Only add atoms that are compatible with the current target. - atoms.add(atom); - } + auto toAdd = CapabilitySet((CapabilityName)atomVal.intValue); + + if(isGLSLTarget) + targetCap.addSpirvVersionFromOtherAsGlslSpirvVersion(toAdd); + + if (!targetCap.isIncompatibleWith(toAdd)) + targetCap.join(toAdd); } - cookedCapabilities = CapabilitySet(atoms); + cookedCapabilities = targetCap; + + SLANG_ASSERT(!cookedCapabilities.isInvalid()); + return cookedCapabilities; } |
