From 8813c610562b1c30222ec3ef0734ef601d43b617 Mon Sep 17 00:00:00 2001 From: ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:38:23 -0400 Subject: Capability System: Implicit capability upgrade warning/error (#4241) * capability upgrade warning/error adjusted implementation + tests to support a warning/error if capabilities are implicitly upgraded and test accordingly. * add glsl profile caps * add GLSL and HLSL capabilities to the associated capability * syntax error in capdef * only error if user explicitly enables capabilities 1. changed testing infrastructure to not set a `profile` explicitly, 2. Added tests to be sure this works as intended with user API and with slangc command line * Change capability atom definitions and how Slang manages them to fix errors 1. most `glsl_spirv` version atoms have been removed from `.capdef`, instead we will translate `spirv` version atoms into `glsl_spirv` since there is no point in writing the same code twice in `.capdef` files to define `spirv` versions. 2. add spirv version, and hlsl sm version (and equivlent) capability dependencies 3. removed some stage requirments which were set on objects, keep the wrapper capabilities. I am keeping the wrapper capabilities since I am unaware on if there are stage limitations (spec says code in practice does not work). * check internal version instead of version profile (_spirv_1_5 vs. spirv_1_5) * remove unused OpCapability. adjust SPIRV version'ing again for glsl_spirv * apply workaround for glslang bug with rayquery usage * ensure capabilities targetted by a profile and added together by a user are valid * remove additions to `spirv_1_*` wrapper * spirv_* -> glsl_spirv fix * fix bug where incompatable profiles would cause invalid target caps * try to avoid joining invalid capabilities * fix the warning/error & printing * run through tests to fix capability system and test mistakes many mistakes were mesh shaders doing `-profile glsl_450+spirv_1_4`. This is not allowed for a few reasons 1. the test tooling does not handle arguments the same as `slangc` 2. glsl_450 core profile does not support mesh shaders, nor does spirv_1_4. sm_6_5 does work in this senario * set some sm_4_1 intrinsics to sm_4_0 * replace `GLSL_` defs with `glsl_` * swap the unsupported render-test syntax for working syntax * set d3d11/d3d12 profile defaults this is required since sm version changes compiled code & behavior * adjusted nvapi capabilities with atomics + d3d11 set to use sm_5_0 as per default * cleanup * address review * incorrect styling * change `bitscanForward` to work as intended on 32 bit targets --------- Co-authored-by: Yong He --- source/core/slang-uint-set.cpp | 6 +- source/core/slang-uint-set.h | 6 +- source/slang/glsl.meta.slang | 196 +++---- source/slang/hlsl.meta.slang | 542 +++++++++--------- source/slang/slang-ast-support-types.h | 2 +- source/slang/slang-capabilities.capdef | 614 ++++++++++++--------- source/slang/slang-capability.cpp | 144 +++-- source/slang/slang-capability.h | 36 +- source/slang/slang-check-decl.cpp | 25 +- source/slang/slang-check-shader.cpp | 36 +- source/slang/slang-compiler.cpp | 2 +- source/slang/slang-compiler.h | 38 +- source/slang/slang-diagnostic-defs.h | 2 + source/slang/slang-doc-markdown-writer.cpp | 2 +- source/slang/slang-emit-glsl.cpp | 9 +- source/slang/slang-emit-glsl.h | 2 + source/slang/slang-ir-glsl-legalize.cpp | 16 +- source/slang/slang-ir-specialize-target-switch.cpp | 6 +- source/slang/slang-ir-spirv-legalize.cpp | 14 +- source/slang/slang-lower-to-ir.cpp | 8 +- source/slang/slang-options.cpp | 2 + source/slang/slang-profile-defs.h | 18 +- source/slang/slang.cpp | 39 +- 23 files changed, 1022 insertions(+), 743 deletions(-) (limited to 'source') diff --git a/source/core/slang-uint-set.cpp b/source/core/slang-uint-set.cpp index b6871c192..ba71254e1 100644 --- a/source/core/slang-uint-set.cpp +++ b/source/core/slang-uint-set.cpp @@ -106,7 +106,7 @@ void UIntSet::subtractWith(const UIntSet& set) /* static */void UIntSet::calcUnion(UIntSet& outRs, const UIntSet& set1, const UIntSet& set2) { - outRs.m_buffer.setCount(Math::Max(set1.m_buffer.getCount(), set2.m_buffer.getCount())); + outRs.resizeBackingBufferDirectly(Math::Max(set1.m_buffer.getCount(), set2.m_buffer.getCount())); outRs.clear(); for (Index i = 0; i < set1.m_buffer.getCount(); i++) outRs.m_buffer[i] |= set1.m_buffer[i]; @@ -117,7 +117,7 @@ void UIntSet::subtractWith(const UIntSet& set) /* static */void UIntSet::calcIntersection(UIntSet& outRs, const UIntSet& set1, const UIntSet& set2) { const Index minCount = Math::Min(set1.m_buffer.getCount(), set2.m_buffer.getCount()); - outRs.m_buffer.setCount(minCount); + outRs.resizeBackingBufferDirectly(minCount); for (Index i = 0; i < minCount; i++) outRs.m_buffer[i] = set1.m_buffer[i] & set2.m_buffer[i]; @@ -125,7 +125,7 @@ void UIntSet::subtractWith(const UIntSet& set) /* static */void UIntSet::calcSubtract(UIntSet& outRs, const UIntSet& set1, const UIntSet& set2) { - outRs.m_buffer.setCount(set1.m_buffer.getCount()); + outRs.resizeBackingBufferDirectly(set1.m_buffer.getCount()); const Index minCount = Math::Min(set1.m_buffer.getCount(), set2.m_buffer.getCount()); for (Index i = 0; i < minCount; i++) diff --git a/source/core/slang-uint-set.h b/source/core/slang-uint-set.h index 077bc7981..4ba067871 100644 --- a/source/core/slang-uint-set.h +++ b/source/core/slang-uint-set.h @@ -32,10 +32,10 @@ static inline Index bitscanForward(uint64_t in) #else uint32_t out; // check for 0s in 0bit->31bit. If all 0's, check for 0s in 32bit->63bit - if (_BitScanForward((unsigned long*)&out, *(((uint32_t*)&in) + 1))) + if (_BitScanForward((unsigned long*)&out, *(((uint32_t*)&in)))) return Index(out); - _BitScanForward((unsigned long*)&out, *(((uint32_t*)&in))); - return Index(out); + _BitScanForward((unsigned long*)&out, *(((uint32_t*)&in)+1)); + return Index(out)+32; #endif// #ifdef _WIN64 #else diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index db5f9c2fa..d78ab3c0e 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -152,7 +152,7 @@ public in int gl_ViewportIndex : SV_ViewportArrayIndex; [OverloadRank(15)] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -160,7 +160,7 @@ public matrix operator*(matrix m1, matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -168,7 +168,7 @@ public matrix operator*(matrix m1, matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -176,7 +176,7 @@ public matrix operator*(matrix m1, matrix [ForceInline] [OverloadRank(15)] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -184,7 +184,7 @@ public matrix operator* operator*(vector v, matrix m) { return mul(m, v); @@ -192,7 +192,7 @@ public vector operator* operator*(matrix m, vector v) { return mul(v, m); @@ -306,7 +306,7 @@ ${{{{ __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T atan(T y, T x) { return atan2(y, x); @@ -315,7 +315,7 @@ public T atan(T y, T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector atan(vector y, vector x) { return atan2(y, x); @@ -328,7 +328,7 @@ public vector atan(vector y, vector x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T inversesqrt(T x) { return rsqrt(x); @@ -337,7 +337,7 @@ public T inversesqrt(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector inversesqrt(vector x) { return rsqrt(x); @@ -350,7 +350,7 @@ public vector inversesqrt(vector x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] public T roundEven(T x) { return rint(x); @@ -359,7 +359,7 @@ public T roundEven(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] public vector roundEven(vector x) { return rint(x); @@ -368,7 +368,7 @@ public vector roundEven(vector x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T mod(T x, T y) { // SPIR-V doesn't have "modulus". @@ -385,7 +385,7 @@ public T mod(T x, T y) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector mod(vector x, T y) { __target_switch @@ -399,7 +399,7 @@ public vector mod(vector x, T y) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector mod(vector x, vector y) { __target_switch @@ -412,7 +412,7 @@ public vector mod(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector min(vector x, T y) { __target_switch @@ -425,7 +425,7 @@ public vector min(vector x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector max(vector x, T y) { __target_switch @@ -438,7 +438,7 @@ public vector max(vector x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector clamp(vector x, T minBound, T maxBound) { __target_switch @@ -452,7 +452,7 @@ public vector clamp(vector x, T minBound, T maxBound) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T mix(T x, T y, T a) { return lerp(x, y, a); @@ -461,7 +461,7 @@ public T mix(T x, T y, T a) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector mix(vector x, vector y, T a) { __target_switch @@ -475,7 +475,7 @@ public vector mix(vector x, vector y, T a) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector mix(vector x, vector y, vector a) { return lerp(x, y, a); @@ -484,7 +484,7 @@ public vector mix(vector x, vector y, vector a) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public T mix(T x, T y, bool a) { __target_switch @@ -501,7 +501,7 @@ public T mix(T x, T y, bool a) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] public vector mix(vector x, vector y, vector a) { __target_switch @@ -2121,7 +2121,7 @@ public int textureSamples(Sampler2DMSArray sampler) __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector texture(Sampler1D> sampler, float p) { return __vectorReshape<4>(sampler.Sample(p)); @@ -2129,7 +2129,7 @@ public vector texture(Sampler1D> sampler, float p) __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector texture(Sampler1D> sampler, float p, constexpr float bias) { return __vectorReshape<4>(sampler.SampleBias(p, bias)); @@ -2137,7 +2137,7 @@ public vector texture(Sampler1D> sampler, float p, constexpr fl __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector texture(__TextureImpl< vector, Shape, @@ -2155,7 +2155,7 @@ public vector texture(__TextureImpl< __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector texture(__TextureImpl< vector, Shape, @@ -2229,14 +2229,14 @@ public float texture(sampler2DShadow sampler, vec3 p, float bias) } } -[require(glsl_hlsl_spirv, texture_shadowlod_cube)] +[require(glsl_hlsl_spirv, texture_shadowlod)] [ForceInline] public float texture(samplerCubeShadow sampler, vec4 p) { return sampler.SampleCmp(p.xyz, p.w); } -[require(glsl_hlsl_spirv, texture_shadowlod_cube)] +[require(glsl_hlsl_spirv, texture_shadowlod)] [ForceInline] public float texture(samplerCubeShadow sampler, vec4 p, float bias) { @@ -2295,7 +2295,7 @@ public float texture(sampler2DArrayShadow sampler, vec4 p) } [ForceInline] -[require(glsl_hlsl_spirv, texture_shadowlod_cube)] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(samplerCubeArrayShadow sampler, vec4 p, float compare) { return sampler.SampleCmp(p, compare); @@ -2307,7 +2307,7 @@ public float texture(samplerCubeArrayShadow sampler, vec4 p, float compare) __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler1D> sampler, vec2 p) { __requireComputeDerivative(); @@ -2324,7 +2324,7 @@ public vector textureProj(Sampler1D> sampler, vec2 p) __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler1D> sampler, vec2 p, float bias) { __requireComputeDerivative(); @@ -2341,7 +2341,7 @@ public vector textureProj(Sampler1D> sampler, vec2 p, float bia __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler1D> sampler, vec4 p) { __requireComputeDerivative(); @@ -2358,7 +2358,7 @@ public vector textureProj(Sampler1D> sampler, vec4 p) __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler1D> sampler, vec4 p, float bias) { __requireComputeDerivative(); @@ -2375,7 +2375,7 @@ public vector textureProj(Sampler1D> sampler, vec4 p, float bia __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler2D> sampler, vec3 p) { __requireComputeDerivative(); @@ -2392,7 +2392,7 @@ public vector textureProj(Sampler2D> sampler, vec3 p) __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler2D> sampler, vec3 p, float bias) { __requireComputeDerivative(); @@ -2409,7 +2409,7 @@ public vector textureProj(Sampler2D> sampler, vec3 p, float bia __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler2D> sampler, vec4 p) { __requireComputeDerivative(); @@ -2426,7 +2426,7 @@ public vector textureProj(Sampler2D> sampler, vec4 p) __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler2D> sampler, vec4 p, float bias) { __requireComputeDerivative(); @@ -2443,7 +2443,7 @@ public vector textureProj(Sampler2D> sampler, vec4 p, float bia __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler3D> sampler, vec4 p) { __requireComputeDerivative(); @@ -2460,7 +2460,7 @@ public vector textureProj(Sampler3D> sampler, vec4 p) __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProj(Sampler3D> sampler, vec4 p, float bias) { __requireComputeDerivative(); @@ -2565,7 +2565,7 @@ public float textureProj(sampler2DShadow sampler, vec4 p, float bias) __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureLod(Sampler1D> sampler, float p, float lod) { return __vectorReshape<4>(sampler.SampleLevel(p, lod)); @@ -2573,7 +2573,7 @@ public vector textureLod(Sampler1D> sampler, float p, float lod __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureLod(__TextureImpl< vector, Shape, @@ -2652,7 +2652,7 @@ public float textureLod(sampler1DArrayShadow sampler, vec3 p, float lod) __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureOffset(Sampler1D> sampler, float p, constexpr int offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2660,7 +2660,7 @@ public vector textureOffset(Sampler1D> sampler, float p, conste __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureOffset(Sampler2D> sampler, vec2 p, constexpr ivec2 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2668,7 +2668,7 @@ public vector textureOffset(Sampler2D> sampler, vec2 p, constex __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureOffset(Sampler3D> sampler, vec3 p, constexpr ivec3 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2742,7 +2742,7 @@ public float textureOffset(sampler1DShadow sampler, vec3 p, constexpr int offset __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureOffset(Sampler1DArray> sampler, vec2 p, constexpr int offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2750,7 +2750,7 @@ public vector textureOffset(Sampler1DArray> sampler, vec2 p, co __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureOffset(Sampler2DArray> sampler, vec3 p, constexpr ivec2 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -2921,7 +2921,7 @@ public vector texelFetchOffset(Sampler2DRect> sampler, ivec2 p, __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler1D> sampler, vec2 p, constexpr int offset) { __requireComputeDerivative(); @@ -2938,7 +2938,7 @@ public vector textureProjOffset(Sampler1D> sampler, vec2 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler1D> sampler, vec2 p, constexpr int offset, float bias) { __requireComputeDerivative(); @@ -2955,7 +2955,7 @@ public vector textureProjOffset(Sampler1D> sampler, vec2 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler1D> sampler, vec4 p, constexpr int offset) { __requireComputeDerivative(); @@ -2976,7 +2976,7 @@ public vector textureProjOffset(Sampler1D> sampler, vec4 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler1D> sampler, vec4 p, constexpr int offset, float bias) { __requireComputeDerivative(); @@ -2997,7 +2997,7 @@ public vector textureProjOffset(Sampler1D> sampler, vec4 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler2D> sampler, vec3 p, constexpr ivec2 offset) { __requireComputeDerivative(); @@ -3014,7 +3014,7 @@ public vector textureProjOffset(Sampler2D> sampler, vec3 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler2D> sampler, vec3 p, constexpr ivec2 offset, float bias) { __requireComputeDerivative(); @@ -3031,7 +3031,7 @@ public vector textureProjOffset(Sampler2D> sampler, vec3 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler2D> sampler, vec4 p, constexpr ivec2 offset) { __requireComputeDerivative(); @@ -3052,7 +3052,7 @@ public vector textureProjOffset(Sampler2D> sampler, vec4 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler2D> sampler, vec4 p, constexpr ivec2 offset, float bias) { __requireComputeDerivative(); @@ -3073,7 +3073,7 @@ public vector textureProjOffset(Sampler2D> sampler, vec4 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler3D> sampler, vec4 p, constexpr ivec3 offset) { __requireComputeDerivative(); @@ -3090,7 +3090,7 @@ public vector textureProjOffset(Sampler3D> sampler, vec4 p, con __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjOffset(Sampler3D> sampler, vec4 p, constexpr ivec3 offset, float bias) { __requireComputeDerivative(); @@ -3195,7 +3195,7 @@ public float textureProjOffset(sampler2DShadow sampler, vec4 p, constexpr ivec2 __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0)] public vector textureLodOffset(Sampler1D> sampler, float p, float lod, constexpr int offset) { return __vectorReshape<4>(sampler.SampleLevel(p, lod, offset)); @@ -3203,7 +3203,7 @@ public vector textureLodOffset(Sampler1D> sampler, float p, flo __generic [ForceInline] -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0)] public vector textureLodOffset(__TextureImpl< vector, Shape, @@ -3285,7 +3285,7 @@ public float textureLodOffset(sampler1DArrayShadow sampler, vec3 p, float lod, c __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLod(Sampler1D> sampler, vec2 p, float lod) { __target_switch @@ -3301,7 +3301,7 @@ public vector textureProjLod(Sampler1D> sampler, vec2 p, float __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLod(Sampler1D> sampler, vec4 p, float lod) { __target_switch @@ -3321,7 +3321,7 @@ public vector textureProjLod(Sampler1D> sampler, vec4 p, float __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLod(Sampler2D> sampler, vec3 p, float lod) { __target_switch @@ -3337,7 +3337,7 @@ public vector textureProjLod(Sampler2D> sampler, vec3 p, float __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLod(Sampler2D> sampler, vec4 p, float lod) { __target_switch @@ -3357,7 +3357,7 @@ public vector textureProjLod(Sampler2D> sampler, vec4 p, float __generic [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLod(Sampler3D> sampler, vec4 p, float lod) { __target_switch @@ -3417,7 +3417,7 @@ public float textureProjLod(sampler2DShadow sampler, vec4 p, float lod) __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLodOffset(Sampler1D> sampler, vec2 p, float lod, constexpr int offset) { __target_switch @@ -3433,7 +3433,7 @@ public vector textureProjLodOffset(Sampler1D> sampler, vec2 p, __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLodOffset(Sampler1D> sampler, vec4 p, float lod, constexpr int offset) { __target_switch @@ -3453,7 +3453,7 @@ public vector textureProjLodOffset(Sampler1D> sampler, vec4 p, __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLodOffset(Sampler2D> sampler, vec3 p, float lod, constexpr ivec2 offset) { __target_switch @@ -3469,7 +3469,7 @@ public vector textureProjLodOffset(Sampler2D> sampler, vec3 p, __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLodOffset(Sampler2D> sampler, vec4 p, float lod, constexpr ivec2 offset) { __target_switch @@ -3489,7 +3489,7 @@ public vector textureProjLodOffset(Sampler2D> sampler, vec4 p, __generic [ForceInline] -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vector textureProjLodOffset(Sampler3D> sampler, vec4 p, float lod, constexpr ivec3 offset) { __target_switch @@ -3626,7 +3626,7 @@ public float textureGrad(sampler2DShadow sampler, vec3 p, vec2 dPdx, vec2 dPdy) } [ForceInline] -[require(glsl_spirv, texture_shadowlod_cube)] +[require(glsl_spirv, texture_shadowlod)] public float textureGrad(samplerCubeShadow sampler, vec4 p, vec3 dPdx, vec3 dPdy) { __target_switch @@ -4171,163 +4171,163 @@ public vec4 textureGatherOffsets(__TextureImpl< // error when we try to translate the GLSL to SPIR-V. // So we cannot use them. -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1D(sampler1D sampler, float coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1D(sampler1D sampler, float coord, float bias) { return texture(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec2 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec2 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec4 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProj(sampler1D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DLod(sampler1D sampler, float coord, float lod) { return textureLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProjLod(sampler1D sampler, vec2 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture1DProjLod(sampler1D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2D(sampler2D sampler, vec2 coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2D(sampler2D sampler, vec2 coord, float bias) { return texture(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec3 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec3 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec4 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProj(sampler2D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DLod(sampler2D sampler, vec2 coord, float lod) { return textureLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProjLod(sampler2D sampler, vec3 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture2DProjLod(sampler2D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3D(sampler3D sampler, vec3 coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3D(sampler3D sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DProj(sampler3D sampler, vec4 coord) { return textureProj(sampler, coord); } -[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DProj(sampler3D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DLod(sampler3D sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 texture3DProjLod(sampler3D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 textureCube(samplerCube sampler, vec3 coord) { return texture(sampler, coord); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 textureCube(samplerCube sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } -[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_0_fragment)] public vec4 textureCubeLod(samplerCube sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); @@ -9390,7 +9390,7 @@ public vec4 noise4(vector x) // TODO: if called after a return, error. [ForceInline] -[require(glsl_hlsl_spirv, glsl_barrier)] +[require(glsl_hlsl_spirv, memorybarrier)] public void barrier() { __target_switch diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c03c47703..9a87604ae 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -37,12 +37,12 @@ struct GLSLShaderStorageBuffer {} __generic __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) -[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, appendstructuredbuffer)] uint2 __structuredBufferGetDimensions(AppendStructuredBuffer buffer); __generic __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) -[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, consumestructuredbuffer)] uint2 __structuredBufferGetDimensions(ConsumeStructuredBuffer buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) @@ -578,7 +578,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector location) { __requireComputeDerivative(); @@ -634,7 +634,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector location, constexpr vector offset) { __requireComputeDerivative(); @@ -663,7 +663,7 @@ extension __TextureImpl [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector location, vector offset, float clamp) { __requireComputeDerivative(); @@ -691,7 +691,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector location, vector offset, float clamp, out uint status) { __target_switch @@ -709,7 +709,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(vector location, float bias) { __requireComputeDerivative(); @@ -737,7 +737,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(vector location, float bias, constexpr vector offset) { __requireComputeDerivative(); @@ -896,7 +896,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector location, vector gradX, vector gradY) { __target_switch @@ -923,7 +923,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset) { __target_switch @@ -950,7 +950,7 @@ extension __TextureImpl [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { __target_switch @@ -977,7 +977,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(vector location, float level) { __target_switch @@ -1034,7 +1034,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(vector location, float level, constexpr vector offset) { __target_switch @@ -1116,7 +1116,7 @@ extension __TextureImpl { [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector location) { __requireComputeDerivative(); @@ -1200,7 +1200,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset) { __requireComputeDerivative(); @@ -1248,7 +1248,7 @@ extension __TextureImpl [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset, float clamp) { __requireComputeDerivative(); @@ -1296,7 +1296,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset, float clamp, out uint status) { __target_switch @@ -1314,7 +1314,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(SamplerState s, vector location, float bias) { __requireComputeDerivative(); @@ -1364,7 +1364,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_fragment)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(SamplerState s, vector location, float bias, constexpr vector offset) { __requireComputeDerivative(); @@ -1619,7 +1619,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY) { __target_switch @@ -1670,7 +1670,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset) { __target_switch @@ -1719,7 +1719,7 @@ extension __TextureImpl [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { __target_switch @@ -1767,7 +1767,7 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(SamplerState s, vector location, float level) { __target_switch @@ -1847,7 +1847,8 @@ extension __TextureImpl [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1)] + + [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(SamplerState s, vector location, float level, constexpr vector offset) { __target_switch @@ -3228,8 +3229,6 @@ ${{{{ const char* textureTypeName = isCombined ? "Sampler" : "Texture"; StringBuilder requireStringBuilder; - if (shape == kStdlibShapeIndexCube) - requireStringBuilder << "[require(any_target, texture_cube)]"; auto requireString = requireStringBuilder.toString(); }}}} $(requireString) @@ -3710,13 +3709,13 @@ uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value) // Conversion between uint64_t and uint2 -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint2 __asuint2(uint64_t i) { return uint2(uint(i), uint(uint64_t(i) >> 32)); } -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint64_t __asuint64(uint2 i) { return (uint64_t(i.y) << 32) | i.x; @@ -4072,7 +4071,7 @@ ${{{{ __cuda_sm_version(2.0) [__requiresNVAPI] [ForceInline] - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd, out float originalValue) { __target_switch @@ -4143,7 +4142,7 @@ ${{{{ [__requiresNVAPI] [ForceInline] __cuda_sm_version(2.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd) { __target_switch @@ -4163,7 +4162,7 @@ ${{{{ // Int64 Add [ForceInline] __cuda_sm_version(6.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) { __target_switch @@ -4182,7 +4181,7 @@ ${{{{ // Without returning original value __cuda_sm_version(6.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) { __target_switch @@ -4199,7 +4198,7 @@ ${{{{ // Cas uint64_t - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -4217,7 +4216,7 @@ ${{{{ // Max __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { __target_switch @@ -4275,7 +4274,7 @@ ${{{{ // Min __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { __target_switch @@ -4333,7 +4332,7 @@ ${{{{ // And __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { __target_switch @@ -4371,7 +4370,7 @@ ${{{{ // Or __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { __target_switch @@ -4409,7 +4408,7 @@ ${{{{ // Xor __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { __target_switch @@ -4446,7 +4445,7 @@ ${{{{ // Exchange - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { __target_switch @@ -4551,6 +4550,7 @@ ${{{{ } [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) { __target_switch @@ -4567,6 +4567,7 @@ ${{{{ } [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -5211,7 +5212,7 @@ void abort(); __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T abs(T x) { __target_switch @@ -5232,7 +5233,7 @@ T abs(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector abs(vector x) { __target_switch @@ -5250,7 +5251,7 @@ vector abs(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix abs(matrix x) { __target_switch @@ -5263,7 +5264,7 @@ matrix abs(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T abs(T x) { __target_switch @@ -5281,7 +5282,7 @@ T abs(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector abs(vector x) { __target_switch @@ -5299,7 +5300,7 @@ vector abs(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix abs(matrix x) { __target_switch @@ -5313,7 +5314,7 @@ matrix abs(matrix x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fabs(T x) { __target_switch @@ -5327,7 +5328,7 @@ T fabs(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fabs(vector x) { __target_switch @@ -5343,7 +5344,7 @@ vector fabs(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T acos(T x) { __target_switch @@ -5361,7 +5362,7 @@ T acos(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector acos(vector x) { __target_switch @@ -5379,7 +5380,7 @@ vector acos(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix acos(matrix x) { __target_switch @@ -5395,7 +5396,7 @@ matrix acos(matrix x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T acosh(T x) { __target_switch @@ -5415,7 +5416,7 @@ T acosh(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector acosh(vector x) { __target_switch @@ -5527,7 +5528,7 @@ bool all(matrix x) // Barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void AllMemoryBarrier() { __target_switch @@ -5545,7 +5546,7 @@ void AllMemoryBarrier() // Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void AllMemoryBarrierWithGroupSync() { __target_switch @@ -5778,28 +5779,28 @@ matrix asfloat(matrix x) // No op [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] float asfloat(float x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] vector asfloat(vector x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] matrix asfloat(matrix x) { return x; } // Inverse sine (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T asin(T x) { __target_switch @@ -5817,7 +5818,7 @@ T asin(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector asin(vector x) { __target_switch @@ -5835,7 +5836,7 @@ vector asin(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix asin(matrix x) { __target_switch @@ -5851,7 +5852,7 @@ matrix asin(matrix x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T asinh(T x) { __target_switch @@ -5871,7 +5872,7 @@ T asinh(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector asinh(vector x) { __target_switch @@ -6329,7 +6330,7 @@ matrix asfloat16(matrix va // Inverse tangent (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atan(T x) { __target_switch @@ -6347,7 +6348,7 @@ T atan(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector atan(vector x) { __target_switch @@ -6365,7 +6366,7 @@ vector atan(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix atan(matrix x) { __target_switch @@ -6378,7 +6379,7 @@ matrix atan(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atan2(T y, T x) { __target_switch @@ -6396,7 +6397,7 @@ T atan2(T y, T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector atan2(vector y, vector x) { __target_switch @@ -6414,7 +6415,7 @@ vector atan2(vector y, vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix atan2(matrix y, matrix x) { __target_switch @@ -6430,7 +6431,7 @@ matrix atan2(matrix y, matrix x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atanh(T x) { __target_switch @@ -6450,7 +6451,7 @@ T atanh(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector atanh(vector x) { __target_switch @@ -6468,7 +6469,7 @@ vector atanh(vector x) // Ceiling (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ceil(T x) { __target_switch @@ -6486,7 +6487,7 @@ T ceil(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector ceil(vector x) { __target_switch @@ -6504,7 +6505,7 @@ vector ceil(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix ceil(matrix x) { __target_switch @@ -6603,7 +6604,7 @@ bool CheckAccessFullyMapped(uint status); // Clamp (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T clamp(T x, T minBound, T maxBound) { __target_switch @@ -6627,7 +6628,7 @@ T clamp(T x, T minBound, T maxBound) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector clamp(vector x, vector minBound, vector maxBound) { __target_switch @@ -6651,7 +6652,7 @@ vector clamp(vector x, vector minBound, vector maxBound) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix clamp(matrix x, matrix minBound, matrix maxBound) { __target_switch @@ -6664,7 +6665,7 @@ matrix clamp(matrix x, matrix minBound, matrix maxBo __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T clamp(T x, T minBound, T maxBound) { __target_switch @@ -6682,7 +6683,7 @@ T clamp(T x, T minBound, T maxBound) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector clamp(vector x, vector minBound, vector maxBound) { __target_switch @@ -6700,7 +6701,7 @@ vector clamp(vector x, vector minBound, vector maxBound) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix clamp(matrix x, matrix minBound, matrix maxBound) { __target_switch @@ -6751,7 +6752,7 @@ void clip(matrix x) // Cosine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cos(T x) { __target_switch @@ -6769,7 +6770,7 @@ T cos(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cos(vector x) { __target_switch @@ -6787,7 +6788,7 @@ vector cos(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix cos(matrix x) { __target_switch @@ -6801,7 +6802,7 @@ matrix cos(matrix x) // Hyperbolic cosine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cosh(T x) { __target_switch @@ -6819,7 +6820,7 @@ T cosh(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cosh(vector x) { __target_switch @@ -6837,7 +6838,7 @@ vector cosh(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix cosh(matrix x) { __target_switch @@ -6852,7 +6853,7 @@ matrix cosh(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cospi(T x) { __target_switch @@ -6865,7 +6866,7 @@ T cospi(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cospi(vector x) { __target_switch @@ -6902,7 +6903,7 @@ uint countbits(uint value) // TODO: SPIRV does not support integer vectors. __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cross(vector left, vector right) { __target_switch @@ -6923,7 +6924,7 @@ vector cross(vector left, vector right) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cross(vector left, vector right) { __target_switch @@ -6943,7 +6944,7 @@ vector cross(vector left, vector right) // Convert encoded color [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] int4 D3DCOLORtoUBYTE4(float4 color) { __target_switch @@ -7182,7 +7183,7 @@ T determinant(matrix m) // Barrier for device memory __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void DeviceMemoryBarrier() { __target_switch @@ -7199,7 +7200,7 @@ void DeviceMemoryBarrier() } __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void DeviceMemoryBarrierWithGroupSync() { __target_switch @@ -7219,7 +7220,7 @@ void DeviceMemoryBarrierWithGroupSync() __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T distance(vector x, vector y) { __target_switch @@ -7237,7 +7238,7 @@ T distance(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T distance(T x, T y) { __target_switch @@ -7255,7 +7256,7 @@ T distance(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fdim(T x, T y) { __target_switch @@ -7268,7 +7269,7 @@ T fdim(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fdim(vector x, vector y) { __target_switch @@ -7313,7 +7314,7 @@ vector divide(vector x, vector y) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(T x, T y) { __target_switch @@ -7327,7 +7328,7 @@ T dot(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(vector x, vector y) { __target_switch @@ -7348,7 +7349,7 @@ T dot(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(vector x, vector y) { __target_switch @@ -7538,7 +7539,7 @@ matrix EvaluateAttributeSnapped(matrix x, int2 offset) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp(T x) { __target_switch @@ -7556,7 +7557,7 @@ T exp(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector exp(vector x) { __target_switch @@ -7574,7 +7575,7 @@ vector exp(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix exp(matrix x) { __target_switch @@ -7589,7 +7590,7 @@ matrix exp(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp2(T x) { __target_switch @@ -7621,7 +7622,7 @@ T exp2(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector exp2(vector x) { __target_switch @@ -7640,7 +7641,7 @@ vector exp2(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix exp2(matrix x) { __target_switch @@ -7655,7 +7656,7 @@ matrix exp2(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp10(T x) { __target_switch @@ -7669,7 +7670,7 @@ T exp10(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector exp10(vector x) { __target_switch @@ -7853,7 +7854,7 @@ vector f32tof16_(vector value) // Flip surface normal to face forward, if needed __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_400)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] vector faceforward(vector n, vector i, vector ng) { __target_switch @@ -8006,7 +8007,7 @@ vector firstbitlow(vector value) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T floor(T x) { __target_switch @@ -8024,7 +8025,7 @@ T floor(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector floor(vector x) { __target_switch @@ -8042,7 +8043,7 @@ vector floor(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix floor(matrix x) { __target_switch @@ -8113,7 +8114,7 @@ matrix fma(matrix a, matrix b, matrix c) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmod(T x, T y) { // In HLSL, `fmod` returns a remainder. @@ -8184,7 +8185,7 @@ T fmod(T x, T y) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmod(vector x, vector y) { __target_switch @@ -8201,7 +8202,7 @@ vector fmod(vector x, vector y) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix fmod(matrix x, matrix y) { __target_switch @@ -8215,7 +8216,7 @@ matrix fmod(matrix x, matrix y) // Fractional part __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T frac(T x) { __target_switch @@ -8233,7 +8234,7 @@ T frac(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector frac(vector x) { __target_switch @@ -8259,7 +8260,7 @@ matrix frac(matrix x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fract(T x) { return frac(x); @@ -8268,7 +8269,7 @@ T fract(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fract(vector x) { return frac(x); @@ -8278,7 +8279,7 @@ vector fract(vector x) // Split float into mantissa and exponent __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T frexp(T x, out int exp) { __target_switch @@ -8296,7 +8297,7 @@ T frexp(T x, out int exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector frexp(vector x, out vector exp) { __target_switch @@ -8314,7 +8315,7 @@ vector frexp(vector x, out vector exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix frexp(matrix x, out matrix exp) { __target_switch @@ -8512,7 +8513,7 @@ float2 GetRenderTargetSamplePosition(int Index) // Group memory barrier __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void GroupMemoryBarrier() { __target_switch @@ -8529,7 +8530,7 @@ void GroupMemoryBarrier() } } -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void __subgroupBarrier() { __target_switch @@ -8547,7 +8548,7 @@ void __subgroupBarrier() } __glsl_extension(GL_KHR_memory_scope_semantics) -[require(cuda_glsl_hlsl_metal_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void GroupMemoryBarrierWithGroupSync() { __target_switch @@ -9449,7 +9450,7 @@ void InterlockedXor(__ref uint64_t dest, uint64_t value, out uint64_t origina __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isfinite(T x) { __target_switch @@ -9467,7 +9468,7 @@ bool isfinite(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector isfinite(vector x) { __target_switch @@ -9485,7 +9486,7 @@ vector isfinite(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix isfinite(matrix x) { __target_switch @@ -9499,7 +9500,7 @@ matrix isfinite(matrix x) // Is floating-point value infinite? __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isinf(T x) { __target_switch @@ -9518,7 +9519,7 @@ bool isinf(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector isinf(vector x) { __target_switch @@ -9536,7 +9537,7 @@ vector isinf(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix isinf(matrix x) { __target_switch @@ -9550,7 +9551,7 @@ matrix isinf(matrix x) // Is floating-point value not-a-number? __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isnan(T x) { __target_switch @@ -9569,7 +9570,7 @@ bool isnan(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector isnan(vector x) { __target_switch @@ -9587,7 +9588,7 @@ vector isnan(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix isnan(matrix x) { __target_switch @@ -9602,7 +9603,7 @@ matrix isnan(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ldexp(T x, T exp) { __target_switch @@ -9615,7 +9616,7 @@ T ldexp(T x, T exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector ldexp(vector x, vector exp) { __target_switch @@ -9628,7 +9629,7 @@ vector ldexp(vector x, vector exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix ldexp(matrix x, matrix exp) { __target_switch @@ -9641,7 +9642,7 @@ matrix ldexp(matrix x, matrix exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ldexp(T x, E exp) { __target_switch @@ -9659,7 +9660,7 @@ T ldexp(T x, E exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector ldexp(vector x, vector exp) { __target_switch @@ -9683,7 +9684,7 @@ vector ldexp(vector x, vector exp) // Vector length __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T length(vector x) { __target_switch @@ -9701,7 +9702,7 @@ T length(vector x) // Scalar float length __generic -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T length(T x) { __target_switch @@ -9718,7 +9719,7 @@ T length(T x) // Linear interpolation __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T lerp(T x, T y, T s) { __target_switch @@ -9736,7 +9737,7 @@ T lerp(T x, T y, T s) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector lerp(vector x, vector y, vector s) { __target_switch @@ -9754,7 +9755,7 @@ vector lerp(vector x, vector y, vector s) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix lerp(matrix x, matrix y, matrix s) { __target_switch @@ -9767,7 +9768,7 @@ matrix lerp(matrix x, matrix y, matrix s) // Legacy lighting function (obsolete) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] float4 lit(float n_dot_l, float n_dot_h, float m) { __target_switch @@ -9784,7 +9785,7 @@ float4 lit(float n_dot_l, float n_dot_h, float m) // Base-e logarithm __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log(T x) { __target_switch @@ -9802,7 +9803,7 @@ T log(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector log(vector x) { __target_switch @@ -9820,7 +9821,7 @@ vector log(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix log(matrix x) { __target_switch @@ -9834,7 +9835,7 @@ matrix log(matrix x) // Base-10 logarithm __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log10(T x) { __target_switch @@ -9857,7 +9858,7 @@ T log10(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector log10(vector x) { __target_switch @@ -9880,7 +9881,7 @@ vector log10(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix log10(matrix x) { __target_switch @@ -9894,7 +9895,7 @@ matrix log10(matrix x) // Base-2 logarithm __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log2(T x) { __target_switch @@ -9912,7 +9913,7 @@ T log2(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector log2(vector x) { __target_switch @@ -9930,7 +9931,7 @@ vector log2(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix log2(matrix x) { __target_switch @@ -10043,7 +10044,7 @@ matrix mad(matrix mvalue, matrix avalue, matrix [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max(T x, T y) { // Note: a stdlib implementation of `max` (or `min`) will require splitting @@ -10078,7 +10079,7 @@ T max(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector max(vector x, vector y) { __target_switch @@ -10108,7 +10109,7 @@ vector max(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix max(matrix x, matrix y) { __target_switch @@ -10121,7 +10122,7 @@ matrix max(matrix x, matrix y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max(T x, T y) { __target_switch @@ -10139,7 +10140,7 @@ T max(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector max(vector x, vector y) { __target_switch @@ -10157,7 +10158,7 @@ vector max(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix max(matrix x, matrix y) { __target_switch @@ -10170,7 +10171,7 @@ matrix max(matrix x, matrix y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max3(T x, T y, T z) { __target_switch @@ -10183,7 +10184,7 @@ T max3(T x, T y, T z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector max3(vector x, vector y, vector z) { __target_switch @@ -10196,7 +10197,7 @@ vector max3(vector x, vector y, vector z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmax(T x, T y) { __target_switch @@ -10210,7 +10211,7 @@ T fmax(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmax(vector x, vector y) { __target_switch @@ -10223,7 +10224,7 @@ vector fmax(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmax3(T x, T y, T z) { __target_switch @@ -10257,7 +10258,7 @@ T fmax3(T x, T y, T z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmax3(vector x, vector y, vector z) { __target_switch @@ -10272,7 +10273,7 @@ vector fmax3(vector x, vector y, vector z) // minimum __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min(T x, T y) { __target_switch @@ -10300,7 +10301,7 @@ T min(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector min(vector x, vector y) { __target_switch @@ -10326,7 +10327,7 @@ vector min(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix min(matrix x, matrix y) { __target_switch @@ -10339,7 +10340,7 @@ matrix min(matrix x, matrix y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min(T x, T y) { __target_switch @@ -10357,7 +10358,7 @@ T min(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector min(vector x, vector y) { __target_switch @@ -10375,7 +10376,7 @@ vector min(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix min(matrix x, matrix y) { __target_switch @@ -10388,7 +10389,7 @@ matrix min(matrix x, matrix y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min3(T x, T y, T z) { __target_switch @@ -10401,7 +10402,7 @@ T min3(T x, T y, T z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector min3(vector x, vector y, vector z) { __target_switch @@ -10414,7 +10415,7 @@ vector min3(vector x, vector y, vector z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmin(T x, T y) { __target_switch @@ -10428,7 +10429,7 @@ T fmin(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmin(vector x, vector y) { __target_switch @@ -10442,7 +10443,7 @@ vector fmin(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmin3(T x, T y, T z) { __target_switch @@ -10476,7 +10477,7 @@ T fmin3(T x, T y, T z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmin3(vector x, vector y, vector z) { __target_switch @@ -10491,7 +10492,7 @@ vector fmin3(vector x, vector y, vector z) // Median __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T median3(T x, T y, T z) { __target_switch @@ -10519,7 +10520,7 @@ T median3(T x, T y, T z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector median3(vector x, vector y, vector z) { __target_switch @@ -10538,7 +10539,7 @@ vector median3(vector x, vector y, vector z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmedian3(T x, T y, T z) { __target_switch @@ -10567,7 +10568,7 @@ T fmedian3(T x, T y, T z) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmedian3(vector x, vector y, vector z) { __target_switch @@ -10582,7 +10583,7 @@ vector fmedian3(vector x, vector y, vector z) // split into integer and fractional parts (both with same sign) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T modf(T x, out T ip) { __target_switch @@ -10600,7 +10601,7 @@ T modf(T x, out T ip) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector modf(vector x, out vector ip) { __target_switch @@ -10618,7 +10619,7 @@ vector modf(vector x, out vector ip) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix modf(matrix x, out matrix ip) { __target_switch @@ -10631,7 +10632,7 @@ matrix modf(matrix x, out matrix ip) // msad4 (whatever that is) [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint4 msad4(uint reference, uint2 source, uint4 accum) { __target_switch @@ -10659,39 +10660,39 @@ uint4 msad4(uint reference, uint2 source, uint4 accum) __generic __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(T x, T y); // scalar-vector and vector-scalar __generic __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector x, T y); __generic __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(T x, vector y); // scalar-matrix and matrix-scalar __generic __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix x, T y); __generic __intrinsic_op($(kIROp_Mul)) [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(T x, matrix y); // vector-vector (dot product) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(vector x, vector y) { __target_switch @@ -10705,7 +10706,7 @@ T mul(vector x, vector y) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(vector x, vector y) { __target_switch @@ -10719,7 +10720,7 @@ T mul(vector x, vector y) // vector-matrix __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector left, matrix right) { __target_switch @@ -10746,7 +10747,7 @@ vector mul(vector left, matrix right) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector left, matrix right) { __target_switch @@ -10770,7 +10771,7 @@ vector mul(vector left, matrix right) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector left, matrix right) { __target_switch @@ -10796,7 +10797,7 @@ vector mul(vector left, matrix right) // matrix-vector __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(matrix left, vector right) { __target_switch @@ -10823,7 +10824,7 @@ vector mul(matrix left, vector right) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(matrix left, vector right) { __target_switch @@ -10847,7 +10848,7 @@ vector mul(matrix left, vector right) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(matrix left, vector right) { __target_switch @@ -10873,7 +10874,7 @@ vector mul(matrix left, vector right) // matrix-matrix __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix left, matrix right) { __target_switch @@ -10901,7 +10902,7 @@ matrix mul(matrix left, matrix right) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix left, matrix right) { __target_switch @@ -10926,7 +10927,7 @@ matrix mul(matrix left, matrix right) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix left, matrix right) { __target_switch @@ -11057,7 +11058,7 @@ T NonUniformResourceIndex(T value) { return value; } // Normalize a vector __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector normalize(vector x) { __target_switch @@ -11075,7 +11076,7 @@ vector normalize(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T normalize(T x) { __target_switch @@ -11094,7 +11095,7 @@ T normalize(T x) // Raise to a power __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T pow(T x, T y) { __target_switch @@ -11112,7 +11113,7 @@ T pow(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector pow(vector x, vector y) { __target_switch @@ -11130,7 +11131,7 @@ vector pow(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix pow(matrix x, matrix y) { __target_switch @@ -11143,7 +11144,7 @@ matrix pow(matrix x, matrix y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T powr(T x, T y) { __target_switch @@ -11156,7 +11157,7 @@ T powr(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector powr(vector x, vector y) { __target_switch @@ -11302,7 +11303,7 @@ void ProcessTriTessFactorsMin( // Degrees to radians __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T radians(T x) { __target_switch @@ -11319,7 +11320,7 @@ T radians(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector radians(vector x) { __target_switch @@ -11336,7 +11337,7 @@ vector radians(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix radians(matrix x) { __target_switch @@ -11350,7 +11351,7 @@ matrix radians(matrix x) // Approximate reciprocal __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rcp(T x) { __target_switch @@ -11363,7 +11364,7 @@ T rcp(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector rcp(vector x) { __target_switch @@ -11379,7 +11380,7 @@ vector rcp(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix rcp(matrix x) { __target_switch @@ -11393,7 +11394,7 @@ matrix rcp(matrix x) // Reflect incident vector across plane with given normal __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T reflect(T i, T n) { __target_switch @@ -11411,7 +11412,7 @@ T reflect(T i, T n) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector reflect(vector i, vector n) { __target_switch @@ -11430,7 +11431,7 @@ vector reflect(vector i, vector n) // Refract incident vector given surface normal and index of refraction __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector refract(vector i, vector n, T eta) { __target_switch @@ -11451,7 +11452,7 @@ vector refract(vector i, vector n, T eta) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T refract(T i, T n, T eta) { __target_switch @@ -11513,7 +11514,7 @@ vector reversebits(vector value) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rint(T x) { __target_switch @@ -11544,7 +11545,7 @@ T rint(T x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector rint(vector x) { __target_switch @@ -11562,7 +11563,7 @@ vector rint(vector x) // Round-to-nearest __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T round(T x) { __target_switch @@ -11580,7 +11581,7 @@ T round(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector round(vector x) { __target_switch @@ -11598,7 +11599,7 @@ vector round(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix round(matrix x) { __target_switch @@ -11612,7 +11613,7 @@ matrix round(matrix x) // Reciprocal of square root __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rsqrt(T x) { __target_switch @@ -11632,7 +11633,7 @@ T rsqrt(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector rsqrt(vector x) { __target_switch @@ -11650,7 +11651,7 @@ vector rsqrt(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix rsqrt(matrix x) { __target_switch @@ -11665,7 +11666,7 @@ matrix rsqrt(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T saturate(T x) { __target_switch @@ -11679,7 +11680,7 @@ T saturate(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector saturate(vector x) { __target_switch @@ -11695,7 +11696,7 @@ vector saturate(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix saturate(matrix x) { __target_switch @@ -11764,7 +11765,7 @@ vector sign(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] matrix sign(matrix x) { __target_switch @@ -11779,7 +11780,7 @@ matrix sign(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sin(T x) { __target_switch @@ -11797,7 +11798,7 @@ T sin(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sin(vector x) { __target_switch @@ -11815,7 +11816,7 @@ vector sin(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix sin(matrix x) { __target_switch @@ -11852,7 +11853,7 @@ vector __sincos_metal(vector x, out vector c) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(T x, out T s, out T c) { __target_switch @@ -11872,7 +11873,7 @@ void sincos(T x, out T s, out T c) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(vector x, out vector s, out vector c) { __target_switch @@ -11891,7 +11892,7 @@ void sincos(vector x, out vector s, out vector c) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(matrix x, out matrix s, out matrix c) { __target_switch @@ -11906,7 +11907,7 @@ void sincos(matrix x, out matrix s, out matrix c) // Hyperbolic Sine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sinh(T x) { __target_switch @@ -11924,7 +11925,7 @@ T sinh(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sinh(vector x) { __target_switch @@ -11942,7 +11943,7 @@ vector sinh(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix sinh(matrix x) { __target_switch @@ -11957,7 +11958,7 @@ matrix sinh(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sinpi(T x) { __target_switch @@ -11970,7 +11971,7 @@ T sinpi(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sinpi(vector x) { __target_switch @@ -11985,7 +11986,7 @@ vector sinpi(vector x) // Smooth step (Hermite interpolation) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T smoothstep(T min, T max, T x) { __target_switch @@ -12004,7 +12005,7 @@ T smoothstep(T min, T max, T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector smoothstep(vector min, vector max, vector x) { __target_switch @@ -12022,7 +12023,7 @@ vector smoothstep(vector min, vector max, vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix smoothstep(matrix min, matrix max, matrix x) { __target_switch @@ -12036,7 +12037,7 @@ matrix smoothstep(matrix min, matrix max, matrix [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sqrt(T x) { __target_switch @@ -12054,7 +12055,7 @@ T sqrt(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sqrt(vector x) { __target_switch @@ -12072,7 +12073,7 @@ vector sqrt(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix sqrt(matrix x) { __target_switch @@ -12086,7 +12087,7 @@ matrix sqrt(matrix x) // Step function __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T step(T y, T x) { __target_switch @@ -12104,7 +12105,7 @@ T step(T y, T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector step(vector y, vector x) { __target_switch @@ -12122,7 +12123,7 @@ vector step(vector y, vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix step(matrix y, matrix x) { __target_switch @@ -12136,7 +12137,7 @@ matrix step(matrix y, matrix x) // Tangent __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tan(T x) { __target_switch @@ -12154,7 +12155,7 @@ T tan(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector tan(vector x) { __target_switch @@ -12172,7 +12173,7 @@ vector tan(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix tan(matrix x) { __target_switch @@ -12186,7 +12187,7 @@ matrix tan(matrix x) // Hyperbolic tangent __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tanh(T x) { __target_switch @@ -12204,7 +12205,7 @@ T tanh(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector tanh(vector x) { __target_switch @@ -12222,7 +12223,7 @@ vector tanh(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix tanh(matrix x) { __target_switch @@ -12237,7 +12238,7 @@ matrix tanh(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tanpi(T x) { __target_switch @@ -12250,7 +12251,7 @@ T tanpi(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector tanpi(vector x) { __target_switch @@ -12265,7 +12266,7 @@ vector tanpi(vector x) // Matrix transpose __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] matrix transpose(matrix x) { @@ -12286,7 +12287,7 @@ matrix transpose(matrix x) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] matrix transpose(matrix x) { @@ -12307,7 +12308,7 @@ matrix transpose(matrix x) } __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] [OverloadRank(-1)] matrix transpose(matrix x) @@ -12331,7 +12332,7 @@ matrix transpose(matrix x) // Truncate to integer __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T trunc(T x) { __target_switch @@ -12349,7 +12350,7 @@ T trunc(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector trunc(vector x) { __target_switch @@ -12367,7 +12368,7 @@ vector trunc(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix trunc(matrix x) { __target_switch @@ -12548,7 +12549,7 @@ uint WaveMaskCountBits(WaveMask mask, bool value) // It seems this can only mean the active threads are the "threads the program flow would lead to". This implies a lockstep // "straight SIMD" style interpretation. That being the case this op on HLSL is just a memory barrier without any Sync. -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void AllMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch @@ -12580,7 +12581,7 @@ void AllMemoryBarrierWithWaveMaskSync(WaveMask mask) // aspect of HLSL seems to make everything in lock step - but that's not quite so, it only has to apparently be that way as far as the programmers // model appears - divergence could perhaps potentially still happen. -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch @@ -12596,7 +12597,7 @@ void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask) } } -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void AllMemoryBarrierWithWaveSync() { __target_switch @@ -12612,7 +12613,7 @@ void AllMemoryBarrierWithWaveSync() } } -[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] +[require(cuda_glsl_hlsl_spirv, memorybarrier)] void GroupMemoryBarrierWithWaveSync() { __target_switch @@ -15095,7 +15096,7 @@ struct RaytracingAccelerationStructure {}; // 10.1.5 - Intersection Attributes Structure __target_intrinsic(hlsl, BuiltInTriangleIntersectionAttributes) -[require(cpp_cuda_glsl_hlsl_spirv, rayobject)] +[require(cpp_cuda_glsl_hlsl_spirv, raytracing)] struct BuiltInTriangleIntersectionAttributes { __target_intrinsic(hlsl, barycentrics) @@ -16888,7 +16889,6 @@ ${{{{ return spirv_asm { OpCapability RayQueryKHR; - OpCapability RayTracingPositionFetchKHR; OpCapability RayQueryPositionFetchKHR; OpExtension "SPV_KHR_ray_query"; OpExtension "SPV_KHR_ray_tracing_position_fetch"; @@ -19742,7 +19742,7 @@ extension __TextureImpl coord, float value, out float originalValue) { __target_switch diff --git a/source/slang/slang-ast-support-types.h b/source/slang/slang-ast-support-types.h index 648baa820..76c638693 100644 --- a/source/slang/slang-ast-support-types.h +++ b/source/slang/slang-ast-support-types.h @@ -67,7 +67,7 @@ namespace Slang void printDiagnosticArg(StringBuilder& sb, DeclRefBase* declRefBase); void printDiagnosticArg(StringBuilder& sb, ASTNodeType nodeType); void printDiagnosticArg(StringBuilder& sb, const CapabilitySet& set); - + void printDiagnosticArg(StringBuilder& sb, List& set); struct QualifiedDeclPath { diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 6d4e5b3f4..53679be35 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -58,53 +58,10 @@ def c : target + textualTarget; def cpp : target + textualTarget; def cuda : target + textualTarget; def metal : target + textualTarget; -def spirv_1_0 : target; - -// We have multiple capabilities for the various SPIR-V versions, -// arranged so that they inherit from one another to represent which versions -// provide a super-set of the features of earlier ones (e.g., SPIR-V 1.4 is -// expressed as inheriting from SPIR-V 1.3). -// -def spirv_1_1 : spirv_1_0; -def spirv_1_2 : spirv_1_1; -def spirv_1_3 : spirv_1_2; -def spirv_1_4 : spirv_1_3; -def spirv_1_5 : spirv_1_4; -def spirv_1_6 : spirv_1_5; -alias spirv = spirv_1_0; -alias spirv_latest = spirv_1_6; - -alias any_target = hlsl | metal | glsl | c | cpp | cuda | spirv; -alias any_textual_target = hlsl | metal | glsl | c | cpp | cuda; -alias any_gfx_target = hlsl | metal | glsl | spirv; -alias any_cpp_target = cpp | cuda; - -alias cpp_cuda = cpp | cuda; -alias cpp_cuda_glsl_spirv = cpp | cuda | glsl | spirv; -alias cpp_cuda_glsl_hlsl = cpp | cuda | glsl | hlsl; -alias cpp_cuda_glsl_hlsl_spirv = cpp | cuda | glsl | hlsl | spirv_1_0; -alias cpp_cuda_glsl_hlsl_metal_spirv = cpp | cuda | glsl | hlsl | metal | spirv_1_0; -alias cpp_cuda_hlsl = cpp | cuda | hlsl; -alias cpp_cuda_hlsl_spirv = cpp | cuda | hlsl | spirv_1_0; -alias cpp_cuda_hlsl_metal_spirv = cpp | cuda | hlsl | metal | spirv_1_0; -alias cpp_glsl = cpp | glsl; -alias cpp_glsl_hlsl_spirv = cpp | glsl | hlsl | spirv_1_0; -alias cpp_glsl_hlsl_metal_spirv = cpp | glsl | hlsl | metal | spirv_1_0; -alias cpp_hlsl = cpp | hlsl; -alias cuda_glsl_hlsl = cuda | glsl | hlsl; -alias cuda_glsl_hlsl_spirv = cuda | glsl | hlsl | spirv_1_0; -alias cuda_glsl_hlsl_metal_spirv = cuda | glsl | hlsl | metal | spirv_1_0; -alias cuda_glsl_spirv = cuda | glsl | spirv; -alias cuda_hlsl = cuda | hlsl; -alias cuda_hlsl_spirv = cuda | hlsl | spirv; -alias glsl_hlsl_spirv = glsl | hlsl | spirv; -alias glsl_hlsl_metal_spirv = glsl | hlsl | metal | spirv; -alias glsl_metal_spirv = glsl | metal | spirv; -alias glsl_spirv = glsl | spirv; -alias hlsl_spirv = hlsl | spirv; +def spirv : target; // Capabilities that stand for target spirv version for GLSL backend. -// These are not compilation targets. +// These are not compilation targets. We will convert `_spirv_*`->`glsl_spirv_*` during a compile. def glsl_spirv_1_0 : glsl; def glsl_spirv_1_1 : glsl_spirv_1_0; def glsl_spirv_1_2 : glsl_spirv_1_1; @@ -113,6 +70,20 @@ def glsl_spirv_1_4 : glsl_spirv_1_3; def glsl_spirv_1_5 : glsl_spirv_1_4; def glsl_spirv_1_6 : glsl_spirv_1_5; +// We have multiple capabilities for the various SPIR-V versions, +// arranged so that they inherit from one another to represent which versions +// provide a super-set of the features of earlier ones (e.g., SPIR-V 1.4 is +// expressed as inheriting from SPIR-V 1.3). +// +def _spirv_1_0 : spirv; +def _spirv_1_1 : _spirv_1_0; +def _spirv_1_2 : _spirv_1_1; +def _spirv_1_3 : _spirv_1_2; +def _spirv_1_4 : _spirv_1_3; +def _spirv_1_5 : _spirv_1_4; +def _spirv_1_6 : _spirv_1_5; +alias _spirv_latest = _spirv_1_6; + def _GLSL_130 : glsl; def _GLSL_140 : _GLSL_130; def _GLSL_150 : _GLSL_140; @@ -125,7 +96,6 @@ def _GLSL_440 : _GLSL_430; def _GLSL_450 : _GLSL_440; def _GLSL_460 : _GLSL_450; - // metal versions def metallib_2_3 : metal; def metallib_2_4 : metallib_2_3; @@ -161,6 +131,35 @@ def _cuda_sm_7_0 : _cuda_sm_6_0; def _cuda_sm_8_0 : _cuda_sm_7_0; def _cuda_sm_9_0 : _cuda_sm_8_0; +alias any_target = hlsl | metal | glsl | c | cpp | cuda | spirv; +alias any_textual_target = hlsl | metal | glsl | c | cpp | cuda; +alias any_gfx_target = hlsl | metal | glsl | spirv; +alias any_cpp_target = cpp | cuda; + +alias cpp_cuda = cpp | cuda; +alias cpp_cuda_glsl_spirv = cpp | cuda | glsl | spirv; +alias cpp_cuda_glsl_hlsl = cpp | cuda | glsl | hlsl; +alias cpp_cuda_glsl_hlsl_spirv = cpp | cuda | glsl | hlsl | spirv; +alias cpp_cuda_glsl_hlsl_metal_spirv = cpp | cuda | glsl | hlsl | metal | spirv; +alias cpp_cuda_hlsl = cpp | cuda | hlsl; +alias cpp_cuda_hlsl_spirv = cpp | cuda | hlsl | spirv; +alias cpp_cuda_hlsl_metal_spirv = cpp | cuda | hlsl | metal | spirv; +alias cpp_glsl = cpp | glsl; +alias cpp_glsl_hlsl_spirv = cpp | glsl | hlsl | spirv; +alias cpp_glsl_hlsl_metal_spirv = cpp | glsl | hlsl | metal | spirv; +alias cpp_hlsl = cpp | hlsl; +alias cuda_glsl_hlsl = cuda | glsl | hlsl; +alias cuda_glsl_hlsl_spirv = cuda | glsl | hlsl | spirv; +alias cuda_glsl_hlsl_metal_spirv = cuda | glsl | hlsl | metal | spirv; +alias cuda_glsl_spirv = cuda | glsl | spirv; +alias cuda_hlsl = cuda | hlsl; +alias cuda_hlsl_spirv = cuda | hlsl | spirv; +alias glsl_hlsl_spirv = glsl | hlsl | spirv; +alias glsl_hlsl_metal_spirv = glsl | hlsl | metal | spirv; +alias glsl_metal_spirv = glsl | metal | spirv; +alias glsl_spirv = glsl | spirv; +alias hlsl_spirv = hlsl | spirv; + abstract stage; def vertex : stage; def fragment : stage; @@ -205,58 +204,65 @@ alias raytracingstages_compute_fragment_geometry_vertex = raytracing_stages | co // SPIRV extensions. -def SPV_EXT_fragment_shader_interlock : spirv_1_0; -def SPV_KHR_fragment_shader_barycentric : spirv_1_0; -def SPV_EXT_fragment_fully_covered : spirv_1_0; -def SPV_EXT_descriptor_indexing : spirv_1_0; -def SPV_EXT_shader_atomic_float_add : spirv_1_0; +def SPV_EXT_fragment_shader_interlock : _spirv_1_0; +def SPV_EXT_physical_storage_buffer : _spirv_1_3; +def SPV_EXT_fragment_fully_covered : _spirv_1_0; +def SPV_EXT_descriptor_indexing : _spirv_1_0; +def SPV_EXT_shader_atomic_float_add : _spirv_1_0; def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; -def SPV_EXT_shader_atomic_float_min_max : spirv_1_0; -def SPV_KHR_non_semantic_info : spirv_1_0; -def SPV_NV_shader_subgroup_partitioned : spirv_1_0; -def SPV_NV_ray_tracing_motion_blur : spirv_1_0; -def SPV_EXT_mesh_shader : spirv_1_4; -def SPV_KHR_ray_tracing : spirv_1_4; -def SPV_KHR_ray_query : spirv_1_0; -def SPV_KHR_ray_tracing_position_fetch : SPV_KHR_ray_tracing + SPV_KHR_ray_query; -def SPV_NV_shader_invocation_reorder : spirv_1_5 + SPV_KHR_ray_tracing; -def SPV_KHR_shader_clock : spirv_1_0; -def SPV_NV_shader_image_footprint : spirv_1_0; -def SPV_GOOGLE_user_type : spirv_1_0; -def SPV_NV_compute_shader_derivatives : spirv_1_0; -def SPV_EXT_demote_to_helper_invocation : spirv_1_4; +def SPV_EXT_shader_atomic_float_min_max : _spirv_1_0; +def SPV_EXT_mesh_shader : _spirv_1_4; +def SPV_EXT_demote_to_helper_invocation : _spirv_1_4; + +def SPV_KHR_fragment_shader_barycentric : _spirv_1_0; +def SPV_KHR_non_semantic_info : _spirv_1_0; +def SPV_KHR_ray_tracing : _spirv_1_4; +def SPV_KHR_ray_query : _spirv_1_0; +def SPV_KHR_ray_tracing_position_fetch : _spirv_1_0; // requires SPV_KHR_ray_query or SPV_KHR_ray_tracing +def SPV_KHR_shader_clock : _spirv_1_0; + +def SPV_NV_shader_subgroup_partitioned : _spirv_1_0; +def SPV_NV_ray_tracing_motion_blur : _spirv_1_0; +def SPV_NV_shader_invocation_reorder : _spirv_1_5 + SPV_KHR_ray_tracing; +def SPV_NV_shader_image_footprint : _spirv_1_0; +def SPV_NV_compute_shader_derivatives : _spirv_1_0; + +def SPV_GOOGLE_user_type : _spirv_1_0; // SPIRV Capabilities. def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; -def spvInt64Atomics : spirv_1_0; +def spvInt64Atomics : _spirv_1_0; def spvAtomicFloat32MinMaxEXT : SPV_EXT_shader_atomic_float_min_max; def spvAtomicFloat16MinMaxEXT : SPV_EXT_shader_atomic_float_min_max; -def spvDerivativeControl : spirv_1_0; -def spvImageQuery : spirv_1_0; -def spvImageGatherExtended : spirv_1_0; +def spvDerivativeControl : _spirv_1_0; +def spvImageQuery : _spirv_1_0; +def spvImageGatherExtended : _spirv_1_0; +def spvSparseResidency : _spirv_1_0; def spvImageFootprintNV : SPV_NV_shader_image_footprint; -def spvMinLod : spirv_1_0; +def spvMinLod : _spirv_1_0; def spvFragmentShaderPixelInterlockEXT : SPV_EXT_fragment_shader_interlock; def spvFragmentBarycentricKHR : SPV_KHR_fragment_shader_barycentric; def spvFragmentFullyCoveredEXT : SPV_EXT_fragment_fully_covered; -def spvGroupNonUniformBallot : spirv_1_3; -def spvGroupNonUniformShuffle : spirv_1_3; -def spvGroupNonUniformArithmetic : spirv_1_3; -def spvGroupNonUniformQuad : spirv_1_3; -def spvGroupNonUniformVote : spirv_1_3; -def spvGroupNonUniformPartitionedNV : spirv_1_3 + SPV_NV_shader_subgroup_partitioned; +def spvGroupNonUniformBallot : _spirv_1_3; +def spvGroupNonUniformShuffle : _spirv_1_3; +def spvGroupNonUniformArithmetic : _spirv_1_3; +def spvGroupNonUniformQuad : _spirv_1_3; +def spvGroupNonUniformVote : _spirv_1_3; +def spvGroupNonUniformPartitionedNV : _spirv_1_3 + SPV_NV_shader_subgroup_partitioned; def spvRayTracingMotionBlurNV : SPV_NV_ray_tracing_motion_blur; def spvMeshShadingEXT : SPV_EXT_mesh_shader; def spvRayTracingKHR : SPV_KHR_ray_tracing; -def spvRayTracingPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch; +def spvRayTracingPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch + spvRayTracingKHR; def spvRayQueryKHR : SPV_KHR_ray_query; -def spvRayQueryPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch; +def spvRayQueryPositionFetchKHR : SPV_KHR_ray_tracing_position_fetch + spvRayQueryKHR; def spvShaderInvocationReorderNV : SPV_NV_shader_invocation_reorder; def spvShaderClockKHR : SPV_KHR_shader_clock; -def spvShaderNonUniform : spirv_1_5; +def spvShaderNonUniformEXT : SPV_EXT_descriptor_indexing; +def spvShaderNonUniform : spvShaderNonUniformEXT; def spvDemoteToHelperInvocationEXT : SPV_EXT_demote_to_helper_invocation; +def spvDemoteToHelperInvocation : spvDemoteToHelperInvocationEXT; // The following capabilities all pertain to how ray tracing shaders are translated // to GLSL, where there are two different extensions that can provide the core @@ -271,109 +277,125 @@ def spvDemoteToHelperInvocationEXT : SPV_EXT_demote_to_helper_invocation; // extensions, the `EXT` extension will be favored over the `NV` extension, if // all other factors are equal. // +// If a user enabled a GL_ARB/GL_NV/GL_KHR, the user will also be-able to enable any equal GL_EXT +// To describe this relationship, all GL_EXT which were promoted (or originally not an EXT) +// will be set as a derived atom. -def _GL_ARB_derivative_control : glsl; -def _GL_ARB_fragment_shader_interlock : glsl; -def _GL_ARB_gpu_shader5 : glsl; -def _GL_ARB_shader_image_size : glsl; -def _GL_ARB_shader_texture_image_samples : glsl; -def _GL_ARB_sparse_texture_clamp : glsl; -def _GL_EXT_texture_query_lod : glsl; -def _GL_ARB_texture_query_levels : glsl; -def _GL_ARB_texture_cube_map : glsl; -def _GL_ARB_texture_gather : glsl; -def _GL_EXT_buffer_reference : glsl; -def _GL_EXT_buffer_reference_uvec2 : glsl; +def _GL_EXT_buffer_reference : _GLSL_450; +def _GL_EXT_buffer_reference_uvec2 : _GLSL_450; def _GL_EXT_debug_printf : glsl; -def _GL_EXT_fragment_shader_barycentric : glsl; -def _GL_EXT_mesh_shader : glsl; +def _GL_EXT_demote_to_helper_invocation : _GLSL_140; +def _GL_EXT_fragment_shader_barycentric : _GLSL_450; +def _GL_EXT_mesh_shader : _GLSL_450; def _GL_EXT_nonuniform_qualifier : glsl; -def _GL_EXT_ray_query : glsl_spirv_1_4; -def _GL_EXT_ray_tracing : glsl_spirv_1_4; -def _GL_EXT_ray_tracing_position_fetch : glsl_spirv_1_4; -def _GL_EXT_samplerless_texture_functions : glsl; +def _GL_EXT_ray_query : _GLSL_460 + glsl_spirv_1_4; // spirv_1_4 is required due to glslang bug which enables `SPV_KHR_ray_tracing` regardless of context +def _GL_EXT_ray_tracing : _GLSL_460; +def _GL_EXT_ray_tracing_position_fetch : _GL_EXT_ray_query; // requires _GL_EXT_ray_tracing or _GL_EXT_ray_query +def _GL_EXT_samplerless_texture_functions : _GLSL_140; def _GL_EXT_shader_atomic_float : glsl; +def _GL_EXT_shader_atomic_float_min_max : glsl; def _GL_EXT_shader_atomic_float2 : glsl; def _GL_EXT_shader_atomic_int64 : glsl; -def _GL_EXT_shader_atomic_float_min_max : glsl; -def _GL_EXT_shader_explicit_arithmetic_types_int64 : glsl; -def _GL_EXT_shader_image_load_store : glsl; +def _GL_EXT_shader_explicit_arithmetic_types_int64 : _GLSL_140; +def _GL_EXT_shader_image_load_store : _GLSL_130; def _GL_EXT_shader_realtime_clock : glsl; -def _GL_EXT_texture_shadow_lod : glsl; -def _GL_KHR_memory_scope_semantics : glsl; -def _GL_KHR_shader_subgroup_arithmetic : glsl; -def _GL_KHR_shader_subgroup_basic : glsl; -def _GL_KHR_shader_subgroup_ballot : glsl; -def _GL_KHR_shader_subgroup_quad : glsl; -def _GL_KHR_shader_subgroup_shuffle : glsl; -def _GL_KHR_shader_subgroup_shuffle_relative : glsl; -def _GL_KHR_shader_subgroup_shuffle_clustered : glsl; -def _GL_KHR_shader_subgroup_vote : glsl; -def _GL_NV_compute_shader_derivatives : glsl; -def _GL_NV_shader_subgroup_partitioned : glsl; -def _GL_NV_ray_tracing_motion_blur : glsl_spirv_1_4; -def _GL_NV_shader_atomic_fp16_vector : glsl; -def _GL_NV_shader_invocation_reorder : glsl_spirv_1_4; -def _GL_NV_shader_texture_footprint : glsl; +def _GL_EXT_texture_query_lod : glsl; +def _GL_EXT_texture_shadow_lod : _GLSL_130; + +def _GL_ARB_derivative_control : _GLSL_400; +def _GL_ARB_fragment_shader_interlock : _GLSL_450; +def _GL_ARB_gpu_shader5 : _GLSL_150; +def _GL_ARB_shader_image_load_store : _GL_EXT_shader_image_load_store; +def _GL_ARB_shader_image_size : _GLSL_420; +def _GL_ARB_texture_multisample : _GLSL_140; +def _GL_ARB_shader_texture_image_samples : _GLSL_150; +def _GL_ARB_sparse_texture : glsl; +def _GL_ARB_sparse_texture2 : _GL_ARB_sparse_texture; +def _GL_ARB_sparse_texture_clamp : _GL_ARB_sparse_texture2; +def _GL_ARB_texture_gather : _GLSL_130; +def _GL_ARB_texture_query_levels : _GLSL_130; + +def _GL_KHR_memory_scope_semantics : _GLSL_420; +def _GL_KHR_shader_subgroup_arithmetic : _GLSL_140; +def _GL_KHR_shader_subgroup_ballot : _GLSL_140; +def _GL_KHR_shader_subgroup_basic : _GLSL_140; +def _GL_KHR_shader_subgroup_clustered : _GLSL_140; +def _GL_KHR_shader_subgroup_quad : _GLSL_140; +def _GL_KHR_shader_subgroup_shuffle : _GLSL_140; +def _GL_KHR_shader_subgroup_shuffle_relative : _GLSL_140; +def _GL_KHR_shader_subgroup_vote : _GLSL_140; + +def _GL_NV_compute_shader_derivatives : _GLSL_450; +def _GL_NV_fragment_shader_barycentric : _GL_EXT_fragment_shader_barycentric; def _GL_NV_gpu_shader5 : _GL_ARB_gpu_shader5; -alias _GL_NV_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric; -alias _GL_NV_ray_tracing = _GL_EXT_ray_tracing; +def _GL_NV_ray_tracing : _GL_EXT_ray_tracing; +def _GL_NV_ray_tracing_motion_blur : _GLSL_460; +def _GL_NV_shader_atomic_fp16_vector : _GL_NV_gpu_shader5; +def _GL_NV_shader_invocation_reorder : _GLSL_460; +def _GL_NV_shader_subgroup_partitioned : _GLSL_140; +def _GL_NV_shader_texture_footprint : _GLSL_450; // GLSL extension and SPV extension associations. -alias GL_ARB_derivative_control = _GL_ARB_derivative_control | spvDerivativeControl; -alias GL_ARB_fragment_shader_interlock = _GL_ARB_fragment_shader_interlock | spvFragmentShaderPixelInterlockEXT; -alias GL_ARB_gpu_shader5 = _GL_ARB_gpu_shader5 | spirv_1_0; -alias GL_ARB_sparse_texture_clamp = _GL_ARB_fragment_shader_interlock | spirv_1_0; -alias GL_EXT_texture_query_lod = _GL_EXT_texture_query_lod | spvImageQuery | metal; -alias GL_ARB_texture_query_levels = _GL_ARB_texture_query_levels | spvImageQuery | metal; -alias GL_ARB_texture_cube_map = _GL_ARB_texture_cube_map | spirv_1_0; -alias GL_ARB_texture_gather = _GL_ARB_texture_gather | spirv_1_0 | metal; -alias GL_EXT_buffer_reference = _GL_ARB_fragment_shader_interlock | spirv_1_5; -alias GL_EXT_buffer_reference_uvec2 = _GL_EXT_buffer_reference_uvec2 | spirv_1_0; +alias GL_EXT_buffer_reference = _GL_EXT_buffer_reference | SPV_EXT_physical_storage_buffer; +alias GL_EXT_buffer_reference_uvec2 = _GL_EXT_buffer_reference_uvec2 | _spirv_1_0; alias GL_EXT_debug_printf = _GL_EXT_debug_printf | SPV_KHR_non_semantic_info; +alias GL_EXT_demote_to_helper_invocation = _GL_EXT_demote_to_helper_invocation | spvDemoteToHelperInvocationEXT; alias GL_EXT_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric | spvFragmentBarycentricKHR; alias GL_EXT_mesh_shader = _GL_EXT_mesh_shader | spvMeshShadingEXT; -alias GL_EXT_nonuniform_qualifier = _GL_EXT_nonuniform_qualifier | spvShaderNonUniform; -alias GL_EXT_ray_query = _GL_EXT_ray_query | spvRayTracingKHR + spvRayQueryKHR; -alias GL_EXT_ray_tracing = _GL_EXT_ray_tracing | spvRayTracingKHR + spvRayQueryKHR; -alias GL_EXT_ray_tracing_position_fetch = _GL_EXT_ray_tracing_position_fetch | spvRayTracingPositionFetchKHR + spvRayQueryPositionFetchKHR; -alias GL_EXT_samplerless_texture_functions = _GL_EXT_samplerless_texture_functions | spirv_1_0; +alias GL_EXT_nonuniform_qualifier = _GL_EXT_nonuniform_qualifier | spvShaderNonUniformEXT; +alias GL_EXT_ray_query = _GL_EXT_ray_query | spvRayQueryKHR; +alias GL_EXT_ray_tracing = _GL_EXT_ray_tracing | spvRayTracingKHR; +alias GL_EXT_ray_tracing_position_fetch_ray_tracing = _GL_EXT_ray_tracing_position_fetch | spvRayTracingPositionFetchKHR; +alias GL_EXT_ray_tracing_position_fetch_ray_query = _GL_EXT_ray_tracing_position_fetch | spvRayQueryPositionFetchKHR; +alias GL_EXT_samplerless_texture_functions = _GL_EXT_samplerless_texture_functions | _spirv_1_0; alias GL_EXT_shader_atomic_float = _GL_EXT_shader_atomic_float | spvAtomicFloat32AddEXT + spvAtomicFloat32MinMaxEXT; +alias GL_EXT_shader_atomic_float_min_max = _GL_EXT_shader_atomic_float_min_max | spvAtomicFloat32MinMaxEXT + spvAtomicFloat16MinMaxEXT; alias GL_EXT_shader_atomic_float2 = _GL_EXT_shader_atomic_float2 | spvAtomicFloat32AddEXT + spvAtomicFloat32MinMaxEXT + spvAtomicFloat16AddEXT + spvAtomicFloat16MinMaxEXT; alias GL_EXT_shader_atomic_int64 = _GL_EXT_shader_atomic_int64 | spvInt64Atomics; -alias GL_EXT_shader_atomic_float_min_max = _GL_EXT_shader_atomic_float_min_max | spvAtomicFloat32MinMaxEXT + spvAtomicFloat16MinMaxEXT; -alias GL_EXT_shader_explicit_arithmetic_types_int64 = _GL_EXT_shader_explicit_arithmetic_types_int64 | spirv_1_0; -alias GL_EXT_shader_image_load_store = _GL_EXT_shader_image_load_store | spirv_1_0; +alias GL_EXT_shader_explicit_arithmetic_types_int64 = _GL_EXT_shader_explicit_arithmetic_types_int64 | _spirv_1_0; +alias GL_EXT_shader_image_load_store = _GL_EXT_shader_image_load_store | _spirv_1_0; alias GL_EXT_shader_realtime_clock = _GL_EXT_shader_realtime_clock | spvShaderClockKHR; -alias GL_EXT_texture_shadow_lod = _GL_EXT_texture_shadow_lod + _GLSL_400 | spirv_1_0; -alias GL_KHR_memory_scope_semantics = _GL_KHR_memory_scope_semantics | spirv_1_0; +alias GL_EXT_texture_query_lod = _GL_EXT_texture_query_lod | spvImageQuery | metal; +alias GL_EXT_texture_shadow_lod = _GL_EXT_texture_shadow_lod | _spirv_1_0; + +alias GL_ARB_derivative_control = _GL_ARB_derivative_control | spvDerivativeControl; +alias GL_ARB_fragment_shader_interlock = _GL_ARB_fragment_shader_interlock | spvFragmentShaderPixelInterlockEXT; +alias GL_ARB_gpu_shader5 = _GL_ARB_gpu_shader5 | _spirv_1_0; +alias GL_ARB_shader_image_load_store = GL_EXT_shader_image_load_store; +alias GL_ARB_shader_image_size = _GL_ARB_shader_image_size | spvImageQuery | metal; +alias GL_ARB_texture_multisample = _GL_ARB_texture_multisample | _spirv_1_0; +alias GL_ARB_shader_texture_image_samples = _GL_ARB_shader_texture_image_samples | spvImageQuery | metal; +alias GL_ARB_sparse_texture_clamp = _GL_ARB_sparse_texture_clamp | spvSparseResidency; +alias GL_ARB_texture_gather = _GL_ARB_texture_gather | spvImageGatherExtended | metal; +alias GL_ARB_texture_query_levels = _GL_ARB_texture_query_levels | spvImageQuery | metal; + +alias GL_KHR_memory_scope_semantics = _GL_KHR_memory_scope_semantics | _spirv_1_0; alias GL_KHR_shader_subgroup_arithmetic = _GL_KHR_shader_subgroup_arithmetic | spvGroupNonUniformArithmetic; -alias GL_KHR_shader_subgroup_basic = _GL_KHR_shader_subgroup_basic | spvGroupNonUniformBallot; alias GL_KHR_shader_subgroup_ballot = _GL_KHR_shader_subgroup_ballot | spvGroupNonUniformBallot; -alias GL_KHR_shader_subgroup_clustered = _GL_KHR_shader_subgroup_shuffle_clustered | spvGroupNonUniformShuffle; +alias GL_KHR_shader_subgroup_basic = _GL_KHR_shader_subgroup_basic | spvGroupNonUniformBallot; +alias GL_KHR_shader_subgroup_clustered = _GL_KHR_shader_subgroup_clustered | spvGroupNonUniformShuffle; +alias GL_KHR_shader_subgroup_quad = _GL_KHR_shader_subgroup_quad | spvGroupNonUniformQuad; alias GL_KHR_shader_subgroup_shuffle = _GL_KHR_shader_subgroup_shuffle | spvGroupNonUniformShuffle; alias GL_KHR_shader_subgroup_shuffle_relative = _GL_KHR_shader_subgroup_shuffle_relative | spvGroupNonUniformShuffle; alias GL_KHR_shader_subgroup_vote = _GL_KHR_shader_subgroup_vote | spvGroupNonUniformVote; -alias GL_KHR_shader_subgroup_quad = _GL_KHR_shader_subgroup_quad | spvGroupNonUniformQuad; + alias GL_NV_compute_shader_derivatives = _GL_NV_compute_shader_derivatives | SPV_NV_compute_shader_derivatives | _sm_6_6; -alias GL_ARB_shader_image_size = _GL_ARB_shader_image_size | spvImageQuery | metal; -alias GL_ARB_shader_texture_image_samples = _GL_ARB_shader_texture_image_samples | spvImageQuery | metal; -alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spirv_1_0; -alias GL_NV_shader_subgroup_partitioned = _GL_NV_shader_subgroup_partitioned | spvGroupNonUniformPartitionedNV; +alias GL_NV_fragment_shader_barycentric = GL_EXT_fragment_shader_barycentric; +alias GL_NV_gpu_shader5 = GL_ARB_gpu_shader5; +alias GL_NV_ray_tracing = GL_EXT_ray_tracing; alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTracingMotionBlurNV; +alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | _spirv_1_0; alias GL_NV_shader_invocation_reorder = _GL_NV_shader_invocation_reorder + _GL_EXT_buffer_reference_uvec2 | spvShaderInvocationReorderNV; +alias GL_NV_shader_subgroup_partitioned = _GL_NV_shader_subgroup_partitioned | spvGroupNonUniformPartitionedNV; alias GL_NV_shader_texture_footprint = _GL_NV_shader_texture_footprint | spvImageFootprintNV; -alias GL_NV_fragment_shader_barycentric = GL_EXT_fragment_shader_barycentric; -alias GL_NV_ray_tracing = GL_EXT_ray_tracing; - // Define feature names alias nvapi = hlsl_nvapi; -alias raytracing = GL_EXT_ray_tracing | _sm_6_5 | cuda; +alias raytracing = GL_EXT_ray_tracing | _sm_6_3 | cuda; alias ser = raytracing + GL_NV_shader_invocation_reorder | raytracing + hlsl_nvapi | cuda; -alias motionblur = GL_NV_ray_tracing_motion_blur | _sm_6_5 + hlsl_nvapi | cuda; -alias rayquery = GL_EXT_ray_query | _sm_6_5; +alias motionblur = GL_NV_ray_tracing_motion_blur | _sm_6_3 + hlsl_nvapi | cuda; +alias rayquery = GL_EXT_ray_query | _sm_6_3; alias raytracing_motionblur = raytracing + motionblur | cuda; alias ser_motion = ser + motionblur; alias shaderclock = GL_EXT_shader_realtime_clock | hlsl_nvapi | cpp | cuda; @@ -383,120 +405,187 @@ alias fragmentshaderinterlock = _GL_ARB_fragment_shader_interlock | hlsl_nvapi | alias atomic64 = GL_EXT_shader_atomic_int64 | _sm_6_6 | cpp | cuda; alias atomicfloat = GL_EXT_shader_atomic_float | _sm_6_0 + hlsl_nvapi | cpp | cuda; alias atomicfloat2 = GL_EXT_shader_atomic_float2 | _sm_6_6 + hlsl_nvapi | cpp | cuda; -alias groupnonuniform = GL_KHR_shader_subgroup_ballot + GL_KHR_shader_subgroup_shuffle - + GL_KHR_shader_subgroup_arithmetic + GL_KHR_shader_subgroup_quad + GL_KHR_shader_subgroup_vote - | _sm_6_0 | cuda; alias fragmentshaderbarycentric = GL_EXT_fragment_shader_barycentric | _sm_6_1; -alias shadermemorycontrol = glsl | spirv_1_0 | _sm_5_0; +alias shadermemorycontrol = glsl | _spirv_1_0 | _sm_5_0; alias shadermemorycontrol_compute = raytracingstages_compute + shadermemorycontrol; alias subpass = fragment + any_gfx_target; alias waveprefix = _sm_6_5 | _cuda_sm_7_0 | GL_KHR_shader_subgroup_arithmetic; alias bufferreference = GL_EXT_buffer_reference; alias bufferreference_int64 = bufferreference + GL_EXT_shader_explicit_arithmetic_types_int64; -// Define what each HLSL shader model means on different targets. - - -alias sm_4_0 = _sm_4_0 - | glsl_spirv_1_0 + _GL_ARB_sparse_texture_clamp + _GL_EXT_samplerless_texture_functions - | spirv_1_0 + spvImageQuery + spvImageGatherExtended + spvMinLod + SPV_GOOGLE_user_type +// Define what each shader model means on different targets. + +alias spirv_1_0 = _spirv_1_0; +alias spirv_1_1 = _spirv_1_1 + | spirv_1_0 + ; +alias spirv_1_2 = _spirv_1_2 + | spirv_1_1 + ; +alias spirv_1_3 = _spirv_1_3 + | spirv_1_2 + ; +alias spirv_1_4 = _spirv_1_4 + | spirv_1_3 + ; +alias spirv_1_5 = _spirv_1_5 + GL_EXT_nonuniform_qualifier + GL_EXT_buffer_reference + | spirv_1_4 + ; +alias spirv_1_6 = _spirv_1_6 + GL_EXT_debug_printf + GL_EXT_demote_to_helper_invocation + | spirv_1_5 + ; +alias spirv_latest = _spirv_1_6; + +alias sm_4_0_version = _sm_4_0 + | _GLSL_150 + | spirv_1_0 | _cuda_sm_2_0 | metal | cpp ; +alias sm_4_0 = sm_4_0_version + | SPV_GOOGLE_user_type + spvMinLod + | GL_ARB_sparse_texture_clamp + GL_EXT_samplerless_texture_functions + GL_EXT_texture_query_lod + GL_EXT_texture_shadow_lod + GL_EXT_debug_printf + ; -alias sm_4_1 = _sm_4_1 - | glsl_spirv_1_0 + _GLSL_150 + sm_4_0 - | spirv_1_0 + sm_4_0 +alias sm_4_1_version = _sm_4_1 + | _GLSL_150 + | spirv_1_0 | _cuda_sm_6_0 | metal | cpp ; +alias sm_4_1 = sm_4_1_version + // previous + | sm_4_0 + ; -alias sm_5_0 = _sm_5_0 - | glsl_spirv_1_0 + sm_4_1 + _GL_KHR_memory_scope_semantics - | spirv_1_0 + sm_4_1 + spvDerivativeControl + spvFragmentFullyCoveredEXT +alias sm_5_0_version = _sm_5_0 + | _GLSL_330 + | spirv_1_0 | _cuda_sm_9_0 | metal | cpp ; +alias sm_5_0 = sm_5_0_version + | GL_KHR_memory_scope_semantics + GL_ARB_gpu_shader5 + GL_ARB_derivative_control + | spvFragmentFullyCoveredEXT + // previous + | sm_4_1 + ; -alias sm_5_1 = _sm_5_1 - | glsl_spirv_1_0 + sm_5_0 + _GL_ARB_gpu_shader5 + _GL_EXT_nonuniform_qualifier - | spirv_1_0 + sm_5_0 + spvShaderNonUniform +alias sm_5_1_version = _sm_5_1 + | _GLSL_330 + | spirv_1_0 | _cuda_sm_9_0 | metal | cpp ; +alias sm_5_1 = sm_5_1_version + | GL_EXT_nonuniform_qualifier + GL_ARB_gpu_shader5 + // previous + | sm_5_0 + ; -alias sm_6_0 = _sm_6_0 - | glsl_spirv_1_3 + sm_5_1 - + groupnonuniform + atomicfloat - | spirv_1_3 + sm_5_1 - + groupnonuniform + atomicfloat +alias sm_6_0_version = _sm_6_0 + | _GLSL_450 + | spirv_1_3 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_0 = sm_6_0_version + | GL_KHR_shader_subgroup_ballot + GL_KHR_shader_subgroup_shuffle + GL_KHR_shader_subgroup_arithmetic + GL_KHR_shader_subgroup_quad + GL_KHR_shader_subgroup_vote + // previous + | sm_5_1 + ; -alias sm_6_1 = _sm_6_1 - | glsl_spirv_1_3 + sm_6_0 + fragmentshaderbarycentric - | spirv_1_3 + sm_6_0 + fragmentshaderbarycentric +alias sm_6_1_version = _sm_6_1 + | _GLSL_450 + | spirv_1_3 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_1 = sm_6_1_version + | GL_EXT_fragment_shader_barycentric + // previous + | sm_6_0 + ; -alias sm_6_2 = _sm_6_2 - | glsl_spirv_1_3 + sm_6_1 - | spirv_1_3 + sm_6_1 +alias sm_6_2_version = _sm_6_2 + | _GLSL_450 + | spirv_1_3 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_2 = sm_6_2_version + // previous + | sm_6_1 + ; -alias sm_6_3 = _sm_6_3 - | glsl_spirv_1_4 + sm_6_2 + _GL_EXT_ray_tracing - | spirv_1_4 + sm_6_2 + SPV_KHR_ray_tracing +alias sm_6_3_version = _sm_6_3 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_3 = sm_6_3_version + | GL_EXT_ray_tracing + GL_EXT_ray_tracing_position_fetch_ray_tracing + // previous + | sm_6_2 + ; -alias sm_6_4 = _sm_6_4 - | glsl_spirv_1_4 + sm_6_3 - | spirv_1_4 + sm_6_3 +alias sm_6_4_version = _sm_6_4 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_4 = sm_6_4_version + // previous + | sm_6_3 + ; -alias sm_6_5 = _sm_6_5 - | glsl_spirv_1_4 + sm_6_4 + raytracing + meshshading - | spirv_1_4 + sm_6_4 + raytracing + meshshading +alias sm_6_5_version = _sm_6_5 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_5 = sm_6_5_version + // also requires: GL_NV_shader_subgroup_partitioned + | GL_EXT_mesh_shader + GL_EXT_ray_query + GL_EXT_ray_tracing_position_fetch_ray_query + // previous + | sm_6_4 + ; -alias sm_6_6 = _sm_6_6 - | glsl_spirv_1_5 + sm_6_5 - + GL_EXT_shader_atomic_int64 + atomicfloat2 - | spirv_1_5 + sm_6_5 - + GL_EXT_shader_atomic_int64 + atomicfloat2 - + SPV_EXT_descriptor_indexing +alias sm_6_6_version = _sm_6_6 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_6 = sm_6_6_version + | GL_EXT_shader_atomic_int64 + GL_EXT_shader_atomic_float2 + GL_EXT_shader_atomic_float + _GL_EXT_shader_atomic_float_min_max + | sm_6_5 + ; -alias sm_6_7 = _sm_6_7 - | glsl_spirv_1_5 + sm_6_6 - | spirv_1_5 + sm_6_6 +alias sm_6_7_version = _sm_6_7 + | _GLSL_460 + | spirv_1_4 | _cuda_sm_9_0 | metal | cpp ; +alias sm_6_7 = sm_6_7_version + | sm_6_6 + ; // Profiles alias GLSL_130 = _GLSL_130 @@ -510,8 +599,10 @@ alias GLSL_140 = _GLSL_140 | _sm_4_1 | _cuda_sm_2_0 | spirv_1_0 - | metal + | metal | cpp + // previous + | GLSL_130 ; alias GLSL_150 = _GLSL_150 | _sm_4_1 @@ -519,6 +610,10 @@ alias GLSL_150 = _GLSL_150 | spirv_1_0 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_texture_multisample + // previous + | GLSL_140 ; alias GLSL_330 = _GLSL_330 | _sm_5_0 @@ -526,6 +621,8 @@ alias GLSL_330 = _GLSL_330 | spirv_1_0 | metal | cpp + // previous + | GLSL_150 ; alias GLSL_400 = _GLSL_400 | _sm_5_1 @@ -533,6 +630,10 @@ alias GLSL_400 = _GLSL_400 | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_gpu_shader5 + GL_ARB_texture_gather + // previous + | GLSL_330 ; alias GLSL_410 = _GLSL_410 | _sm_5_1 @@ -540,6 +641,8 @@ alias GLSL_410 = _GLSL_410 | spirv_1_3 | metal | cpp + // previous + | GLSL_400 ; alias GLSL_420 = _GLSL_420 | _sm_5_1 @@ -547,6 +650,10 @@ alias GLSL_420 = _GLSL_420 | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_shader_image_load_store + // previous + | GLSL_410 ; alias GLSL_430 = _GLSL_430 | _sm_5_1 @@ -554,27 +661,39 @@ alias GLSL_430 = _GLSL_430 | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_shader_image_size + GL_ARB_texture_query_levels + // previous + | GLSL_420 ; alias GLSL_440 = _GLSL_440 | _sm_6_0 | _cuda_sm_6_0 - | spirv_1_5 + | spirv_1_3 | metal | cpp + // previous + | GLSL_430 ; -alias GLSL_450 = _GLSL_450 +alias GLSL_450 = _GLSL_450 | _sm_6_0 | _cuda_sm_6_0 - | spirv_1_5 + | spirv_1_3 | metal | cpp + // extensions to propagate + | glsl_spirv + GL_ARB_derivative_control + GL_ARB_shader_texture_image_samples + // previous + | GLSL_440 ; alias GLSL_460 = _GLSL_460 | _sm_6_6 | _cuda_sm_6_0 - | spirv_1_5 + | spirv_1_3 | metal | cpp + // previous + | GLSL_450 ; alias GLSL_410_SPIRV_1_0 = _GLSL_410 | spirv_1_0; @@ -606,15 +725,13 @@ alias SPIRV_1_4 = spirv_1_4; alias SPIRV_1_5 = spirv_1_5; alias SPIRV_1_6 = spirv_1_6; -alias sm_2_0_GLSL_140 = _GLSL_140 + sm_4_0 | sm_4_0; -alias sm_2_0_GLSL_400 = _GLSL_400 + sm_4_0 | sm_4_0; -alias appendstructuredbuffer = sm_5_0 + raytracingstages_compute_fragment; +alias appendstructuredbuffer = sm_5_0_version; alias atomic_hlsl = _sm_4_0; alias atomic_hlsl_nvapi = _sm_4_0 + hlsl_nvapi; alias atomic_hlsl_sm_6_6 = _sm_6_6; -alias byteaddressbuffer = sm_4_0; -alias byteaddressbuffer_rw = sm_4_0 + raytracingstages_compute_fragment; -alias consumestructuredbuffer = sm_5_0 + raytracingstages_compute_fragment; +alias byteaddressbuffer = sm_4_0_version; +alias byteaddressbuffer_rw = sm_4_0_version; +alias consumestructuredbuffer = sm_5_0_version; alias fragmentprocessing = fragment + _sm_5_0 | fragment + glsl_spirv | raytracingstages_compute_amplification_mesh + GL_NV_compute_shader_derivatives @@ -624,44 +741,23 @@ alias fragmentprocessing_derivativecontrol = fragment + _sm_5_0 | raytracingstages_compute_amplification_mesh + GL_NV_compute_shader_derivatives ; alias getattributeatvertex = fragment + _sm_6_1 | fragment + GL_EXT_fragment_shader_barycentric; -alias memorybarrier_compute = raytracingstages_compute + sm_5_0; -alias glsl_barrier = hlsl + memorybarrier_compute - | glsl_spirv + compute_tesscontrol_tesseval - ; -alias structuredbuffer = sm_4_0; -alias structuredbuffer_rw = sm_4_0 + raytracingstages_compute_fragment; -alias texture_sm_4_1 = sm_4_1 +alias memorybarrier = sm_5_0_version; +alias structuredbuffer = sm_4_0_version; +alias structuredbuffer_rw = sm_4_0_version; +alias texture_sm_4_0 = sm_4_0_version | GL_ARB_sparse_texture_clamp + GL_EXT_texture_query_lod ; -alias texture_sm_4_1_samplerless = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 + GL_EXT_samplerless_texture_functions - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment - | spirv_1_0 + texture_sm_4_1 + GL_EXT_samplerless_texture_functions - | metal + texture_sm_4_1 - ; -alias texture_sm_4_1_compute_fragment = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment - | spirv_1_0 + texture_sm_4_1 - | metal + texture_sm_4_1 - ; -// supposedly works on compute but docs say nothing, so for now keep as compute_fragment -alias texture_sm_4_1_fragment = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment - | spirv_1_0 + texture_sm_4_1 - | metal + texture_sm_4_1 +alias texture_sm_4_1 = sm_4_1_version | GL_ARB_sparse_texture_clamp + GL_EXT_texture_query_lod + ; +alias texture_sm_4_1_samplerless = texture_sm_4_1 + // add samplerless to all targets that need an extension + | GL_EXT_samplerless_texture_functions ; -alias texture_sm_4_1_clamp_fragment = texture_sm_4_1_fragment + GL_ARB_sparse_texture_clamp; -alias texture_sm_4_1_vertex_fragment_geometry = cpp + texture_sm_4_1 - | cuda + texture_sm_4_1 - | glsl + texture_sm_4_1 - | hlsl + texture_sm_4_1 + raytracingstages_compute_fragment_geometry_vertex - | spirv_1_0 + texture_sm_4_1 - | metal + texture_sm_4_1 - ; + +// supposedly works on only limited stages, support all stages for now +alias texture_sm_4_1_compute_fragment = texture_sm_4_1; +alias texture_sm_4_0_fragment = texture_sm_4_0; +alias texture_sm_4_1_clamp_fragment = texture_sm_4_0_fragment + GL_ARB_sparse_texture_clamp; +alias texture_sm_4_1_vertex_fragment_geometry = texture_sm_4_1; alias texture_gather = texture_sm_4_1_vertex_fragment_geometry + GL_ARB_texture_gather; alias image_samples = texture_sm_4_1_compute_fragment + GL_ARB_shader_texture_image_samples; alias image_size = texture_sm_4_1_compute_fragment + GL_ARB_shader_image_size; @@ -670,9 +766,6 @@ alias texture_querylod = texture_sm_4_1 + GL_EXT_texture_query_lod; alias texture_querylevels = texture_sm_4_1 + GL_ARB_texture_query_levels; alias texture_shadowlod = texture_sm_4_1 + GL_EXT_texture_shadow_lod | texture_sm_4_1; -alias texture_shadowlod_cube = texture_shadowlod | texture_shadowlod + GL_ARB_texture_cube_map; -alias texture_cube = texture_sm_4_1 + GL_ARB_texture_cube_map | texture_sm_4_1; -alias texture_querylevels_cube = texture_querylevels + GL_ARB_texture_cube_map | texture_querylevels; alias atomic_glsl_float1 = GL_EXT_shader_atomic_float; alias atomic_glsl_float2 = GL_EXT_shader_atomic_float2; @@ -686,8 +779,8 @@ alias printf = GL_EXT_debug_printf | _sm_4_0 | _cuda_sm_2_0 | cpp; alias texturefootprint = GL_NV_shader_texture_footprint + GLSL_450 | hlsl_nvapi + _sm_4_0; alias texturefootprintclamp = texturefootprint + GL_ARB_sparse_texture_clamp; -alias shader5_sm_4_0 = GL_ARB_gpu_shader5 + _GLSL_140 + sm_4_0 | sm_4_0; -alias shader5_sm_5_0 = GL_ARB_gpu_shader5 + _GLSL_140 + sm_4_0 | sm_5_0; +alias shader5_sm_4_0 = GL_ARB_gpu_shader5 | sm_4_0_version; +alias shader5_sm_5_0 = GL_ARB_gpu_shader5 | sm_5_0_version; alias subgroup_basic = GL_KHR_shader_subgroup_basic | _sm_6_0 | _cuda_sm_7_0; alias subgroup_ballot = spirv_1_0 + GL_KHR_shader_subgroup_ballot @@ -714,17 +807,16 @@ alias subgroup_clustered = GL_KHR_shader_subgroup_clustered | _sm_6_0 | _cuda_sm alias subgroup_quad = GL_KHR_shader_subgroup_quad | _sm_6_0 | _cuda_sm_7_0; alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5; +alias atomic_glsl_hlsl_nvapi_cuda_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0; +alias atomic_glsl_hlsl_nvapi_cuda5_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0; +alias atomic_glsl_hlsl_nvapi_cuda6_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0; +alias atomic_glsl_hlsl_nvapi_cuda9_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_9_0; + alias atomic_glsl_hlsl_cuda = atomic_glsl | _sm_5_0 | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda_float1 = atomic_glsl_float1 | atomic_hlsl_nvapi | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda_float2 = atomic_glsl_float2 | atomic_hlsl_nvapi | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda2_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_2_0; -alias atomic_glsl_hlsl_cuda5_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_6_0; -alias atomic_glsl_hlsl_cuda6_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_6_0; -alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_9_0; +alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | _sm_6_6 | _cuda_sm_9_0; alias breakpoint = GL_EXT_debug_printf | hlsl | _cuda_sm_8_0 | cpp; -alias rayobject = raytracing | rayquery; alias raytracing_allstages = raytracing_stages + raytracing; alias raytracing_anyhit = anyhit + raytracing; alias raytracing_intersection = intersection + raytracing; @@ -733,10 +825,10 @@ alias raytracing_anyhit_closesthit_intersection = anyhit_closesthit_intersection alias raytracing_raygen_closesthit_miss = raygen_closesthit_miss + raytracing; alias raytracing_anyhit_closesthit_intersection_miss = anyhit_closesthit_intersection_miss + raytracing; alias raytracing_raygen_closesthit_miss_callable = raygen_closesthit_miss_callable + raytracing; -alias raytracing_position = raytracing + GL_EXT_ray_tracing_position_fetch + anyhit_closesthit; +alias raytracing_position = raytracing + GL_EXT_ray_tracing_position_fetch_ray_tracing + anyhit_closesthit; alias raytracing_motionblur_anyhit_closesthit_intersection_miss = anyhit_closesthit_intersection_miss + raytracing_motionblur; alias raytracing_motionblur_raygen_closesthit_miss = raygen_closesthit_miss + raytracing_motionblur; -alias rayquery_position = rayquery + GL_EXT_ray_tracing_position_fetch; +alias rayquery_position = rayquery + GL_EXT_ray_tracing_position_fetch_ray_query; alias ser_raygen = raygen + ser; alias ser_raygen_closesthit_miss = raygen_closesthit_miss + ser; alias ser_any_closesthit_intersection_miss = anyhit_closesthit_intersection_miss + ser; @@ -746,7 +838,7 @@ alias ser_motion_raygen_closesthit_miss = raygen_closesthit_miss + ser_motion; alias ser_motion_raygen = raygen + ser_motion; alias all = _sm_6_7 + hlsl_nvapi - | glsl_spirv_1_5 + sm_6_7 + | sm_6_7 + ser + shaderclock + texturefootprint + fragmentshaderinterlock + _GL_NV_shader_subgroup_partitioned + _GL_NV_ray_tracing_motion_blur + _GL_NV_shader_texture_footprint | spirv_1_5 + sm_6_7 diff --git a/source/slang/slang-capability.cpp b/source/slang/slang-capability.cpp index 21b2641b4..2c3069f61 100644 --- a/source/slang/slang-capability.cpp +++ b/source/slang/slang-capability.cpp @@ -100,16 +100,16 @@ bool isDirectChildOfAbstractAtom(CapabilityAtom name) return _getInfo(name).abstractBase != CapabilityName::Invalid; } -bool isTargetVersionAtom(CapabilityName name) +bool isTargetVersionAtom(CapabilityAtom name) { - if (name >= CapabilityName::spirv_1_0 && name <= getLatestSpirvAtom()) + if (name >= CapabilityAtom::_spirv_1_0 && name <= getLatestSpirvAtom()) return true; - if (name >= CapabilityName::metallib_2_3 && name <= getLatestMetalAtom()) + if (name >= CapabilityAtom::metallib_2_3 && name <= getLatestMetalAtom()) return true; return false; } -bool isSpirvExtensionAtom(CapabilityName name) +bool isSpirvExtensionAtom(CapabilityAtom name) { return UnownedStringSlice(_getInfo(name).name).startsWith("SPV_"); } @@ -124,26 +124,26 @@ CapabilityName findCapabilityName(UnownedStringSlice const& name) return result; } -CapabilityName getLatestSpirvAtom() +inline CapabilityAtom getLatestSpirvAtom() { - static CapabilityName result = CapabilityName::Invalid; - if (result == CapabilityName::Invalid) + static CapabilityAtom result = CapabilityAtom::Invalid; + if (result == CapabilityAtom::Invalid) { - CapabilitySet latestSpirvCapSet = CapabilitySet(CapabilityName::spirv_latest); + CapabilitySet latestSpirvCapSet = CapabilitySet(CapabilityName::_spirv_latest); auto latestSpirvCapSetElements = latestSpirvCapSet.getAtomSets()->getElements(); - result = (CapabilityName)latestSpirvCapSetElements[latestSpirvCapSetElements.getCount() - 2]; //-1 gets shader stage + result = asAtom(latestSpirvCapSetElements[latestSpirvCapSetElements.getCount() - 2]); //-1 gets shader stage } return result; } -CapabilityName getLatestMetalAtom() +CapabilityAtom getLatestMetalAtom() { - static CapabilityName result = CapabilityName::Invalid; - if (result == CapabilityName::Invalid) + static CapabilityAtom result = CapabilityAtom::Invalid; + if (result == CapabilityAtom::Invalid) { CapabilitySet latestMetalCapSet = CapabilitySet(CapabilityName::metallib_latest); auto latestMetalCapSetElements = latestMetalCapSet.getAtomSets()->getElements(); - result = (CapabilityName)latestMetalCapSetElements[latestMetalCapSetElements.getCount() - 2]; //-1 gets shader stage + result = asAtom(latestMetalCapSetElements[latestMetalCapSetElements.getCount() - 2]); //-1 gets shader stage } return result; } @@ -175,7 +175,7 @@ CapabilityAtom getTargetAtomInSet(const CapabilityAtomSet& atomSet) auto iter = out.begin(); if (iter == out.end()) return CapabilityAtom::Invalid; - return (CapabilityAtom)*iter; + return asAtom(*iter); } CapabilityAtom getStageAtomInSet(const CapabilityAtomSet& atomSet) @@ -186,7 +186,7 @@ CapabilityAtom getStageAtomInSet(const CapabilityAtomSet& atomSet) auto iter = out.begin(); if (iter == out.end()) return CapabilityAtom::Invalid; - return (CapabilityAtom)*iter; + return asAtom(*iter); } template @@ -201,11 +201,11 @@ void CapabilitySet::addPermutationsOfConjunctionForEachInContainer(CapabilityAto if constexpr (keyholeAtomToPermuteWith == CapabilityName::target) { - addConjunction(conjunctionPermutation, (CapabilityAtom)atom, knownStageAtom); + addConjunction(conjunctionPermutation, asAtom(atom), knownStageAtom); } else if constexpr (keyholeAtomToPermuteWith == CapabilityName::stage) { - addConjunction(conjunctionPermutation, knownTargetAtom, (CapabilityAtom)atom); + addConjunction(conjunctionPermutation, knownTargetAtom, asAtom(atom)); } else { @@ -394,17 +394,25 @@ bool CapabilitySet::implies(CapabilityAtom atom) const return this->implies(tmpSet); } -bool CapabilitySet::implies(CapabilitySet const& other, const bool onlyRequireSingleImply) const +CapabilitySet::ImpliesReturnFlags CapabilitySet::_implies(CapabilitySet const& otherSet, ImpliesFlags flags) const { // x implies (c | d) only if (x implies c) and (x implies d). - for (const auto& otherTarget : other.m_targetSets) + bool onlyRequireSingleImply = ((int)flags & (int)ImpliesFlags::OnlyRequireASingleValidImply); + int flagsCollected = (int)CapabilitySet::ImpliesReturnFlags::NotImplied; + + if (otherSet.isEmpty()) + return CapabilitySet::ImpliesReturnFlags::Implied; + + for (const auto& otherTarget : otherSet.m_targetSets) { auto thisTarget = this->m_targetSets.tryGetValue(otherTarget.first); if (!thisTarget) { + if (onlyRequireSingleImply) + continue; // 'this' lacks a target 'other' has. - return false; + return CapabilitySet::ImpliesReturnFlags::NotImplied; } for (const auto& otherStage : otherTarget.second.shaderStageSets) @@ -412,31 +420,44 @@ bool CapabilitySet::implies(CapabilitySet const& other, const bool onlyRequireSi auto thisStage = thisTarget->shaderStageSets.tryGetValue(otherStage.first); if (!thisStage) { + if (onlyRequireSingleImply) + continue; // 'this' lacks a stage 'other' has. - return false; + return CapabilitySet::ImpliesReturnFlags::NotImplied; } // all stage sets that are in 'other' must be contained by 'this' - if(thisStage->atomSet) + if (thisStage->atomSet) { auto& thisStageSet = thisStage->atomSet.value(); - if(otherStage.second.atomSet) - { - if (!onlyRequireSingleImply) + if (otherStage.second.atomSet) + { + auto contained = thisStageSet.contains(otherStage.second.atomSet.value()); + if (!onlyRequireSingleImply && !contained) { - if (!thisStageSet.contains(otherStage.second.atomSet.value())) - return false; + return CapabilitySet::ImpliesReturnFlags::NotImplied; } - else + else if (onlyRequireSingleImply && contained) { - if (thisStageSet.contains(otherStage.second.atomSet.value())) - return true; + return CapabilitySet::ImpliesReturnFlags::Implied; } } } } } - return !onlyRequireSingleImply; + if (!onlyRequireSingleImply) + flagsCollected |= (int)CapabilitySet::ImpliesReturnFlags::Implied; + + return (CapabilitySet::ImpliesReturnFlags)flagsCollected; +} + +bool CapabilitySet::implies(CapabilitySet const& other) const +{ + return (int)_implies(other, ImpliesFlags::None) & (int)CapabilitySet::ImpliesReturnFlags::Implied; +} +CapabilitySet::ImpliesReturnFlags CapabilitySet::atLeastOneSetImpliedInOther(CapabilitySet const& other) const +{ + return _implies(other, ImpliesFlags::OnlyRequireASingleValidImply); } void CapabilityTargetSet::unionWith(const CapabilityTargetSet& other) @@ -827,6 +848,53 @@ bool CapabilitySet::checkCapabilityRequirement(CapabilitySet const& available, C return true; } +/// Converts spirv version atom to the glsl_spirv equivlent. If not possible, Invalid is returned +inline CapabilityName maybeConvertSpirvVersionToGlslSpirvVersion(CapabilityName& atom) +{ + if (atom >= CapabilityName::_spirv_1_0 && asAtom(atom) <= getLatestSpirvAtom()) + { + return (CapabilityName)((Int)CapabilityName::glsl_spirv_1_0 + ((Int)atom - (Int)CapabilityName::_spirv_1_0)); + } + return CapabilityName::Invalid; +} + +void CapabilitySet::addSpirvVersionFromOtherAsGlslSpirvVersion(CapabilitySet& other) +{ + if (auto* otherTargetSet = other.m_targetSets.tryGetValue(CapabilityAtom::spirv)) + { + auto* thisTargetSet = m_targetSets.tryGetValue(CapabilityAtom::glsl); + if (!thisTargetSet) + return; + + for (auto& otherStageSet : otherTargetSet->shaderStageSets) + { + if (!otherStageSet.second.atomSet) + continue; + + auto* thisStageSet = thisTargetSet->shaderStageSets.tryGetValue(otherStageSet.first); + if (!thisStageSet || !thisStageSet->atomSet) + continue; + + CapabilityAtomSet::Iterator otherAtom = otherStageSet.second.atomSet->begin(); + while (otherAtom != otherStageSet.second.atomSet->end()) + { + otherAtom++; + auto otherAtomName = (CapabilityName)*otherAtom; + if (otherAtomName > (CapabilityName)getLatestSpirvAtom()) + { + otherAtom = otherStageSet.second.atomSet->end(); + continue; + } + auto maybeConvertedSpirvVersionAtom = maybeConvertSpirvVersionToGlslSpirvVersion(otherAtomName); + if (maybeConvertedSpirvVersionAtom == CapabilityName::Invalid) + continue; + + thisStageSet->atomSet->add((UInt)maybeConvertedSpirvVersionAtom); + } + } + } +} + void printDiagnosticArg(StringBuilder& sb, const CapabilitySet& capSet) { bool isFirstSet = true; @@ -864,6 +932,20 @@ void printDiagnosticArg(StringBuilder& sb, CapabilityName name) sb << _getInfo(name).name; } +void printDiagnosticArg(StringBuilder& sb, List& list) +{ + sb << "{"; + auto count = list.getCount(); + for(Index i = 0; i < count; i++) + { + printDiagnosticArg(sb, list[i]); + if (i + 1 != count) + sb << ", "; + } + sb << "}"; +} + + #ifdef UNIT_TEST_CAPABILITIES #define CHECK_CAPS(inData) SLANG_ASSERT(inData>0) diff --git a/source/slang/slang-capability.h b/source/slang/slang-capability.h index 8fd9e2bd4..53164be7f 100644 --- a/source/slang/slang-capability.h +++ b/source/slang/slang-capability.h @@ -139,8 +139,15 @@ public: /// Is this capability set incompatible with the given `other` atomic capability. bool isIncompatibleWith(CapabilitySet const& other) const; + enum class ImpliesReturnFlags : int + { + NotImplied = 0, + Implied = 1 << 0, + }; /// Does this capability set imply all the capabilities in `other`? - bool implies(CapabilitySet const& other, const bool onlyRequireSingleImply = false) const; + bool implies(CapabilitySet const& other) const; + /// Does this capability set imply at least 1 set in other. + ImpliesReturnFlags atLeastOneSetImpliedInOther(CapabilitySet const& other) const; /// Does this capability set imply the atomic capability `other`? bool implies(CapabilityAtom other) const; @@ -289,6 +296,10 @@ public: /// Get access to the raw atomic capabilities that define this set. /// Get all bottom level UIntSets for each CapabilityTargetSet. CapabilitySet::AtomSets::Iterator getAtomSets() const; + + /// Add spirv version capabilities from 'spirv CapabilityTargetSet' as glsl_spirv version capability in 'glsl CapabilityTargetSet' + void addSpirvVersionFromOtherAsGlslSpirvVersion(CapabilitySet& other); + private: /// underlying data of CapabilitySet. CapabilityTargetSets m_targetSets{}; @@ -296,6 +307,13 @@ private: void addCapability(CapabilityName name); bool hasSameTargets(const CapabilitySet& other) const; + + enum class ImpliesFlags + { + None = 0, + OnlyRequireASingleValidImply = 1 << 0, + }; + ImpliesReturnFlags _implies(CapabilitySet const& other, ImpliesFlags flags) const; }; /// Returns true if atom is derived from base @@ -304,8 +322,16 @@ bool isCapabilityDerivedFrom(CapabilityAtom atom, CapabilityAtom base); /// Find a capability atom with the given `name`, or return CapabilityAtom::Invalid. CapabilityName findCapabilityName(UnownedStringSlice const& name); -CapabilityName getLatestSpirvAtom(); -CapabilityName getLatestMetalAtom(); +CapabilityAtom getLatestSpirvAtom(); +CapabilityAtom getLatestMetalAtom(); + +/// For debug purposes ensure a casted CapabilityAtom is valid +template +inline CapabilityAtom asAtom(T name) +{ + SLANG_ASSERT((UInt)name < (UInt)CapabilityAtom::Count); + return CapabilityAtom(name); +} /// Gets the capability names. void getCapabilityNames(List& ioNames); @@ -316,8 +342,8 @@ bool isDirectChildOfAbstractAtom(CapabilityAtom name); /// Return true if `name` represents an atom for a target version, e.g. spirv_1_5. -bool isTargetVersionAtom(CapabilityName name); -bool isSpirvExtensionAtom(CapabilityName name); +bool isTargetVersionAtom(CapabilityAtom name); +bool isSpirvExtensionAtom(CapabilityAtom name); void printDiagnosticArg(StringBuilder& sb, CapabilityAtom atom); void printDiagnosticArg(StringBuilder& sb, CapabilityName name); diff --git a/source/slang/slang-check-decl.cpp b/source/slang/slang-check-decl.cpp index f8f6d2dcb..74d08f8e5 100644 --- a/source/slang/slang-check-decl.cpp +++ b/source/slang/slang-check-decl.cpp @@ -10123,9 +10123,10 @@ namespace Slang // then the decl is using things that require conflicting set of capabilities, and we should diagnose an error. if (referencedDecl && decl) { - diagnoseCapabilityErrors( + maybeDiagnose( visitor->getSink(), visitor->getOptionSet(), + DiagnosticCategory::Capability, referenceLoc, Diagnostics::conflictingCapabilityDueToUseOfDecl, referencedDecl, @@ -10135,9 +10136,10 @@ namespace Slang } else if (decl) { - diagnoseCapabilityErrors( + maybeDiagnose( visitor->getSink(), visitor->getOptionSet(), + DiagnosticCategory::Capability, referenceLoc, Diagnostics::conflictingCapabilityDueToStatement, nodeCaps, @@ -10146,9 +10148,10 @@ namespace Slang } else { - diagnoseCapabilityErrors( + maybeDiagnose( visitor->getSink(), visitor->getOptionSet(), + DiagnosticCategory::Capability, referenceLoc, Diagnostics::conflictingCapabilityDueToStatementEnclosingFunc, nodeCaps, @@ -10251,7 +10254,7 @@ namespace Slang targetCap.join(bodyCap); if (targetCap.isInvalid()) { - diagnoseCapabilityErrors(Base::getSink(), outerContext.getOptionSet(), targetCase->body->loc, Diagnostics::conflictingCapabilityDueToStatement, bodyCap, "target_switch", oldCap); + maybeDiagnose(Base::getSink(), outerContext.getOptionSet(), DiagnosticCategory::Capability, targetCase->body->loc, Diagnostics::conflictingCapabilityDueToStatement, bodyCap, "target_switch", oldCap); } set.unionWith(targetCap); } @@ -10390,7 +10393,7 @@ namespace Slang auto stageCaps = CapabilitySet(Profile(entryPointAttr->stage).getCapabilityName()); if (declaredCaps.isIncompatibleWith(stageCaps)) { - diagnoseCapabilityErrors(getSink(), this->getOptionSet(), funcDecl->loc, Diagnostics::stageIsInCompatibleWithCapabilityDefinition, funcDecl, stageCaps, declaredCaps); + maybeDiagnose(getSink(), this->getOptionSet(), DiagnosticCategory::Capability, funcDecl->loc, Diagnostics::stageIsInCompatibleWithCapabilityDefinition, funcDecl, stageCaps, declaredCaps); } else { @@ -10604,7 +10607,7 @@ namespace Slang printedDecls.add(declToPrint); if (auto provenance = declToPrint->capabilityRequirementProvenance.tryGetValue(atomToFind)) { - diagnoseCapabilityErrors(sink, optionSet, provenance->referenceLoc, Diagnostics::seeUsingOf, provenance->referencedDecl); + maybeDiagnose(sink, optionSet, DiagnosticCategory::Capability, provenance->referenceLoc, Diagnostics::seeUsingOf, provenance->referencedDecl); declToPrint = provenance->referencedDecl; if (printedDecls.contains(declToPrint)) break; @@ -10625,7 +10628,7 @@ namespace Slang } if (declToPrint && !optionallyNeverPrintDecl) { - diagnoseCapabilityErrors(sink, optionSet, declToPrint->loc, Diagnostics::seeDefinitionOf, declToPrint); + maybeDiagnose(sink, optionSet, DiagnosticCategory::Capability, declToPrint->loc, Diagnostics::seeDefinitionOf, declToPrint); } } @@ -10654,7 +10657,7 @@ namespace Slang CapabilityAtom outFailedAtom{}; if (hasTargetAtom(failedAtomsInsideAvailableSet, outFailedAtom)) { - diagnoseCapabilityErrors(getSink(), this->getOptionSet(), decl->loc, Diagnostics::declHasDependenciesNotCompatibleOnTarget, decl, outFailedAtom); + maybeDiagnose(getSink(), this->getOptionSet(), DiagnosticCategory::Capability, decl->loc, Diagnostics::declHasDependenciesNotCompatibleOnTarget, decl, outFailedAtom); // Anything defined on a non-failed target atom may be the culprit to why we fail having a target capability. // Print out all possible culprits. @@ -10665,7 +10668,7 @@ namespace Slang for (auto atom : targetsNotUsedSet) { - CapabilityAtom formattedAtom = (CapabilityAtom)atom; + CapabilityAtom formattedAtom = asAtom(atom); diagnoseCapabilityProvenance(this->getOptionSet(), getSink(), decl, formattedAtom, true); } return; @@ -10688,8 +10691,8 @@ namespace Slang // can come from multiple referenced items in a function body. for (auto i : failedAtomsInsideAvailableSet) { - CapabilityAtom formattedAtom = (CapabilityAtom)i; - diagnoseCapabilityErrors(getSink(), this->getOptionSet(), decl->loc, diagnosticInfo, decl, formattedAtom); + CapabilityAtom formattedAtom = asAtom(i); + maybeDiagnose(getSink(), this->getOptionSet(), DiagnosticCategory::Capability, decl->loc, diagnosticInfo, decl, formattedAtom); // Print provenances. diagnoseCapabilityProvenance(this->getOptionSet(), getSink(), decl, formattedAtom); } diff --git a/source/slang/slang-check-shader.cpp b/source/slang/slang-check-shader.cpp index 2ebc9d3a4..67abb56b7 100644 --- a/source/slang/slang-check-shader.cpp +++ b/source/slang/slang-check-shader.cpp @@ -519,7 +519,7 @@ namespace Slang targetCaps.join(stageCapabilitySet); if (targetCaps.isIncompatibleWith(entryPointFuncDecl->inferredCapabilityRequirements)) { - diagnoseCapabilityErrors(sink, linkage->m_optionSet, entryPointFuncDecl, Diagnostics::entryPointUsesUnavailableCapability, entryPointFuncDecl, entryPointFuncDecl->inferredCapabilityRequirements, targetCaps); + maybeDiagnose(sink, linkage->m_optionSet, DiagnosticCategory::Capability, entryPointFuncDecl, Diagnostics::entryPointUsesUnavailableCapability, entryPointFuncDecl, entryPointFuncDecl->inferredCapabilityRequirements, targetCaps); // Find out what exactly is incompatible and print out a trace of provenance to // help user diagnose their code. @@ -532,7 +532,7 @@ namespace Slang { for (auto inferredAtom : *interredCapConjunctions.begin()) { - CapabilityAtom inferredAtomFormatted = (CapabilityAtom)inferredAtom; + CapabilityAtom inferredAtomFormatted = asAtom(inferredAtom); if (!compileCaps->contains((UInt)inferredAtom)) { diagnoseCapabilityProvenance(linkage->m_optionSet, sink, entryPointFuncDecl, inferredAtomFormatted); @@ -540,6 +540,38 @@ namespace Slang } } } + else + { + // Only attempt to error if a user adds to slangc either `-profile` or `-capability` + if ( + ( + target->getOptionSet().hasOption(CompilerOptionName::Capability) + || + target->getOptionSet().hasOption(CompilerOptionName::Profile) + ) + && targetCaps.atLeastOneSetImpliedInOther(entryPointFuncDecl->inferredCapabilityRequirements) == CapabilitySet::ImpliesReturnFlags::NotImplied + ) + { + CapabilitySet combinedSets = targetCaps; + combinedSets.join(entryPointFuncDecl->inferredCapabilityRequirements); + CapabilityAtomSet addedAtoms{}; + if (auto targetCapSet = targetCaps.getAtomSets()) + { + if (auto combinedSet = combinedSets.getAtomSets()) + { + CapabilityAtomSet::calcSubtract(addedAtoms, (*combinedSet), (*targetCapSet)); + } + } + maybeDiagnoseWarningOrError( + sink, + target->getOptionSet(), + DiagnosticCategory::Capability, + entryPointFuncDecl->loc, + Diagnostics::profileImplicitlyUpgraded, + Diagnostics::profileImplicitlyUpgradedRestrictive, + addedAtoms.getElements()); + } + } } } diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index ed208ca37..fce01b770 100644 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -656,7 +656,7 @@ namespace Slang { for (auto atom : conjunctions) { - switch ((CapabilityAtom)atom) + switch (asAtom(atom)) { default: break; diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 881f511d0..8f1860433 100755 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -201,10 +201,16 @@ namespace Slang Name* getName() { return m_name; } /// Get the stage that the entry point is to be compiled for - Stage getStage() { return m_profile.getStage(); } + Stage getStage() + { + return m_profile.getStage(); + } /// Get the profile that the entry point is to be compiled for - Profile getProfile() { return m_profile; } + Profile getProfile() + { + return m_profile; + } /// Get the index to the translation unit int getTranslationUnitIndex() const { return m_translationUnitIndex; } @@ -3353,6 +3359,34 @@ struct CompileTimerRAII session->addTotalCompileTime(elapsedTime); } }; + +// helpers for error/warning reporting +enum class DiagnosticCategory +{ + None = 0, + Capability = 1 << 0, +}; +template +bool maybeDiagnose(DiagnosticSink* sink, CompilerOptionSet& optionSet, DiagnosticCategory errorType, P const& pos, DiagnosticInfo const& info, Args const&... args) +{ + if ((int)errorType & (int)DiagnosticCategory::Capability && optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) + return false; + return sink->diagnose(pos, info, args...); +} + +template +bool maybeDiagnoseWarningOrError(DiagnosticSink* sink, CompilerOptionSet& optionSet, DiagnosticCategory errorType, P const& pos, DiagnosticInfo const& warningInfo, DiagnosticInfo const& errorInfo, Args const&... args) +{ + if ((int)errorType & (int)DiagnosticCategory::Capability && optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck)) + { + return maybeDiagnose(sink, optionSet, errorType, pos, errorInfo, args...); + } + else + { + return maybeDiagnose(sink, optionSet, errorType, pos, warningInfo, args...); + } +} + } #endif diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index f4bad6664..e0d818c97 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -733,6 +733,8 @@ DIAGNOSTIC(41001, Error, recursiveType, "type '$0' contains cyclic reference to DIAGNOSTIC(41010, Warning, missingReturn, "control flow may reach end of non-'void' function") DIAGNOSTIC(41011, Error, profileIncompatibleWithTargetSwitch, "__target_switch has no compatable target with current profile '$0'") +DIAGNOSTIC(41012, Warning, profileImplicitlyUpgraded, "user set `profile` had an implicit upgrade applied to it, atoms added: '$0'") +DIAGNOSTIC(41012, Error, profileImplicitlyUpgradedRestrictive, "user set `profile` had an implicit upgrade applied to it, atoms added: '$0'") DIAGNOSTIC(41015, Error, usingUninitializedValue, "use of uninitialized value '$0'") DIAGNOSTIC(41016, Warning, returningWithUninitializedOut, "returning without initializing out parameter '$0'") DIAGNOSTIC(41017, Warning, returningWithPartiallyUninitializedOut, "returning without fully initializing out parameter '$0'") diff --git a/source/slang/slang-doc-markdown-writer.cpp b/source/slang/slang-doc-markdown-writer.cpp index ac3b9ca7e..f29574180 100644 --- a/source/slang/slang-doc-markdown-writer.cpp +++ b/source/slang/slang-doc-markdown-writer.cpp @@ -432,7 +432,7 @@ static DocMarkdownWriter::Requirement _getRequirementFromTargetToken(const Token return Requirement{CodeGenTarget::SPIRV, UnownedStringSlice("")}; } - const CapabilityAtom targetCap = (CapabilityAtom)findCapabilityName(targetName); + const CapabilityAtom targetCap = asAtom(findCapabilityName(targetName)); if (targetCap == CapabilityAtom::Invalid) { diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index 936dc15ff..c788d9cbf 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -75,6 +75,13 @@ void GLSLSourceEmitter::_requireRayTracing() m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460); } +void GLSLSourceEmitter::_requireRayQuery() +{ + m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_ray_query")); + m_glslExtensionTracker->requireSPIRVVersion(SemanticVersion(1, 4)); // required due to glslang bug which enables `SPV_KHR_ray_tracing` regardless of context + m_glslExtensionTracker->requireVersion(ProfileVersion::GLSL_460); +} + void GLSLSourceEmitter::_requireFragmentShaderBarycentric() { m_glslExtensionTracker->requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_fragment_shader_barycentric")); @@ -2584,7 +2591,7 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) { case kIROp_RaytracingAccelerationStructureType: { - _requireRayTracing(); + _requireRayQuery(); m_writer->emit("accelerationStructureEXT"); break; } diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h index a30195d75..efd3ded75 100644 --- a/source/slang/slang-emit-glsl.h +++ b/source/slang/slang-emit-glsl.h @@ -126,6 +126,8 @@ protected: void _requireRayTracing(); + void _requireRayQuery(); + void _requireFragmentShaderBarycentric(); void _emitSpecialFloatImpl(IRType* type, const char* valueExpr); diff --git a/source/slang/slang-ir-glsl-legalize.cpp b/source/slang/slang-ir-glsl-legalize.cpp index bcf2d8a4f..ae30184c8 100644 --- a/source/slang/slang-ir-glsl-legalize.cpp +++ b/source/slang/slang-ir-glsl-legalize.cpp @@ -3560,20 +3560,20 @@ void legalizeEntryPointForGLSL( void decorateModuleWithSPIRVVersion(IRModule* module, SemanticVersion spirvVersion) { - CapabilityName atom = CapabilityName::spirv_1_0; + CapabilityName atom = CapabilityName::_spirv_1_0; switch (spirvVersion.m_major) { case 1: { switch (spirvVersion.m_minor) { - case 0: atom = CapabilityName::spirv_1_0; break; - case 1: atom = CapabilityName::spirv_1_1; break; - case 2: atom = CapabilityName::spirv_1_2; break; - case 3: atom = CapabilityName::spirv_1_3; break; - case 4: atom = CapabilityName::spirv_1_4; break; - case 5: atom = CapabilityName::spirv_1_5; break; - case 6: atom = CapabilityName::spirv_1_6; break; + case 0: atom = CapabilityName::_spirv_1_0; break; + case 1: atom = CapabilityName::_spirv_1_1; break; + case 2: atom = CapabilityName::_spirv_1_2; break; + case 3: atom = CapabilityName::_spirv_1_3; break; + case 4: atom = CapabilityName::_spirv_1_4; break; + case 5: atom = CapabilityName::_spirv_1_5; break; + case 6: atom = CapabilityName::_spirv_1_6; break; default: SLANG_UNEXPECTED("Unknown SPIRV version"); } break; diff --git a/source/slang/slang-ir-specialize-target-switch.cpp b/source/slang/slang-ir-specialize-target-switch.cpp index e3ef06e18..c501cdab5 100644 --- a/source/slang/slang-ir-specialize-target-switch.cpp +++ b/source/slang/slang-ir-specialize-target-switch.cpp @@ -28,6 +28,7 @@ namespace Slang bool isEqual; CapabilitySet bestCapSet = CapabilitySet::makeInvalid(); IRBlock* targetBlock = nullptr; + CapabilitySet::ImpliesReturnFlags impliesReturnType = CapabilitySet::ImpliesReturnFlags::NotImplied; for (UInt i = 0; i < targetSwitch->getCaseCount(); i++) { auto cap = (CapabilityName)getIntVal(targetSwitch->getCaseValue(i)); @@ -41,9 +42,8 @@ namespace Slang bool isBetterForTarget = capSet.isBetterForTarget(bestCapSet, target->getTargetCaps(), isEqual); if (isBetterForTarget) { - CapabilitySet joinedCapSet = capSet; - joinedCapSet.join(target->getTargetCaps()); - bool targetImpliesCapSet = target->getTargetCaps().implies(joinedCapSet, true); + impliesReturnType = target->getTargetCaps().atLeastOneSetImpliedInOther(capSet); + bool targetImpliesCapSet = ((int)impliesReturnType & (int)CapabilitySet::ImpliesReturnFlags::Implied || capSet.isEmpty()); if (targetImpliesCapSet) { // Now check if bestCapSet contains targetCaps. If it does not then this is an invalid target diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 989790b13..d7b980bf8 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2156,25 +2156,25 @@ struct SPIRVLegalizationContext : public SourceEmitterBase auto spirvAtom = ((CapabilityName)atom); switch (spirvAtom) { - case CapabilityName::spirv_1_0: + case CapabilityName::_spirv_1_0: m_sharedContext->requireSpirvVersion(0x10000); break; - case CapabilityName::spirv_1_1: + case CapabilityName::_spirv_1_1: m_sharedContext->requireSpirvVersion(0x10100); break; - case CapabilityName::spirv_1_2: + case CapabilityName::_spirv_1_2: m_sharedContext->requireSpirvVersion(0x10200); break; - case CapabilityName::spirv_1_3: + case CapabilityName::_spirv_1_3: m_sharedContext->requireSpirvVersion(0x10300); break; - case CapabilityName::spirv_1_4: + case CapabilityName::_spirv_1_4: m_sharedContext->requireSpirvVersion(0x10400); break; - case CapabilityName::spirv_1_5: + case CapabilityName::_spirv_1_5: m_sharedContext->requireSpirvVersion(0x10500); break; - case CapabilityName::spirv_1_6: + case CapabilityName::_spirv_1_6: m_sharedContext->requireSpirvVersion(0x10600); break; case CapabilityName::SPV_EXT_demote_to_helper_invocation: diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 4a6701acc..f8faf7c07 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -11632,11 +11632,11 @@ RefPtr TargetProgram::createIRModuleForLayout(DiagnosticSink* sink) { for (auto atomVal : atomSet) { - auto atom = (CapabilityName)atomVal; - if (atom >= CapabilityName::spirv_1_0 && atom <= latestSpirvAtom || - atom >= CapabilityName::metallib_2_3 && atom <= latestMetalAtom) + auto atom = asAtom(atomVal); + if (atom >= CapabilityAtom::_spirv_1_0 && atom <= latestSpirvAtom || + atom >= CapabilityAtom::metallib_2_3 && atom <= latestMetalAtom) { - builder->addRequireCapabilityAtomDecoration(irFunc, atom); + builder->addRequireCapabilityAtomDecoration(irFunc, (CapabilityName)atom); } } } diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp index ec37223bd..87c9b1192 100644 --- a/source/slang/slang-options.cpp +++ b/source/slang/slang-options.cpp @@ -289,6 +289,7 @@ void initCommandOptions(CommandOptions& options) { OptionKind::Language, "-lang", "-lang ", "Set the language for the following input files."}, { OptionKind::MatrixLayoutColumn, "-matrix-layout-column-major", nullptr, "Set the default matrix layout to column-major."}, { OptionKind::MatrixLayoutRow,"-matrix-layout-row-major", nullptr, "Set the default matrix layout to row-major."}, + { OptionKind::RestrictiveCapabilityCheck,"-restrictive-capability-check", nullptr, "Many capability warnings will become an error."}, { OptionKind::ZeroInitialize, "-zero-initialize", nullptr, "Initialize all variables to zero." "Structs will set all struct-fields without an init expression to 0." @@ -1699,6 +1700,7 @@ SlangResult OptionsParser::_parse( case OptionKind::VulkanEmitReflection: case OptionKind::ZeroInitialize: case OptionKind::IgnoreCapabilities: + case OptionKind::RestrictiveCapabilityCheck: case OptionKind::MinimumSlangOptimization: case OptionKind::DisableNonEssentialValidations: case OptionKind::DisableSourceMap: diff --git a/source/slang/slang-profile-defs.h b/source/slang/slang-profile-defs.h index 3b9ee27f5..25506a0a6 100644 --- a/source/slang/slang-profile-defs.h +++ b/source/slang/slang-profile-defs.h @@ -57,7 +57,9 @@ LANGUAGE_ALIAS(SPIRV, spirv_vk) // Pipeline stages to target PROFILE_STAGE(Vertex, vertex, SLANG_STAGE_VERTEX) PROFILE_STAGE(Hull, hull, SLANG_STAGE_HULL) +PROFILE_STAGE_ALIAS(TessControl, tesscontrol, Hull) PROFILE_STAGE(Domain, domain, SLANG_STAGE_DOMAIN) +PROFILE_STAGE_ALIAS(TessEval, tesseval, Domain) PROFILE_STAGE(Geometry, geometry, SLANG_STAGE_GEOMETRY) PROFILE_STAGE(Pixel, pixel, SLANG_STAGE_FRAGMENT) PROFILE_STAGE(Compute, compute, SLANG_STAGE_COMPUTE) @@ -190,14 +192,14 @@ PROFILE(DX_Vertex_4_0, vs_4_0, Vertex, DX_4_0) PROFILE(DX_Vertex_4_1, vs_4_1, Vertex, DX_4_1) PROFILE(DX_Vertex_5_0, vs_5_0, Vertex, DX_5_0) PROFILE(DX_Vertex_5_1, vs_5_1, Vertex, DX_5_1) -PROFILE(DX_Vertex_6_0, vs_6_0, Vertex, DX_6_0) -PROFILE(DX_Vertex_6_1, vs_6_1, Vertex, DX_6_1) -PROFILE(DX_Vertex_6_2, vs_6_2, Vertex, DX_6_2) -PROFILE(DX_Vertex_6_3, vs_6_3, Vertex, DX_6_3) -PROFILE(DX_Vertex_6_4, vs_6_4, Vertex, DX_6_4) -PROFILE(DX_Vertex_6_5, vs_6_5, Vertex, DX_6_5) -PROFILE(DX_Vertex_6_6, vs_6_6, Vertex, DX_6_6) -PROFILE(DX_Vertex_6_7, vs_6_7, Vertex, DX_6_7) +PROFILE(DX_Vertex_6_0, vs_6_0, Vertex, DX_6_0) +PROFILE(DX_Vertex_6_1, vs_6_1, Vertex, DX_6_1) +PROFILE(DX_Vertex_6_2, vs_6_2, Vertex, DX_6_2) +PROFILE(DX_Vertex_6_3, vs_6_3, Vertex, DX_6_3) +PROFILE(DX_Vertex_6_4, vs_6_4, Vertex, DX_6_4) +PROFILE(DX_Vertex_6_5, vs_6_5, Vertex, DX_6_5) +PROFILE(DX_Vertex_6_6, vs_6_6, Vertex, DX_6_6) +PROFILE(DX_Vertex_6_7, vs_6_7, Vertex, DX_6_7) PROFILE(DX_Mesh_6_5, ms_6_5, Mesh, DX_6_5) PROFILE(DX_Mesh_6_6, ms_6_6, Mesh, DX_6_6) diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index 10dc8f57c..e87aeb399 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -1724,7 +1724,7 @@ CapabilitySet TargetRequest::getTargetCaps() { for (auto atom : profileCapAtomSet) { - if (isTargetVersionAtom((CapabilityName)atom)) + if (isTargetVersionAtom(asAtom(atom))) { atoms.add((CapabilityName)atom); hasTargetVersionAtom = true; @@ -1742,7 +1742,7 @@ CapabilitySet TargetRequest::getTargetCaps() { for (auto atom : profileCapAtomSet) { - if (isSpirvExtensionAtom((CapabilityName)atom)) + if (isSpirvExtensionAtom(asAtom(atom))) { atoms.add((CapabilityName)atom); hasTargetVersionAtom = true; @@ -1754,6 +1754,7 @@ CapabilitySet TargetRequest::getTargetCaps() { isGLSLTarget = true; atoms.add(CapabilityName::glsl); + profileCaps.addSpirvVersionFromOtherAsGlslSpirvVersion(profileCaps); } break; @@ -1796,30 +1797,24 @@ CapabilitySet TargetRequest::getTargetCaps() CapabilitySet targetCap = CapabilitySet(atoms); - CapabilityName latestSpirvAtom = getLatestSpirvAtom(); - + if (profileCaps.atLeastOneSetImpliedInOther(targetCap) == CapabilitySet::ImpliesReturnFlags::Implied) + targetCap.join(profileCaps); + for (auto atomVal : optionSet.getArray(CompilerOptionName::Capability)) { - auto atom = (CapabilityName)atomVal.intValue; - if (isGLSLTarget) - { - // If we are emitting GLSL code, we need to - // translate all spirv_*_* capabilities to - // glsl_spirv_*_* instead. - // - if (atom >= CapabilityName::spirv_1_0 && atom <= latestSpirvAtom) - { - atom = (CapabilityName)((Int)CapabilityName::glsl_spirv_1_0 + ((Int)atom - (Int)CapabilityName::spirv_1_0)); - } - } - if (!targetCap.isIncompatibleWith(atom)) - { - // Only add atoms that are compatible with the current target. - atoms.add(atom); - } + auto toAdd = CapabilitySet((CapabilityName)atomVal.intValue); + + if(isGLSLTarget) + targetCap.addSpirvVersionFromOtherAsGlslSpirvVersion(toAdd); + + if (!targetCap.isIncompatibleWith(toAdd)) + targetCap.join(toAdd); } - cookedCapabilities = CapabilitySet(atoms); + cookedCapabilities = targetCap; + + SLANG_ASSERT(!cookedCapabilities.isInvalid()); + return cookedCapabilities; } -- cgit v1.2.3