From f9bcad35562c1f08638e6d3eb397d370d7d2f8f8 Mon Sep 17 00:00:00 2001 From: ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> Date: Fri, 19 Apr 2024 23:18:40 -0400 Subject: Initial pass to add capability declarations to stdlib intrinsics. (#3912) --- source/slang/glsl.meta.slang | 1675 ++++++++++++++++++++------------ source/slang/hlsl.meta.slang | 1435 ++++++++++++++++++--------- source/slang/slang-ast-dump.cpp | 5 +- source/slang/slang-capabilities.capdef | 428 ++++++-- source/slang/slang-capability.cpp | 116 +++ source/slang/slang-capability.h | 6 + source/slang/slang-check-decl.cpp | 81 +- source/slang/slang-check-modifier.cpp | 33 + source/slang/slang-stdlib-textures.cpp | 1 + 9 files changed, 2605 insertions(+), 1175 deletions(-) (limited to 'source') diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index e2c1d25bd..d04dfe8b9 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -9,8 +9,6 @@ #define VECTOR_MAP_TRINARY(TYPE, COUNT, FUNC, A, B, C) \ vector result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result -#define REQUIRE_KHRONOS [require(glsl)] [require(spirv)] - // // OpenGL 4.60 spec // @@ -107,11 +105,11 @@ public in uvec3 gl_WorkGroupID : SV_GroupID; public in uvec3 gl_LocalInvocationIndex : SV_GroupIndex; public in uvec3 gl_LocalInvocationID : SV_GroupThreadID; -[require(glsl)] -[require(spirv)] public property uint3 gl_NumWorkGroups { - get { + [require(glsl_spirv, GLSL_430_SPIRV_1_0_compute)] + get + { __target_switch { case glsl: @@ -128,6 +126,7 @@ public property uint3 gl_NumWorkGroups { public property uint3 gl_WorkGroupSize { [__unsafeForceInlineEarly] + [require(glsl_spirv, GLSL_430_SPIRV_1_0_compute)] get { return WorkgroupSize(); @@ -153,6 +152,7 @@ public in int gl_ViewportIndex : SV_ViewportArrayIndex; [OverloadRank(15)] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -160,6 +160,7 @@ public matrix operator*(matrix m1, matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -167,6 +168,7 @@ public matrix operator*(matrix m1, matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -174,6 +176,7 @@ public matrix operator*(matrix m1, matrix [ForceInline] [OverloadRank(15)] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public matrix operator*(matrix m1, matrix m2) { return mul(m2, m1); @@ -181,6 +184,7 @@ public matrix operator* operator*(vector v, matrix m) { return mul(m, v); @@ -188,6 +192,7 @@ public vector operator* operator*(matrix m, vector v) { return mul(v, m); @@ -224,6 +229,7 @@ public extension vector [ForceInline] [OverloadRank(15)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator==(vector left, vector right) { return all(equal(left, right)); @@ -231,6 +237,7 @@ public bool operator==(vector left, [ForceInline] [OverloadRank(15)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator!=(vector left, vector right) { return any(notEqual(left, right)); @@ -238,6 +245,7 @@ public bool operator!=(vector left, [ForceInline] [OverloadRank(14)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator==(vector left, vector right) { return all(equal(left, right)); @@ -245,6 +253,7 @@ public bool operator==(vector lef [ForceInline] [OverloadRank(14)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator!=(vector left, vector right) { return any(notEqual(left, right)); @@ -252,6 +261,7 @@ public bool operator!=(vector lef [ForceInline] [OverloadRank(14)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator==(vector left, vector right) { return all(equal(left, right)); @@ -259,6 +269,7 @@ public bool operator==(vector left, vec [ForceInline] [OverloadRank(14)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator!=(vector left, vector right) { return any(notEqual(left, right)); @@ -271,6 +282,7 @@ for (auto type : kBaseTypes) { }}}} [ForceInline] [OverloadRank(15)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator==(vector<$(typeName), N> left, vector<$(typeName), N> right) { return all(equal(left, right)); @@ -278,6 +290,7 @@ public bool operator==(vector<$(typeName), N> left, vector<$(typeName [ForceInline] [OverloadRank(15)] +[require(cpp_cuda_glsl_hlsl_spirv)] public bool operator!=(vector<$(typeName), N> left, vector<$(typeName), N> right) { return any(notEqual(left, right)); @@ -293,6 +306,7 @@ ${{{{ __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public T atan(T y, T x) { return atan2(y, x); @@ -301,6 +315,7 @@ public T atan(T y, T x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector atan(vector y, vector x) { return atan2(y, x); @@ -311,6 +326,7 @@ __target_intrinsic(cuda, "$P_asinh($0)") __target_intrinsic(cpp, "$P_asinh($0)") [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public T asinh(T x) { return log(x + sqrt(x * x + T(1))); @@ -319,6 +335,7 @@ public T asinh(T x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public vector asinh(vector x) { VECTOR_MAP_UNARY(T, N, asinh, x); @@ -329,6 +346,7 @@ __target_intrinsic(cuda, "$P_acosh($0)") __target_intrinsic(cpp, "$P_acosh($0)") [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public T acosh(T x) { return log(x + sqrt( x * x - T(1))); @@ -337,6 +355,7 @@ public T acosh(T x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public vector acosh(vector x) { VECTOR_MAP_UNARY(T, N, acosh, x); @@ -347,6 +366,7 @@ __target_intrinsic(cuda, "$P_atanh($0)") __target_intrinsic(cpp, "$P_atanh($0)") [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public T atanh(T x) { return T(0.5) * log((T(1) + x) / (T(1) - x)); @@ -355,6 +375,7 @@ public T atanh(T x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector atanh(vector x) { VECTOR_MAP_UNARY(T, N, atanh, x); @@ -367,6 +388,7 @@ public vector atanh(vector x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public T inversesqrt(T x) { return rsqrt(x); @@ -375,6 +397,7 @@ public T inversesqrt(T x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public vector inversesqrt(vector x) { return rsqrt(x); @@ -387,6 +410,7 @@ public vector inversesqrt(vector x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public T roundEven(T x) { T i; @@ -404,6 +428,7 @@ public T roundEven(T x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] public vector roundEven(vector x) { VECTOR_MAP_UNARY(T, N, roundEven, x); @@ -412,6 +437,7 @@ public vector roundEven(vector x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public T fract(T x) { return frac(x); @@ -420,6 +446,7 @@ public T fract(T x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector fract(vector x) { return frac(x); @@ -428,6 +455,7 @@ public vector fract(vector x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public T mod(T x, T y) { return fmod(x, y); @@ -436,6 +464,7 @@ public T mod(T x, T y) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector mod(vector x, T y) { return fmod(x, vector(y)); @@ -444,6 +473,7 @@ public vector mod(vector x, T y) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector mod(vector x, vector y) { return fmod(x, y); @@ -452,6 +482,7 @@ public vector mod(vector x, vector y) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public T mix(T x, T y, T a) { return lerp(x, y, a); @@ -460,6 +491,7 @@ public T mix(T x, T y, T a) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector mix(vector x, vector y, T a) { return lerp(x, y, vector(a)); @@ -468,6 +500,7 @@ public vector mix(vector x, vector y, T a) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector mix(vector x, vector y, vector a) { return lerp(x, y, a); @@ -476,6 +509,7 @@ public vector mix(vector x, vector y, vector a) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public T mix(T x, T y, bool a) { return (a ? y : x); @@ -484,6 +518,7 @@ public T mix(T x, T y, bool a) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] public vector mix(vector x, vector y, vector a) { vector result; @@ -496,6 +531,7 @@ public vector mix(vector x, vector y, vector a) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public int floatBitsToInt(highp float x) { return asint(x); @@ -504,6 +540,7 @@ public int floatBitsToInt(highp float x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vector floatBitsToInt(highp vector x) { return asint(x); @@ -511,6 +548,7 @@ public vector floatBitsToInt(highp vector x) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public uint floatBitsToUint(highp float x) { return asuint(x); @@ -519,6 +557,7 @@ public uint floatBitsToUint(highp float x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vector floatBitsToUint(highp vector x) { return asuint(x); @@ -526,6 +565,7 @@ public vector floatBitsToUint(highp vector x) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public float intBitsToFloat(highp int x) { return asfloat(x); @@ -534,6 +574,7 @@ public float intBitsToFloat(highp int x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vector intBitsToFloat(highp vector x) { return asfloat(x); @@ -541,6 +582,7 @@ public vector intBitsToFloat(highp vector x) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public float uintBitsToFloat(highp uint x) { return asfloat(x); @@ -549,6 +591,7 @@ public float uintBitsToFloat(highp uint x) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vector uintBitsToFloat(highp vector x) { return asfloat(x); @@ -616,6 +659,7 @@ float unpackSnorm1x8(uint p) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] uint float2half(float f) { uint u = floatBitsToUint(f); @@ -632,6 +676,7 @@ uint float2half(float f) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public uint packUnorm2x16(vec2 v) { return packUnorm1x16(v.x) | (packUnorm1x16(v.y) << uint(16)); @@ -640,6 +685,7 @@ public uint packUnorm2x16(vec2 v) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public uint packSnorm2x16(vec2 v) { return packSnorm1x16(v.x) | (packSnorm1x16(v.y) << uint(16)); @@ -648,6 +694,7 @@ public uint packSnorm2x16(vec2 v) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public uint packUnorm4x8(vec4 v) { return packUnorm1x8(v.x) | (packUnorm1x8(v.y) << uint(8)) | (packUnorm1x8(v.z) << uint(16)) | (packUnorm1x8(v.w) << uint(24)); @@ -656,6 +703,7 @@ public uint packUnorm4x8(vec4 v) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public uint packSnorm4x8(vec4 v) { return packSnorm1x8(v.x) | (packSnorm1x8(v.y) << uint(8)) | (packSnorm1x8(v.z) << uint(16)) | (packSnorm1x8(v.w) << uint(24)); @@ -664,6 +712,7 @@ public uint packSnorm4x8(vec4 v) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vec2 unpackUnorm2x16(uint p) { return vec2(unpackUnorm1x16(p & uint(0xffff)), unpackUnorm1x16(p >> uint(16))); @@ -672,6 +721,7 @@ public vec2 unpackUnorm2x16(uint p) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vec2 unpackSnorm2x16(uint p) { return vec2(unpackSnorm1x16(p & uint(0xffff)), unpackSnorm1x16(p >> uint(16))); @@ -680,6 +730,7 @@ public vec2 unpackSnorm2x16(uint p) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vec4 unpackUnorm4x8(highp uint p) { return vec4(unpackUnorm1x8(p & uint(0xffff)), unpackUnorm1x8(p >> uint(8)), unpackUnorm1x8(p >> uint(16)), unpackUnorm1x8(p >> uint(24))); @@ -688,6 +739,7 @@ public vec4 unpackUnorm4x8(highp uint p) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vec4 unpackSnorm4x8(highp uint p) { return vec4(unpackSnorm1x8(p & uint(0xffff)), unpackSnorm1x8(p >> uint(8)), unpackSnorm1x8(p >> uint(16)), unpackSnorm1x8(p >> uint(24))); @@ -696,6 +748,7 @@ public vec4 unpackSnorm4x8(highp uint p) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public uint packHalf2x16(vec2 v) { return float2half(v.x) | (float2half(v.y) << uint(16)); @@ -704,6 +757,7 @@ public uint packHalf2x16(vec2 v) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public float half2float(uint h) { uint s = ((h & uint(0x8000)) << uint(16)); @@ -719,6 +773,7 @@ public float half2float(uint h) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public vec2 unpackHalf2x16(uint p) { return vec2(half2float(p & uint(0xffff)), half2float(p >> uint(16))); @@ -727,6 +782,7 @@ public vec2 unpackHalf2x16(uint p) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public double packDouble2x32(uvec2 v) { // TODO: there is no "asdouble()" @@ -737,6 +793,7 @@ public double packDouble2x32(uvec2 v) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] public uvec2 unpackDouble2x32(double v) { // TODO: there is no "asuint64()" @@ -751,6 +808,7 @@ public uvec2 unpackDouble2x32(double v) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public T faceforward(T n, T i, T ng) { return dot(ng, i) < T(0.0f) ? n : -n; @@ -782,6 +840,7 @@ public matrix outerProduct(vector c, vector r) __generic __target_intrinsic(hlsl) __target_intrinsic(glsl) +[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] matrix inverse(matrix m); // @@ -987,27 +1046,35 @@ public vector bitfieldReverse(highp vector value) VECTOR_MAP_UNARY(int, N, bitfieldReverse, value); } -[__readNone] [ForceInline] REQUIRE_KHRONOS +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public uint bitCount(uint value) { return countbits(value); } __generic -[__readNone] [ForceInline] REQUIRE_KHRONOS +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector bitCount(vector value) { VECTOR_MAP_UNARY(uint, N, countbits, value); } -[__readNone] [ForceInline] REQUIRE_KHRONOS +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public int bitCount(int value) { return countbits(uint(value)); } __generic -[__readNone] [ForceInline] REQUIRE_KHRONOS +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector bitCount(vector value) { VECTOR_MAP_UNARY(int, N, countbits, value); @@ -1015,6 +1082,7 @@ public vector bitCount(vector value) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public int findLSB(int v) { return firstbitlow(v); @@ -1023,6 +1091,7 @@ public int findLSB(int v) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector findLSB(vector value) { return firstbitlow(value); @@ -1030,6 +1099,7 @@ public vector findLSB(vector value) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public uint findLSB(uint v) { return firstbitlow(v); @@ -1038,6 +1108,7 @@ public uint findLSB(uint v) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector findLSB(vector value) { return firstbitlow(value); @@ -1045,6 +1116,7 @@ public vector findLSB(vector value) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public int findMSB(int value) { return firstbithigh(value); @@ -1053,6 +1125,7 @@ public int findMSB(int value) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector findMSB(vector value) { return firstbithigh(value); @@ -1060,6 +1133,7 @@ public vector findMSB(vector value) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public uint findMSB(uint value) { return firstbithigh(value); @@ -1068,6 +1142,7 @@ public uint findMSB(uint value) __generic [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] public vector findMSB(vector value) { return firstbithigh(value); @@ -1243,6 +1318,7 @@ public typealias usamplerBuffer = SamplerBuffer; __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public int textureSize(Sampler1D> sampler, int lod) { int result; @@ -1253,6 +1329,7 @@ public int textureSize(Sampler1D> sampler, int lod) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec2 textureSize(Sampler2D> sampler, int lod) { vector result; @@ -1263,6 +1340,7 @@ public ivec2 textureSize(Sampler2D> sampler, int lod) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec3 textureSize(Sampler3D> sampler, int lod) { vector result; @@ -1273,6 +1351,7 @@ public ivec3 textureSize(Sampler3D> sampler, int lod) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec2 textureSize(SamplerCube> sampler, int lod) { vector result; @@ -1282,6 +1361,7 @@ public ivec2 textureSize(SamplerCube> sampler, int lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public int textureSize(sampler1DShadow sampler, int lod) { int result; @@ -1291,6 +1371,7 @@ public int textureSize(sampler1DShadow sampler, int lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec2 textureSize(sampler2DShadow sampler, int lod) { vector result; @@ -1300,6 +1381,7 @@ public ivec2 textureSize(sampler2DShadow sampler, int lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec2 textureSize(samplerCubeShadow sampler, int lod) { vector result; @@ -1308,6 +1390,7 @@ public ivec2 textureSize(samplerCubeShadow sampler, int lod) return result; } +[require(glsl_hlsl_spirv, texture_size)] __generic [ForceInline] public ivec3 textureSize(SamplerCubeArray> sampler, int lod) @@ -1319,6 +1402,7 @@ public ivec3 textureSize(SamplerCubeArray> sampler, int lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec3 textureSize(samplerCubeArrayShadow sampler, int lod) { vector result; @@ -1327,6 +1411,7 @@ public ivec3 textureSize(samplerCubeArrayShadow sampler, int lod) return result; } +[require(glsl_hlsl_spirv, texture_size)] __generic [ForceInline] public ivec2 textureSize(Sampler2DRect> sampler) @@ -1338,6 +1423,7 @@ public ivec2 textureSize(Sampler2DRect> sampler) } [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec2 textureSize(sampler2DRectShadow sampler) { vector result; @@ -1346,6 +1432,7 @@ public ivec2 textureSize(sampler2DRectShadow sampler) return result; } +[require(glsl_hlsl_spirv, texture_size)] __generic [ForceInline] public ivec2 textureSize(Sampler1DArray> sampler, int lod) @@ -1357,6 +1444,7 @@ public ivec2 textureSize(Sampler1DArray> sampler, int lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec2 textureSize(sampler1DArrayShadow sampler, int lod) { vector result; @@ -1365,6 +1453,7 @@ public ivec2 textureSize(sampler1DArrayShadow sampler, int lod) return result; } +[require(glsl_hlsl_spirv, texture_size)] __generic [ForceInline] public ivec3 textureSize(Sampler2DArray> sampler, int lod) @@ -1376,6 +1465,7 @@ public ivec3 textureSize(Sampler2DArray> sampler, int lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_size)] public ivec3 textureSize(sampler2DArrayShadow sampler, int lod) { vector result; @@ -1384,6 +1474,7 @@ public ivec3 textureSize(sampler2DArrayShadow sampler, int lod) return result; } +[require(glsl_hlsl_spirv, texture_size)] __generic [ForceInline] public int textureSize(SamplerBuffer,format> sampler) @@ -1393,6 +1484,7 @@ public int textureSize(SamplerBuffer,format> sampler) return int(result); } +[require(glsl_hlsl_spirv, texture_size)] __generic [ForceInline] public ivec2 textureSize(Sampler2DMS,sampleCount> sampler) @@ -1404,6 +1496,7 @@ public ivec2 textureSize(Sampler2DMS,sampleCount> sampler) return result; } +[require(glsl_hlsl_spirv, texture_size)] __generic [ForceInline] public ivec3 textureSize(Sampler2DMSArray,sampleCount> sampler) @@ -1421,6 +1514,7 @@ public ivec3 textureSize(Sampler2DMSArray,sampleCount> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylod)] public vec2 textureQueryLod(__TextureImpl< T, __Shape1D, @@ -1441,6 +1535,7 @@ public vec2 textureQueryLod(__TextureImpl< __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylod)] public vec2 textureQueryLod(__TextureImpl [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(Sampler1D> sampler) { int width; @@ -1474,6 +1570,7 @@ public int textureQueryLevels(Sampler1D> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(Sampler2D> sampler) { vector dim; @@ -1484,6 +1581,7 @@ public int textureQueryLevels(Sampler2D> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(Sampler3D> sampler) { vector dim; @@ -1494,6 +1592,7 @@ public int textureQueryLevels(Sampler3D> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(SamplerCube> sampler) { vector dim; @@ -1504,6 +1603,7 @@ public int textureQueryLevels(SamplerCube> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(Sampler1DArray> sampler) { vector dim; @@ -1514,6 +1614,7 @@ public int textureQueryLevels(Sampler1DArray> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(Sampler2DArray> sampler) { vector dim; @@ -1524,6 +1625,7 @@ public int textureQueryLevels(Sampler2DArray> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(SamplerCubeArray> sampler) { vector dim; @@ -1533,6 +1635,7 @@ public int textureQueryLevels(SamplerCubeArray> sampler) } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(sampler1DShadow sampler) { int dim; @@ -1542,6 +1645,7 @@ public int textureQueryLevels(sampler1DShadow sampler) } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(sampler2DShadow sampler) { vector dim; @@ -1551,6 +1655,7 @@ public int textureQueryLevels(sampler2DShadow sampler) } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(samplerCubeShadow sampler) { vector dim; @@ -1560,6 +1665,7 @@ public int textureQueryLevels(samplerCubeShadow sampler) } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(sampler1DArrayShadow sampler) { vector dim; @@ -1569,6 +1675,7 @@ public int textureQueryLevels(sampler1DArrayShadow sampler) } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(sampler2DArrayShadow sampler) { vector dim; @@ -1578,6 +1685,7 @@ public int textureQueryLevels(sampler2DArrayShadow sampler) } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)] public int textureQueryLevels(samplerCubeArrayShadow sampler) { vector dim; @@ -1592,6 +1700,7 @@ public int textureQueryLevels(samplerCubeArrayShadow sampler) __generic [ForceInline] +[require(glsl_hlsl_spirv, image_samples)] public int textureSamples(Sampler2DMS,sampleCount> sampler) { vector dim; @@ -1603,6 +1712,7 @@ public int textureSamples(Sampler2DMS,sampleCount> sampler) __generic [ForceInline] +[require(glsl_hlsl_spirv, image_samples)] public int textureSamples(Sampler2DMSArray,sampleCount> sampler) { vector dim; @@ -1622,6 +1732,7 @@ public int textureSamples(Sampler2DMSArray,sampleCount> sampler) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector texture(Sampler1D> sampler, float p) { return __vectorReshape<4>(sampler.Sample(p)); @@ -1629,6 +1740,7 @@ public vector texture(Sampler1D> sampler, float p) __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector texture(Sampler1D> sampler, float p, constexpr float bias) { return __vectorReshape<4>(sampler.SampleBias(p, bias)); @@ -1636,6 +1748,7 @@ public vector texture(Sampler1D> sampler, float p, constexpr fl __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector texture(__TextureImpl< vector, Shape, @@ -1653,6 +1766,7 @@ public vector texture(__TextureImpl< __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector texture(__TextureImpl< vector, Shape, @@ -1669,12 +1783,14 @@ public vector texture(__TextureImpl< } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(sampler1DShadow sampler, vec3 p) { return sampler.SampleCmp(p.x, p.z); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(sampler1DShadow sampler, vec3 p, float bias) { // TODO: Need to apply bias @@ -1682,24 +1798,28 @@ public float texture(sampler1DShadow sampler, vec3 p, float bias) } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(sampler2DShadow sampler, vec3 p) { return sampler.SampleCmp(p.xy, p.z); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(sampler2DShadow sampler, vec3 p, float bias) { // TODO: Need to apply bias return sampler.SampleCmp(p.xy, p.z); } +[require(glsl_hlsl_spirv, texture_shadowlod_cube)] [ForceInline] public float texture(samplerCubeShadow sampler, vec4 p) { return sampler.SampleCmp(p.xyz, p.w); } +[require(glsl_hlsl_spirv, texture_shadowlod_cube)] [ForceInline] public float texture(samplerCubeShadow sampler, vec4 p, float bias) { @@ -1708,12 +1828,14 @@ public float texture(samplerCubeShadow sampler, vec4 p, float bias) } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(sampler1DArrayShadow sampler, vec3 p) { return sampler.SampleCmp(p.xy, p.z); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(sampler1DArrayShadow sampler, vec3 p, float bias) { // TODO: Need to apply bias @@ -1721,12 +1843,14 @@ public float texture(sampler1DArrayShadow sampler, vec3 p, float bias) } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float texture(sampler2DArrayShadow sampler, vec4 p) { return sampler.SampleCmp(p.xyz, p.w); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod_cube)] public float texture(samplerCubeArrayShadow sampler, vec4 p, float compare) { return sampler.SampleCmp(p, compare); @@ -1738,6 +1862,7 @@ public float texture(samplerCubeArrayShadow sampler, vec4 p, float compare) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler1D> sampler, vec2 p) { return texture(sampler, p.x / p.y); @@ -1745,6 +1870,7 @@ public vector textureProj(Sampler1D> sampler, vec2 p) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler1D> sampler, vec2 p, float bias) { return texture(sampler, p.x / p.y, bias); @@ -1752,6 +1878,7 @@ public vector textureProj(Sampler1D> sampler, vec2 p, float bia __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler1D> sampler, vec4 p) { return texture(sampler, p.x / p.w); @@ -1759,6 +1886,7 @@ public vector textureProj(Sampler1D> sampler, vec4 p) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler1D> sampler, vec4 p, float bias) { return texture(sampler, p.x / p.w, bias); @@ -1766,6 +1894,7 @@ public vector textureProj(Sampler1D> sampler, vec4 p, float bia __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler2D> sampler, vec3 p) { return texture(sampler, p.xy / p.z); @@ -1773,6 +1902,7 @@ public vector textureProj(Sampler2D> sampler, vec3 p) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler2D> sampler, vec3 p, float bias) { return texture(sampler, p.xy / p.z, bias); @@ -1780,6 +1910,7 @@ public vector textureProj(Sampler2D> sampler, vec3 p, float bia __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler2D> sampler, vec4 p) { return texture(sampler, p.xy / p.w); @@ -1787,6 +1918,7 @@ public vector textureProj(Sampler2D> sampler, vec4 p) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler2D> sampler, vec4 p, float bias) { return texture(sampler, p.xy / p.w, bias); @@ -1794,6 +1926,7 @@ public vector textureProj(Sampler2D> sampler, vec4 p, float bia __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler3D> sampler, vec4 p) { return texture(sampler, p.xyz / p.w); @@ -1801,30 +1934,35 @@ public vector textureProj(Sampler3D> sampler, vec4 p) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProj(Sampler3D> sampler, vec4 p, float bias) { return texture(sampler, p.xyz / p.w, bias); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProj(sampler1DShadow sampler, vec4 p) { return texture(sampler, p.xyz / p.w); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProj(sampler1DShadow sampler, vec4 p, float bias) { return texture(sampler, p.xyz / p.w, bias); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProj(sampler2DShadow sampler, vec4 p) { return texture(sampler, p.xyz / p.w); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProj(sampler2DShadow sampler, vec4 p, float bias) { return texture(sampler, p.xyz / p.w, bias); @@ -1836,6 +1974,7 @@ public float textureProj(sampler2DShadow sampler, vec4 p, float bias) __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureLod(Sampler1D> sampler, float p, float lod) { return __vectorReshape<4>(sampler.SampleLevel(p, lod)); @@ -1843,6 +1982,7 @@ public vector textureLod(Sampler1D> sampler, float p, float lod __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureLod(__TextureImpl< vector, Shape, @@ -1859,6 +1999,7 @@ public vector textureLod(__TextureImpl< } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureLod(sampler2DShadow sampler, vec3 p, float lod) { // TODO: Need to apply lod @@ -1866,6 +2007,7 @@ public float textureLod(sampler2DShadow sampler, vec3 p, float lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureLod(sampler1DShadow sampler, vec3 p, float lod) { // TODO: Need to apply lod @@ -1873,6 +2015,7 @@ public float textureLod(sampler1DShadow sampler, vec3 p, float lod) } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureLod(sampler1DArrayShadow sampler, vec3 p, float lod) { // TODO: Need to apply lod @@ -1885,6 +2028,7 @@ public float textureLod(sampler1DArrayShadow sampler, vec3 p, float lod) __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureOffset(Sampler1D> sampler, float p, constexpr int offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -1892,6 +2036,7 @@ public vector textureOffset(Sampler1D> sampler, float p, conste __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureOffset(Sampler2D> sampler, vec2 p, constexpr ivec2 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -1899,12 +2044,14 @@ public vector textureOffset(Sampler2D> sampler, vec2 p, constex __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureOffset(Sampler3D> sampler, vec3 p, constexpr ivec3 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureOffset(sampler2DShadow sampler, vec3 p, constexpr ivec2 offset, float bias = 0.0) { // TODO: Need to apply bias @@ -1912,6 +2059,7 @@ public float textureOffset(sampler2DShadow sampler, vec3 p, constexpr ivec2 offs } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureOffset(sampler1DShadow sampler, vec3 p, constexpr int offset, float bias = 0.0) { // TODO: Need to apply bias @@ -1920,6 +2068,7 @@ public float textureOffset(sampler1DShadow sampler, vec3 p, constexpr int offset __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureOffset(Sampler1DArray> sampler, vec2 p, constexpr int offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); @@ -1927,12 +2076,14 @@ public vector textureOffset(Sampler1DArray> sampler, vec2 p, co __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureOffset(Sampler2DArray> sampler, vec3 p, constexpr ivec2 offset, float bias = 0.0) { return __vectorReshape<4>(sampler.SampleBias(p, bias, offset)); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureOffset(sampler1DArrayShadow sampler, vec3 p, constexpr int offset, float bias = 0.0) { // TODO: Need to apply bias @@ -1940,6 +2091,7 @@ public float textureOffset(sampler1DArrayShadow sampler, vec3 p, constexpr int o } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureOffset(sampler2DArrayShadow sampler, vec4 p, constexpr ivec2 offset) { return sampler.SampleCmp(p.xyz, p.w, offset); @@ -1951,6 +2103,7 @@ public float textureOffset(sampler2DArrayShadow sampler, vec4 p, constexpr ivec2 __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetch(Sampler1D> sampler, int p, int lod) { return __vectorReshape<4>(sampler.Load(int2(p, lod))); @@ -1958,6 +2111,7 @@ public vector texelFetch(Sampler1D> sampler, int p, int lod) __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetch(__TextureImpl< vector, Shape, @@ -1975,6 +2129,7 @@ public vector texelFetch(__TextureImpl< __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetch(Sampler2DRect> sampler, ivec2 p) { return __vectorReshape<4>(sampler.Load(int3(p.xy,0))); @@ -1982,6 +2137,7 @@ public vector texelFetch(Sampler2DRect> sampler, ivec2 p) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetch(SamplerBuffer,format> sampler, int p) { return __vectorReshape<4>(sampler.Load(p)); @@ -1989,6 +2145,7 @@ public vector texelFetch(SamplerBuffer,format> sampler, int p) __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetch(__TextureImpl< vector, __Shape2D, @@ -2010,6 +2167,7 @@ public vector texelFetch(__TextureImpl< __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetchOffset(Sampler1D> sampler, int p, int lod, constexpr int offset) { return texelFetch(sampler, p + offset, lod); @@ -2017,6 +2175,7 @@ public vector texelFetchOffset(Sampler1D> sampler, int p, int l __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetchOffset(__TextureImpl< vector, Shape, @@ -2034,6 +2193,7 @@ public vector texelFetchOffset(__TextureImpl< __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetchOffset(__TextureImpl< vector, Shape, @@ -2051,6 +2211,7 @@ public vector texelFetchOffset(__TextureImpl< __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] public vector texelFetchOffset(Sampler2DRect> sampler, ivec2 p, constexpr ivec2 offset) { return texelFetch(sampler, p + offset); @@ -2062,6 +2223,7 @@ public vector texelFetchOffset(Sampler2DRect> sampler, ivec2 p, __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjOffset(Sampler1D> sampler, vec2 p, constexpr int offset, float bias = 0.0) { return textureOffset(sampler, p.x / p.y, offset, bias); @@ -2069,6 +2231,7 @@ public vector textureProjOffset(Sampler1D> sampler, vec2 p, con __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjOffset(Sampler1D> sampler, vec4 p, constexpr int offset, float bias = 0.0) { return textureOffset(sampler, p.x / p.w, offset, bias); @@ -2076,6 +2239,7 @@ public vector textureProjOffset(Sampler1D> sampler, vec4 p, con __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjOffset(Sampler2D> sampler, vec3 p, constexpr ivec2 offset, float bias = 0.0) { return textureOffset(sampler, p.xy / p.z, offset, bias); @@ -2083,6 +2247,7 @@ public vector textureProjOffset(Sampler2D> sampler, vec3 p, con __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjOffset(Sampler2D> sampler, vec4 p, constexpr ivec2 offset, float bias = 0.0) { return textureOffset(sampler, p.xy / p.w, offset, bias); @@ -2090,18 +2255,21 @@ public vector textureProjOffset(Sampler2D> sampler, vec4 p, con __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjOffset(Sampler3D> sampler, vec4 p, constexpr ivec3 offset, float bias = 0.0) { return textureOffset(sampler, p.xyz / p.w, offset, bias); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProjOffset(sampler1DShadow sampler, vec4 p, constexpr int offset, float bias = 0.0) { return textureOffset(sampler, p.xyz / p.w, offset, bias); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProjOffset(sampler2DShadow sampler, vec4 p, constexpr ivec2 offset, float bias = 0.0) { return textureOffset(sampler, p.xyz / p.w, offset, bias); @@ -2113,6 +2281,7 @@ public float textureProjOffset(sampler2DShadow sampler, vec4 p, constexpr ivec2 __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureLodOffset(Sampler1D> sampler, float p, float lod, constexpr int offset) { return __vectorReshape<4>(sampler.SampleLevel(p, lod, offset)); @@ -2120,6 +2289,7 @@ public vector textureLodOffset(Sampler1D> sampler, float p, flo __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureLodOffset(__TextureImpl< vector, Shape, @@ -2136,6 +2306,7 @@ public vector textureLodOffset(__TextureImpl< } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureLodOffset(sampler1DShadow sampler, vec3 p, float lod, constexpr int offset) { // TODO: Need to apply lod @@ -2143,6 +2314,7 @@ public float textureLodOffset(sampler1DShadow sampler, vec3 p, float lod, conste } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureLodOffset(sampler2DShadow sampler, vec3 p, float lod, constexpr ivec2 offset) { // TODO: Need to apply lod @@ -2150,6 +2322,7 @@ public float textureLodOffset(sampler2DShadow sampler, vec3 p, float lod, conste } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureLodOffset(sampler1DArrayShadow sampler, vec3 p, float lod, constexpr int offset) { // TODO: Need to apply lod @@ -2162,6 +2335,7 @@ public float textureLodOffset(sampler1DArrayShadow sampler, vec3 p, float lod, c __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLod(Sampler1D> sampler, vec2 p, float lod) { return textureLod(sampler, p.x / p.y, lod); @@ -2169,6 +2343,7 @@ public vector textureProjLod(Sampler1D> sampler, vec2 p, float __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLod(Sampler1D> sampler, vec4 p, float lod) { return textureLod(sampler, p.x / p.w, lod); @@ -2176,6 +2351,7 @@ public vector textureProjLod(Sampler1D> sampler, vec4 p, float __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLod(Sampler2D> sampler, vec3 p, float lod) { return textureLod(sampler, p.xy / p.z, lod); @@ -2183,6 +2359,7 @@ public vector textureProjLod(Sampler2D> sampler, vec3 p, float __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLod(Sampler2D> sampler, vec4 p, float lod) { return textureLod(sampler, p.xy / p.w, lod); @@ -2190,18 +2367,21 @@ public vector textureProjLod(Sampler2D> sampler, vec4 p, float __generic [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLod(Sampler3D> sampler, vec4 p, float lod) { return textureLod(sampler, p.xyz / p.w, lod); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProjLod(sampler1DShadow sampler, vec4 p, float lod) { return textureLod(sampler, p.xyz / p.w, lod); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProjLod(sampler2DShadow sampler, vec4 p, float lod) { return textureLod(sampler, p.xyz / p.w, lod); @@ -2213,6 +2393,7 @@ public float textureProjLod(sampler2DShadow sampler, vec4 p, float lod) __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLodOffset(Sampler1D> sampler, vec2 p, float lod, constexpr int offset) { return textureLodOffset(sampler, p.x / p.y, lod, offset); @@ -2220,6 +2401,7 @@ public vector textureProjLodOffset(Sampler1D> sampler, vec2 p, __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLodOffset(Sampler1D> sampler, vec4 p, float lod, constexpr int offset) { return textureLodOffset(sampler, p.x / p.w, lod, offset); @@ -2227,6 +2409,7 @@ public vector textureProjLodOffset(Sampler1D> sampler, vec4 p, __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLodOffset(Sampler2D> sampler, vec3 p, float lod, constexpr ivec2 offset) { return textureLodOffset(sampler, p.xy / p.z, lod, offset); @@ -2234,6 +2417,7 @@ public vector textureProjLodOffset(Sampler2D> sampler, vec3 p, __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLodOffset(Sampler2D> sampler, vec4 p, float lod, constexpr ivec2 offset) { return textureLodOffset(sampler, p.xy / p.w, lod, offset); @@ -2241,18 +2425,21 @@ public vector textureProjLodOffset(Sampler2D> sampler, vec4 p, __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vector textureProjLodOffset(Sampler3D> sampler, vec4 p, float lod, constexpr ivec3 offset) { return textureLodOffset(sampler, p.xyz / p.w, lod, offset); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProjLodOffset(sampler1DShadow sampler, vec4 p, float lod, constexpr int offset) { return textureLodOffset(sampler, p.xyz / p.w, lod, offset); } [ForceInline] +[require(glsl_hlsl_spirv, texture_shadowlod)] public float textureProjLodOffset(sampler2DShadow sampler, vec4 p, float lod, constexpr ivec2 offset) { return textureLodOffset(sampler, p.xyz / p.w, lod, offset); @@ -2262,8 +2449,10 @@ public float textureProjLodOffset(sampler2DShadow sampler, vec4 p, float lod, co // textureGrad // ------------------- + __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureGrad(Sampler1D> sampler, float p, float dPdx, float dPdy) { return __vectorReshape<4>(sampler.SampleGrad(p, dPdx, dPdy)); @@ -2271,6 +2460,7 @@ public vector textureGrad(Sampler1D> sampler, float p, float dP __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureGrad(__TextureImpl< vector, Shape, @@ -2287,6 +2477,7 @@ public vector textureGrad(__TextureImpl< } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_shadowlod)] public float textureGrad(sampler1DShadow sampler, vec3 p, float dPdx, float dPdy) { // TODO: Not implemented @@ -2294,6 +2485,7 @@ public float textureGrad(sampler1DShadow sampler, vec3 p, float dPdx, float dPdy } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_shadowlod)] public float textureGrad(sampler1DArrayShadow sampler, vec3 p, float dPdx, float dPdy) { // TODO: Not implemented @@ -2301,6 +2493,7 @@ public float textureGrad(sampler1DArrayShadow sampler, vec3 p, float dPdx, float } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_shadowlod)] public float textureGrad(sampler2DShadow sampler, vec3 p, vec2 dPdx, vec2 dPdy) { // TODO: Not implemented on HLSL side yet. @@ -2308,6 +2501,7 @@ public float textureGrad(sampler2DShadow sampler, vec3 p, vec2 dPdx, vec2 dPdy) } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_shadowlod_cube)] public float textureGrad(samplerCubeShadow sampler, vec4 p, vec3 dPdx, vec3 dPdy) { // TODO: Not implemented on HLSL side yet. @@ -2315,6 +2509,7 @@ public float textureGrad(samplerCubeShadow sampler, vec4 p, vec3 dPdx, vec3 dPdy } [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, texture_shadowlod)] public float textureGrad(sampler2DArrayShadow sampler, vec4 p, vec2 dPdx, vec2 dPdy) { // TODO: Not implemented on HLSL side yet. @@ -2327,12 +2522,14 @@ public float textureGrad(sampler2DArrayShadow sampler, vec4 p, vec2 dPdx, vec2 d __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureGradOffset(Sampler1D> sampler, float p, float dPdx, float dPdy, constexpr int offset) { return __vectorReshape<4>(sampler.SampleGrad(p, dPdx, dPdy, offset)); } __generic +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] [ForceInline] public vector textureGradOffset(__TextureImpl< vector, @@ -2350,6 +2547,7 @@ public vector textureGradOffset(__TextureImpl< } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureGradOffset(sampler1DShadow sampler, vec3 p, float dPdx, float dPdy, constexpr int offset) { // TODO: Not implemented on HLSL side yet. @@ -2357,6 +2555,7 @@ public float textureGradOffset(sampler1DShadow sampler, vec3 p, float dPdx, floa } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureGradOffset(sampler2DShadow sampler, vec3 p, vec2 dPdx, vec2 dPdy, constexpr ivec2 offset) { // TODO: Not implemented on HLSL side yet. @@ -2364,6 +2563,7 @@ public float textureGradOffset(sampler2DShadow sampler, vec3 p, vec2 dPdx, vec2 } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureGradOffset(sampler1DArrayShadow sampler, vec3 p, float dPdx, float dPdy, constexpr int offset) { // TODO: Not implemented on HLSL side yet. @@ -2371,6 +2571,7 @@ public float textureGradOffset(sampler1DArrayShadow sampler, vec3 p, float dPdx, } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureGradOffset(sampler2DArrayShadow sampler, vec4 p, vec2 dPdx, vec2 dPdy, constexpr ivec2 offset) { // TODO: Not implemented on HLSL side yet. @@ -2383,6 +2584,7 @@ public float textureGradOffset(sampler2DArrayShadow sampler, vec4 p, vec2 dPdx, __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGrad(Sampler1D> sampler, vec2 p, float dPdx, float dPdy) { return textureGrad(sampler, p.x / p.y, dPdx, dPdy); @@ -2390,6 +2592,7 @@ public vector textureProjGrad(Sampler1D> sampler, vec2 p, float __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGrad(Sampler1D> sampler, vec4 p, float dPdx, float dPdy) { return textureGrad(sampler, p.x / p.w, dPdx, dPdy); @@ -2397,6 +2600,7 @@ public vector textureProjGrad(Sampler1D> sampler, vec4 p, float __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGrad(Sampler2D> sampler, vec3 p, vec2 dPdx, vec2 dPdy) { return textureGrad(sampler, p.xy / p.z, dPdx, dPdy); @@ -2404,6 +2608,7 @@ public vector textureProjGrad(Sampler2D> sampler, vec3 p, vec2 __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGrad(Sampler2D> sampler, vec4 p, vec2 dPdx, vec2 dPdy) { return textureGrad(sampler, p.xy / p.w, dPdx, dPdy); @@ -2411,18 +2616,21 @@ public vector textureProjGrad(Sampler2D> sampler, vec4 p, vec2 __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGrad(Sampler3D> sampler, vec4 p, vec3 dPdx, vec3 dPdy) { return textureGrad(sampler, p.xyz / p.w, dPdx, dPdy); } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureProjGrad(sampler1DShadow sampler, vec4 p, float dPdx, float dPdy) { return textureGrad(sampler, p.xyz / p.w, dPdx, dPdy); } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureProjGrad(sampler2DShadow sampler, vec4 p, vec2 dPdx, vec2 dPdy) { return textureGrad(sampler, p.xyz / p.w, dPdx, dPdy); @@ -2434,6 +2642,7 @@ public float textureProjGrad(sampler2DShadow sampler, vec4 p, vec2 dPdx, vec2 dP __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGradOffset(Sampler1D> sampler, vec2 p, float dPdx, float dPdy, constexpr int offset) { return textureGradOffset(sampler, p.x / p.y, dPdx, dPdy, offset); @@ -2441,6 +2650,7 @@ public vector textureProjGradOffset(Sampler1D> sampler, vec2 p, __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGradOffset(Sampler1D> sampler, vec4 p, float dPdx, float dPdy, constexpr int offset) { return textureGradOffset(sampler, p.x / p.w, dPdx, dPdy, offset); @@ -2448,6 +2658,7 @@ public vector textureProjGradOffset(Sampler1D> sampler, vec4 p, __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGradOffset(Sampler2D> sampler, vec3 p, vec2 dPdx, vec2 dPdy, constexpr ivec2 offset) { return textureGradOffset(sampler, p.xy / p.z, dPdx, dPdy, offset); @@ -2455,6 +2666,7 @@ public vector textureProjGradOffset(Sampler2D> sampler, vec3 p, __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGradOffset(Sampler2D> sampler, vec4 p, vec2 dPdx, vec2 dPdy, constexpr ivec2 offset) { return textureGradOffset(sampler, p.xy / p.w, dPdx, dPdy, offset); @@ -2462,18 +2674,21 @@ public vector textureProjGradOffset(Sampler2D> sampler, vec4 p, __generic [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] public vector textureProjGradOffset(Sampler3D> sampler, vec4 p, vec3 dPdx, vec3 dPdy, constexpr ivec3 offset) { return textureGradOffset(sampler, p.xyz / p.w, dPdx, dPdy, offset); } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureProjGradOffset(sampler1DShadow sampler, vec4 p, float dPdx, float dPdy, constexpr int offset) { return textureGradOffset(sampler, p.xyz / p.w, dPdx, dPdy, offset); } [ForceInline] +[require(cpp_glsl_hlsl_spirv, texture_shadowlod)] public float textureProjGradOffset(sampler2DShadow sampler, vec4 p, vec2 dPdx, vec2 dPdy, constexpr ivec2 offset) { return textureGradOffset(sampler, p.xyz / p.w, dPdx, dPdy, offset); @@ -2489,6 +2704,7 @@ public float textureProjGradOffset(sampler2DShadow sampler, vec4 p, vec2 dPdx, v __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_gather)] public vector textureGather(__TextureImpl< vector, Shape, @@ -2512,6 +2728,7 @@ public vector textureGather(__TextureImpl< __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_gather)] public vec4 textureGather(__TextureImpl< float, Shape, @@ -2533,6 +2750,7 @@ public vec4 textureGather(__TextureImpl< __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_gather)] public vector textureGatherOffset(__TextureImpl< vector, __Shape2D, @@ -2556,6 +2774,7 @@ public vector textureGatherOffset(__TextureImpl< __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_gather)] public vec4 textureGatherOffset(__TextureImpl< float, __Shape2D, @@ -2577,6 +2796,7 @@ public vec4 textureGatherOffset(__TextureImpl< __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_gather)] public vector textureGatherOffsets(__TextureImpl< vector, __Shape2D, @@ -2600,6 +2820,7 @@ public vector textureGatherOffsets(__TextureImpl< __generic [ForceInline] +[require(glsl_hlsl_spirv, texture_gather)] public vec4 textureGatherOffsets(__TextureImpl< float, __Shape2D, @@ -2619,196 +2840,235 @@ public vec4 textureGatherOffsets(__TextureImpl< // Section 8.9.5. Compatibility Profile Texture Functions // +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1D(sampler1D sampler, float coord) { return texture(sampler, coord); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1D(sampler1D sampler, float coord, float bias) { return texture(sampler, coord, bias); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1DProj(sampler1D sampler, vec2 coord) { return textureProj(sampler, coord); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1DProj(sampler1D sampler, vec2 coord, float bias) { return textureProj(sampler, coord, bias); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1DProj(sampler1D sampler, vec4 coord) { return textureProj(sampler, coord); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1DProj(sampler1D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1DLod(sampler1D sampler, float coord, float lod) { return textureLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1DProjLod(sampler1D sampler, vec2 coord, float lod) { return textureProjLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture1DProjLod(sampler1D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2D(sampler2D sampler, vec2 coord) { return texture(sampler, coord); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2D(sampler2D sampler, vec2 coord, float bias) { return texture(sampler, coord, bias); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2DProj(sampler2D sampler, vec3 coord) { return textureProj(sampler, coord); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2DProj(sampler2D sampler, vec3 coord, float bias) { return textureProj(sampler, coord, bias); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2DProj(sampler2D sampler, vec4 coord) { return textureProj(sampler, coord); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2DProj(sampler2D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2DLod(sampler2D sampler, vec2 coord, float lod) { return textureLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2DProjLod(sampler2D sampler, vec3 coord, float lod) { return textureProjLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture2DProjLod(sampler2D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture3D(sampler3D sampler, vec3 coord) { return texture(sampler, coord); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture3D(sampler3D sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture3DProj(sampler3D sampler, vec4 coord) { return textureProj(sampler, coord); } +[require(glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture3DProj(sampler3D sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture3DLod(sampler3D sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 texture3DProjLod(sampler3D sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 textureCube(samplerCube sampler, vec3 coord) { return texture(sampler, coord); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 textureCube(samplerCube sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } +[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] public vec4 textureCubeLod(samplerCube sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow1D(sampler1DShadow sampler, vec3 coord) { return texture(sampler, coord); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow1D(sampler1DShadow sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow2D(sampler2DShadow sampler, vec3 coord) { return texture(sampler, coord); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow2D(sampler2DShadow sampler, vec3 coord, float bias) { return texture(sampler, coord, bias); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow1DProj(sampler1DShadow sampler, vec4 coord) { return textureProj(sampler, coord); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow1DProj(sampler1DShadow sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow2DProj(sampler2DShadow sampler, vec4 coord) { return textureProj(sampler, coord); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow2DProj(sampler2DShadow sampler, vec4 coord, float bias) { return textureProj(sampler, coord, bias); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow1DLod(sampler1DShadow sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow2DLod(sampler2DShadow sampler, vec3 coord, float lod) { return textureLod(sampler, coord, lod); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow1DProjLod(sampler1DShadow sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); } +[require(glsl_spirv, texture_shadowlod)] public vec4 shadow2DProjLod(sampler2DShadow sampler, vec4 coord, float lod) { return textureProjLod(sampler, coord, lod); @@ -3010,10 +3270,10 @@ ${{{{ }}}} // readonly writeonly in GLSL means an object only allows information queries. - [require(spirv)] - [require(glsl)] [__readNone] - [ForceInline] public $(imageSizeIntOrIVec) imageSize(readonly writeonly $(fullTypeName) image) + [ForceInline] + [require(glsl_spirv, image_size)] + public $(imageSizeIntOrIVec) imageSize(readonly writeonly $(fullTypeName) image) { __target_switch { @@ -3026,10 +3286,10 @@ ${{{{ }; } } - [require(spirv)] - [require(glsl)] [__NoSideEffect] - [ForceInline] public $(targetType.prefix)vec4 imageLoad(readonly $(IMAGE_PARAMS)) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.prefix)vec4 imageLoad(readonly $(IMAGE_PARAMS)) { __target_switch { @@ -3044,9 +3304,9 @@ ${{{{ } } } - [require(spirv)] - [require(glsl)] - [ForceInline] public void imageStore(writeonly $(IMAGE_PARAMS), $(targetType.prefix)vec4 data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public void imageStore(writeonly $(IMAGE_PARAMS), $(targetType.prefix)vec4 data) { __target_switch { @@ -3067,10 +3327,10 @@ ${{{{ { }}}} // readonly writeonly in GLSL means an object only allows information queries. - [require(spirv)] - [require(glsl)] [__readNone] - [ForceInline] public int imageSamples(readonly writeonly $(fullTypeName) image) + [ForceInline] + [require(glsl_spirv, image_samples)] + public int imageSamples(readonly writeonly $(fullTypeName) image) { __target_switch { @@ -3086,9 +3346,9 @@ ${{{{ ${{{{ } }}}} - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicAdd($(IMAGE_PARAMS), $(targetType.type) data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicAdd($(IMAGE_PARAMS), $(targetType.type) data) { typeRequireChecks_image_atomic_tier1<$(targetType.type)>(); __target_switch @@ -3109,9 +3369,9 @@ ${{{{ } } } - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicExchange($(IMAGE_PARAMS), $(targetType.type) data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicExchange($(IMAGE_PARAMS), $(targetType.type) data) { typeRequireChecks_image_atomic_tier1<$(targetType.type)>(); __target_switch @@ -3132,9 +3392,9 @@ ${{{{ } } } - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicMin($(IMAGE_PARAMS), $(targetType.type) data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicMin($(IMAGE_PARAMS), $(targetType.type) data) { typeRequireChecks_image_atomic_tier2<$(targetType.type)>(); __target_switch @@ -3155,9 +3415,9 @@ ${{{{ } } } - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicMax($(IMAGE_PARAMS), $(targetType.type) data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicMax($(IMAGE_PARAMS), $(targetType.type) data) { typeRequireChecks_image_atomic_tier2<$(targetType.type)>(); __target_switch @@ -3182,9 +3442,9 @@ ${{{{ if (!targetType.isInt) continue; }}}} - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicAnd($(IMAGE_PARAMS), $(targetType.type) data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicAnd($(IMAGE_PARAMS), $(targetType.type) data) { __target_switch { @@ -3204,9 +3464,9 @@ ${{{{ } } } - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicOr($(IMAGE_PARAMS), $(targetType.type) data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicOr($(IMAGE_PARAMS), $(targetType.type) data) { __target_switch { @@ -3226,9 +3486,10 @@ ${{{{ } } } - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicXor($(IMAGE_PARAMS), $(targetType.type) data) + + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicXor($(IMAGE_PARAMS), $(targetType.type) data) { __target_switch { @@ -3248,9 +3509,9 @@ ${{{{ } } } - [require(spirv)] - [require(glsl)] - [ForceInline] public $(targetType.type) imageAtomicCompSwap($(IMAGE_PARAMS), $(targetType.type) compare, $(targetType.type) data) + [ForceInline] + [require(glsl_spirv, image_loadstore)] + public $(targetType.type) imageAtomicCompSwap($(IMAGE_PARAMS), $(targetType.type) compare, $(targetType.type) data) { __target_switch { @@ -3287,6 +3548,7 @@ public typealias hitObjectNV = HitObject; //GL_EXT_ray_tracing BuiltIn's +[require(any_target, raytracing_allstages)] void requireGLSLExtForRayTracingBuiltin() { __target_switch @@ -3294,25 +3556,27 @@ void requireGLSLExtForRayTracingBuiltin() case glsl: __requireGLSLExtension("GL_EXT_ray_tracing"); __intrinsic_asm ""; + default: + return; } } __spirv_version(1.4) +[require(any_target, raytracing_allstages)] void setupExtForRayTracingBuiltIn() { __target_switch { case glsl: requireGLSLExtForRayTracingBuiltin(); - case spirv: + default: return; } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property uint3 gl_LaunchIDNV { + [require(glsl_spirv, raytracing_allstages)] get { setupExtForRayTracingBuiltIn(); @@ -3333,10 +3597,9 @@ public property uint3 gl_LaunchIDNV } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property uint3 gl_LaunchIDEXT { + [require(cuda_glsl_hlsl_spirv, raytracing_allstages)] get { setupExtForRayTracingBuiltIn(); @@ -3344,10 +3607,9 @@ public property uint3 gl_LaunchIDEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property uint3 gl_LaunchSizeNV { + [require(glsl_spirv, raytracing_allstages)] get { setupExtForRayTracingBuiltIn(); @@ -3368,10 +3630,9 @@ public property uint3 gl_LaunchSizeNV } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property uint3 gl_LaunchSizeEXT { + [require(cuda_glsl_hlsl_spirv, raytracing_allstages)] get { setupExtForRayTracingBuiltIn(); @@ -3379,10 +3640,10 @@ public property uint3 gl_LaunchSizeEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] + public property int gl_PrimitiveID { + [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3390,10 +3651,9 @@ public property int gl_PrimitiveID } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property int gl_InstanceID { + [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3401,10 +3661,9 @@ public property int gl_InstanceID } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property int gl_InstanceCustomIndexEXT { + [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3412,10 +3671,9 @@ public property int gl_InstanceCustomIndexEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property int gl_GeometryIndexEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3423,10 +3681,9 @@ public property int gl_GeometryIndexEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property vec3 gl_WorldRayOriginEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] get { setupExtForRayTracingBuiltIn(); @@ -3434,10 +3691,9 @@ public property vec3 gl_WorldRayOriginEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property vec3 gl_WorldRayDirectionEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] get { setupExtForRayTracingBuiltIn(); @@ -3445,10 +3701,9 @@ public property vec3 gl_WorldRayDirectionEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property vec3 gl_ObjectRayOriginEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3456,10 +3711,9 @@ public property vec3 gl_ObjectRayOriginEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property vec3 gl_ObjectRayDirectionEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3467,10 +3721,9 @@ public property vec3 gl_ObjectRayDirectionEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property float gl_RayTminEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] get { setupExtForRayTracingBuiltIn(); @@ -3478,10 +3731,9 @@ public property float gl_RayTminEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property float gl_RayTmaxEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] get { setupExtForRayTracingBuiltIn(); @@ -3489,10 +3741,9 @@ public property float gl_RayTmaxEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property uint gl_IncomingRayFlagsEXT { + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] get { setupExtForRayTracingBuiltIn(); @@ -3500,10 +3751,9 @@ public property uint gl_IncomingRayFlagsEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property float gl_HitTEXT { + [require(glsl_spirv, raytracing_anyhit_closesthit)] get { setupExtForRayTracingBuiltIn(); @@ -3524,10 +3774,10 @@ public property float gl_HitTEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property uint gl_HitKindEXT { + + [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit)] get { setupExtForRayTracingBuiltIn(); @@ -3535,10 +3785,9 @@ public property uint gl_HitKindEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property mat4x3 gl_ObjectToWorldEXT { + [require(glsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3560,10 +3809,9 @@ public property mat4x3 gl_ObjectToWorldEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property mat3x4 gl_ObjectToWorld3x4EXT { + [require(glsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3584,10 +3832,9 @@ public property mat3x4 gl_ObjectToWorld3x4EXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property mat4x3 gl_WorldToObjectEXT { + [require(glsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3609,10 +3856,9 @@ public property mat4x3 gl_WorldToObjectEXT } } -[require(glsl, raytracing)] -[require(spirv, raytracing)] public property mat3x4 gl_WorldToObject3x4EXT { + [require(glsl_spirv, raytracing_anyhit_closesthit_intersection)] get { setupExtForRayTracingBuiltIn(); @@ -3635,9 +3881,8 @@ public property mat3x4 gl_WorldToObject3x4EXT // GL_EXT_ray_tracing functions -__glsl_extension(GL_EXT_ray_query) -[require(glsl, raytracing)] -[require(spirv, raytracing)] +__glsl_extension(GL_EXT_ray_tracing) +[require(glsl_spirv, raytracing_raygen_closesthit_miss)] public void traceRayEXT( accelerationStructureEXT topLevel, uint rayFlags, @@ -3689,17 +3934,15 @@ public void traceRayEXT( } } -__glsl_extension(GL_EXT_ray_query) -[require(glsl, raytracing)] -[require(spirv, raytracing)] +__glsl_extension(GL_EXT_ray_tracing) +[require(glsl_spirv, raytracing_intersection)] public bool reportIntersectionEXT(float hitT, uint hitKind) { return __reportIntersection(hitT, hitKind); } -__glsl_extension(GL_EXT_ray_query) -[require(glsl, raytracing)] -[require(spirv, raytracing)] +__glsl_extension(GL_EXT_ray_tracing) +[require(glsl_spirv, raytracing_raygen_closesthit_miss_callable)] public void executeCallableEXT( uint sbtRecordIndex, int callable /*callableDataEXT and callableDataInEXT*/) @@ -3748,9 +3991,8 @@ public static const uint gl_RayQueryCandidateIntersectionAABBEXT = 1U; // GL_EXT_ray_query functions __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public void rayQueryInitializeEXT( inout rayQueryEXT q, accelerationStructureEXT topLevel, @@ -3765,17 +4007,15 @@ public void rayQueryInitializeEXT( } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public bool rayQueryProceedEXT(inout rayQueryEXT q) { return q.Proceed(); } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] +[require(glsl_spirv, rayquery)] [mutating] [ForceInline] public void rayQueryTerminateEXT(inout rayQueryEXT q) @@ -3784,28 +4024,25 @@ public void rayQueryTerminateEXT(inout rayQueryEXT q) } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public void rayQueryGenerateIntersectionEXT(inout rayQueryEXT q, float tHit) { q.CommitProceduralPrimitiveHit(tHit); } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public void rayQueryConfirmIntersectionEXT(inout rayQueryEXT q) { q.CommitNonOpaqueTriangleHit(); } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [__NoSideEffect] [ForceInline] +[require(glsl_spirv, rayquery)] public uint rayQueryGetIntersectionTypeEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3819,45 +4056,40 @@ public uint rayQueryGetIntersectionTypeEXT(rayQueryEXT q, bool committed) } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public float rayQueryGetRayTMinEXT(rayQueryEXT q) { return q.RayTMin(); } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public uint rayQueryGetRayFlagsEXT(rayQueryEXT q) { return q.RayFlags(); } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public vec3 rayQueryGetWorldRayOriginEXT(rayQueryEXT q) { return q.WorldRayOrigin(); } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public vec3 rayQueryGetWorldRayDirectionEXT(rayQueryEXT q) { return q.WorldRayDirection(); } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public float rayQueryGetIntersectionTEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3871,9 +4103,8 @@ public float rayQueryGetIntersectionTEXT(rayQueryEXT q, bool committed) } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public int rayQueryGetIntersectionInstanceCustomIndexEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3887,9 +4118,8 @@ public int rayQueryGetIntersectionInstanceCustomIndexEXT(rayQueryEXT q, bool com } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public int rayQueryGetIntersectionInstanceIdEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3903,9 +4133,8 @@ public int rayQueryGetIntersectionInstanceIdEXT(rayQueryEXT q, bool committed) } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public uint rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3919,9 +4148,8 @@ public uint rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(ray } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public int rayQueryGetIntersectionGeometryIndexEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3935,9 +4163,8 @@ public int rayQueryGetIntersectionGeometryIndexEXT(rayQueryEXT q, bool committed } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public int rayQueryGetIntersectionPrimitiveIndexEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3951,9 +4178,8 @@ public int rayQueryGetIntersectionPrimitiveIndexEXT(rayQueryEXT q, bool committe } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public vec2 rayQueryGetIntersectionBarycentricsEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3967,9 +4193,8 @@ public vec2 rayQueryGetIntersectionBarycentricsEXT(rayQueryEXT q, bool committed } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public bool rayQueryGetIntersectionFrontFaceEXT(rayQueryEXT q, bool committed) { if (committed) @@ -3983,17 +4208,15 @@ public bool rayQueryGetIntersectionFrontFaceEXT(rayQueryEXT q, bool committed) } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public bool rayQueryGetIntersectionCandidateAABBOpaqueEXT(rayQueryEXT q) { return q.CandidateProceduralPrimitiveNonOpaque(); } -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public vec3 rayQueryGetIntersectionObjectRayDirectionEXT(rayQueryEXT q, bool committed) { if (committed) @@ -4007,9 +4230,8 @@ public vec3 rayQueryGetIntersectionObjectRayDirectionEXT(rayQueryEXT q, bool com } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public vec3 rayQueryGetIntersectionObjectRayOriginEXT(rayQueryEXT q, bool committed) { if (committed) @@ -4023,9 +4245,8 @@ public vec3 rayQueryGetIntersectionObjectRayOriginEXT(rayQueryEXT q, bool commit } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public mat4x3 rayQueryGetIntersectionObjectToWorldEXT(rayQueryEXT q, bool committed) { if (committed) @@ -4039,9 +4260,8 @@ public mat4x3 rayQueryGetIntersectionObjectToWorldEXT(rayQueryEXT q, bool commit } __glsl_extension(GL_EXT_ray_query) -[require(glsl, rayquery)] -[require(spirv, rayquery)] [ForceInline] +[require(glsl_spirv, rayquery)] public mat4x3 rayQueryGetIntersectionWorldToObjectEXT(rayQueryEXT q, bool committed) { if (committed) @@ -4059,9 +4279,8 @@ public mat4x3 rayQueryGetIntersectionWorldToObjectEXT(rayQueryEXT q, bool commit __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public void hitObjectTraceRayNV( inout hitObjectNV hitObject, accelerationStructureEXT topLevel, @@ -4120,9 +4339,8 @@ __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) __glsl_extension(GL_NV_ray_tracing_motion_blur) -[require(glsl, ser_motion)] -[require(spirv, ser_motion)] [ForceInline] +[require(glsl_spirv, ser_motion_raygen_closesthit_miss)] public void hitObjectTraceRayMotionNV( inout hitObjectNV hitObject, accelerationStructureEXT topLevel, @@ -4185,9 +4403,8 @@ public void hitObjectTraceRayMotionNV( __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public void hitObjectRecordHitNV( inout hitObjectNV hitObject, accelerationStructureEXT topLevel, @@ -4249,9 +4466,8 @@ __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) __glsl_extension(GL_NV_ray_tracing_motion_blur) -[require(glsl, ser_motion)] -[require(spirv, ser_motion)] [ForceInline] +[require(glsl_spirv, ser_motion_raygen_closesthit_miss)] public void hitObjectRecordHitMotionNV( inout hitObjectNV hitObject, accelerationStructureEXT topLevel, @@ -4315,9 +4531,8 @@ public void hitObjectRecordHitMotionNV( __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public void hitObjectRecordHitWithIndexNV( inout hitObjectNV hitObject, accelerationStructureEXT topLevel, @@ -4376,9 +4591,8 @@ __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) __glsl_extension(GL_NV_ray_tracing_motion_blur) -[require(glsl, ser_motion)] -[require(spirv, ser_motion)] [ForceInline] +[require(glsl_spirv, ser_motion_raygen_closesthit_miss)] public void hitObjectRecordHitWithIndexMotionNV( inout hitObjectNV hitObject, accelerationStructureEXT topLevel, @@ -4441,9 +4655,8 @@ public void hitObjectRecordHitWithIndexMotionNV( __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public void hitObjectRecordMissNV( inout hitObjectNV hitObject, uint sbtRecordIndex, @@ -4462,9 +4675,8 @@ __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) __glsl_extension(GL_NV_ray_tracing_motion_blur) -[require(glsl, ser_motion)] -[require(spirv, ser_motion)] [ForceInline] +[require(glsl_spirv, ser_motion_raygen_closesthit_miss)] public void hitObjectRecordMissMotionNV( inout hitObjectNV hitObject, uint sbtRecordIndex, @@ -4484,9 +4696,8 @@ public void hitObjectRecordMissMotionNV( __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public void hitObjectRecordEmptyNV(hitObjectNV hitObject) { hitObject = HitObject::MakeNop(); @@ -4495,9 +4706,8 @@ public void hitObjectRecordEmptyNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public void hitObjectExecuteShaderNV( inout hitObjectNV hitObject, constexpr int payload) @@ -4521,9 +4731,8 @@ public void hitObjectExecuteShaderNV( __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public bool hitObjectIsEmptyNV(hitObjectNV hitObject) { return hitObject.IsNop(); @@ -4532,9 +4741,8 @@ public bool hitObjectIsEmptyNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public bool hitObjectIsMissNV(hitObjectNV hitObject) { return hitObject.IsMiss(); @@ -4543,9 +4751,8 @@ public bool hitObjectIsMissNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public bool hitObjectIsHitNV(hitObjectNV hitObject) { return hitObject.IsHit(); @@ -4554,9 +4761,8 @@ public bool hitObjectIsHitNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public float hitObjectGetRayTMinNV(hitObjectNV hitObject) { return hitObject.GetRayDesc().TMin; @@ -4565,9 +4771,8 @@ public float hitObjectGetRayTMinNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public float hitObjectGetRayTMaxNV(hitObjectNV hitObject) { return hitObject.GetRayDesc().TMax; @@ -4576,9 +4781,8 @@ public float hitObjectGetRayTMaxNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public vec3 hitObjectGetWorldRayOriginNV(hitObjectNV hitObject) { return hitObject.GetRayDesc().Origin; @@ -4587,9 +4791,8 @@ public vec3 hitObjectGetWorldRayOriginNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public vec3 hitObjectGetWorldRayDirectionNV(hitObjectNV hitObject) { return hitObject.GetRayDesc().Direction; @@ -4598,9 +4801,8 @@ public vec3 hitObjectGetWorldRayDirectionNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public vec3 hitObjectGetObjectRayOriginNV(hitObjectNV hitObject) { return hitObject.GetObjectRayOrigin(); @@ -4609,9 +4811,8 @@ public vec3 hitObjectGetObjectRayOriginNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public vec3 hitObjectGetObjectRayDirectionNV(hitObjectNV hitObject) { return hitObject.GetObjectRayDirection(); @@ -4620,9 +4821,8 @@ public vec3 hitObjectGetObjectRayDirectionNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public mat4x3 hitObjectGetObjectToWorldNV(hitObjectNV hitObject) { return transpose(hitObject.GetObjectToWorld()); @@ -4631,9 +4831,8 @@ public mat4x3 hitObjectGetObjectToWorldNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public mat4x3 hitObjectGetWorldToObjectNV(hitObjectNV hitObject) { return transpose(hitObject.GetWorldToObject()); @@ -4642,9 +4841,8 @@ public mat4x3 hitObjectGetWorldToObjectNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public int hitObjectGetInstanceCustomIndexNV(hitObjectNV hitObject) { return hitObject.GetInstanceID(); @@ -4653,17 +4851,18 @@ public int hitObjectGetInstanceCustomIndexNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public int hitObjectGetInstanceIdNV(hitObjectNV hitObject) { return hitObject.GetInstanceIndex(); } -[require(glsl, ser)] -[require(spirv, ser)] +__glsl_extension(GL_EXT_ray_tracing) +__glsl_extension(GL_NV_shader_invocation_reorder) +__glsl_extension(GLSL_EXT_buffer_reference_uvec2) [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public int hitObjectGetGeometryIndexNV(hitObjectNV hitObject) { return hitObject.GetGeometryIndex(); @@ -4672,9 +4871,8 @@ public int hitObjectGetGeometryIndexNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public int hitObjectGetPrimitiveIndexNV(hitObjectNV hitObject) { return hitObject.GetPrimitiveIndex(); @@ -4683,9 +4881,8 @@ public int hitObjectGetPrimitiveIndexNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public uint hitObjectGetHitKindNV(hitObjectNV hitObject) { return hitObject.GetHitKind(); @@ -4694,9 +4891,8 @@ public uint hitObjectGetHitKindNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public void hitObjectGetAttributesNV( inout hitObjectNV hitObject, constexpr int attributeLocation) @@ -4721,9 +4917,8 @@ public void hitObjectGetAttributesNV( __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public uvec2 hitObjectGetShaderRecordBufferHandleNV(hitObjectNV hitObject) { return hitObject.GetShaderRecordBufferHandle(); @@ -4732,9 +4927,8 @@ public uvec2 hitObjectGetShaderRecordBufferHandleNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public uint hitObjectGetShaderBindingTableRecordIndexNV(hitObjectNV hitObject) { return hitObject.GetShaderTableIndex(); @@ -4743,9 +4937,8 @@ public uint hitObjectGetShaderBindingTableRecordIndexNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen_closesthit_miss)] public float hitObjectGetCurrentTimeNV(hitObjectNV hitObject) { return hitObject.GetCurrentTime(); @@ -4754,9 +4947,8 @@ public float hitObjectGetCurrentTimeNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen)] public void reorderThreadNV(uint hint, uint bits) { ReorderThread(hint, bits); @@ -4765,9 +4957,8 @@ public void reorderThreadNV(uint hint, uint bits) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen)] public void reorderThreadNV(hitObjectNV hitObject) { ReorderThread(hitObject); @@ -4776,9 +4967,8 @@ public void reorderThreadNV(hitObjectNV hitObject) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GLSL_EXT_buffer_reference_uvec2) -[require(glsl, ser)] -[require(spirv, ser)] [ForceInline] +[require(glsl_spirv, ser_raygen)] public void reorderThreadNV(hitObjectNV hitObject, uint hint, uint bits) { ReorderThread(hitObject, hint, bits); @@ -4786,10 +4976,9 @@ public void reorderThreadNV(hitObjectNV hitObject, uint hint, uint bits) /// GL_NV_ray_tracing_motion_blur -[require(glsl, raytracing_motionblur)] -[require(spirv, raytracing_motionblur)] public property float gl_CurrentRayTimeNV { + [require(glsl_spirv, raytracing_motionblur_anyhit_closesthit_intersection_miss)] get { __target_switch @@ -4809,9 +4998,8 @@ public property float gl_CurrentRayTimeNV __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_ray_tracing_motion_blur) -[require(glsl, raytracing_motionblur)] -[require(spirv, raytracing_motionblur)] [ForceInline] +[require(glsl_spirv, raytracing_motionblur_raygen_closesthit_miss)] public void traceRayMotionNV( accelerationStructureEXT topLevel, uint rayFlags, @@ -4901,7 +5089,7 @@ void shader_subgroup_preamble() { { case glsl: typeRequireChecks_shader_subgroup_GLSL(); - case spirv: + default: return; } @@ -4909,50 +5097,57 @@ void shader_subgroup_preamble() { // GL_KHR_shader_subgroup_basic Built-in Variables +[require(cpp_cuda_glsl_hlsl_spirv, subgroup_basic)] void requireGLSLExtForSubgroupBasicBuiltin() { __target_switch { case glsl: __requireGLSLExtension("GL_KHR_shader_subgroup_basic"); __intrinsic_asm ""; + default: + return; } } -__spirv_version(1.3) +[require(cpp_cuda_glsl_hlsl_spirv, subgroup_basic)] void setupExtForSubgroupBasicBuiltIn() { __target_switch { case glsl: requireGLSLExtForSubgroupBasicBuiltin(); - case spirv: + default: return; } } +__spirv_version(1.3) +[require(cpp_cuda_glsl_hlsl_spirv, subgroup_ballot)] void requireGLSLExtForSubgroupBallotBuiltin() { __target_switch { case glsl: __requireGLSLExtension("GL_KHR_shader_subgroup_ballot"); __intrinsic_asm ""; + default: + return; } } -__spirv_version(1.3) +__spirv_version(1.3) +[require(cpp_cuda_glsl_hlsl_spirv, subgroup_ballot)] void setupExtForSubgroupBallotBuiltIn() { __target_switch { case glsl: requireGLSLExtForSubgroupBallotBuiltin(); - case spirv: + default: return; } } -[require(glsl)] -[require(spirv)] public property uint gl_NumSubgroups { + [require(glsl_spirv, subgroup_basic)] get { setupExtForSubgroupBasicBuiltIn(); __target_switch @@ -4969,10 +5164,9 @@ public property uint gl_NumSubgroups { } } -[require(glsl)] -[require(spirv)] public property uint gl_SubgroupID { + [require(glsl_spirv, subgroup_basic)] get { setupExtForSubgroupBasicBuiltIn(); __target_switch @@ -4988,30 +5182,27 @@ public property uint gl_SubgroupID } } -[require(glsl)] -[require(spirv)] public property uint gl_SubgroupSize { + [require(cpp_cuda_glsl_hlsl_spirv, subgroup_basic)] get { setupExtForSubgroupBasicBuiltIn(); return WaveGetLaneCount(); } } -[require(glsl)] -[require(spirv)] public property uint gl_SubgroupInvocationID { + [require(cpp_cuda_glsl_hlsl_spirv, subgroup_basic)] get { setupExtForSubgroupBasicBuiltIn(); return WaveGetLaneIndex(); } } -[require(glsl)] -[require(spirv)] public property uvec4 gl_SubgroupEqMask { + [require(glsl_spirv, subgroup_basic_ballot)] get { setupExtForSubgroupBasicBuiltIn(); setupExtForSubgroupBallotBuiltIn(); @@ -5028,10 +5219,9 @@ public property uvec4 gl_SubgroupEqMask } } -[require(glsl)] -[require(spirv)] public property uvec4 gl_SubgroupGeMask { + [require(glsl_spirv, subgroup_basic_ballot)] get { setupExtForSubgroupBasicBuiltIn(); setupExtForSubgroupBallotBuiltIn(); @@ -5048,10 +5238,9 @@ public property uvec4 gl_SubgroupGeMask } } -[require(glsl)] -[require(spirv)] public property uvec4 gl_SubgroupGtMask { + [require(glsl_spirv, subgroup_basic_ballot)] get { setupExtForSubgroupBasicBuiltIn(); setupExtForSubgroupBallotBuiltIn(); @@ -5068,10 +5257,9 @@ public property uvec4 gl_SubgroupGtMask } } -[require(glsl)] -[require(spirv)] public property uvec4 gl_SubgroupLeMask { + [require(glsl_spirv, subgroup_basic_ballot)] get { setupExtForSubgroupBasicBuiltIn(); setupExtForSubgroupBallotBuiltIn(); @@ -5088,10 +5276,9 @@ public property uvec4 gl_SubgroupLeMask } } -[require(glsl)] -[require(spirv)] public property uvec4 gl_SubgroupLtMask { + [require(glsl_spirv, subgroup_basic_ballot)] get { setupExtForSubgroupBasicBuiltIn(); setupExtForSubgroupBallotBuiltIn(); @@ -5110,9 +5297,11 @@ public property uvec4 gl_SubgroupLtMask // GL_KHR_shader_subgroup_basic -__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public void subgroupBarrier() +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_basic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] +public void subgroupBarrier() { __target_switch { @@ -5131,9 +5320,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public void subgroupMemoryBarrier() +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_basic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] +public void subgroupMemoryBarrier() { __target_switch { @@ -5152,9 +5343,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public void subgroupMemoryBarrierBuffer() +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_basic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] +public void subgroupMemoryBarrierBuffer() { // the following implementation is NOT the same as DeviceMemoryBarrier // HLSL lacks the same granularity of blocking on subgroup memory within a subgroup @@ -5175,9 +5368,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public void subgroupMemoryBarrierImage() +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_basic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] +public void subgroupMemoryBarrierImage() { __target_switch { @@ -5195,9 +5390,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public void subgroupMemoryBarrierShared() +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_basic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] +public void subgroupMemoryBarrierShared() { __target_switch { @@ -5217,9 +5414,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public bool subgroupElect() +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_basic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] +public bool subgroupElect() { __target_switch { @@ -5235,36 +5434,41 @@ __spirv_version(1.3) [require(spirv)] // GL_KHR_shader_subgroup_vote -__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public bool subgroupAll(bool value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_vote) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] +public bool subgroupAll(bool value) { - return WaveActiveAllTrue(value); - } -__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public bool subgroupAny(bool value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_vote) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] +public bool subgroupAny(bool value) { return WaveActiveAnyTrue(value); - } __generic -__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public bool subgroupAllEqual(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_vote) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] +public bool subgroupAllEqual(T value) { shader_subgroup_preamble(); return WaveActiveAllEqual(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public bool subgroupAllEqual(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_vote) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] +public bool subgroupAllEqual(vector value) { shader_subgroup_preamble(); return WaveActiveAllEqual(value); @@ -5273,45 +5477,55 @@ __spirv_version(1.3) [require(spirv)] // GL_KHR_shader_subgroup_arithmetic __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupAdd(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public T subgroupAdd(T value) { shader_subgroup_preamble(); return WaveActiveSum(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupMul(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public T subgroupMul(T value) { shader_subgroup_preamble(); return WaveActiveProduct(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupMin(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public T subgroupMin(T value) { shader_subgroup_preamble(); return WaveActiveMin(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupMax(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public T subgroupMax(T value) { shader_subgroup_preamble(); return WaveActiveMax(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupAnd(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupAnd(T value) { shader_subgroup_preamble(); __target_switch @@ -5334,9 +5548,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupOr(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupOr(T value) { shader_subgroup_preamble(); __target_switch @@ -5359,9 +5575,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupXor(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupXor(T value) { shader_subgroup_preamble(); __target_switch @@ -5384,9 +5602,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupInclusiveAdd(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupInclusiveAdd(T value) { shader_subgroup_preamble(); __target_switch @@ -5403,9 +5623,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupInclusiveMul(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupInclusiveMul(T value) { shader_subgroup_preamble(); __target_switch @@ -5422,9 +5644,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupInclusiveMin(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupInclusiveMin(T value) { shader_subgroup_preamble(); __target_switch @@ -5443,9 +5667,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupInclusiveMax(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupInclusiveMax(T value) { shader_subgroup_preamble(); __target_switch @@ -5464,9 +5690,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupInclusiveAnd(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupInclusiveAnd(T value) { shader_subgroup_preamble(); __target_switch @@ -5489,9 +5717,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupInclusiveOr(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupInclusiveOr(T value) { shader_subgroup_preamble(); __target_switch @@ -5514,9 +5744,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupInclusiveXor(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupInclusiveXor(T value) { shader_subgroup_preamble(); __target_switch @@ -5531,9 +5763,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupExclusiveAdd(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public T subgroupExclusiveAdd(T value) { shader_subgroup_preamble(); return WavePrefixSum(value); @@ -5541,18 +5775,22 @@ __spirv_version(1.3) [require(spirv)] __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupExclusiveMul(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public T subgroupExclusiveMul(T value) { shader_subgroup_preamble(); return WavePrefixProduct(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupExclusiveMin(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupExclusiveMin(T value) { shader_subgroup_preamble(); __target_switch @@ -5571,9 +5809,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupExclusiveMax(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupExclusiveMax(T value) { shader_subgroup_preamble(); __target_switch @@ -5592,9 +5832,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupExclusiveAnd(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupExclusiveAnd(T value) { shader_subgroup_preamble(); __target_switch @@ -5607,9 +5849,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupExclusiveOr(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupExclusiveOr(T value) { shader_subgroup_preamble(); __target_switch @@ -5622,9 +5866,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupExclusiveXor(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public T subgroupExclusiveXor(T value) { shader_subgroup_preamble(); __target_switch @@ -5640,45 +5886,55 @@ __spirv_version(1.3) [require(spirv)] //note: this is a seperate section because it is so huge that the only reasonable way to implement this is to just regex replace code __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupAdd(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public vector subgroupAdd(vector value) { shader_subgroup_preamble(); return WaveActiveSum(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupMul(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public vector subgroupMul(vector value) { shader_subgroup_preamble(); return WaveActiveProduct(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupMin(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public vector subgroupMin(vector value) { shader_subgroup_preamble(); return WaveActiveMin(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupMax(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public vector subgroupMax(vector value) { shader_subgroup_preamble(); return WaveActiveMax(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupAnd(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupAnd(vector value) { shader_subgroup_preamble(); __target_switch @@ -5702,9 +5958,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupOr(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupOr(vector value) { shader_subgroup_preamble(); __target_switch @@ -5728,9 +5986,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupXor(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupXor(vector value) { shader_subgroup_preamble(); __target_switch @@ -5753,9 +6013,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupInclusiveAdd(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupInclusiveAdd(vector value) { shader_subgroup_preamble(); __target_switch @@ -5772,9 +6034,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupInclusiveMul(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupInclusiveMul(vector value) { shader_subgroup_preamble(); __target_switch @@ -5791,9 +6055,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupInclusiveMin(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupInclusiveMin(vector value) { shader_subgroup_preamble(); __target_switch @@ -5812,9 +6078,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupInclusiveMax(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupInclusiveMax(vector value) { shader_subgroup_preamble(); __target_switch @@ -5833,9 +6101,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupInclusiveAnd(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupInclusiveAnd(vector value) { shader_subgroup_preamble(); __target_switch @@ -5849,9 +6119,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupInclusiveOr(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupInclusiveOr(vector value) { shader_subgroup_preamble(); __target_switch @@ -5865,9 +6137,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupInclusiveXor(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupInclusiveXor(vector value) { shader_subgroup_preamble(); __target_switch @@ -5881,9 +6155,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupExclusiveAdd(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public vector subgroupExclusiveAdd(vector value) { shader_subgroup_preamble(); return WavePrefixSum(value); @@ -5891,18 +6167,22 @@ __spirv_version(1.3) [require(spirv)] __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupExclusiveMul(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] +public vector subgroupExclusiveMul(vector value) { shader_subgroup_preamble(); return WavePrefixProduct(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupExclusiveMin(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupExclusiveMin(vector value) { shader_subgroup_preamble(); __target_switch @@ -5921,9 +6201,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupExclusiveMax(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupExclusiveMax(vector value) { shader_subgroup_preamble(); __target_switch @@ -5942,9 +6224,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupExclusiveAnd(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupExclusiveAnd(vector value) { shader_subgroup_preamble(); __target_switch @@ -5957,9 +6241,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupExclusiveOr(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupExclusiveOr(vector value) { shader_subgroup_preamble(); __target_switch @@ -5972,9 +6258,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupExclusiveXor(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +[ForceInline] +[require(glsl_spirv, subgroup_arithmetic)] +public vector subgroupExclusiveXor(vector value) { shader_subgroup_preamble(); __target_switch @@ -5989,45 +6277,55 @@ __spirv_version(1.3) [require(spirv)] // GL_KHR_shader_subgroup_ballot __generic -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupBroadcast(T value, uint id) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] +public T subgroupBroadcast(T value, uint id) { shader_subgroup_preamble(); return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, id); } __generic -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupBroadcast(vector value, uint id) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] +public vector subgroupBroadcast(vector value, uint id) { shader_subgroup_preamble(); return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, id); } __generic -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupBroadcastFirst(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] +public T subgroupBroadcastFirst(T value) { shader_subgroup_preamble(); return WaveMaskReadLaneFirst(WaveGetActiveMask(), value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupBroadcastFirst(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] +public vector subgroupBroadcastFirst(vector value) { shader_subgroup_preamble(); return WaveMaskReadLaneFirst(WaveGetActiveMask(), value); } // WaveMaskBallot is not the same; it force trunc's -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public uvec4 subgroupBallot(bool value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] +public uvec4 subgroupBallot(bool value) { return WaveActiveBallot(value); } @@ -6042,9 +6340,11 @@ __spirv_version(1.3) [require(spirv)] // note 2: we have a waveLaneCount check because based on wave lane count we can determine if we can do a // fast path or slow path (know index is 0 or non 0) // } -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public bool subgroupInverseBallot(uvec4 value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] +public bool subgroupInverseBallot(uvec4 value) { __target_switch { @@ -6075,9 +6375,11 @@ __spirv_version(1.3) [require(spirv)] } // same logic as subgroupInverseBallot -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public bool subgroupBallotBitExtract(uvec4 value, uint index) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] +public bool subgroupBallotBitExtract(uvec4 value, uint index) { __target_switch { @@ -6107,9 +6409,11 @@ __spirv_version(1.3) [require(spirv)] // the count is only supposed to use uvec4 values within bottom bits of subgroup launched, not a simple countbits -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public uint subgroupBallotBitCount(uvec4 value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(glsl_spirv, subgroup_ballot)] +public uint subgroupBallotBitCount(uvec4 value) { __target_switch { @@ -6123,9 +6427,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public uint subgroupBallotInclusiveBitCount(uvec4 value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(glsl_spirv, subgroup_ballot)] +public uint subgroupBallotInclusiveBitCount(uvec4 value) { __target_switch { @@ -6139,9 +6445,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public uint subgroupBallotExclusiveBitCount(uvec4 value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(glsl_spirv, subgroup_ballot)] +public uint subgroupBallotExclusiveBitCount(uvec4 value) { __target_switch { @@ -6155,9 +6463,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public uint subgroupBallotFindLSB(uvec4 value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(glsl_spirv, subgroup_ballot)] +public uint subgroupBallotFindLSB(uvec4 value) { __target_switch { @@ -6171,9 +6481,11 @@ __spirv_version(1.3) [require(spirv)] } } -__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public uint subgroupBallotFindMSB(uvec4 value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_ballot) +[ForceInline] +[require(glsl_spirv, subgroup_ballot)] +public uint subgroupBallotFindMSB(uvec4 value) { __target_switch { @@ -6190,17 +6502,20 @@ __spirv_version(1.3) [require(spirv)] // GL_KHR_shader_subgroup_shuffle __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupShuffle(T value, uint index) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] +public T subgroupShuffle(T value, uint index) { shader_subgroup_preamble(); return WaveShuffle(value, index); } __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] -__spirv_version(1.3) [require(spirv)] +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +[require(glsl_spirv, subgroup_shuffle)] [ForceInline] public T subgroupShuffleXor(T value, uint mask) { shader_subgroup_preamble(); @@ -6217,18 +6532,22 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupShuffle(vector value, uint index) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +[ForceInline] +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] +public vector subgroupShuffle(vector value, uint index) { shader_subgroup_preamble(); return WaveShuffle(value, index); } __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupShuffleXor(vector value, uint mask) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +[ForceInline] +[require(glsl_spirv, subgroup_shuffle)] +public vector subgroupShuffleXor(vector value, uint mask) { shader_subgroup_preamble(); __target_switch @@ -6247,9 +6566,11 @@ __spirv_version(1.3) [require(spirv)] // GL_KHR_shader_subgroup_shuffle_relative __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupShuffleUp(T value, uint delta) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) +[ForceInline] +[require(glsl_spirv, subgroup_shufflerelative)] +public T subgroupShuffleUp(T value, uint delta) { shader_subgroup_preamble(); __target_switch @@ -6265,9 +6586,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupShuffleDown(T value, uint delta) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) +[ForceInline] +[require(glsl_spirv, subgroup_shufflerelative)] +public T subgroupShuffleDown(T value, uint delta) { shader_subgroup_preamble(); __target_switch @@ -6284,9 +6607,11 @@ __spirv_version(1.3) [require(spirv)] __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupShuffleUp(vector value, uint delta) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) +[ForceInline] +[require(glsl_spirv, subgroup_shufflerelative)] +public vector subgroupShuffleUp(vector value, uint delta) { shader_subgroup_preamble(); __target_switch @@ -6302,9 +6627,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupShuffleDown(vector value, uint delta) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) +[ForceInline] +[require(glsl_spirv, subgroup_shufflerelative)] +public vector subgroupShuffleDown(vector value, uint delta) { shader_subgroup_preamble(); __target_switch @@ -6321,9 +6648,11 @@ __spirv_version(1.3) [require(spirv)] // GL_KHR_shader_subgroup_clustered __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupClusteredAdd(T value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public T subgroupClusteredAdd(T value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6340,9 +6669,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupClusteredMul(T value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public T subgroupClusteredMul(T value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6359,9 +6690,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupClusteredMin(T value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public T subgroupClusteredMin(T value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6380,9 +6713,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupClusteredMax(T value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public T subgroupClusteredMax(T value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6401,9 +6736,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupClusteredAnd(T value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public T subgroupClusteredAnd(T value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6417,9 +6754,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupClusteredOr(T value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public T subgroupClusteredOr(T value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6435,9 +6774,11 @@ __spirv_version(1.3) [require(spirv)] __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupClusteredXor(T value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public T subgroupClusteredXor(T value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6453,9 +6794,11 @@ __spirv_version(1.3) [require(spirv)] __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupClusteredAdd(vector value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public vector subgroupClusteredAdd(vector value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6473,9 +6816,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupClusteredMul(vector value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public vector subgroupClusteredMul(vector value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6492,9 +6837,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupClusteredMin(vector value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public vector subgroupClusteredMin(vector value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6513,9 +6860,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupClusteredMax(vector value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public vector subgroupClusteredMax(vector value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6534,9 +6883,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupClusteredAnd(vector value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public vector subgroupClusteredAnd(vector value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6550,9 +6901,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupClusteredOr(vector value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public vector subgroupClusteredOr(vector value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6566,9 +6919,11 @@ __spirv_version(1.3) [require(spirv)] } __generic -__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupClusteredXor(vector value, uint clusterSize) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_clustered) +[ForceInline] +[require(glsl_spirv, subgroup_clustered)] +public vector subgroupClusteredXor(vector value, uint clusterSize) { shader_subgroup_preamble(); __target_switch @@ -6584,36 +6939,44 @@ __spirv_version(1.3) [require(spirv)] // GL_KHR_shader_subgroup_quad __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupQuadBroadcast(T value, uint id) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public T subgroupQuadBroadcast(T value, uint id) { shader_subgroup_preamble(); return QuadReadLaneAt(value, id); } __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupQuadSwapHorizontal(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public T subgroupQuadSwapHorizontal(T value) { shader_subgroup_preamble(); return QuadReadAcrossX(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupQuadSwapVertical(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public T subgroupQuadSwapVertical(T value) { shader_subgroup_preamble(); return QuadReadAcrossY(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public T subgroupQuadSwapDiagonal(T value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public T subgroupQuadSwapDiagonal(T value) { shader_subgroup_preamble(); return QuadReadAcrossDiagonal(value); @@ -6621,36 +6984,44 @@ __spirv_version(1.3) [require(spirv)] __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupQuadBroadcast(vector value, uint id) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public vector subgroupQuadBroadcast(vector value, uint id) { shader_subgroup_preamble(); return QuadReadLaneAt(value, id); } __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupQuadSwapHorizontal(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public vector subgroupQuadSwapHorizontal(vector value) { shader_subgroup_preamble(); return QuadReadAcrossX(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupQuadSwapVertical(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public vector subgroupQuadSwapVertical(vector value) { shader_subgroup_preamble(); return QuadReadAcrossY(value); } __generic -__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] -__spirv_version(1.3) [require(spirv)] -[ForceInline] public vector subgroupQuadSwapDiagonal(vector value) +__spirv_version(1.3) +__glsl_extension(GL_KHR_shader_subgroup_quad) +[ForceInline] +[require(glsl_hlsl_spirv, subgroup_quad)] +public vector subgroupQuadSwapDiagonal(vector value) { shader_subgroup_preamble(); return QuadReadAcrossDiagonal(value); @@ -6671,7 +7042,9 @@ public struct atomic_uint // types and operations to enable the according ext needed for these operations __generic -[ForceInline] void typeRequireChecks_atomic_using_float0_tier() +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +void typeRequireChecks_atomic_using_float0_tier() { __target_switch { @@ -6685,7 +7058,9 @@ __generic } } __generic -[ForceInline] void typeRequireChecks_atomic_using_float1_tier() +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +void typeRequireChecks_atomic_using_float1_tier() { __target_switch { @@ -6708,7 +7083,9 @@ __generic } } __generic -[ForceInline] void typeRequireChecks_atomic_using_float2_tier() +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +void typeRequireChecks_atomic_using_float2_tier() { __target_switch { @@ -6732,6 +7109,7 @@ __generic } __generic +[require(glsl_spirv, atomic_glsl)] void typeRequireChecks_atomic_using_add() { __target_switch @@ -6777,6 +7155,7 @@ void typeRequireChecks_atomic_using_add() } } __generic +[require(glsl_spirv, atomic_glsl)] void typeRequireChecks_atomic_using_MinMax() { __target_switch @@ -6822,7 +7201,9 @@ void typeRequireChecks_atomic_using_MinMax() } } __generic -[ForceInline] void typeRequireChecks_atomic_using_Logical_CAS() +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +void typeRequireChecks_atomic_using_Logical_CAS() { __target_switch { @@ -6877,9 +7258,11 @@ for (const auto& item : atomics) { }}}} -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicAdd(inout $(item.name) mem, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicAdd(inout $(item.name) mem, $(item.name) data) { typeRequireChecks_atomic_using_float1_tier<$(item.name)>(); typeRequireChecks_atomic_using_add<$(item.name)>(); @@ -6894,9 +7277,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicMin(inout $(item.name) mem, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicMin(inout $(item.name) mem, $(item.name) data) { typeRequireChecks_atomic_using_float2_tier<$(item.name)>(); typeRequireChecks_atomic_using_MinMax<$(item.name)>(); @@ -6911,9 +7296,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicMax(inout $(item.name) mem, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicMax(inout $(item.name) mem, $(item.name) data) { typeRequireChecks_atomic_using_float2_tier<$(item.name)>(); typeRequireChecks_atomic_using_MinMax<$(item.name)>(); @@ -6928,9 +7315,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicExchange(inout $(item.name) mem, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicExchange(inout $(item.name) mem, $(item.name) data) { typeRequireChecks_atomic_using_float1_tier<$(item.name)>(); __target_switch @@ -6949,9 +7338,11 @@ if(item.isFloat) continue; }}}} -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicAnd(inout $(item.name) mem, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicAnd(inout $(item.name) mem, $(item.name) data) { typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); @@ -6969,9 +7360,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicOr(inout $(item.name) mem, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicOr(inout $(item.name) mem, $(item.name) data) { typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); @@ -6986,9 +7379,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicXor(inout $(item.name) mem, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicXor(inout $(item.name) mem, $(item.name) data) { typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); @@ -7003,9 +7398,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public $(item.name) atomicCompSwap(inout $(item.name) mem, $(item.name) compare, $(item.name) data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public $(item.name) atomicCompSwap(inout $(item.name) mem, $(item.name) compare, $(item.name) data) { typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); @@ -7027,9 +7424,11 @@ ${{{{ // all atomic_uint functions are mangled at compile time, // all types are converted into a field address of a 'uint' // relative to the layout(offset) of the atomic_uint -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterIncrement(atomic_uint c) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterIncrement(atomic_uint c) { __target_switch @@ -7045,7 +7444,8 @@ __glsl_version(430) [require(glsl)] } } -__glsl_version(430) [require(glsl)] +__glsl_version(430) +[require(glsl, atomic_glsl)] [ForceInline] public uint atomicCounterDecrement_GLSL_helper(atomic_uint c) { __target_switch @@ -7057,9 +7457,10 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounter(atomic_uint c) +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounter(atomic_uint c) { __target_switch { @@ -7077,9 +7478,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterDecrement(atomic_uint c) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterDecrement(atomic_uint c) { __target_switch { @@ -7101,9 +7504,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterAdd(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterAdd(atomic_uint c, uint data) { __target_switch { @@ -7118,9 +7523,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterSubtract(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterSubtract(atomic_uint c, uint data) { __target_switch { @@ -7138,9 +7545,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterMin(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterMin(atomic_uint c, uint data) { __target_switch { @@ -7155,9 +7564,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterMax(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterMax(atomic_uint c, uint data) { __target_switch { @@ -7172,9 +7583,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterAnd(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterAnd(atomic_uint c, uint data) { __target_switch { @@ -7189,9 +7602,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterOr(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterOr(atomic_uint c, uint data) { __target_switch { @@ -7206,9 +7621,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterXor(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterXor(atomic_uint c, uint data) { __target_switch { @@ -7223,9 +7640,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterExchange(atomic_uint c, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterExchange(atomic_uint c, uint data) { __target_switch { @@ -7240,9 +7659,11 @@ __glsl_version(430) [require(glsl)] } } -__spirv_version(1.0)[require(spirv)] -__glsl_version(430) [require(glsl)] -[ForceInline] public uint atomicCounterCompSwap(atomic_uint c, uint compare, uint data) + +__glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] +public uint atomicCounterCompSwap(atomic_uint c, uint compare, uint data) { __target_switch { @@ -7259,105 +7680,105 @@ __glsl_version(430) [require(glsl)] /// Section 8.14. Fragment Processing Functions -[require(any_gfx_target, fragmentprocessing)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing)] public float dFdx(float p) { return ddx(p); } -[require(any_gfx_target, fragmentprocessing)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing)] public vector dFdx(vector p) { return ddx(p); } -[require(any_gfx_target, fragmentprocessing)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing)] public float dFdy(float p) { return ddy(p); } -[require(any_gfx_target, fragmentprocessing)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing)] public vector dFdy(vector p) { return ddy(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public float dFdxFine(float p) { return ddx_fine(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public vector dFdxFine(vector p) { return ddx_fine(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public float dFdyFine(float p) { return ddy_fine(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public vector dFdyFine(vector p) { return ddy_fine(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public float dFdxCoarse(float p) { return ddx_coarse(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public vector dFdxCoarse(vector p) { return ddx_coarse(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public float dFdyCoarse(float p) { return ddy_coarse(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public vector dFdyCoarse(vector p) { return ddy_coarse(p); } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public float fwidthFine(float p) { __target_switch @@ -7377,10 +7798,10 @@ public float fwidthFine(float p) } } } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public vector fwidthFine(vector p) { __target_switch @@ -7401,9 +7822,9 @@ public vector fwidthFine(vector p) } } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public float fwidthCoarse(float p) { __target_switch @@ -7423,10 +7844,10 @@ public float fwidthCoarse(float p) } } } -[require(any_gfx_target, fragmentprocessing_derivativecontrol)] __generic [__NoSideEffect] [ForceInline] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] public vector fwidthCoarse(vector p) { __target_switch @@ -7447,17 +7868,13 @@ public vector fwidthCoarse(vector p) } } -[require(any_gfx_target, fragmentprocessing)] [__NoSideEffect] [__GLSLRequireShaderInputParameter(0)] +[require(glsl_spirv, fragmentprocessing)] public float interpolateAtCentroid(__ref float interpolant) { __target_switch { - case hlsl: - { - return EvaluateAttributeAtCentroid(interpolant); - } case glsl: __intrinsic_asm "interpolateAtCentroid($0)"; case spirv: { @@ -7469,18 +7886,14 @@ public float interpolateAtCentroid(__ref float interpolant) } } } -[require(any_gfx_target, fragmentprocessing)] __generic [__NoSideEffect] [__GLSLRequireShaderInputParameter(0)] +[require(glsl_spirv, fragmentprocessing)] public vector interpolateAtCentroid(__ref vector interpolant) { __target_switch { - case hlsl: - { - return EvaluateAttributeAtCentroid(interpolant); - } case glsl: __intrinsic_asm "interpolateAtCentroid($0)"; case spirv: { @@ -7493,17 +7906,13 @@ public vector interpolateAtCentroid(__ref vector interpolant } } -[require(any_gfx_target, fragmentprocessing)] [__NoSideEffect] [__GLSLRequireShaderInputParameter(0)] +[require(glsl_spirv, fragmentprocessing)] public float interpolateAtSample(__ref float interpolant, int sample) { __target_switch { - case hlsl: - { - return EvaluateAttributeAtSample(interpolant, sample); - } case glsl: __intrinsic_asm "interpolateAtSample($0,$1)"; case spirv: { @@ -7515,18 +7924,14 @@ public float interpolateAtSample(__ref float interpolant, int sample) } } } -[require(any_gfx_target, fragmentprocessing)] __generic [__NoSideEffect] [__GLSLRequireShaderInputParameter(0)] +[require(glsl_spirv, fragmentprocessing)] public vector interpolateAtSample(__ref vector interpolant, int sample) { __target_switch { - case hlsl: - { - return EvaluateAttributeAtSample(interpolant, sample); - } case glsl: __intrinsic_asm "interpolateAtSample($0,$1)"; case spirv: { @@ -7539,9 +7944,9 @@ public vector interpolateAtSample(__ref vector interpolant, } } -[require(glsl_spirv, fragmentprocessing)] [__NoSideEffect] [__GLSLRequireShaderInputParameter(0)] +[require(glsl_spirv, fragmentprocessing)] public float interpolateAtOffset(__ref float interpolant, vec2 offset) { __target_switch @@ -7557,10 +7962,10 @@ public float interpolateAtOffset(__ref float interpolant, vec2 offset) } } } -[require(glsl_spirv, fragmentprocessing)] __generic [__NoSideEffect] [__GLSLRequireShaderInputParameter(0)] +[require(glsl_spirv, fragmentprocessing)] public vector interpolateAtOffset(__ref vector interpolant, vec2 offset) { __target_switch @@ -7650,8 +8055,8 @@ public vec4 noise4(vector x) // control flow. // TODO: if called after a return, error. -[require(any_gfx_target, compute_tess_gfx)] [ForceInline] +[require(glsl_hlsl_spirv, shader_stages_compute_tesscontrol_tesseval)] public void barrier() { __target_switch @@ -7673,8 +8078,8 @@ public void barrier() /// Section 8.17. Shader Memory Control Functions -[require(shadermemorycontrol)] [ForceInline] +[require(glsl_hlsl_spirv, shadermemorycontrol)] public void memoryBarrier() { __target_switch @@ -7699,8 +8104,8 @@ public void memoryBarrier() // glslang will compile with `AtomicCounterMemory` since it does not use // the `AtomicStorage` OpCapability which is required for `AtomicCounterMemory`. // this is invalid/undefined spir-v for vulkan targets and should not be followed -[require(any_gfx_target, shadermemorycontrol_compute)] [ForceInline] +[require(glsl_hlsl_spirv, shadermemorycontrol_compute)] public void memoryBarrierAtomicCounter() { __target_switch @@ -7717,8 +8122,8 @@ public void memoryBarrierAtomicCounter() } } } -[require(shadermemorycontrol)] [ForceInline] +[require(glsl_hlsl_spirv, shadermemorycontrol)] public void memoryBarrierBuffer() { __target_switch @@ -7736,8 +8141,8 @@ public void memoryBarrierBuffer() } } -[require(any_gfx_target, shadermemorycontrol_compute)] [ForceInline] +[require(glsl_hlsl_spirv, shadermemorycontrol_compute)] public void memoryBarrierShared() { __target_switch @@ -7754,8 +8159,8 @@ public void memoryBarrierShared() } } } -[require(shadermemorycontrol)] [ForceInline] +[require(glsl_hlsl_spirv, shadermemorycontrol)] public void memoryBarrierImage() { __target_switch @@ -7771,8 +8176,8 @@ public void memoryBarrierImage() } } } -[require(any_gfx_target, shadermemorycontrol_compute)] [ForceInline] +[require(glsl_hlsl_spirv, shadermemorycontrol_compute)] public void groupMemoryBarrier() { __target_switch @@ -7822,9 +8227,9 @@ ${{{{ }}}} __generic - [require(glsl_spirv, subpass)] [__NoSideEffect] [ForceInline] + [require(glsl_hlsl_spirv, subpass)] public T subpassLoad(__SubpassImpl subpass) { __target_switch @@ -7843,9 +8248,9 @@ ${{{{ } } __generic - [require(glsl_spirv, subpass)] [__NoSideEffect] [ForceInline] + [require(glsl_hlsl_spirv, subpass)] public T subpassLoad(__SubpassImpl subpass, int sample) { __target_switch @@ -7871,27 +8276,27 @@ ${{{{ // alternatives __glsl_version(460) __spirv_version(1.3) -[require(shaderinvocationgroup)] [NonUniformReturn] [ForceInline] +[require(glsl_hlsl_spirv, shaderinvocationgroup)] public bool anyInvocation(bool value) { return WaveActiveAnyTrue(value); } __glsl_version(460) __spirv_version(1.3) -[require(shaderinvocationgroup)] [NonUniformReturn] [ForceInline] +[require(glsl_hlsl_spirv, shaderinvocationgroup)] public bool allInvocations(bool value) { return WaveActiveAllTrue(value); } __glsl_version(460) __spirv_version(1.3) -[require(shaderinvocationgroup)] [NonUniformReturn] [ForceInline] +[require(glsl_hlsl_spirv, shaderinvocationgroup)] public bool allInvocationsEqual(bool value) { return WaveActiveAllEqual(value); diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index cdd08b5d7..cb01e9e68 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2,6 +2,9 @@ typedef uint UINT; +__intrinsic_op($(kIROp_RequireGLSLExtension)) +void __requireGLSLExtension(String extensionName); + __intrinsic_op($(kIROp_FloatCast)) T __floatCast(U v); @@ -37,24 +40,30 @@ struct GLSLShaderStorageBuffer {} __generic __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] uint2 __structuredBufferGetDimensions(AppendStructuredBuffer buffer); __generic __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] uint2 __structuredBufferGetDimensions(ConsumeStructuredBuffer buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer)] uint2 __structuredBufferGetDimensions(StructuredBuffer buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] uint2 __structuredBufferGetDimensions(RWStructuredBuffer buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] uint2 __structuredBufferGetDimensions(RasterizerOrderedStructuredBuffer buffer); __generic __magic_type(HLSLAppendStructuredBufferType) __intrinsic_type($(kIROp_HLSLAppendStructuredBufferType)) +[require(cpp_cuda_glsl_hlsl_spirv, appendstructuredbuffer)] struct AppendStructuredBuffer { __intrinsic_op($(kIROp_StructuredBufferAppend)) @@ -73,6 +82,7 @@ struct AppendStructuredBuffer __magic_type(HLSLByteAddressBufferType) __intrinsic_type($(kIROp_HLSLByteAddressBufferType)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] struct ByteAddressBuffer { [__readNone] @@ -213,69 +223,89 @@ struct __TextureImpl(TSampler s, TCoord value); +__glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "texture($0, $1)") +[require(glsl, texture_shadowlod)] float __glsl_texture_1d_shadow(TSampler s, TCoord value); +__glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "texture($0, $1, $2)") +[require(glsl, texture_shadowlod)] float __glsl_texture_3d_array_shadow(TSampler s, TCoord value, float compare); __glsl_extension(GL_EXT_texture_shadow_lod) - __target_intrinsic(glsl, "textureOffset($0, $1, $2)") +__target_intrinsic(glsl, "textureOffset($0, $1, $2)") +[require(glsl, texture_sm_4_1)] float __glsl_texture_offset( TSampler s, TCoord value, constexpr TOffset offset); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureOffset($0, $1, $2)") +[require(glsl, texture_shadowlod)] float __glsl_texture_offset_1d_shadow(TSampler s, TCoord value, constexpr TOffset offset); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLod($0, $1, 0)") +[require(glsl, texture_sm_4_1)] float __glsl_texture_level_zero(TSampler s, TCoord value); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLod($0, $1, 0)") +[require(glsl, texture_shadowlod)] float __glsl_texture_level_zero_1d_shadow(TSampler s, TCoord value); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLodOffset($0, $1, 0, $2)") +[require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero(TSampler s, TCoord value, constexpr TOffset offset); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLodOffset($0, $1, 0, $2)") +[require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero_1d_shadow(TSampler s, TCoord value, constexpr TOffset offset); __target_intrinsic(glsl, "texture($p, $2)") +[require(glsl, texture_sm_4_1)] float __glsl_texture(TTexture t, SamplerComparisonState s, TCoord value); +__glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "texture($p, $2)") +[require(glsl, texture_shadowlod)] float __glsl_texture_1d_shadow(TTexture t, SamplerComparisonState s, TCoord value); +__glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "texture($p, $2, $3)") +[require(glsl, texture_shadowlod)] float __glsl_texture_3d_array_shadow(TTexture t, SamplerComparisonState s, TCoord value, float compare); -__glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureOffset($p, $2, $3)") +[require(glsl, texture_sm_4_1)] float __glsl_texture_offset(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureOffset($p, $2, $3)") +[require(glsl, texture_shadowlod)] float __glsl_texture_offset_1d_shadow(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset); -__glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLod($p, $2, 0)") +[require(glsl, texture_sm_4_1)] float __glsl_texture_level_zero(TTexture t,SamplerComparisonState s, TCoord value); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLod($p, $2, 0)") +[require(glsl, texture_shadowlod)] float __glsl_texture_level_zero_1d_shadow(TTexture t,SamplerComparisonState s, TCoord value); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLodOffset($p, $2, 0, $3)") +[require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset); __glsl_extension(GL_EXT_texture_shadow_lod) __target_intrinsic(glsl, "textureLodOffset($p, $2, 0, $3)") +[require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero_1d_shadow(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset); @@ -297,6 +327,7 @@ extension __TextureImpl [ForceInline] [__readNone] + [require(glsl_hlsl_spirv, texture_querylod)] float CalculateLevelOfDetail(TextureCoord location) { __target_switch @@ -317,6 +348,7 @@ extension __TextureImpl [ForceInline] [__readNone] + [require(glsl_hlsl_spirv, texture_querylod)] float CalculateLevelOfDetailUnclamped(TextureCoord location) { __target_switch @@ -336,6 +368,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector location) { __target_switch @@ -385,6 +418,7 @@ extension __TextureImpl [__readNone] __glsl_extension(GL_ARB_sparse_texture_clamp) + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector location, vector offset, float clamp) { __target_switch @@ -406,6 +440,7 @@ extension __TextureImpl [__readNone] __target_intrinsic(hlsl) + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector location, vector offset, float clamp, out uint status) { status = 0; @@ -413,6 +448,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(vector location, float bias) { __target_switch @@ -433,6 +469,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(vector location, float bias, constexpr vector offset) { __target_switch @@ -453,6 +490,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(vector location, float compareValue) { __target_switch @@ -482,6 +520,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(vector location, float compareValue) { __target_switch @@ -508,6 +547,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(vector location, float compareValue, constexpr vector offset) { __target_switch @@ -533,6 +573,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(vector location, float compareValue, constexpr vector offset) { __target_switch @@ -558,6 +599,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector location, vector gradX, vector gradY) { __target_switch @@ -578,6 +620,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset) { __target_switch @@ -598,6 +641,7 @@ extension __TextureImpl __glsl_extension(GL_ARB_sparse_texture_clamp) [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { __target_switch @@ -619,6 +663,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1)] T SampleLevel(vector location, float level) { __target_switch @@ -670,6 +715,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleLevel(vector location, float level, constexpr vector offset) { __target_switch @@ -698,6 +744,7 @@ extension __TextureImpl { [__readNone] + [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector location) { __target_switch @@ -788,6 +837,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset) { __target_switch @@ -809,6 +859,7 @@ extension __TextureImpl [__readNone] __glsl_extension(GL_ARB_sparse_texture_clamp) + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset, float clamp) { __target_switch @@ -838,6 +889,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(SamplerState s, vector location, float bias) { __target_switch @@ -858,6 +910,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(SamplerState s, vector location, float bias, constexpr vector offset) { __target_switch @@ -878,6 +931,7 @@ extension __TextureImpl } [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector location, float compareValue) { __target_switch @@ -907,6 +961,7 @@ extension __TextureImpl } [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector location, float compareValue) { __target_switch @@ -933,6 +988,7 @@ extension __TextureImpl } [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector location, float compareValue, constexpr vector offset) { __target_switch @@ -958,6 +1014,7 @@ extension __TextureImpl } [__readNone] [ForceInline] + [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector location, float compareValue, constexpr vector offset) { __target_switch @@ -984,6 +1041,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY) { __target_switch @@ -1004,6 +1062,7 @@ extension __TextureImpl } [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset) { __target_switch @@ -1026,6 +1085,7 @@ extension __TextureImpl __glsl_extension(GL_ARB_sparse_texture_clamp) [__readNone] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { __target_switch @@ -1048,6 +1108,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1)] T SampleLevel(SamplerState s, vector location, float level) { __target_switch @@ -1100,6 +1161,7 @@ extension __TextureImpl [__readNone] [ForceInline] + [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleLevel(SamplerState s, vector location, float level, constexpr vector offset) { __target_switch @@ -1157,6 +1219,7 @@ ${{{{ __generic extension __TextureImpl { + [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_vertex_fragment_geometry)] float2 GetSamplePosition(int s); } @@ -1166,6 +1229,7 @@ Array __makeArray(T v0, T v1, T v2, T v3); // Gather for scalar textures. __generic [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gather(__TextureImpl texture, SamplerState s, vector location, int component) { __target_switch @@ -1181,6 +1245,7 @@ vector __glsl_gather(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gather(__TextureImpl sampler, vector location, int component) { __target_switch @@ -1195,6 +1260,7 @@ vector __glsl_gather(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gather_offset(__TextureImpl texture, SamplerState s, constexpr vector location, constexpr vector offset, int component) { __target_switch @@ -1210,6 +1276,7 @@ vector __glsl_gather_offset(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gather_offset(__TextureImpl sampler, vector location, constexpr vector offset, int component) { __target_switch @@ -1224,6 +1291,7 @@ vector __glsl_gather_offset(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gather_offsets(__TextureImpl texture, SamplerState s, vector location, constexpr vector offset1, constexpr vector offset2, @@ -1246,6 +1314,7 @@ vector __glsl_gather_offsets(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gather_offsets(__TextureImpl sampler, vector location, constexpr vector offset1, constexpr vector offset2, @@ -1267,6 +1336,7 @@ vector __glsl_gather_offsets(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gatherCmp(__TextureImpl texture, SamplerComparisonState s, vector location, TElement compareValue) { __target_switch @@ -1282,6 +1352,7 @@ vector __glsl_gatherCmp(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gatherCmp(__TextureImpl sampler, vector location, TElement compareValue) { __target_switch @@ -1296,6 +1367,7 @@ vector __glsl_gatherCmp(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gatherCmp_offset(__TextureImpl texture, SamplerComparisonState s, vector location, TElement compareValue, constexpr vector offset) { __target_switch @@ -1311,6 +1383,7 @@ vector __glsl_gatherCmp_offset(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gatherCmp_offset(__TextureImpl sampler, vector location, TElement compareValue, constexpr vector offset) { __target_switch @@ -1325,6 +1398,7 @@ vector __glsl_gatherCmp_offset(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gatherCmp_offsets(__TextureImpl texture, SamplerComparisonState s, vector location, TElement compareValue, vector offset1, vector offset2, @@ -1346,6 +1420,7 @@ vector __glsl_gatherCmp_offsets(__TextureImpl [ForceInline] +[require(glsl_spirv, GLSL_400)] vector __glsl_gatherCmp_offsets(__TextureImpl sampler, vector location, TElement compareValue, vector offset1, vector offset2, @@ -1396,6 +1471,7 @@ ${{{{ auto glslComponent = (isCmp ? "" :glslComponentNames[componentId == 0 ? 0 : componentId - 1]); }}}} [ForceInline] + [require(glsl_hlsl_spirv, texture_gather)] vector Gather$(cmp)$(componentName)($(samplerStateType)$(samplerStateParam) vector location $(cmpParam)) { __target_switch @@ -1407,6 +1483,7 @@ ${{{{ } } [ForceInline] + [require(glsl_hlsl_spirv, texture_gather)] vector Gather$(cmp)$(componentName)($(samplerStateType)$(samplerStateParam) vector location $(cmpParam), constexpr vector offset) { __target_switch @@ -1418,6 +1495,7 @@ ${{{{ } } [ForceInline] + [require(glsl_hlsl_spirv, texture_gather)] vector Gather$(cmp)$(componentName)($(samplerStateType)$(samplerStateParam) vector location $(cmpParam), constexpr vector offset1, constexpr vector offset2, @@ -1453,6 +1531,7 @@ extension __TextureImpl location) { __intrinsic_asm "$ctexelFetch($0, ($1), 0)$z"; @@ -1461,6 +1540,7 @@ extension __TextureImpl location) { __target_switch @@ -1497,6 +1577,7 @@ extension __TextureImpl location, constexpr vector offset) { __target_switch @@ -1544,6 +1625,7 @@ extension __TextureImpl location, int sampleIndex) { __target_switch @@ -1619,6 +1702,7 @@ extension __TextureImpl locationAndSampleIndex) { return Load(__vectorReshape(locationAndSampleIndex), locationAndSampleIndex[Shape.dimensions + isArray]); @@ -1627,6 +1711,7 @@ extension __TextureImpl location, int sampleIndex, constexpr vector offset) { __target_switch @@ -1671,6 +1756,7 @@ extension __TextureImpl location) { __target_switch @@ -1749,6 +1835,7 @@ extension __TextureImpl location, vector offset) { __target_switch @@ -1769,6 +1856,7 @@ extension __TextureImpl location, vector offset, out uint status) { __target_switch @@ -1782,6 +1870,7 @@ extension __TextureImpl location, T value) { __intrinsic_asm "imageStore($0, $1, $V2)"; @@ -1791,6 +1880,7 @@ extension __TextureImpl location, int sampleIndex) { __target_switch @@ -1886,6 +1978,7 @@ extension __TextureImpl location, int sampleIndex, vector offset) { __target_switch @@ -1919,6 +2012,7 @@ extension __TextureImpl location, int sampleIndex, T value) { __intrinsic_asm "imageStore($0, $1, $2, $V3)"; @@ -1928,6 +2022,7 @@ extension __TextureImpl = __TextureImpl; ${{{{ } @@ -2021,6 +2122,8 @@ ${{{{ __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_float) +[ForceInline] +[require(glsl_spirv, atomic_glsl_float1)] float __atomicAdd(__ref float value, float amount) { __target_switch @@ -2038,6 +2141,8 @@ float __atomicAdd(__ref float value, float amount) __glsl_version(430) __glsl_extension(GL_NV_shader_atomic_fp16_vector) +[ForceInline] +[require(glsl_spirv, atomic_glsl_halfvec)] half2 __atomicAdd(__ref half2 value, half2 amount) { __target_switch @@ -2056,18 +2161,23 @@ half2 __atomicAdd(__ref half2 value, half2 amount) // Helper for hlsl, using NVAPI __target_intrinsic(hlsl, "NvInterlockedAddUint64($0, $1, $2)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicAdd(RWByteAddressBuffer buf, uint offset, uint2); // atomic add for hlsl using SM6.6 __target_intrinsic(hlsl, "$0.InterlockedAdd64($1, $2, $3)") +[require(hlsl, atomic_hlsl_sm_6_6)] void __atomicAdd(RWByteAddressBuffer buf, uint offset, int64_t value, out int64_t originalValue); __target_intrinsic(hlsl, "$0.InterlockedAdd64($1, $2, $3)") +[require(hlsl, atomic_hlsl_sm_6_6)] void __atomicAdd(RWByteAddressBuffer buf, uint offset, uint64_t value, out uint64_t originalValue); // Int versions require glsl 4.30 // https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/atomicAdd.xhtml __glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] int __atomicAdd(__ref int value, int amount) { __target_switch @@ -2082,6 +2192,8 @@ int __atomicAdd(__ref int value, int amount) } __glsl_version(430) +[ForceInline] +[require(glsl_spirv, atomic_glsl)] uint __atomicAdd(__ref uint value, uint amount) { __target_switch @@ -2097,6 +2209,8 @@ uint __atomicAdd(__ref uint value, uint amount) __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] int64_t __atomicAdd(__ref int64_t value, int64_t amount) { __target_switch @@ -2114,6 +2228,8 @@ int64_t __atomicAdd(__ref int64_t value, int64_t amount) __target_intrinsic(glsl, "atomicAdd($0, $1)") __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicAdd(__ref uint64_t value, uint64_t amount) { __target_switch @@ -2134,16 +2250,21 @@ uint64_t __atomicAdd(__ref uint64_t value, uint64_t amount) __target_intrinsic(hlsl, "NvInterlockedCompareExchangeUint64($0, $1, $2, $3)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __cas(RWByteAddressBuffer buf, uint offset, uint2 compareValue, uint2 value); // CAS using SM6.6 __target_intrinsic(hlsl, "$0.InterlockedCompareExchange64($1, $2, $3, $4)") +[require(hlsl, atomic_hlsl_sm_6_6)] void __cas(RWByteAddressBuffer buf, uint offset, in int64_t compare_value, in int64_t value, out int64_t original_value); __target_intrinsic(hlsl, "$0.InterlockedCompareExchange64($1, $2, $3, $4)") +[require(hlsl, atomic_hlsl_sm_6_6)] void __cas(RWByteAddressBuffer buf, uint offset, in uint64_t compare_value, in uint64_t value, out uint64_t original_value); __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] int64_t __cas(__ref int64_t ioValue, int64_t compareValue, int64_t newValue) { __target_switch @@ -2160,6 +2281,8 @@ int64_t __cas(__ref int64_t ioValue, int64_t compareValue, int64_t newValue) __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __cas(__ref uint64_t ioValue, uint64_t compareValue, uint64_t newValue) { __target_switch @@ -2178,10 +2301,13 @@ uint64_t __cas(__ref uint64_t ioValue, uint64_t compareValue, uint64_t newValue) __target_intrinsic(hlsl, "NvInterlockedMaxUint64($0, $1, $2)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicMax(RWByteAddressBuffer buf, uint offset, uint2 value); __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicMax(__ref uint64_t ioValue, uint64_t value) { __target_switch @@ -2197,7 +2323,9 @@ uint64_t __atomicMax(__ref uint64_t ioValue, uint64_t value) } __glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float_min_max) +__glsl_extension(GL_EXT_shader_atomic_float2) +[ForceInline] +[require(glsl_spirv, atomic_glsl_float2)] float __atomicMax(__ref float ioValue, float value) { __target_switch @@ -2214,7 +2342,9 @@ float __atomicMax(__ref float ioValue, float value) } __glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float_min_max) +__glsl_extension(GL_EXT_shader_atomic_float2) +[ForceInline] +[require(glsl_spirv, atomic_glsl_float2)] half __atomicMax(__ref half ioValue, half value) { __target_switch @@ -2234,10 +2364,13 @@ half __atomicMax(__ref half ioValue, half value) __target_intrinsic(hlsl, "NvInterlockedMinUint64($0, $1, $2)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicMin(RWByteAddressBuffer buf, uint offset, uint2 value); __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicMin(__ref uint64_t ioValue, uint64_t value) { __target_switch @@ -2253,7 +2386,9 @@ uint64_t __atomicMin(__ref uint64_t ioValue, uint64_t value) } __glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float_min_max) +__glsl_extension(GL_EXT_shader_atomic_float2) +[ForceInline] +[require(glsl_spirv, atomic_glsl_float2)] float __atomicMin(__ref float ioValue, float value) { __target_switch @@ -2270,7 +2405,9 @@ float __atomicMin(__ref float ioValue, float value) } __glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float_min_max) +__glsl_extension(GL_EXT_shader_atomic_float2) +[ForceInline] +[require(glsl_spirv, atomic_glsl_float2)] half __atomicMin(__ref half ioValue, half value) { __target_switch @@ -2290,10 +2427,13 @@ half __atomicMin(__ref half ioValue, half value) __target_intrinsic(hlsl, "NvInterlockedAndUint64($0, $1, $2)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicAnd(RWByteAddressBuffer buf, uint offset, uint2 value); __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicAnd(__ref uint64_t ioValue, uint64_t value) { __target_switch @@ -2312,10 +2452,13 @@ uint64_t __atomicAnd(__ref uint64_t ioValue, uint64_t value) __target_intrinsic(hlsl, "NvInterlockedOrUint64($0, $1, $2)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicOr(RWByteAddressBuffer buf, uint offset, uint2 value); __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicOr(__ref uint64_t ioValue, uint64_t value) { __target_switch @@ -2334,10 +2477,13 @@ uint64_t __atomicOr(__ref uint64_t ioValue, uint64_t value) __target_intrinsic(hlsl, "NvInterlockedXorUint64($0, $1, $2)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicXor(RWByteAddressBuffer buf, uint offset, uint2 value); __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicXor(__ref uint64_t ioValue, uint64_t value) { __target_switch @@ -2356,10 +2502,13 @@ uint64_t __atomicXor(__ref uint64_t ioValue, uint64_t value) __target_intrinsic(hlsl, "NvInterlockedExchangeUint64($0, $1, $2)") [__requiresNVAPI] +[require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicExchange(RWByteAddressBuffer buf, uint offset, uint2 value); __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) +[ForceInline] +[require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value) { __target_switch @@ -2376,11 +2525,13 @@ uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value) // Conversion between uint64_t and uint2 +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] uint2 __asuint2(uint64_t i) { return uint2(uint(i), uint(uint64_t(i) >> 32)); } +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] uint64_t __asuint64(uint2 i) { return (uint64_t(i.y) << 32) | i.x; @@ -2389,18 +2540,23 @@ uint64_t __asuint64(uint2 i) // __intrinsic_op($(kIROp_ByteAddressBufferLoad)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] T __byteAddressBufferLoad(ByteAddressBuffer buffer, int offset); __intrinsic_op($(kIROp_ByteAddressBufferLoad)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T __byteAddressBufferLoad(RWByteAddressBuffer buffer, int offset); __intrinsic_op($(kIROp_ByteAddressBufferLoad)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T __byteAddressBufferLoad(RasterizerOrderedByteAddressBuffer buffer, int offset); __intrinsic_op($(kIROp_ByteAddressBufferStore)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void __byteAddressBufferStore(RWByteAddressBuffer buffer, int offset, T value); __intrinsic_op($(kIROp_ByteAddressBufferStore)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void __byteAddressBufferStore(RasterizerOrderedByteAddressBuffer buffer, int offset, T value); __generic @@ -2423,15 +2579,18 @@ struct StructuredBuffer __target_intrinsic(glsl, "$0._data[$1]") __target_intrinsic(spirv, "%addr = OpAccessChain resultType*StorageBuffer resultId _0 const(int, 0) _1; OpLoad resultType resultId %addr;") [__readNone] + [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer)] T Load(int location); __intrinsic_op($(kIROp_StructuredBufferLoadStatus)) + [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer)] T Load(int location, out uint status); __subscript(uint index) -> T { [__readNone] __intrinsic_op($(kIROp_StructuredBufferLoad)) + [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer)] get; }; }; @@ -2439,6 +2598,7 @@ struct StructuredBuffer __generic __magic_type(HLSLConsumeStructuredBufferType) __intrinsic_type($(kIROp_HLSLConsumeStructuredBufferType)) +[require(cpp_cuda_glsl_hlsl_spirv, consumestructuredbuffer)] struct ConsumeStructuredBuffer { __intrinsic_op($(kIROp_StructuredBufferConsume)) @@ -2456,6 +2616,7 @@ struct ConsumeStructuredBuffer }; __generic +[require(glsl_hlsl_spirv, hull)] __magic_type(HLSLInputPatchType) __intrinsic_type($(kIROp_HLSLInputPatchType)) struct InputPatch @@ -2464,6 +2625,7 @@ struct InputPatch }; __generic +[require(glsl_hlsl_spirv, shader_stages_domain_hull)] __magic_type(HLSLOutputPatchType) __intrinsic_type($(kIROp_HLSLOutputPatchType)) struct OutputPatch @@ -2494,11 +2656,13 @@ struct $(item.name) __target_intrinsic(cpp) __target_intrinsic(cuda) [__unsafeForceInlineEarly] + [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer_rw)] void GetDimensions(out uint dim); [__unsafeForceInlineEarly] __specialized_for_target(spirv) __specialized_for_target(glsl) + [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer_rw)] void GetDimensions(out uint dim) { dim = __structuredBufferGetDimensions(__getEquivalentStructuredBuffer(this)).x*4; @@ -2506,6 +2670,7 @@ struct $(item.name) __target_intrinsic(hlsl) [__NoSideEffect] + [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] uint Load(int location) { return __byteAddressBufferLoad(this, location); @@ -2516,6 +2681,7 @@ struct $(item.name) __target_intrinsic(hlsl) [__NoSideEffect] + [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] uint2 Load2(int location) { return __byteAddressBufferLoad(this, location); @@ -2526,6 +2692,7 @@ struct $(item.name) __target_intrinsic(hlsl) [__NoSideEffect] + [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] uint3 Load3(int location) { return __byteAddressBufferLoad(this, location); @@ -2536,6 +2703,7 @@ struct $(item.name) __target_intrinsic(hlsl) [__NoSideEffect] + [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] uint4 Load4(int location) { return __byteAddressBufferLoad(this, location); @@ -2545,6 +2713,7 @@ struct $(item.name) uint4 Load4(int location, out uint status); [__NoSideEffect] + [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] T Load(int location) { return __byteAddressBufferLoad(this, location); @@ -2573,6 +2742,7 @@ ${{{{ __cuda_sm_version(2.0) [__requiresNVAPI] [ForceInline] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd, out float originalValue) { __target_switch @@ -2592,6 +2762,7 @@ ${{{{ // FP16x2 [__requiresNVAPI] [ForceInline] + [require(hlsl, atomic_hlsl_nvapi)] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { __target_switch @@ -2642,6 +2813,8 @@ ${{{{ [__requiresNVAPI] [ForceInline] __cuda_sm_version(2.0) + [ForceInline] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd) { __target_switch @@ -2661,6 +2834,8 @@ ${{{{ // Int64 Add [ForceInline] __cuda_sm_version(6.0) + [ForceInline] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) { __target_switch @@ -2680,10 +2855,12 @@ ${{{{ // Without returning original value __cuda_sm_version(6.0) __target_intrinsic(cuda, "atomicAdd($0._getPtrAt($1), $2)") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd); [ForceInline] __specialized_for_target(hlsl) + [ForceInline] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) { __atomicAdd(this, byteAddress, __asuint2(valueToAdd)); @@ -2692,6 +2869,7 @@ ${{{{ [ForceInline] __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) { let buf = __getEquivalentStructuredBuffer(this); @@ -2701,9 +2879,11 @@ ${{{{ // Cas uint64_t __target_intrinsic(cuda, "(*$4 = atomicCAS($0._getPtrAt($1), $2, $3))") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue); __specialized_for_target(hlsl) + [ForceInline] void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { outOriginalValue = __asuint64(__cas(this, byteAddress, __asuint2(compareValue), __asuint2(value))); @@ -2711,6 +2891,7 @@ ${{{{ __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { let buf = __getEquivalentStructuredBuffer(this); @@ -2719,8 +2900,9 @@ ${{{{ // Max - __cuda_sm_version(3.5) + __cuda_sm_version(5.0) __target_intrinsic(cuda, "atomicMax($0._getPtrAt($1), $2)") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) @@ -2728,14 +2910,15 @@ ${{{{ __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { let buf = __getEquivalentStructuredBuffer(this); return __atomicMax(buf[byteAddress / 8], value); } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, int64_t value) { __target_switch @@ -2744,8 +2927,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, int64_t value, out int64_t outOriginalValue) { __target_switch @@ -2754,8 +2937,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, uint64_t value) { __target_switch @@ -2764,8 +2947,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -2776,8 +2959,9 @@ ${{{{ // Min - __cuda_sm_version(3.5) + __cuda_sm_version(5.0) __target_intrinsic(cuda, "atomicMin($0._getPtrAt($1), $2)") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] uint64_t InterlockedMinU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) @@ -2785,14 +2969,15 @@ ${{{{ __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { let buf = __getEquivalentStructuredBuffer(this); return __atomicMin(buf[byteAddress / 8], value); } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, int64_t value) { __target_switch @@ -2801,8 +2986,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, int64_t value, out int64_t outOriginalValue) { __target_switch @@ -2811,8 +2996,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, uint64_t value) { __target_switch @@ -2821,8 +3006,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -2833,7 +3018,9 @@ ${{{{ // And + __cuda_sm_version(5.0) __target_intrinsic(cuda, "atomicAnd($0._getPtrAt($1), $2)") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] uint64_t InterlockedAndU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) @@ -2841,14 +3028,15 @@ ${{{{ __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { let buf = __getEquivalentStructuredBuffer(this); return __atomicAnd(buf[byteAddress / 8], value); } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedAnd64(uint byteAddress, uint64_t value) { __target_switch @@ -2857,8 +3045,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedAnd64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -2869,7 +3057,9 @@ ${{{{ // Or + __cuda_sm_version(5.0) __target_intrinsic(cuda, "atomicOr($0._getPtrAt($1), $2)") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] uint64_t InterlockedOrU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) @@ -2877,14 +3067,15 @@ ${{{{ __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { let buf = __getEquivalentStructuredBuffer(this); return __atomicOr(buf[byteAddress / 8], value); } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedOr64(uint byteAddress, uint64_t value) { __target_switch @@ -2893,8 +3084,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedOr64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -2905,7 +3096,9 @@ ${{{{ // Xor + __cuda_sm_version(5.0) __target_intrinsic(cuda, "atomicXor($0._getPtrAt($1), $2)") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda5_int64)] uint64_t InterlockedXorU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) @@ -2913,14 +3106,15 @@ ${{{{ __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { let buf = __getEquivalentStructuredBuffer(this); return __atomicXor(buf[byteAddress / 8], value); } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedXor64(uint byteAddress, uint64_t value) { __target_switch @@ -2929,8 +3123,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedXor64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -2942,6 +3136,7 @@ ${{{{ // Exchange __target_intrinsic(cuda, "atomicExch($0._getPtrAt($1), $2)") + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) @@ -2949,14 +3144,15 @@ ${{{{ __specialized_for_target(glsl) __specialized_for_target(spirv) + [ForceInline] uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { let buf = __getEquivalentStructuredBuffer(this); return __atomicExchange(buf[byteAddress / 8], value); } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedExchangeFloat(uint byteAddress, float value, out float outOriginalValue) { __target_switch @@ -2965,8 +3161,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedExchange64(uint byteAddress, int64_t value, out int64_t outOriginalValue) { __target_switch @@ -2975,8 +3171,8 @@ ${{{{ } } - [require(hlsl)] [ForceInline] + [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedExchange64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch @@ -2987,6 +3183,7 @@ ${{{{ // SM6.6 6 64bit atomics. [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, int64_t valueToAdd) { __target_switch @@ -3000,6 +3197,7 @@ ${{{{ } [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue) { __target_switch @@ -3009,10 +3207,12 @@ ${{{{ case spirv: let buf = __getEquivalentStructuredBuffer(this); outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); + return; } } [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd) { __target_switch @@ -3026,6 +3226,7 @@ ${{{{ } [ForceInline] + [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue) { __target_switch @@ -3035,76 +3236,64 @@ ${{{{ case spirv: let buf = __getEquivalentStructuredBuffer(this); outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); + return; } } - __specialized_for_target(hlsl) - void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) - { - __cas(this, byteAddress, compareValue, value, outOriginalValue); - } - __specialized_for_target(glsl) - __specialized_for_target(spirv) - void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) - { - let buf = __getEquivalentStructuredBuffer(this); - outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); - } - __specialized_for_target(hlsl) - void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) - { - __cas(this, byteAddress, compareValue, value, outOriginalValue); - } - __specialized_for_target(glsl) - __specialized_for_target(spirv) - void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) - { - let buf = __getEquivalentStructuredBuffer(this); - outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); - } - [require(hlsl)] [ForceInline] - void InterlockedCompareStoreFloatBitwise(uint byteAddress, float compareValue, float value) + void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) { __target_switch { - case hlsl: __intrinsic_asm ".InterlockedCompareStoreFloatBitwise"; + case hlsl: + __cas(this, byteAddress, compareValue, value, outOriginalValue); + return; + case glsl: + case spirv: + let buf = __getEquivalentStructuredBuffer(this); + outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); + return; } } - - [require(hlsl)] [ForceInline] - void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue) + void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch { - case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise"; + case hlsl: + __cas(this, byteAddress, compareValue, value, outOriginalValue); + return; + case glsl: + case spirv: + let buf = __getEquivalentStructuredBuffer(this); + outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); + return; } } - [require(hlsl)] [ForceInline] - void InterlockedCompareStore64(uint byteAddress, int64_t compareValue, int64_t value) + [require(hlsl, atomic_hlsl_sm_6_6)] + void InterlockedCompareStoreFloatBitwise(uint byteAddress, float compareValue, float value) { __target_switch { - case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; + case hlsl: __intrinsic_asm ".InterlockedCompareStoreFloatBitwise"; } } - [require(hlsl)] [ForceInline] - void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) + [require(hlsl, atomic_hlsl_sm_6_6)] + void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue) { __target_switch { - case hlsl: __intrinsic_asm ".InterlockedCompareExchange64"; + case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise"; } } - [require(hlsl)] [ForceInline] - void InterlockedCompareStore64(uint byteAddress, uint64_t compareValue, uint64_t value) + [require(hlsl, atomic_hlsl_sm_6_6)] + void InterlockedCompareStore64(uint byteAddress, int64_t compareValue, int64_t value) { __target_switch { @@ -3112,13 +3301,13 @@ ${{{{ } } - [require(hlsl)] [ForceInline] - void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) + [require(hlsl, atomic_hlsl_sm_6_6)] + void InterlockedCompareStore64(uint byteAddress, uint64_t compareValue, uint64_t value) { __target_switch { - case hlsl: __intrinsic_asm ".InterlockedCompareExchange64"; + case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; } } @@ -3128,6 +3317,7 @@ ${{{{ // Added operations: [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedAdd( UINT dest, UINT value, @@ -3145,6 +3335,7 @@ ${{{{ } [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedAdd( UINT dest, UINT value) @@ -3160,6 +3351,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedAnd( UINT dest, UINT value, @@ -3176,6 +3369,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedAnd( UINT dest, UINT value) @@ -3191,6 +3386,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedCompareExchange( UINT dest, UINT compare_value, @@ -3208,6 +3405,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedCompareStore( UINT dest, UINT compare_value, @@ -3224,6 +3423,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedExchange( UINT dest, UINT value, @@ -3240,6 +3441,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedMax( UINT dest, UINT value, @@ -3256,6 +3459,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedMax( UINT dest, UINT value) @@ -3271,6 +3476,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedMin( UINT dest, UINT value, @@ -3287,6 +3494,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedMin( UINT dest, UINT value) @@ -3302,6 +3511,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedOr( UINT dest, UINT value, @@ -3318,6 +3529,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedOr( UINT dest, UINT value) @@ -3333,6 +3546,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedXor( UINT dest, UINT value, @@ -3349,6 +3564,8 @@ ${{{{ } } + [ForceInline] + [require(cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void InterlockedXor( UINT dest, UINT value) @@ -3423,6 +3640,7 @@ for(auto item : kMutableStructuredBufferCases) { __generic __magic_type(HLSL$(item.name)Type) __intrinsic_type($(item.op)) +[require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] struct $(item.name) { uint DecrementCounter(); @@ -3462,6 +3680,7 @@ ${{{{ }}}} __generic +[require(glsl_hlsl_spirv, geometry)] __magic_type(HLSLPointStreamType) __intrinsic_type($(kIROp_HLSLPointStreamType)) struct PointStream @@ -3490,6 +3709,7 @@ struct PointStream }; __generic +[require(glsl_hlsl_spirv, geometry)] __magic_type(HLSLLineStreamType) __intrinsic_type($(kIROp_HLSLLineStreamType)) struct LineStream @@ -3518,6 +3738,7 @@ struct LineStream }; __generic +[require(glsl_hlsl_spirv, geometry)] __magic_type(HLSLTriangleStreamType) __intrinsic_type($(kIROp_HLSLTriangleStreamType)) struct TriangleStream @@ -3575,6 +3796,7 @@ __target_intrinsic(cuda, "$P_abs($0)") __target_intrinsic(cpp, "$P_abs($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T abs(T x); /*{ // Note: this simple definition may not be appropriate for floating-point inputs @@ -3586,6 +3808,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector abs(vector x) { VECTOR_MAP_UNARY(T, N, abs, x); @@ -3594,6 +3817,7 @@ vector abs(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix abs(matrix x) { MATRIX_MAP_UNARY(T, N, M, abs, x); @@ -3606,6 +3830,7 @@ __target_intrinsic(cuda, "$P_abs($0)") __target_intrinsic(cpp, "$P_abs($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T abs(T x); __generic @@ -3613,6 +3838,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector abs(vector x) { VECTOR_MAP_UNARY(T, N, abs, x); @@ -3621,6 +3847,7 @@ vector abs(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix abs(matrix x) { MATRIX_MAP_UNARY(T, N, M, abs, x); @@ -3635,6 +3862,7 @@ __target_intrinsic(cuda, "$P_acos($0)") __target_intrinsic(cpp, "$P_acos($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Acos _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T acos(T x); __generic @@ -3642,6 +3870,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Acos _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector acos(vector x) { VECTOR_MAP_UNARY(T, N, acos, x); @@ -3650,6 +3879,7 @@ vector acos(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix acos(matrix x) { MATRIX_MAP_UNARY(T, N, M, acos, x); @@ -3740,6 +3970,7 @@ bool all(matrix x) // Barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void AllMemoryBarrier() { __target_switch @@ -3756,6 +3987,7 @@ void AllMemoryBarrier() // Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void AllMemoryBarrierWithGroupSync() { __target_switch @@ -3868,6 +4100,7 @@ __target_intrinsic(cuda, "$P_asdouble($0, $1)") __target_intrinsic(spirv, "%v = OpCompositeConstruct _type(uint2) resultId _0 _1; OpExtInst resultType resultId glsl450 59 %v") __glsl_extension(GL_ARB_gpu_shader5) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] double asdouble(uint lowbits, uint highbits); // Reinterpret bits as a float (HLSL SM 4.0) @@ -3878,6 +4111,7 @@ __target_intrinsic(cpp, "$P_asfloat($0)") __target_intrinsic(cuda, "$P_asfloat($0)") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] float asfloat(int x); __target_intrinsic(hlsl) @@ -3886,6 +4120,7 @@ __target_intrinsic(cpp, "$P_asfloat($0)") __target_intrinsic(cuda, "$P_asfloat($0)") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] float asfloat(uint x); __generic @@ -3893,6 +4128,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "intBitsToFloat") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector asfloat(vector< int, N> x) { VECTOR_MAP_UNARY(float, N, asfloat, x); @@ -3903,6 +4139,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "uintBitsToFloat") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector asfloat(vector x) { VECTOR_MAP_UNARY(float, N, asfloat, x); @@ -3911,6 +4148,7 @@ vector asfloat(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix asfloat(matrix< int,N,M> x) { MATRIX_MAP_UNARY(float, N, M, asfloat, x); @@ -3919,6 +4157,7 @@ matrix asfloat(matrix< int,N,M> x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix asfloat(matrix x) { MATRIX_MAP_UNARY(float, N, M, asfloat, x); @@ -3927,18 +4166,21 @@ matrix asfloat(matrix x) // No op [__unsafeForceInlineEarly] [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] float asfloat(float x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector asfloat(vector x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix asfloat(matrix x) { return x; } @@ -3950,6 +4192,7 @@ __target_intrinsic(cuda, "$P_asin($0)") __target_intrinsic(cpp, "$P_asin($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Asin _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T asin(T x); __generic @@ -3957,6 +4200,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Asin _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector asin(vector x) { VECTOR_MAP_UNARY(T,N,asin,x); @@ -3965,6 +4209,7 @@ vector asin(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix asin(matrix x) { MATRIX_MAP_UNARY(T,N,M,asin,x); @@ -3978,6 +4223,7 @@ __target_intrinsic(cpp, "$P_asint($0)") __target_intrinsic(cuda, "$P_asint($0)") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] int asint(float x); __target_intrinsic(hlsl) @@ -3986,6 +4232,7 @@ __target_intrinsic(cpp, "$P_asint($0)") __target_intrinsic(cuda, "$P_asint($0)") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] int asint(uint x); __generic @@ -3993,6 +4240,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "floatBitsToInt") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector asint(vector x) { VECTOR_MAP_UNARY(int, N, asint, x); @@ -4003,6 +4251,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "ivec$N0($0)") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector asint(vector x) { VECTOR_MAP_UNARY(int, N, asint, x); @@ -4011,6 +4260,7 @@ vector asint(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asint(matrix x) { MATRIX_MAP_UNARY(int, N, M, asint, x); @@ -4019,6 +4269,7 @@ matrix asint(matrix x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asint(matrix x) { MATRIX_MAP_UNARY(int, N, M, asint, x); @@ -4027,18 +4278,21 @@ matrix asint(matrix x) // No op [__unsafeForceInlineEarly] [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] int asint(int x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector asint(vector x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asint(matrix x) { return x; } @@ -4046,6 +4300,7 @@ matrix asint(matrix x) __glsl_extension(GL_ARB_gpu_shader5) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] void asuint(double value, out uint lowbits, out uint highbits) { __target_switch @@ -4074,6 +4329,7 @@ __target_intrinsic(spirv, "OpBitcast resultType resultId _0") __target_intrinsic(cpp, "$P_asuint($0)") __target_intrinsic(cuda, "$P_asuint($0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] uint asuint(float x); __target_intrinsic(hlsl) @@ -4082,6 +4338,7 @@ __target_intrinsic(spirv, "OpBitcast resultType resultId _0") __target_intrinsic(cpp, "$P_asuint($0)") __target_intrinsic(cuda, "$P_asuint($0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] uint asuint(int x); __generic @@ -4089,6 +4346,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "floatBitsToUint") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector asuint(vector x) { VECTOR_MAP_UNARY(uint, N, asuint, x); @@ -4099,6 +4357,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "uvec$N0($0)") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] vector asuint(vector x) { VECTOR_MAP_UNARY(uint, N, asuint, x); @@ -4107,6 +4366,7 @@ vector asuint(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asuint(matrix x) { MATRIX_MAP_UNARY(uint, N, M, asuint, x); @@ -4115,6 +4375,7 @@ matrix asuint(matrix x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asuint(matrix x) { MATRIX_MAP_UNARY(uint, N, M, asuint, x); @@ -4175,13 +4436,16 @@ __target_intrinsic(glsl, "uint16_t(packHalf2x16(vec2($0, 0.0)))") __target_intrinsic(cuda, "__half_as_ushort") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] uint16_t asuint16(float16_t value); [__readNone] +[require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector asuint16(vector value) { VECTOR_MAP_UNARY(uint16_t, N, asuint16, value); } [__readNone] +[require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix asuint16(matrix value) { MATRIX_MAP_UNARY(uint16_t, R, C, asuint16, value); } @@ -4192,6 +4456,7 @@ __target_intrinsic(glsl, "float16_t(unpackHalf2x16($0).x)") __target_intrinsic(cuda, "__ushort_as_half") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] +[require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] float16_t asfloat16(uint16_t value); [__readNone] @@ -4207,18 +4472,41 @@ matrix asfloat16(matrix v __target_intrinsic(hlsl) __target_intrinsic(cuda, "__half_as_short") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") -[__unsafeForceInlineEarly][__readNone] int16_t asint16(float16_t value) { return asuint16(value); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector asint16(vector value) { return asuint16(value); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix asint16(matrix value) { return asuint16(value); } +[__unsafeForceInlineEarly] +[__readNone] +[require(cuda_hlsl_spirv, shader5_sm_5_0)] +int16_t asint16(float16_t value) { return asuint16(value); } + +__target_intrinsic(hlsl) +[__unsafeForceInlineEarly] +[__readNone] +[require(cuda_hlsl_spirv, shader5_sm_5_0)] +vector asint16(vector value) { return asuint16(value); } + +__target_intrinsic(hlsl) +[__unsafeForceInlineEarly] +[__readNone] +[require(cuda_hlsl_spirv, shader5_sm_5_0)] +matrix asint16(matrix value) { return asuint16(value); } __target_intrinsic(hlsl) __target_intrinsic(cuda, "__short_as_half") __target_intrinsic(spirv, "OpBitcast resultType resultId _0") [__readNone] -[__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); } +[__unsafeForceInlineEarly] +[require(cuda_hlsl_spirv, shader5_sm_5_0)] +float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); } + +__target_intrinsic(hlsl) +[__unsafeForceInlineEarly] +[__readNone] +vector asfloat16(vector value) { return asfloat16(asuint16(value)); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector asfloat16(vector value) { return asfloat16(asuint16(value)); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix asfloat16(matrix value) { return asfloat16(asuint16(value)); } +__target_intrinsic(hlsl) +[__unsafeForceInlineEarly] +[__readNone] +[require(cuda_hlsl_spirv, shader5_sm_5_0)] +matrix asfloat16(matrix value) { return asfloat16(asuint16(value)); } // Inverse tangent (HLSL SM 1.0) __generic @@ -4228,6 +4516,7 @@ __target_intrinsic(cuda, "$P_atan($0)") __target_intrinsic(cpp, "$P_atan($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Atan _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T atan(T x); __generic @@ -4235,6 +4524,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Atan _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector atan(vector x) { VECTOR_MAP_UNARY(T, N, atan, x); @@ -4243,6 +4533,7 @@ vector atan(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix atan(matrix x) { MATRIX_MAP_UNARY(T, N, M, atan, x); @@ -4255,6 +4546,7 @@ __target_intrinsic(cuda, "$P_atan2($0, $1)") __target_intrinsic(cpp, "$P_atan2($0, $1)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Atan2 _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T atan2(T y, T x); __generic @@ -4262,6 +4554,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl,"atan($0,$1)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Atan2 _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector atan2(vector y, vector x) { VECTOR_MAP_BINARY(T, N, atan2, y, x); @@ -4270,6 +4563,7 @@ vector atan2(vector y, vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix atan2(matrix y, matrix x) { MATRIX_MAP_BINARY(T, N, M, atan2, y, x); @@ -4283,6 +4577,7 @@ __target_intrinsic(cuda, "$P_ceil($0)") __target_intrinsic(cpp, "$P_ceil($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Ceil _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T ceil(T x); __generic @@ -4290,6 +4585,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Ceil _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector ceil(vector x) { VECTOR_MAP_UNARY(T, N, ceil, x); @@ -4298,6 +4594,7 @@ vector ceil(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix ceil(matrix x) { MATRIX_MAP_UNARY(T, N, M, ceil, x); @@ -4313,6 +4610,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T clamp(T x, T minBound, T maxBound) { return min(max(x, minBound), maxBound); @@ -4323,6 +4621,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector clamp(vector x, vector minBound, vector maxBound) { return min(max(x, minBound), maxBound); @@ -4331,6 +4630,7 @@ vector clamp(vector x, vector minBound, vector maxBound) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix clamp(matrix x, matrix minBound, matrix maxBound) { return min(max(x, minBound), maxBound); @@ -4341,6 +4641,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T clamp(T x, T minBound, T maxBound) { return min(max(x, minBound), maxBound); @@ -4351,6 +4652,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector clamp(vector x, vector minBound, vector maxBound) { return min(max(x, minBound), maxBound); @@ -4359,6 +4661,7 @@ vector clamp(vector x, vector minBound, vector maxBound) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix clamp(matrix x, matrix minBound, matrix maxBound) { return min(max(x, minBound), maxBound); @@ -4367,6 +4670,7 @@ matrix clamp(matrix x, matrix minBound, matrix maxBo // Clip (discard) fragment conditionally __generic __target_intrinsic(hlsl) +[require(cpp_cuda_glsl_hlsl_spirv, fragment)] void clip(T x) { if(x < T(0)) discard; @@ -4374,6 +4678,7 @@ void clip(T x) __generic __target_intrinsic(hlsl) +[require(cpp_cuda_glsl_hlsl_spirv, fragment)] void clip(vector x) { if(any(x < T(0))) discard; @@ -4381,6 +4686,7 @@ void clip(vector x) __generic __target_intrinsic(hlsl) +[require(cpp_cuda_glsl_hlsl_spirv, fragment)] void clip(matrix x) { if(any(x < T(0))) discard; @@ -4394,6 +4700,7 @@ __target_intrinsic(cuda, "$P_cos($0)") __target_intrinsic(cpp, "$P_cos($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Cos _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T cos(T x); __generic @@ -4444,6 +4751,7 @@ matrix cosh(matrix x) // Population count [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] uint countbits(uint value) { __target_switch @@ -4467,6 +4775,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Cross _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector cross(vector left, vector right) { return vector( @@ -4480,6 +4789,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Cross _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector cross(vector left, vector right) { return vector( @@ -4491,6 +4801,7 @@ vector cross(vector left, vector right) // Convert encoded color __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] int4 D3DCOLORtoUBYTE4(float4 color) { let scaled = color.zyxw * 255.001999f; @@ -4504,6 +4815,7 @@ for (auto xOrY : diffDimensions) { }}}} __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, fragmentprocessing)] T dd$(xOrY)(T x) { __target_switch @@ -4521,6 +4833,7 @@ T dd$(xOrY)(T x) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, fragmentprocessing)] vector dd$(xOrY)(vector x) { __target_switch @@ -4539,6 +4852,7 @@ vector dd$(xOrY)(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, fragmentprocessing)] matrix dd$(xOrY)(matrix x) { MATRIX_MAP_UNARY(T, N, M, dd$(xOrY), x); @@ -4547,6 +4861,7 @@ matrix dd$(xOrY)(matrix x) __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] T dd$(xOrY)_coarse(T x) { __target_switch @@ -4560,6 +4875,7 @@ T dd$(xOrY)_coarse(T x) __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] vector dd$(xOrY)_coarse(vector x) { __target_switch @@ -4581,6 +4897,7 @@ matrix dd$(xOrY)_coarse(matrix x) __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] T dd$(xOrY)_fine(T x) { __target_switch @@ -4594,6 +4911,7 @@ T dd$(xOrY)_fine(T x) __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] +[require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] vector dd$(xOrY)_fine(vector x) { __target_switch @@ -4659,6 +4977,7 @@ T determinant(matrix m); // Barrier for device memory __glsl_extension(GL_KHR_memory_scope_semantics) +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void DeviceMemoryBarrier() { __target_switch @@ -4674,6 +4993,7 @@ void DeviceMemoryBarrier() } __glsl_extension(GL_KHR_memory_scope_semantics) +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void DeviceMemoryBarrierWithGroupSync() { __target_switch @@ -4695,6 +5015,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Distance _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T distance(vector x, vector y) { return length(x - y); @@ -4702,6 +5023,7 @@ T distance(vector x, vector y) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T distance(T x, T y) { return length(x - y); @@ -4714,6 +5036,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) [__readNone] [ForceInline] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T dot(T x, T y) { return x * y; @@ -4724,6 +5047,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpDot resultType resultId _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T dot(vector x, vector y) { T result = T(0); @@ -4735,6 +5059,7 @@ T dot(vector x, vector y) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T dot(vector x, vector y) { T result = T(0); @@ -4774,12 +5099,14 @@ __generic __target_intrinsic(glsl, interpolateAtCentroid) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0") [__readNone] +[require(glsl_spirv, fragmentprocessing)] T EvaluateAttributeAtCentroid(T x); __generic __target_intrinsic(glsl, interpolateAtCentroid) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0") [__readNone] +[require(glsl_spirv, fragmentprocessing)] vector EvaluateAttributeAtCentroid(vector x); __generic @@ -4794,17 +5121,20 @@ __generic __target_intrinsic(glsl, "interpolateAtSample($0, int($1))") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1") [__readNone] +[require(glsl_spirv, fragmentprocessing)] T EvaluateAttributeAtSample(T x, uint sampleindex); __generic __target_intrinsic(glsl, "interpolateAtSample($0, int($1))") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1") [__readNone] +[require(glsl_spirv, fragmentprocessing)] vector EvaluateAttributeAtSample(vector x, uint sampleindex); __generic __target_intrinsic(glsl, "interpolateAtSample($0, int($1))") [__readNone] +[require(glsl_spirv, fragmentprocessing)] matrix EvaluateAttributeAtSample(matrix x, uint sampleindex) { matrix result; @@ -4819,12 +5149,14 @@ __generic __target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)") __target_intrinsic(spirv, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16") [__readNone] +[require(glsl_spirv, fragmentprocessing)] T EvaluateAttributeSnapped(T x, int2 offset); __generic __target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)") __target_intrinsic(spirv, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16") [__readNone] +[require(glsl_spirv, fragmentprocessing)] vector EvaluateAttributeSnapped(vector x, int2 offset); __generic @@ -4849,6 +5181,7 @@ __target_intrinsic(cuda, "$P_exp($0)") __target_intrinsic(cpp, "$P_exp($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Exp _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T exp(T x); __generic @@ -4872,10 +5205,8 @@ matrix exp(matrix x) // Base-2 exponent __generic -__target_intrinsic(hlsl) -__target_intrinsic(cuda, "$P_exp2($0)") -__target_intrinsic(cpp, "$P_exp2($0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T exp2(T x) { __target_switch @@ -4896,7 +5227,14 @@ T exp2(T x) result:$$float = OpExtInst glsl450 Exp2 $xf }); } + case hlsl: + __intrinsic_asm "exp2($0)"; + case cpp: + __intrinsic_asm "$P_exp2($0)"; + case cuda: + __intrinsic_asm "$P_exp2($0)"; } + } __generic @@ -4918,17 +5256,28 @@ matrix exp2(matrix x) // Convert 16-bit float stored in low bits of integer -__target_intrinsic(glsl, "unpackHalf2x16($0).x") __glsl_version(420) -__target_intrinsic(hlsl) __cuda_sm_version(6.0) -__target_intrinsic(cuda, "__half2float(__ushort_as_half($0))") -__target_intrinsic(spirv, R"( - %lowBits = OpUConvert _type(uint16_t) resultId _0; - %half = OpBitcast _type(half) resultId %lowBits; - OpFConvert resultType resultId %half)") [__readNone] -float f16tof32(uint value); +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +float f16tof32(uint value) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackHalf2x16($0).x"; + case hlsl: __intrinsic_asm "f16tof32($0)"; + case cuda: __intrinsic_asm "__half2float(__ushort_as_half($0))"; + case cpp: __intrinsic_asm "f16tof32($0)"; + case spirv: + { + return spirv_asm { + %lowBits = OpUConvert $$uint16_t $value; + %half = OpBitcast $$half %lowBits; + result:$$float = OpFConvert %half + }; + } + } +} __generic __target_intrinsic(hlsl) @@ -4941,17 +5290,28 @@ vector f16tof32(vector value) // Convert to 16-bit float stored in low bits of integer -__target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))") __glsl_version(420) -__target_intrinsic(hlsl) __cuda_sm_version(6.0) -__target_intrinsic(cuda, "__half_as_ushort(__float2half($0))") -__target_intrinsic(spirv, R"( - %half = OpFConvert _type(half) resultId _0; - %lowBits = OpBitcast _type(uint16_t) resultId %half; - OpUConvert resultType resultId %lowBits)") [__readNone] -uint f32tof16(float value); +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +uint f32tof16(float value) +{ + __target_switch + { + case glsl: __intrinsic_asm "packHalf2x16(vec2($0,0.0))"; + case hlsl: __intrinsic_asm "f32tof16($0)"; + case cuda: __intrinsic_asm "__half_as_ushort(__float2half($0))"; + case cpp: __intrinsic_asm "f32tof16($0)"; + case spirv: + { + return spirv_asm { + %half = OpFConvert $$half $value; + %lowBits = OpBitcast $$uint16_t %half; + result:$$uint = OpUConvert %lowBits + }; + } + } +} __generic __target_intrinsic(hlsl) @@ -4966,12 +5326,25 @@ vector f32tof16(vector value) // It's not clear what happens with float16 time in HLSL -> can the float16 coerce to uint for example? If so that would // give the wrong result -__target_intrinsic(glsl, "unpackHalf2x16($0).x") -__target_intrinsic(cuda, "__half2float") -__target_intrinsic(spirv, "OpFConvert resultType resultId _0") __glsl_version(420) [__readNone] -float f16tof32(float16_t value); +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +float f16tof32(float16_t value) +{ + __target_switch + { + case glsl: __intrinsic_asm "unpackHalf2x16($0).x"; + case hlsl: __intrinsic_asm "f16tof32($0)"; + case cuda: __intrinsic_asm "__half2float($0)"; + case cpp: __intrinsic_asm "f16tof32($0)"; + case spirv: + { + return spirv_asm { + result:$$float = OpFConvert $value + }; + } + } +} __generic __target_intrinsic(hlsl) @@ -4989,6 +5362,7 @@ __glsl_version(420) __target_intrinsic(cuda, "__float2half") __target_intrinsic(spirv, "OpFConvert resultType resultId _0") [__readNone] +[require(cuda_glsl_spirv, shader5_sm_5_0)] float16_t f32tof16_(float value); __generic @@ -5008,6 +5382,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 FaceForward _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_400)] vector faceforward(vector n, vector i, vector ng) { return dot(ng, i) < T(0.0f) ? n : -n; @@ -5020,6 +5395,7 @@ __target_intrinsic(cuda, "$P_firstbithigh($0)") __target_intrinsic(cpp, "$P_firstbithigh($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 FindSMsb _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] int firstbithigh(int value); __target_intrinsic(hlsl) @@ -5038,6 +5414,7 @@ __target_intrinsic(cuda, "$P_firstbithigh($0)") __target_intrinsic(cpp, "$P_firstbithigh($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 FindUMsb _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] uint firstbithigh(uint value); __target_intrinsic(hlsl) @@ -5057,6 +5434,7 @@ __target_intrinsic(cuda, "$P_firstbitlow($0)") __target_intrinsic(cpp, "$P_firstbitlow($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 FindILsb _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] int firstbitlow(int value); __target_intrinsic(hlsl) @@ -5075,6 +5453,7 @@ __target_intrinsic(cuda, "$P_firstbitlow($0)") __target_intrinsic(cpp, "$P_firstbitlow($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 FindILsb _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] uint firstbitlow(uint value); __target_intrinsic(hlsl) @@ -5096,6 +5475,7 @@ __target_intrinsic(cuda, "$P_floor($0)") __target_intrinsic(cpp, "$P_floor($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Floor _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T floor(T x); __generic @@ -5123,6 +5503,7 @@ __target_intrinsic(cuda, "$P_fma($0, $1, $2)") __target_intrinsic(cpp, "$P_fma($0, $1, $2)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] T fma(T a, T b, T c) { __target_switch @@ -5132,6 +5513,8 @@ T fma(T a, T b, T c) return mad(a, b, c); else __intrinsic_asm "fma($0, $1, $2)"; + default: + return a*b + c; } } @@ -5140,6 +5523,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector fma(vector a, vector b, vector c) { VECTOR_MAP_TRINARY(T, N, fma, a, b, c); @@ -5148,6 +5532,7 @@ vector fma(vector a, vector b, vector c) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix fma(matrix a, matrix b, matrix c) { MATRIX_MAP_TRINARY(T, N, M, fma, a, b, c); @@ -5159,6 +5544,7 @@ __target_intrinsic(hlsl) __target_intrinsic(cuda, "$P_fmod($0, $1)") __target_intrinsic(cpp, "$P_fmod($0, $1)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T fmod(T x, T y) { return x - y * trunc(x/y); @@ -5188,6 +5574,7 @@ __target_intrinsic(cuda, "$P_frac($0)") __target_intrinsic(cpp, "$P_frac($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Fract _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T frac(T x); __generic @@ -5215,6 +5602,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Frexp _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T frexp(T x, out int exp); __generic @@ -5241,6 +5629,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpFwidth resultType resultId _0") +[require(glsl_hlsl_spirv, fragmentprocessing)] T fwidth(T x); __generic @@ -5276,6 +5665,7 @@ matrix fwidth(matrix x) __generic [__readNone] __glsl_version(450) +[require(glsl_hlsl_spirv, getattributeatvertex)] T GetAttributeAtVertex(T attribute, uint vertexIndex) { __target_switch @@ -5308,6 +5698,7 @@ T GetAttributeAtVertex(T attribute, uint vertexIndex) __generic [__readNone] __glsl_version(450) +[require(glsl_hlsl_spirv, getattributeatvertex)] vector GetAttributeAtVertex(vector attribute, uint vertexIndex) { __target_switch @@ -5340,6 +5731,7 @@ vector GetAttributeAtVertex(vector attribute, uint vertexIndex) __generic [__readNone] __glsl_version(450) +[require(glsl_hlsl_spirv, getattributeatvertex)] matrix GetAttributeAtVertex(matrix attribute, uint vertexIndex) { __target_switch @@ -5359,14 +5751,17 @@ matrix GetAttributeAtVertex(matrix attribute, uint vertexIndex) // Get number of samples in render target [__readNone] +[require(sm_4_0)] uint GetRenderTargetSampleCount(); // Get position of given sample [__readNone] +[require(sm_4_0)] float2 GetRenderTargetSamplePosition(int Index); // Group memory barrier __glsl_extension(GL_KHR_memory_scope_semantics) +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void GroupMemoryBarrier() { __target_switch @@ -5382,6 +5777,7 @@ void GroupMemoryBarrier() } } +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void __subgroupBarrier() { __target_switch @@ -5398,6 +5794,7 @@ void __subgroupBarrier() } __glsl_extension(GL_KHR_memory_scope_semantics) +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void GroupMemoryBarrierWithGroupSync() { __target_switch @@ -5417,6 +5814,7 @@ void GroupMemoryBarrierWithGroupSync() [ForceInline] __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAdd(__ref int dest, int value) { __target_switch @@ -5434,6 +5832,7 @@ void InterlockedAdd(__ref int dest, int value) [ForceInline] __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAdd(__ref uint dest, uint value) { __target_switch @@ -5457,6 +5856,7 @@ void InterlockedAdd(__ref uint dest, int value) [ForceInline] __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAdd(__ref int dest, int value, out int original_value) { __target_switch @@ -5475,6 +5875,7 @@ void InterlockedAdd(__ref int dest, int value, out int original_value) [ForceInline] __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAdd(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -5528,6 +5929,7 @@ void InterlockedAdd(__ref uint64_t dest, uint64_t value, out uint64_t origina } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAnd(__ref int dest, int value) { __target_switch @@ -5544,6 +5946,7 @@ void InterlockedAnd(__ref int dest, int value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAnd(__ref uint dest, uint value) { __target_switch @@ -5560,6 +5963,7 @@ void InterlockedAnd(__ref uint dest, uint value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAnd(__ref int dest, int value, out int original_value) { __target_switch @@ -5577,6 +5981,7 @@ void InterlockedAnd(__ref int dest, int value, out int original_value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedAnd(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -5612,6 +6017,7 @@ void InterlockedAnd(__ref uint64_t dest, uint64_t value, out uint64_t origina } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) { __target_switch @@ -5629,6 +6035,7 @@ void InterlockedCompareExchange(__ref int dest, int compare_value, int value, } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value) { __target_switch @@ -5700,6 +6107,7 @@ void InterlockedCompareExchange(__ref uint64_t dest, uint64_t compare_value, } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedCompareStore(__ref int dest, int compare_value, int value) { __target_switch @@ -5716,6 +6124,7 @@ void InterlockedCompareStore(__ref int dest, int compare_value, int value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) { __target_switch @@ -5759,6 +6168,7 @@ void InterlockedCompareStore(__ref uint64_t dest, uint64_t compare_value, uint64 } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedExchange(__ref int dest, int value, out int original_value) { __target_switch @@ -5776,6 +6186,7 @@ void InterlockedExchange(__ref int dest, int value, out int original_value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedExchange(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -5847,6 +6258,7 @@ void InterlockedExchange(__ref uint64_t dest, uint64_t value, out uint64_t or } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref int dest, int value) { __target_switch @@ -5863,6 +6275,7 @@ void InterlockedMax(__ref int dest, int value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref uint dest, uint value) { __target_switch @@ -5879,6 +6292,7 @@ void InterlockedMax(__ref uint dest, uint value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref int dest, int value, out int original_value) { __target_switch @@ -5896,6 +6310,7 @@ void InterlockedMax(__ref int dest, int value, out int original_value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMax(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -5949,6 +6364,7 @@ void InterlockedMax(__ref uint64_t dest, uint64_t value, out uint64_t origina } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref int dest, int value) { __target_switch @@ -5965,6 +6381,7 @@ void InterlockedMin(__ref int dest, int value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref uint dest, uint value) { __target_switch @@ -5981,6 +6398,7 @@ void InterlockedMin(__ref uint dest, uint value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref int dest, int value, out int original_value) { __target_switch @@ -5998,6 +6416,7 @@ void InterlockedMin(__ref int dest, int value, out int original_value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedMin(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -6051,6 +6470,7 @@ void InterlockedMin(__ref uint64_t dest, uint64_t value, out uint64_t origina } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref int dest, int value) { __target_switch @@ -6067,6 +6487,7 @@ void InterlockedOr(__ref int dest, int value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref uint dest, uint value) { __target_switch @@ -6083,6 +6504,7 @@ void InterlockedOr(__ref uint dest, uint value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref int dest, int value, out int original_value) { __target_switch @@ -6100,6 +6522,7 @@ void InterlockedOr(__ref int dest, int value, out int original_value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedOr(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -6135,6 +6558,7 @@ void InterlockedOr(__ref uint64_t dest, uint64_t value, out uint64_t original } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref int dest, int value) { __target_switch @@ -6151,6 +6575,7 @@ void InterlockedXor(__ref int dest, int value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref uint dest, uint value) { __target_switch @@ -6167,6 +6592,7 @@ void InterlockedXor(__ref uint dest, uint value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref int dest, int value, out int original_value) { __target_switch @@ -6184,6 +6610,7 @@ void InterlockedXor(__ref int dest, int value, out int original_value) } __glsl_version(430) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] void InterlockedXor(__ref uint dest, uint value, out uint original_value) { __target_switch @@ -6224,6 +6651,7 @@ void InterlockedXor(__ref uint64_t dest, uint64_t value, out uint64_t origina __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] bool isfinite(T x) { __target_switch @@ -6239,6 +6667,7 @@ bool isfinite(T x) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector isfinite(vector x) { __target_switch @@ -6252,6 +6681,7 @@ vector isfinite(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix isfinite(matrix x) { MATRIX_MAP_UNARY(bool, N, M, isfinite, x); @@ -6260,6 +6690,7 @@ matrix isfinite(matrix x) // Is floating-point value infinite? __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] bool isinf(T x) { __target_switch @@ -6277,6 +6708,7 @@ bool isinf(T x) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector isinf(vector x) { __target_switch @@ -6294,6 +6726,7 @@ vector isinf(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix isinf(matrix x) { MATRIX_MAP_UNARY(bool, N, M, isinf, x); @@ -6302,6 +6735,7 @@ matrix isinf(matrix x) // Is floating-point value not-a-number? __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] bool isnan(T x) { __target_switch @@ -6319,6 +6753,7 @@ bool isnan(T x) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector isnan(vector x) { __target_switch @@ -6336,6 +6771,7 @@ vector isnan(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix isnan(matrix x) { MATRIX_MAP_UNARY(bool, N, M, isnan, x); @@ -6346,6 +6782,7 @@ matrix isnan(matrix x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T ldexp(T x, T exp) { return x * exp2(exp); @@ -6354,6 +6791,7 @@ T ldexp(T x, T exp) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector ldexp(vector x, vector exp) { return x * exp2(exp); @@ -6362,6 +6800,7 @@ vector ldexp(vector x, vector exp) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix ldexp(matrix x, matrix exp) { MATRIX_MAP_BINARY(T, N, M, ldexp, x, exp); @@ -6373,6 +6812,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Length _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T length(vector x) { return sqrt(dot(x, x)); @@ -6380,6 +6820,7 @@ T length(vector x) // Scalar float length __generic +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T length(T x) { return abs(x); @@ -6391,6 +6832,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, mix) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T lerp(T x, T y, T s) { return x * (T(1.0f) - s) + y * s; @@ -6401,6 +6843,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, mix) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector lerp(vector x, vector y, vector s) { return x * (T(1.0f) - s) + y * s; @@ -6409,6 +6852,7 @@ vector lerp(vector x, vector y, vector s) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix lerp(matrix x, matrix y, matrix s) { MATRIX_MAP_TRINARY(T, N, M, lerp, x, y, s); @@ -6417,6 +6861,7 @@ matrix lerp(matrix x, matrix y, matrix s) // Legacy lighting function (obsolete) __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] float4 lit(float n_dot_l, float n_dot_h, float m) { let ambient = 1.0f; @@ -6433,6 +6878,7 @@ __target_intrinsic(cuda, "$P_log($0)") __target_intrinsic(cpp, "$P_log($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Log _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T log(T x); __generic @@ -6440,6 +6886,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Log _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector log(vector x) { VECTOR_MAP_UNARY(T, N, log, x); @@ -6448,6 +6895,7 @@ vector log(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix log(matrix x) { MATRIX_MAP_UNARY(T, N, M, log, x); @@ -6461,6 +6909,7 @@ __target_intrinsic(cuda, "$P_log10($0)") __target_intrinsic(cpp, "$P_log10($0)") __target_intrinsic(spirv, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpFMul resultType resultId %baseElog const(_p,0.43429448190325182765112891891661)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T log10(T x); __generic @@ -6468,6 +6917,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "(log( $0 ) * $S0(0.43429448190325182765112891891661) )" ) __target_intrinsic(spirv, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpVectorTimesScalar resultType resultId %baseElog const(_p,0.43429448190325182765112891891661)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector log10(vector x) { VECTOR_MAP_UNARY(T, N, log10, x); @@ -6476,6 +6926,7 @@ vector log10(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix log10(matrix x) { MATRIX_MAP_UNARY(T, N, M, log10, x); @@ -6489,6 +6940,7 @@ __target_intrinsic(cuda, "$P_log2($0)") __target_intrinsic(cpp, "$P_log2($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Log2 _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T log2(T x); __generic @@ -6496,6 +6948,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Log2 _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector log2(vector x) { VECTOR_MAP_UNARY(T, N, log2, x); @@ -6504,6 +6957,7 @@ vector log2(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix log2(matrix x) { MATRIX_MAP_UNARY(T, N, M, log2, x); @@ -6518,6 +6972,7 @@ __target_intrinsic(cuda, "$P_fma($0, $1, $2)") __target_intrinsic(cpp, "$P_fma($0, $1, $2)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] T mad(T mvalue, T avalue, T bvalue); __generic @@ -6525,6 +6980,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, fma) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector mad(vector mvalue, vector avalue, vector bvalue) { VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); @@ -6533,6 +6989,7 @@ vector mad(vector mvalue, vector avalue, vector bvalue) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix mad(matrix mvalue, matrix avalue, matrix bvalue) { MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue); @@ -6545,6 +7002,7 @@ __target_intrinsic(cuda, "$P_fma($0, $1, $2)") __target_intrinsic(cpp, "$P_fma($0, $1, $2)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] T mad(T mvalue, T avalue, T bvalue); __generic @@ -6552,6 +7010,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, fma) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector mad(vector mvalue, vector avalue, vector bvalue) { VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); @@ -6560,6 +7019,7 @@ vector mad(vector mvalue, vector avalue, vector bvalue) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix mad(matrix mvalue, matrix avalue, matrix bvalue) { MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue); @@ -6574,6 +7034,7 @@ __target_intrinsic(cuda, "$P_max($0, $1)") __target_intrinsic(cpp, "$P_max($0, $1)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T max(T x, T y); // Note: a stdlib implementation of `max` (or `min`) will require splitting // floating-point and integer cases apart, because the floating-point @@ -6585,6 +7046,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector max(vector x, vector y) { VECTOR_MAP_BINARY(T, N, max, x, y); @@ -6593,6 +7055,7 @@ vector max(vector x, vector y) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix max(matrix x, matrix y) { MATRIX_MAP_BINARY(T, N, M, max, x, y); @@ -6605,6 +7068,7 @@ __target_intrinsic(cuda, "$P_max($0, $1)") __target_intrinsic(cpp, "$P_max($0, $1)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T max(T x, T y); __generic @@ -6612,6 +7076,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector max(vector x, vector y) { VECTOR_MAP_BINARY(T, N, max, x, y); @@ -6620,6 +7085,7 @@ vector max(vector x, vector y) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix max(matrix x, matrix y) { MATRIX_MAP_BINARY(T, N, M, max, x, y); @@ -6633,6 +7099,7 @@ __target_intrinsic(cuda, "$P_min($0, $1)") __target_intrinsic(cpp, "$P_min($0, $1)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T min(T x, T y); __generic @@ -6640,6 +7107,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector min(vector x, vector y) { VECTOR_MAP_BINARY(T, N, min, x, y); @@ -6648,6 +7116,7 @@ vector min(vector x, vector y) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix min(matrix x, matrix y) { MATRIX_MAP_BINARY(T, N, M, min, x, y); @@ -6660,6 +7129,7 @@ __target_intrinsic(cuda, "$P_min($0, $1)") __target_intrinsic(cpp, "$P_min($0, $1)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T min(T x, T y); __generic @@ -6667,6 +7137,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector min(vector x, vector y) { VECTOR_MAP_BINARY(T, N, min, x, y); @@ -6675,6 +7146,7 @@ vector min(vector x, vector y) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix min(matrix x, matrix y) { MATRIX_MAP_BINARY(T, N, M, min, x, y); @@ -6686,12 +7158,14 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Modf _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T modf(T x, out T ip); __generic __target_intrinsic(hlsl) __target_intrinsic(glsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector modf(vector x, out vector ip) { VECTOR_MAP_BINARY(T, N, modf, x, ip); @@ -6700,6 +7174,7 @@ vector modf(vector x, out vector ip) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix modf(matrix x, out matrix ip) { MATRIX_MAP_BINARY(T, N, M, modf, x, ip); @@ -6708,6 +7183,7 @@ matrix modf(matrix x, out matrix ip) // msad4 (whatever that is) __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] uint4 msad4(uint reference, uint2 source, uint4 accum) { int4 bytesRef = (reference >> uint4(24, 16, 8, 0)) & 0xFF; @@ -6730,28 +7206,33 @@ uint4 msad4(uint reference, uint2 source, uint4 accum) __generic __intrinsic_op($(kIROp_Mul)) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T mul(T x, T y); // scalar-vector and vector-scalar __generic __intrinsic_op($(kIROp_Mul)) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(vector x, T y); __generic __intrinsic_op($(kIROp_Mul)) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(T x, vector y); // scalar-matrix and matrix-scalar __generic __intrinsic_op($(kIROp_Mul)) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix mul(matrix x, T y); __generic __intrinsic_op($(kIROp_Mul)) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix mul(T x, matrix y); // vector-vector (dot product) @@ -6759,6 +7240,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl, "dot") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T mul(vector x, vector y) { return dot(x, y); @@ -6766,6 +7248,7 @@ T mul(vector x, vector y) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T mul(vector x, vector y) { return dot(x, y); @@ -6777,6 +7260,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") __target_intrinsic(spirv, "OpMatrixTimesVector resultType resultId _1 _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(vector left, matrix right) { vector result; @@ -6795,6 +7279,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(vector left, matrix right) { vector result; @@ -6813,6 +7298,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(vector left, matrix right) { vector result; @@ -6834,6 +7320,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") __target_intrinsic(spirv, "OpVectorTimesMatrix resultType resultId _1 _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(matrix left, vector right) { vector result; @@ -6852,6 +7339,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(matrix left, vector right) { vector result; @@ -6870,6 +7358,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector mul(matrix left, vector right) { vector result; @@ -6891,6 +7380,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") __target_intrinsic(spirv, "OpMatrixTimesMatrix resultType resultId _1 _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix mul(matrix left, matrix right) { matrix result; @@ -6910,6 +7400,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix mul(matrix left, matrix right) { matrix result; @@ -6929,6 +7420,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix mul(matrix left, matrix right) { matrix result; @@ -6980,6 +7472,7 @@ __generic float noise(vector x) /// the user's responsibility, so that the default behavior of the language /// is more semantically "correct." [ForceInline] +[require(spirv)] T __copyObject(T v) { __target_switch { @@ -6992,9 +7485,9 @@ T __copyObject(T v) /// `NonUniformResourceIndex` function is used to indicate if the resource index is /// divergent, and ensure scalarization happens correctly for each divergent lane. -[__readNone] __generic __intrinsic_op($(kIROp_NonUniformResourceIndex)) +[require(cpp_cuda_glsl_hlsl_spirv, nonuniformqualifier)] T NonUniformResourceIndex(T index); /// HLSL allows NonUniformResourceIndex around non int/uint types. @@ -7010,6 +7503,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Normalize _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector normalize(vector x) { return x / length(x); @@ -7020,6 +7514,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Normalize _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T normalize(T x) { return x / length(x); @@ -7033,6 +7528,7 @@ __target_intrinsic(cuda, "$P_pow($0, $1)") __target_intrinsic(cpp, "$P_pow($0, $1)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Pow _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T pow(T x, T y); __generic @@ -7040,6 +7536,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Pow _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector pow(vector x, vector y) { VECTOR_MAP_BINARY(T, N, pow, x, y); @@ -7048,6 +7545,7 @@ vector pow(vector x, vector y) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix pow(matrix x, matrix y) { MATRIX_MAP_BINARY(T, N, M, pow, x, y); @@ -7082,6 +7580,7 @@ for (int argCount = 0; argCount < 12; argCount++) auto spirvArgs = spirvArgList.toString(); }}}} __glsl_extension(GL_EXT_debug_printf) +[require(cpp_cuda_glsl_hlsl_spirv, printf)] void printf$(genericParamList.toString())(NativeString format $(paramList)) { __target_switch @@ -7105,6 +7604,7 @@ ${{{{ // Tessellation factor fixup routines +[require(hlsl, sm_5_0)] void Process2DQuadTessFactorsAvg( in float4 RawEdgeFactors, in float2 InsideScale, @@ -7112,6 +7612,7 @@ void Process2DQuadTessFactorsAvg( out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); +[require(hlsl, sm_5_0)] void Process2DQuadTessFactorsMax( in float4 RawEdgeFactors, in float2 InsideScale, @@ -7119,6 +7620,7 @@ void Process2DQuadTessFactorsMax( out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); +[require(hlsl, sm_5_0)] void Process2DQuadTessFactorsMin( in float4 RawEdgeFactors, in float2 InsideScale, @@ -7126,12 +7628,14 @@ void Process2DQuadTessFactorsMin( out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); +[require(hlsl, sm_5_0)] void ProcessIsolineTessFactors( in float RawDetailFactor, in float RawDensityFactor, out float RoundedDetailFactor, out float RoundedDensityFactor); +[require(hlsl, sm_5_0)] void ProcessQuadTessFactorsAvg( in float4 RawEdgeFactors, in float InsideScale, @@ -7139,6 +7643,7 @@ void ProcessQuadTessFactorsAvg( out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); +[require(hlsl, sm_5_0)] void ProcessQuadTessFactorsMax( in float4 RawEdgeFactors, in float InsideScale, @@ -7146,6 +7651,7 @@ void ProcessQuadTessFactorsMax( out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); +[require(hlsl, sm_5_0)] void ProcessQuadTessFactorsMin( in float4 RawEdgeFactors, in float InsideScale, @@ -7153,6 +7659,7 @@ void ProcessQuadTessFactorsMin( out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); +[require(hlsl, sm_5_0)] void ProcessTriTessFactorsAvg( in float3 RawEdgeFactors, in float InsideScale, @@ -7160,6 +7667,7 @@ void ProcessTriTessFactorsAvg( out float RoundedInsideTessFactor, out float UnroundedInsideTessFactor); +[require(hlsl, sm_5_0)] void ProcessTriTessFactorsMax( in float3 RawEdgeFactors, in float InsideScale, @@ -7167,6 +7675,7 @@ void ProcessTriTessFactorsMax( out float RoundedInsideTessFactor, out float UnroundedInsideTessFactor); +[require(hlsl, sm_5_0)] void ProcessTriTessFactorsMin( in float3 RawEdgeFactors, in float InsideScale, @@ -7180,6 +7689,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Radians _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T radians(T x) { return x * (T.getPi() / T(180.0f)); @@ -7190,6 +7700,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Radians _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector radians(vector x) { return x * (T.getPi() / T(180.0f)); @@ -7198,6 +7709,7 @@ vector radians(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix radians(matrix x) { return x * (T.getPi() / T(180.0f)); @@ -7207,6 +7719,7 @@ matrix radians(matrix x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T rcp(T x) { return T(1.0) / x; @@ -7215,6 +7728,7 @@ T rcp(T x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector rcp(vector x) { VECTOR_MAP_UNARY(T, N, rcp, x); @@ -7223,6 +7737,7 @@ vector rcp(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix rcp(matrix x) { MATRIX_MAP_UNARY(T, N, M, rcp, x); @@ -7234,6 +7749,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Reflect _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T reflect(T i, T n) { return i - T(2) * dot(n,i) * n; @@ -7244,6 +7760,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Reflect _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector reflect(vector i, vector n) { return i - T(2) * dot(n,i) * n; @@ -7255,6 +7772,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Refract _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector refract(vector i, vector n, T eta) { let dotNI = dot(n,i); @@ -7268,6 +7786,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Refract _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T refract(T i, T n, T eta) { let dotNI = dot(n,i); @@ -7278,6 +7797,7 @@ T refract(T i, T n, T eta) // Reverse order of bits [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] uint reversebits(uint value) { __target_switch @@ -7297,6 +7817,7 @@ uint reversebits(uint value) __target_intrinsic(glsl, "bitfieldReverse") __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector reversebits(vector value) { __target_switch @@ -7318,6 +7839,7 @@ __target_intrinsic(cuda, "$P_round($0)") __target_intrinsic(cpp, "$P_round($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Round _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T round(T x); __generic @@ -7325,6 +7847,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Round _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector round(vector x) { VECTOR_MAP_UNARY(T, N, round, x); @@ -7333,6 +7856,7 @@ vector round(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix round(matrix x) { MATRIX_MAP_UNARY(T, N, M, round, x); @@ -7346,6 +7870,7 @@ __target_intrinsic(cuda, "$P_rsqrt($0)") __target_intrinsic(cpp, "$P_rsqrt($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 InverseSqrt _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T rsqrt(T x) { return T(1.0) / sqrt(x); @@ -7356,6 +7881,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "inversesqrt($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 InverseSqrt _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector rsqrt(vector x) { VECTOR_MAP_UNARY(T, N, rsqrt, x); @@ -7364,6 +7890,7 @@ vector rsqrt(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix rsqrt(matrix x) { MATRIX_MAP_UNARY(T, N, M, rsqrt, x); @@ -7374,6 +7901,7 @@ matrix rsqrt(matrix x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T saturate(T x) { return clamp(x, T(0), T(1)); @@ -7382,6 +7910,7 @@ T saturate(T x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector saturate(vector x) { return clamp(x, @@ -7392,6 +7921,7 @@ vector saturate(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix saturate(matrix x) { MATRIX_MAP_UNARY(T, N, M, saturate, x); @@ -7469,6 +7999,7 @@ __target_intrinsic(cuda, "$P_sin($0)") __target_intrinsic(cpp, "$P_sin($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Sin _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T sin(T x); __generic @@ -7476,6 +8007,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Sin _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector sin(vector x) { VECTOR_MAP_UNARY(T, N, sin, x); @@ -7484,6 +8016,7 @@ vector sin(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix sin(matrix x) { MATRIX_MAP_UNARY(T, N, M, sin, x); @@ -7494,6 +8027,7 @@ __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "$P_sincos($0, $1, $2)") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] void sincos(T x, out T s, out T c) { s = sin(x); @@ -7503,6 +8037,7 @@ void sincos(T x, out T s, out T c) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] void sincos(vector x, out vector s, out vector c) { s = sin(x); @@ -7512,6 +8047,7 @@ void sincos(vector x, out vector s, out vector c) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] void sincos(matrix x, out matrix s, out matrix c) { s = sin(x); @@ -7526,6 +8062,7 @@ __target_intrinsic(cuda, "$P_sinh($0)") __target_intrinsic(cpp, "$P_sinh($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Sinh _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T sinh(T x); __generic @@ -7533,6 +8070,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Sinh _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector sinh(vector x) { VECTOR_MAP_UNARY(T, N, sinh, x); @@ -7541,6 +8079,7 @@ vector sinh(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix sinh(matrix x) { MATRIX_MAP_UNARY(T, N, M, sinh, x); @@ -7552,6 +8091,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T smoothstep(T min, T max, T x) { let t = saturate((x - min) / (max - min)); @@ -7563,6 +8103,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector smoothstep(vector min, vector max, vector x) { VECTOR_MAP_TRINARY(T, N, smoothstep, min, max, x); @@ -7571,6 +8112,7 @@ vector smoothstep(vector min, vector max, vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix smoothstep(matrix min, matrix max, matrix x) { MATRIX_MAP_TRINARY(T, N, M, smoothstep, min, max, x); @@ -7584,6 +8126,7 @@ __target_intrinsic(cuda, "$P_sqrt($0)") __target_intrinsic(cpp, "$P_sqrt($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Sqrt _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T sqrt(T x); __generic @@ -7591,6 +8134,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Sqrt _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector sqrt(vector x) { VECTOR_MAP_UNARY(T, N, sqrt, x); @@ -7599,6 +8143,7 @@ vector sqrt(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix sqrt(matrix x) { MATRIX_MAP_UNARY(T, N, M, sqrt, x); @@ -7610,6 +8155,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Step _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T step(T y, T x) { return x < y ? T(0.0f) : T(1.0f); @@ -7620,6 +8166,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Step _0 _1") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector step(vector y, vector x) { VECTOR_MAP_BINARY(T, N, step, y, x); @@ -7628,6 +8175,7 @@ vector step(vector y, vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix step(matrix y, matrix x) { MATRIX_MAP_BINARY(T, N, M, step, y, x); @@ -7641,6 +8189,7 @@ __target_intrinsic(cuda, "$P_tan($0)") __target_intrinsic(cpp, "$P_tan($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Tan _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T tan(T x); __generic @@ -7648,6 +8197,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Tan _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector tan(vector x) { VECTOR_MAP_UNARY(T, N, tan, x); @@ -7656,6 +8206,7 @@ vector tan(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix tan(matrix x) { MATRIX_MAP_UNARY(T, N, M, tan, x); @@ -7669,6 +8220,7 @@ __target_intrinsic(cuda, "$P_tanh($0)") __target_intrinsic(cpp, "$P_tanh($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Tanh _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T tanh(T x); __generic @@ -7676,6 +8228,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Tanh _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector tanh(vector x) { VECTOR_MAP_UNARY(T, N, tanh, x); @@ -7684,6 +8237,7 @@ vector tanh(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix tanh(matrix x) { MATRIX_MAP_UNARY(T, N, M, tanh, x); @@ -7695,6 +8249,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpTranspose resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] [PreferRecompute] matrix transpose(matrix x) { @@ -7709,6 +8264,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpTranspose resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] [PreferRecompute] matrix transpose(matrix x) { @@ -7723,6 +8279,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpTranspose resultType resultId _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] [PreferRecompute] [OverloadRank(-1)] matrix transpose(matrix x) @@ -7742,6 +8299,7 @@ __target_intrinsic(cuda, "$P_trunc($0)") __target_intrinsic(cpp, "$P_trunc($0)") __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Trunc _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] T trunc(T x); __generic @@ -7749,6 +8307,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Trunc _0") [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] vector trunc(vector x) { VECTOR_MAP_UNARY(T, N, trunc, x); @@ -7757,6 +8316,7 @@ vector trunc(vector x) __generic __target_intrinsic(hlsl) [__readNone] +[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] matrix trunc(matrix x) { MATRIX_MAP_UNARY(T, N, M, trunc, x); @@ -7768,6 +8328,7 @@ typedef uint WaveMask; __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] WaveMask WaveGetConvergedMask() { __target_switch @@ -7793,6 +8354,7 @@ WaveMask __WaveGetActiveMask(); __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] WaveMask WaveGetActiveMask() { __target_switch @@ -7808,13 +8370,14 @@ WaveMask WaveGetActiveMask() OpCapability GroupNonUniformBallot; OpGroupNonUniformBallot $$uint4 result Subgroup $_true }).x; - default: + case cuda: return __WaveGetActiveMask(); } } __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] bool WaveMaskIsFirstLane(WaveMask mask) { __target_switch @@ -7831,13 +8394,12 @@ bool WaveMaskIsFirstLane(WaveMask mask) OpCapability GroupNonUniformBallot; OpGroupNonUniformElect $$bool result Subgroup }; - default: - return false; } } __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAllTrue(WaveMask mask, bool condition) { __target_switch @@ -7854,13 +8416,12 @@ bool WaveMaskAllTrue(WaveMask mask, bool condition) OpCapability GroupNonUniformBallot; OpGroupNonUniformAll $$bool result Subgroup $condition }; - default: - return false; } } __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAnyTrue(WaveMask mask, bool condition) { __target_switch @@ -7877,13 +8438,12 @@ bool WaveMaskAnyTrue(WaveMask mask, bool condition) OpCapability GroupNonUniformBallot; OpGroupNonUniformAny $$bool result Subgroup $condition }; - default: - return false; } } __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] WaveMask WaveMaskBallot(WaveMask mask, bool condition) { __target_switch @@ -7900,11 +8460,10 @@ WaveMask WaveMaskBallot(WaveMask mask, bool condition) OpCapability GroupNonUniformBallot; OpGroupNonUniformBallot $$uint4 result Subgroup $condition }).x; - default: - return 0; } } +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint WaveMaskCountBits(WaveMask mask, bool value) { __target_switch @@ -7933,6 +8492,7 @@ uint WaveMaskCountBits(WaveMask mask, bool value) // It seems this can only mean the active threads are the "threads the program flow would lead to". This implies a lockstep // "straight SIMD" style interpretation. That being the case this op on HLSL is just a memory barrier without any Sync. +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void AllMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch @@ -7964,6 +8524,7 @@ void AllMemoryBarrierWithWaveMaskSync(WaveMask mask) // aspect of HLSL seems to make everything in lock step - but that's not quite so, it only has to apparently be that way as far as the programmers // model appears - divergence could perhaps potentially still happen. +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch @@ -7979,6 +8540,7 @@ void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask) } } +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void AllMemoryBarrierWithWaveSync() { __target_switch @@ -7994,6 +8556,7 @@ void AllMemoryBarrierWithWaveSync() } } +[require(cuda_glsl_hlsl_spirv, memorybarrier_compute)] void GroupMemoryBarrierWithWaveSync() { __target_switch @@ -8019,6 +8582,7 @@ void GroupMemoryBarrierWithWaveSync() __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveMaskBroadcastLaneAt(WaveMask mask, T value, constexpr int lane) { __target_switch @@ -8038,6 +8602,7 @@ T WaveMaskBroadcastLaneAt(WaveMask mask, T value, constexpr int lane) __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveMaskBroadcastLaneAt(WaveMask mask, vector value, constexpr int lane) { __target_switch @@ -8063,6 +8628,7 @@ matrix WaveMaskBroadcastLaneAt(WaveMask mask, matrix value, conste __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] T WaveMaskReadLaneAt(WaveMask mask, T value, int lane) { __target_switch @@ -8079,8 +8645,9 @@ T WaveMaskReadLaneAt(WaveMask mask, T value, int lane) } } __generic -__spirv_version(1.3)__glsl_extension(GL_KHR_shader_subgroup_shuffle) +__glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] vector WaveMaskReadLaneAt(WaveMask mask, vector value, int lane) { __target_switch @@ -8125,6 +8692,7 @@ matrix WaveMaskShuffle(WaveMask mask, matrix value, int lane) __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint WaveMaskPrefixCountBits(WaveMask mask, bool value) { __target_switch @@ -8147,6 +8715,7 @@ uint WaveMaskPrefixCountBits(WaveMask mask, bool value) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskBitAnd(WaveMask mask, T expr) { __target_switch @@ -8165,6 +8734,7 @@ T WaveMaskBitAnd(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskBitAnd(WaveMask mask, vector expr) { __target_switch @@ -8187,6 +8757,7 @@ matrix WaveMaskBitAnd(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskBitOr(WaveMask mask, T expr) { __target_switch @@ -8204,6 +8775,7 @@ T WaveMaskBitOr(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskBitOr(WaveMask mask, vector expr) { __target_switch @@ -8226,6 +8798,7 @@ matrix WaveMaskBitOr(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskBitXor(WaveMask mask, T expr) { __target_switch @@ -8243,6 +8816,7 @@ T WaveMaskBitXor(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskBitXor(WaveMask mask, vector expr) { __target_switch @@ -8265,6 +8839,7 @@ matrix WaveMaskBitXor(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskMax(WaveMask mask, T expr) { __target_switch @@ -8285,6 +8860,7 @@ T WaveMaskMax(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskMax(WaveMask mask, vector expr) { __target_switch @@ -8311,6 +8887,7 @@ matrix WaveMaskMax(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskMin(WaveMask mask, T expr) { __target_switch @@ -8332,6 +8909,7 @@ T WaveMaskMin(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskMin(WaveMask mask, vector expr) { __target_switch @@ -8358,6 +8936,7 @@ matrix WaveMaskMin(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskProduct(WaveMask mask, T expr) { __target_switch @@ -8383,6 +8962,7 @@ T WaveMaskProduct(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskProduct(WaveMask mask, vector expr) { __target_switch @@ -8410,12 +8990,10 @@ __target_intrinsic(cuda, "_waveProductMultiple($0, $1)") __target_intrinsic(hlsl, "WaveActiveProduct($1)") matrix WaveMaskProduct(WaveMask mask, matrix expr); -__intrinsic_op($(kIROp_RequireGLSLExtension)) -void __requireGLSLExtension(String extensionName); - __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskSum(WaveMask mask, T expr) { __target_switch @@ -8442,6 +9020,7 @@ T WaveMaskSum(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskSum(WaveMask mask, vector expr) { __target_switch @@ -8474,6 +9053,7 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __cuda_sm_version(7.0) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAllEqual(WaveMask mask, T value) { __target_switch @@ -8491,14 +9071,13 @@ bool WaveMaskAllEqual(WaveMask mask, T value) OpCapability GroupNonUniformVote; OpGroupNonUniformAllEqual $$bool result Subgroup $value }; - default: - return false; } } __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __cuda_sm_version(7.0) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAllEqual(WaveMask mask, vector value) { __target_switch @@ -8516,8 +9095,6 @@ bool WaveMaskAllEqual(WaveMask mask, vector value) OpCapability GroupNonUniformVote; OpGroupNonUniformAllEqual $$bool result Subgroup $value }; - default: - return false; } } __generic @@ -8531,6 +9108,7 @@ bool WaveMaskAllEqual(WaveMask mask, matrix value); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixProduct(WaveMask mask, T expr) { __target_switch @@ -8557,6 +9135,7 @@ T WaveMaskPrefixProduct(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixProduct(WaveMask mask, vector expr) { __target_switch @@ -8588,6 +9167,7 @@ matrix WaveMaskPrefixProduct(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixSum(WaveMask mask, T expr) { __target_switch @@ -8615,6 +9195,7 @@ T WaveMaskPrefixSum(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixSum(WaveMask mask, vector expr) { __target_switch @@ -8646,6 +9227,7 @@ matrix WaveMaskPrefixSum(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveMaskReadLaneFirst(WaveMask mask, T expr) { __target_switch @@ -8660,6 +9242,7 @@ T WaveMaskReadLaneFirst(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveMaskReadLaneFirst(WaveMask mask, vector expr) { __target_switch @@ -8684,6 +9267,7 @@ __generic __glsl_extension(GL_NV_shader_subgroup_partitioned) __spirv_version(1.1) __cuda_sm_version(7.0) +[require(cuda_glsl_hlsl_spirv, subgroup_partitioned)] WaveMask WaveMaskMatch(WaveMask mask, T value) { __target_switch @@ -8704,6 +9288,7 @@ __generic __glsl_extension(GL_NV_shader_subgroup_partitioned) __spirv_version(1.1) __cuda_sm_version(7.0) +[require(cuda_glsl_hlsl_spirv, subgroup_partitioned)] WaveMask WaveMaskMatch(WaveMask mask, vector value) { __target_switch @@ -8733,6 +9318,7 @@ WaveMask WaveMaskMatch(WaveMask mask, matrix value); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixBitAnd(WaveMask mask, T expr) { __target_switch @@ -8748,6 +9334,7 @@ T WaveMaskPrefixBitAnd(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixBitAnd(WaveMask mask, vector expr) { __target_switch @@ -8768,6 +9355,7 @@ matrix WaveMaskPrefixBitAnd(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixBitOr(WaveMask mask, T expr) { __target_switch @@ -8783,6 +9371,7 @@ T WaveMaskPrefixBitOr(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixBitOr(WaveMask mask, vector expr) { __target_switch @@ -8803,6 +9392,7 @@ matrix WaveMaskPrefixBitOr(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixBitXor(WaveMask mask, T expr) { __target_switch @@ -8818,6 +9408,7 @@ T WaveMaskPrefixBitXor(WaveMask mask, T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixBitXor(WaveMask mask, vector expr) { __target_switch @@ -8843,6 +9434,7 @@ matrix WaveMaskPrefixBitXor(WaveMask mask, matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadLaneAt(T sourceValue, uint quadLaneID) { __target_switch @@ -8861,6 +9453,7 @@ T QuadReadLaneAt(T sourceValue, uint quadLaneID) __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadLaneAt(vector sourceValue, uint quadLaneID) { __target_switch @@ -8882,6 +9475,7 @@ __generic matrix QuadReadLan __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadAcrossX(T localValue) { __target_switch @@ -8902,6 +9496,7 @@ T QuadReadAcrossX(T localValue) __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadAcrossX(vector localValue) { __target_switch @@ -8923,6 +9518,7 @@ __generic matrix QuadReadAcr __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadAcrossY(T localValue) { __target_switch @@ -8942,6 +9538,7 @@ T QuadReadAcrossY(T localValue) __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadAcrossY(vector localValue) { __target_switch @@ -8964,6 +9561,7 @@ __generic matrix QuadReadAcr __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadAcrossDiagonal(T localValue) { __target_switch @@ -8983,6 +9581,7 @@ T QuadReadAcrossDiagonal(T localValue) __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) +[require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadAcrossDiagonal(vector localValue) { __target_switch @@ -9011,6 +9610,7 @@ for (auto opName : kWaveActiveBitOpEntries) { __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveActive$(opName.hlslName)(T expr) { __target_switch @@ -9027,6 +9627,7 @@ T WaveActive$(opName.hlslName)(T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveActive$(opName.hlslName)(vector expr) { __target_switch @@ -9059,6 +9660,7 @@ for (const char* opName : kWaveActiveMinMaxNames) { __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveActive$(opName)(T expr) { __target_switch @@ -9080,6 +9682,7 @@ T WaveActive$(opName)(T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveActive$(opName)(vector expr) { __target_switch @@ -9119,6 +9722,7 @@ for (auto opName : kWaveActivProductSumNames) { __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveActive$(opName.hlslName)(T expr) { __target_switch @@ -9151,6 +9755,7 @@ __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(hlsl) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveActive$(opName.hlslName)(vector expr) { __target_switch @@ -9192,6 +9797,7 @@ ${{{{ __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAllEqual(T value) { __target_switch @@ -9214,6 +9820,7 @@ bool WaveActiveAllEqual(T value) __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAllEqual(vector value) { __target_switch @@ -9242,6 +9849,7 @@ bool WaveActiveAllEqual(matrix value) __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAllTrue(bool condition) { __target_switch @@ -9263,6 +9871,7 @@ bool WaveActiveAllTrue(bool condition) __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAnyTrue(bool condition) { __target_switch @@ -9285,6 +9894,7 @@ bool WaveActiveAnyTrue(bool condition) __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [NonUniformReturn] +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint4 WaveActiveBallot(bool condition) { __target_switch @@ -9313,6 +9923,7 @@ uint WaveActiveCountBits(bool value) __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) [NonUniformReturn] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] uint WaveGetLaneCount() { __target_switch @@ -9332,6 +9943,7 @@ uint WaveGetLaneCount() __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) [NonUniformReturn] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] uint WaveGetLaneIndex() { __target_switch @@ -9351,6 +9963,7 @@ uint WaveGetLaneIndex() __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) [NonUniformReturn] +[require(cuda_glsl_hlsl_spirv, subgroup_basic)] bool WaveIsFirstLane() { __target_switch @@ -9372,6 +9985,8 @@ bool WaveIsFirstLane() // It's useful to have a wave uint4 version of countbits, because some wave functions return uint4. // This implementation tries to limit the amount of work required by the actual lane count. +__spirv_version(1.3) +[require(cpp_cuda_glsl_hlsl_spirv, subgroup_ballot)] uint _WaveCountBits(uint4 value) { __target_switch @@ -9402,6 +10017,7 @@ uint _WaveCountBits(uint4 value) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WavePrefixProduct(T expr) { __target_switch @@ -9434,6 +10050,7 @@ T WavePrefixProduct(T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WavePrefixProduct(vector expr) { __target_switch @@ -9461,6 +10078,7 @@ vector WavePrefixProduct(vector expr) __generic __target_intrinsic(hlsl) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] matrix WavePrefixProduct(matrix expr) { return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); @@ -9469,6 +10087,7 @@ matrix WavePrefixProduct(matrix expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WavePrefixSum(T expr) { __target_switch @@ -9497,6 +10116,7 @@ T WavePrefixSum(T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WavePrefixSum(vector expr) { __target_switch @@ -9532,6 +10152,7 @@ matrix WavePrefixSum(matrix expr) __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveReadLaneFirst(T expr) { __target_switch @@ -9550,6 +10171,7 @@ T WaveReadLaneFirst(T expr) __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveReadLaneFirst(vector expr) { __target_switch @@ -9581,6 +10203,7 @@ matrix WaveReadLaneFirst(matrix expr) __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveBroadcastLaneAt(T value, constexpr int lane) { __target_switch @@ -9600,6 +10223,7 @@ T WaveBroadcastLaneAt(T value, constexpr int lane) __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveBroadcastLaneAt(vector value, constexpr int lane) { __target_switch @@ -9629,6 +10253,7 @@ matrix WaveBroadcastLaneAt(matrix value, constexpr int lane) __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] T WaveReadLaneAt(T value, int lane) { __target_switch @@ -9648,6 +10273,7 @@ T WaveReadLaneAt(T value, int lane) __generic __spirv_version(1.3) __glsl_extension(GL_KHR_shader_subgroup_shuffle) +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] vector WaveReadLaneAt(vector value, int lane) { __target_switch @@ -9678,6 +10304,7 @@ matrix WaveReadLaneAt(matrix value, int lane) __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] T WaveShuffle(T value, int lane) { __target_switch @@ -9697,6 +10324,7 @@ T WaveShuffle(T value, int lane) __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] vector WaveShuffle(vector value, int lane) { __target_switch @@ -9722,6 +10350,7 @@ matrix WaveShuffle(matrix value, int lane) __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint WavePrefixCountBits(bool value) { __target_switch @@ -9743,6 +10372,7 @@ uint WavePrefixCountBits(bool value) __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) +[require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint4 WaveGetConvergedMulti() { __target_switch @@ -9793,6 +10423,7 @@ uint4 WaveMatch(matrix value) __target_intrinsic(hlsl) __target_intrinsic(cuda, "_popc(__ballot_sync(($1).x, $0) & _getLaneLtMask())") +[require(cuda_hlsl, waveprefix)] uint WaveMultiPrefixCountBits(bool value, uint4 mask); __generic @@ -9801,6 +10432,7 @@ __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveAnd($0)") __target_intrinsic(cuda, "_wavePrefixAnd(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_glsl_hlsl, waveprefix)] T WaveMultiPrefixBitAnd(T expr, uint4 mask); __target_intrinsic(hlsl) @@ -9809,32 +10441,37 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveAnd($0)") __target_intrinsic(cuda, "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)") __generic +[require(cuda_glsl_hlsl, waveprefix)] vector WaveMultiPrefixBitAnd(vector expr, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixBitAnd(matrix expr, uint4 mask); __generic __target_intrinsic(hlsl) __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -//__target_intrinsic(glsl, "subgroupExclusiveOr($0)") +__target_intrinsic(glsl, "subgroupExclusiveOr($0)") __target_intrinsic(cuda, "_wavePrefixOr(, _getMultiPrefixMask(($1).x), $0)") +[require(cuda_glsl_hlsl, waveprefix)] T WaveMultiPrefixBitOr(T expr, uint4 mask); __generic __target_intrinsic(hlsl) __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -//__target_intrinsic(glsl, "subgroupExclusiveOr($0)") +__target_intrinsic(glsl, "subgroupExclusiveOr($0)") __target_intrinsic(cuda, "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_glsl_hlsl, waveprefix)] vector WaveMultiPrefixBitOr(vector expr, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixBitOr(matrix expr, uint4 mask); __generic @@ -9843,6 +10480,7 @@ __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveXor($0)") __target_intrinsic(cuda, "_wavePrefixXor(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_glsl_hlsl, waveprefix)] T WaveMultiPrefixBitXor(T expr, uint4 mask); __generic @@ -9851,41 +10489,49 @@ __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveXor($0)") __target_intrinsic(cuda, "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_glsl_hlsl, waveprefix)] vector WaveMultiPrefixBitXor(vector expr, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixBitXor(matrix expr, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixProduct(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] T WaveMultiPrefixProduct(T value, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] vector WaveMultiPrefixProduct(vector value, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixProduct(matrix value, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixSum(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] T WaveMultiPrefixSum(T value, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0 )") +[require(cuda_hlsl, waveprefix)] vector WaveMultiPrefixSum(vector value, uint4 mask); __generic __target_intrinsic(hlsl) __target_intrinsic(cuda, "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0)") +[require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixSum(matrix value, uint4 mask); // `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points @@ -9918,12 +10564,15 @@ for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa) char const* glslTextureSizeFunc = (isReadOnly) ? "textureSize" : "imageSize"; char const* glslLoadFuncName = (isReadOnly) ? "texelFetch" : "imageLoad"; char const* spvLoadInstName = (isReadOnly) ? "OpImageFetch" : "OpImageRead"; + char const* requireToSetQuery = (isReadOnly) ? "[require(glsl_hlsl_spirv, texture_size)]" : "[require(glsl_hlsl_spirv, image_size)]"; + char const* requireToSet = (isReadOnly) ? "[require(glsl_hlsl_spirv, texture_sm_4_1)]" : "[require(glsl_hlsl_spirv, texture_sm_4_1_compute_fragment)]"; }}}} __generic extension __TextureImpl { [__readNone] + $(requireToSetQuery) void GetDimensions(out uint dim) { __target_switch @@ -9940,6 +10589,7 @@ extension __TextureImpl __glsl_extension(GL_EXT_samplerless_texture_functions) $(isReadOnly?"[__readNone] ":"") + $(requireToSet) T Load(int location) { __target_switch @@ -9954,17 +10604,21 @@ extension __TextureImpl } $(isReadOnly?"[__readNone] ":"") + $(requireToSet) T Load(int location, out uint status); __subscript(uint index) -> T { $(isReadOnly?"[__readNone] ":"") [ForceInline] + $(requireToSet) get { return Load((int)index); } ${{{{ if (access != SLANG_RESOURCE_ACCESS_READ) { }}}} - [nonmutating] set + [nonmutating] + $(requireToSet) + set { __target_switch { @@ -10054,6 +10708,7 @@ struct RaytracingAccelerationStructure {}; // 10.1.5 - Intersection Attributes Structure __target_intrinsic(hlsl, BuiltInTriangleIntersectionAttributes) +[require(cpp_cuda_glsl_hlsl_spirv, rayobject)] struct BuiltInTriangleIntersectionAttributes { __target_intrinsic(hlsl, barycentrics) @@ -10072,8 +10727,8 @@ struct BuiltInTriangleIntersectionAttributes // `executeCallableNV` is the GLSL intrinsic that will be used to implement // `CallShader()` for GLSL-based targets. // -__target_intrinsic(_GL_EXT_ray_tracing, "executeCallableEXT") -[require(glsl, raytracing)] +__target_intrinsic(glsl, "executeCallableEXT") +[require(glsl, raytracing_raygen_closesthit_miss_callable)] void __executeCallable(uint shaderIndex, int payloadLocation); // Next is the custom intrinsic that will compute the payload location @@ -10089,9 +10744,7 @@ int __callablePayloadLocation(__ref Payload payload); // GLSL equivalent. // __generic -[require(glsl, raytracing)] -[require(spirv, raytracing)] -[require(hlsl, raytracing)] +[require(glsl_hlsl_spirv, raytracing_raygen_closesthit_miss_callable)] void CallShader(uint shaderIndex, inout Payload payload) { __target_switch @@ -10132,8 +10785,8 @@ __intrinsic_op($(kIROp_ForceVarIntoStructTemporarily)) Ref __forceVarIntoStructTemporarily(inout T maybeStruct); __target_intrinsic(hlsl, "TraceRay") -[require(hlsl, raytracing)] __generic +[require(hlsl, raytracing)] void __traceRayHLSL( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -10144,8 +10797,8 @@ void __traceRayHLSL( RayDesc Ray, inout payload_t Payload); -__target_intrinsic(_GL_EXT_ray_tracing, "traceRayEXT") -[require(glsl, raytracing)] +__target_intrinsic(glsl, "traceRayEXT") +[require(glsl, raytracing_raygen_closesthit_miss)] void __traceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -10171,10 +10824,8 @@ __intrinsic_op($(kIROp_GetVulkanRayTracingPayloadLocation)) int __rayPayloadLocation(__ref Payload payload); [ForceInline] -[require(glsl, raytracing)] -[require(spirv, raytracing)] -[require(hlsl, raytracing)] __generic +[require(cuda_glsl_hlsl_spirv, raytracing_raygen_closesthit_miss)] void TraceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -10256,8 +10907,8 @@ void TraceRay( // https://github.com/KhronosGroup/GLSL/blob/master/extensions/nv/GLSL_NV_ray_tracing_motion_blur.txt __target_intrinsic(hlsl, "TraceMotionRay") -[require(hlsl, raytracing_motionblur)] __generic +[require(hlsl, raytracing_motionblur)] void __traceMotionRayHLSL( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -10271,7 +10922,7 @@ void __traceMotionRayHLSL( __glsl_extension(GL_NV_ray_tracing_motion_blur) __target_intrinsic(glsl, "traceRayMotionNV") -[require(glsl, raytracing_motionblur)] +[require(glsl, raytracing_motionblur_raygen_closesthit_miss)] void __traceMotionRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -10287,9 +10938,7 @@ void __traceMotionRay( int PayloadLocation); [ForceInline] -[require(glsl, raytracing_motionblur)] -[require(spirv, raytracing_motionblur)] -[require(hlsl, raytracing_motionblur)] +[require(glsl_hlsl_spirv, raytracing_motionblur_raygen_closesthit_miss)] __generic void TraceMotionRay( RaytracingAccelerationStructure AccelerationStructure, @@ -10373,17 +11022,13 @@ void TraceMotionRay( } // 10.3.3 -__target_intrinsic(hlsl) -[require(hlsl, raytracing)] -bool ReportHit(float tHit, uint hitKind, A attributes); -[require(spirv, raytracing)] -[require(glsl, raytracing)] +[require(glsl_spirv, raytracing_intersection)] bool __reportIntersection(float tHit, uint hitKind) { __target_switch { - case _GL_EXT_ray_tracing: __intrinsic_asm "reportIntersectionEXT"; + case glsl: __intrinsic_asm "reportIntersectionEXT"; case spirv: return spirv_asm { @@ -10393,27 +11038,31 @@ bool __reportIntersection(float tHit, uint hitKind) } __generic -__specialized_for_target(glsl) -__specialized_for_target(spirv) +[ForceInline] +[require(glsl_hlsl_spirv, raytracing_intersection)] bool ReportHit(float tHit, uint hitKind, A attributes) { - [__vulkanHitAttributes] - static A a; - - a = attributes; - return __reportIntersection(tHit, hitKind); + __target_switch + { + case hlsl: + __intrinsic_asm "ReportHit($0, $1, $2)"; + case glsl: + case spirv: + [__vulkanHitAttributes] + static A a; + a = attributes; + return __reportIntersection(tHit, hitKind); + } } // 10.3.4 -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit)] void IgnoreHit() { __target_switch { case hlsl: __intrinsic_asm "IgnoreHit"; - case _GL_EXT_ray_tracing: __intrinsic_asm "ignoreIntersectionEXT;"; + case glsl: __intrinsic_asm "ignoreIntersectionEXT;"; case cuda: __intrinsic_asm "optixIgnoreIntersection"; case spirv: spirv_asm @@ -10424,15 +11073,13 @@ void IgnoreHit() } // 10.3.5 -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit)] void AcceptHitAndEndSearch() { __target_switch { case hlsl: __intrinsic_asm "AcceptHitAndEndSearch"; - case _GL_EXT_ray_tracing: __intrinsic_asm "terminateRayEXT;"; + case glsl: __intrinsic_asm "terminateRayEXT;"; case cuda: __intrinsic_asm "optixTerminateRay"; case spirv: spirv_asm @@ -10450,15 +11097,13 @@ void AcceptHitAndEndSearch() // 10.4.1 - Ray Dispatch System Values [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_allstages)] uint3 DispatchRaysIndex() { __target_switch { case hlsl: __intrinsic_asm "DispatchRaysIndex"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_LaunchIDEXT)"; + case glsl: __intrinsic_asm "(gl_LaunchIDEXT)"; case cuda: __intrinsic_asm "optixGetLaunchIndex"; case spirv: return spirv_asm @@ -10468,15 +11113,13 @@ uint3 DispatchRaysIndex() } } -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_allstages)] uint3 DispatchRaysDimensions() { __target_switch { case hlsl: __intrinsic_asm "DispatchRaysDimensions"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_LaunchSizeEXT)"; + case glsl: __intrinsic_asm "(gl_LaunchSizeEXT)"; case cuda: __intrinsic_asm "optixGetLaunchDimensions"; case spirv: return spirv_asm @@ -10489,15 +11132,13 @@ uint3 DispatchRaysDimensions() // 10.4.2 - Ray System Values [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float3 WorldRayOrigin() { __target_switch { case hlsl: __intrinsic_asm "WorldRayOrigin"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_WorldRayOriginEXT)"; + case glsl: __intrinsic_asm "(gl_WorldRayOriginEXT)"; case cuda: __intrinsic_asm "optixGetWorldRayOrigin"; case spirv: return spirv_asm @@ -10508,15 +11149,13 @@ float3 WorldRayOrigin() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float3 WorldRayDirection() { __target_switch { case hlsl: __intrinsic_asm "WorldRayDirection"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_WorldRayDirectionEXT)"; + case glsl: __intrinsic_asm "(gl_WorldRayDirectionEXT)"; case cuda: __intrinsic_asm "optixGetWorldRayDirection"; case spirv: return spirv_asm @@ -10527,15 +11166,13 @@ float3 WorldRayDirection() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float RayTMin() { __target_switch { case hlsl: __intrinsic_asm "RayTMin"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_RayTminEXT)"; + case glsl: __intrinsic_asm "(gl_RayTminEXT)"; case cuda: __intrinsic_asm "optixGetRayTmin"; case spirv: return spirv_asm @@ -10556,15 +11193,13 @@ float RayTMin() // to the appropriate Vulkan stages. // [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float RayTCurrent() { __target_switch { case hlsl: __intrinsic_asm "RayTCurrent"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_RayTmaxEXT)"; + case glsl: __intrinsic_asm "(gl_RayTmaxEXT)"; case cuda: __intrinsic_asm "optixGetRayTmax"; case spirv: return spirv_asm @@ -10574,15 +11209,13 @@ float RayTCurrent() } } -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] uint RayFlags() { __target_switch { case hlsl: __intrinsic_asm "RayFlags"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_IncomingRayFlagsEXT)"; + case glsl: __intrinsic_asm "(gl_IncomingRayFlagsEXT)"; case cuda: __intrinsic_asm "optixGetRayFlags"; case spirv: return spirv_asm @@ -10595,15 +11228,13 @@ uint RayFlags() // 10.4.3 - Primitive/Object Space System Values [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] uint InstanceIndex() { __target_switch { case hlsl: __intrinsic_asm "InstanceIndex"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_InstanceID)"; + case glsl: __intrinsic_asm "(gl_InstanceID)"; case cuda: __intrinsic_asm "optixGetInstanceIndex"; case spirv: return spirv_asm @@ -10614,15 +11245,13 @@ uint InstanceIndex() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] uint InstanceID() { __target_switch { case hlsl: __intrinsic_asm "InstanceID"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_InstanceCustomIndexEXT)"; + case glsl: __intrinsic_asm "(gl_InstanceCustomIndexEXT)"; case cuda: __intrinsic_asm "optixGetInstanceId"; case spirv: return spirv_asm @@ -10633,15 +11262,13 @@ uint InstanceID() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] uint PrimitiveIndex() { __target_switch { case hlsl: __intrinsic_asm "PrimitiveIndex"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_PrimitiveID)"; + case glsl: __intrinsic_asm "(gl_PrimitiveID)"; case cuda: __intrinsic_asm "optixGetPrimitiveIndex"; case spirv: return spirv_asm @@ -10652,15 +11279,13 @@ uint PrimitiveIndex() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3 ObjectRayOrigin() { __target_switch { case hlsl: __intrinsic_asm "ObjectRayOrigin"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_ObjectRayOriginEXT)"; + case glsl: __intrinsic_asm "(gl_ObjectRayOriginEXT)"; case cuda: __intrinsic_asm "optixGetObjectRayOrigin"; case spirv: return spirv_asm @@ -10671,15 +11296,13 @@ float3 ObjectRayOrigin() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3 ObjectRayDirection() { __target_switch { case hlsl: __intrinsic_asm "ObjectRayDirection"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_ObjectRayDirectionEXT)"; + case glsl: __intrinsic_asm "(gl_ObjectRayDirectionEXT)"; case cuda: __intrinsic_asm "optixGetObjectRayDirection"; case spirv: return spirv_asm @@ -10692,15 +11315,13 @@ float3 ObjectRayDirection() // TODO: optix has an optixGetObjectToWorldTransformMatrix function that returns 12 // floats by reference. [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3x4 ObjectToWorld3x4() { __target_switch { case hlsl: __intrinsic_asm "ObjectToWorld3x4"; - case _GL_EXT_ray_tracing: __intrinsic_asm "transpose(gl_ObjectToWorldEXT)"; + case glsl: __intrinsic_asm "transpose(gl_ObjectToWorldEXT)"; case spirv: return spirv_asm { @@ -10711,15 +11332,13 @@ float3x4 ObjectToWorld3x4() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3x4 WorldToObject3x4() { __target_switch { case hlsl: __intrinsic_asm "WorldToObject3x4"; - case _GL_EXT_ray_tracing: __intrinsic_asm "transpose(gl_WorldToObjectEXT)"; + case glsl: __intrinsic_asm "transpose(gl_WorldToObjectEXT)"; case spirv: return spirv_asm { @@ -10730,15 +11349,13 @@ float3x4 WorldToObject3x4() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float4x3 ObjectToWorld4x3() { __target_switch { case hlsl: __intrinsic_asm "ObjectToWorld4x3"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_ObjectToWorldEXT)"; + case glsl: __intrinsic_asm "(gl_ObjectToWorldEXT)"; case spirv: return spirv_asm { @@ -10748,15 +11365,13 @@ float4x3 ObjectToWorld4x3() } [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float4x3 WorldToObject4x3() { __target_switch { case hlsl: __intrinsic_asm "WorldToObject4x3"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_WorldToObjectEXT)"; + case glsl: __intrinsic_asm "(gl_WorldToObjectEXT)"; case spirv: return spirv_asm { @@ -10772,9 +11387,7 @@ float4x3 WorldToObject4x3() __glsl_extension(GL_NV_ray_tracing_motion_blur) __glsl_extension(GL_EXT_ray_tracing) [NonUniformReturn] -[require(hlsl, raytracing_motionblur)] -[require(glsl, raytracing_motionblur)] -[require(spirv, raytracing_motionblur)] +[require(glsl_hlsl_spirv, raytracing_motionblur_anyhit_closesthit_intersection_miss)] float RayCurrentTime() { __target_switch @@ -10803,15 +11416,13 @@ float RayCurrentTime() // 10.4.4 - Hit Specific System values [NonUniformReturn] -[require(hlsl, raytracing)] -[require(glsl, raytracing)] -[require(spirv, raytracing)] +[require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit)] uint HitKind() { __target_switch { case hlsl: __intrinsic_asm "HitKind"; - case _GL_EXT_ray_tracing: __intrinsic_asm "(gl_HitKindEXT)"; + case glsl: __intrinsic_asm "(gl_HitKindEXT)"; case cuda: __intrinsic_asm "optixGetHitKind"; case spirv: return spirv_asm @@ -10851,6 +11462,7 @@ float dot2add(float2 left, float2 right, float acc); // Set the number of output vertices and primitives for a mesh shader invocation. __glsl_extension(GL_EXT_mesh_shader) __glsl_version(450) +[require(glsl_hlsl_spirv, meshshading)] void SetMeshOutputCounts(uint vertexCount, uint primitiveCount) { __target_switch @@ -10875,6 +11487,7 @@ void SetMeshOutputCounts(uint vertexCount, uint primitiveCount) // This function doesn't return. // [KnownBuiltin("DispatchMesh")] +[require(glsl_hlsl_spirv, meshshading)] void DispatchMesh

(uint threadGroupCountX, uint threadGroupCountY, uint threadGroupCountZ, __ref P meshPayload) { __target_switch @@ -11004,10 +11617,8 @@ extension __TextureImpl __target_intrinsic(glsl, "rayQueryInitializeEXT($0, $1, $2, $3, $4, $5, $6, $7)") __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] + [require(glsl_spirv, rayquery)] [mutating] void __rayQueryInitializeEXT( RaytracingAccelerationStructure accelerationStructure, @@ -11158,11 +11767,9 @@ struct RayQuery // `RayQuery` to get the effective ray flags, which // must obey any API-imposed restrictions. // - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__unsafeForceInlineEarly] [mutating] + [require(glsl_hlsl_spirv, rayquery)] void TraceRayInline( RaytracingAccelerationStructure accelerationStructure, RAY_FLAG rayFlags, @@ -11200,10 +11807,8 @@ struct RayQuery // that was found. // __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [mutating] + [require(glsl_hlsl_spirv, rayquery)] bool Proceed() { __target_switch @@ -11224,10 +11829,8 @@ struct RayQuery // `Proceed()` calls will return `false`. // __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [mutating] + [require(glsl_hlsl_spirv, rayquery)] void Abort() { __target_switch @@ -11240,11 +11843,9 @@ struct RayQuery // Commit the current non-opaque triangle hit. __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [mutating] + [require(glsl_hlsl_spirv, rayquery)] void CommitNonOpaqueTriangleHit() { __target_switch @@ -11257,11 +11858,9 @@ struct RayQuery // Commit the current procedural primitive hit, with hit time `t`. __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [mutating] + [require(glsl_hlsl_spirv, rayquery)] void CommitProceduralPrimitiveHit(float t) { __target_switch @@ -11282,11 +11881,9 @@ struct RayQuery // user code. // __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] CANDIDATE_TYPE CandidateType() { __target_switch @@ -11303,11 +11900,9 @@ struct RayQuery // Get the status of the committed (closest) hit, if any. __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] COMMITTED_STATUS CommittedStatus() { __target_switch @@ -11324,11 +11919,9 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] bool CandidateProceduralPrimitiveNonOpaque() { __target_switch @@ -11346,11 +11939,9 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] float CandidateTriangleRayT() { __target_switch @@ -11366,11 +11957,9 @@ struct RayQuery } } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] float CommittedRayT() { __target_switch @@ -11388,9 +11977,8 @@ struct RayQuery ///missing hlsl equivlent; only implemented for glsl & spirv __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CandidateRayInstanceCustomIndex() { __target_switch @@ -11406,9 +11994,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CommittedRayInstanceCustomIndex() { __target_switch @@ -11424,9 +12011,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CandidateRayInstanceId() { __target_switch @@ -11442,9 +12028,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CommittedRayInstanceId() { __target_switch @@ -11460,9 +12045,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] uint CandidateRayInstanceShaderBindingTableRecordOffset() { __target_switch @@ -11478,9 +12062,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] uint CommittedRayInstanceShaderBindingTableRecordOffset() { __target_switch @@ -11496,9 +12079,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CandidateRayGeometryIndex() { __target_switch @@ -11514,9 +12096,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CommittedRayGeometryIndex() { __target_switch @@ -11532,9 +12113,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CandidateRayPrimitiveIndex() { __target_switch @@ -11550,9 +12130,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] int CommittedRayPrimitiveIndex() { __target_switch @@ -11568,9 +12147,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float2 CandidateRayBarycentrics() { __target_switch @@ -11586,9 +12164,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float2 CommittedRayBarycentrics() { __target_switch @@ -11604,9 +12181,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] bool CandidateRayFrontFace() { __target_switch @@ -11622,9 +12198,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] bool CommittedRayFrontFace() { __target_switch @@ -11640,9 +12215,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float3 CandidateRayObjectRayDirection() { __target_switch @@ -11658,9 +12232,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float3 CommittedRayObjectRayDirection() { __target_switch @@ -11676,9 +12249,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float3 CandidateRayObjectRayOrigin() { __target_switch @@ -11694,9 +12266,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float3 CommittedRayObjectRayOrigin() { __target_switch @@ -11712,9 +12283,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float4x3 CandidateRayObjectToWorld() { __target_switch @@ -11730,9 +12300,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float4x3 CommittedRayObjectToWorld() { __target_switch @@ -11748,9 +12317,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float4x3 CandidateRayWorldToObject() { __target_switch @@ -11766,9 +12334,8 @@ struct RayQuery } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] [__NoSideEffect] + [require(glsl_spirv, rayquery)] float4x3 CommittedRayWorldToObject() { __target_switch @@ -11796,7 +12363,7 @@ ${{{{ __glsl_extension(GL_EXT_ray_query) __glsl_extension(GL_EXT_ray_tracing_position_fetch) - [require(glsl, rayquery_pos)] + [require(glsl, rayquery_position)] [__NoSideEffect] void __glslGetIntersectionTriangleVertexPositions$(ccName)(out float3 arr[3]) { @@ -11804,8 +12371,8 @@ ${{{{ }; __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery_pos)] - [require(spirv, rayquery_pos)] + [require(glsl, rayquery_position)] + [require(spirv, rayquery_position)] [__NoSideEffect] float3[3] $(ccName)GetIntersectionTriangleVertexPositions() { @@ -11838,11 +12405,9 @@ ${{{{ }}}} __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] float3x4 $(ccName)$(matName)3x4() { __target_switch @@ -11860,11 +12425,9 @@ ${{{{ } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__readNone] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] float4x3 $(ccName)$(matName)4x3() { __target_switch @@ -11905,11 +12468,9 @@ ${{{{ }}}} __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] $(method.type) $(ccName)$(method.hlslName)() { __target_switch @@ -11932,11 +12493,9 @@ ${{{{ // Access properties of the ray being traced. __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] uint RayFlags() { __target_switch @@ -11952,11 +12511,9 @@ ${{{{ } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] float3 WorldRayOrigin() { __target_switch @@ -11972,11 +12529,9 @@ ${{{{ } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] float3 WorldRayDirection() { __target_switch @@ -11992,11 +12547,9 @@ ${{{{ } __glsl_extension(GL_EXT_ray_query) - [require(glsl, rayquery)] - [require(spirv, rayquery)] - [require(hlsl, rayquery)] [__NoSideEffect] [NonUniformReturn] + [require(glsl_hlsl_spirv, rayquery)] float RayTMin() { __target_switch @@ -12018,6 +12571,7 @@ ${{{{ __magic_type(SubpassInputType) __intrinsic_type($(kIROp_SubpassInputType)) +[require(glsl_hlsl_spirv, subpass)] struct __SubpassImpl { } @@ -12029,6 +12583,7 @@ __generic extension __SubpassImpl { [ForceInline] + [require(hlsl_spirv, subpass)] T SubpassLoad() { __target_switch @@ -12054,6 +12609,7 @@ __generic extension __SubpassImpl { [ForceInline] + [require(hlsl_spirv, subpass)] T SubpassLoad(int sample) { __target_switch @@ -12136,10 +12692,8 @@ struct HitObject /// Executes ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the /// resulting hit information as a HitObject and does not trigger closesthit or miss shaders. - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject TraceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -12232,9 +12786,7 @@ struct HitObject /// Executes motion ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the /// resulting hit information as a HitObject and does not trigger closesthit or miss shaders. [ForceInline] - [require(glsl, ser_motion)] - [require(spirv, ser_motion)] - [require(hlsl, ser_motion)] + [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject TraceMotionRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -12332,10 +12884,8 @@ struct HitObject /// TraceRay. The computed index must reference a valid hit group record in the shader table. The /// Attributes parameter must either be an attribute struct, such as /// BuiltInTriangleIntersectionAttributes, or another HitObject to copy the attributes from. - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeHit( RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, @@ -12419,10 +12969,8 @@ struct HitObject /// See MakeHit but handles Motion /// Currently only supported on VK - [require(glsl, ser_motion)] - [require(spirv, ser_motion)] - [require(hlsl, ser_motion)] - [ForceInline] + [ForceInline] + [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionHit( RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, @@ -12503,10 +13051,8 @@ struct HitObject /// reference a valid hit group record in the shader table. The Attributes parameter must either be an /// attribute struct, such as BuiltInTriangleIntersectionAttributes, or another HitObject to copy the /// attributes from. - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeHit( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, @@ -12583,9 +13129,8 @@ struct HitObject } /// See MakeHit but handles Motion /// Currently only supported on VK - [require(glsl, ser_motion)] - [require(spirv, ser_motion)] [ForceInline] + [require(glsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionHit( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, @@ -12655,10 +13200,8 @@ struct HitObject /// tracing a ray. The provided shader table index must reference a valid miss record in the shader /// table. [__requiresNVAPI] - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeMiss( uint MissShaderIndex, RayDesc Ray) @@ -12692,10 +13235,8 @@ struct HitObject /// See MakeMiss but handles Motion /// Currently only supported on VK - [require(glsl, ser_motion)] - [require(spirv, ser_motion)] - [require(hlsl, ser_motion)] [ForceInline] + [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionMiss( uint MissShaderIndex, RayDesc Ray, @@ -12737,10 +13278,8 @@ struct HitObject /// scenarios where future control flow for some threads is known to process neither a hit nor a /// miss. [__requiresNVAPI] - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeNop() { __target_switch @@ -12771,10 +13310,8 @@ struct HitObject /// Invokes closesthit or miss shading for the specified hit object. In case of a NOP HitObject, no /// shader is invoked. [__requiresNVAPI] - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static void Invoke( RaytracingAccelerationStructure AccelerationStructure, HitObject HitOrMiss, @@ -12825,10 +13362,8 @@ struct HitObject /// Returns true if the HitObject encodes a miss, otherwise returns false. [__requiresNVAPI] - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsMiss() { __target_switch @@ -12847,10 +13382,8 @@ struct HitObject /// Returns true if the HitObject encodes a hit, otherwise returns false. [__requiresNVAPI] - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsHit() { __target_switch @@ -12869,10 +13402,8 @@ struct HitObject /// Returns true if the HitObject encodes a nop, otherwise returns false. [__requiresNVAPI] - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsNop() { __target_switch @@ -12891,10 +13422,8 @@ struct HitObject /// Queries ray properties from HitObject. Valid if the hit object represents a hit or a miss. [__requiresNVAPI] - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] RayDesc GetRayDesc() { __target_switch @@ -12923,10 +13452,8 @@ struct HitObject /// Queries shader table index from HitObject. Valid if the hit object represents a hit or a miss. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetShaderTableIndex() { __target_switch @@ -12946,10 +13473,8 @@ struct HitObject /// Returns the instance index of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetInstanceIndex() { __target_switch @@ -12969,10 +13494,8 @@ struct HitObject /// Returns the instance ID of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetInstanceID() { __target_switch @@ -12992,10 +13515,8 @@ struct HitObject /// Returns the geometry index of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetGeometryIndex() { __target_switch @@ -13015,10 +13536,8 @@ struct HitObject /// Returns the primitive index of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetPrimitiveIndex() { __target_switch @@ -13038,10 +13557,8 @@ struct HitObject /// Returns the hit kind. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetHitKind() { __target_switch @@ -13060,10 +13577,8 @@ struct HitObject [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] float4x3 GetWorldToObject() { __target_switch @@ -13082,10 +13597,8 @@ struct HitObject [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] float4x3 GetObjectToWorld() { __target_switch @@ -13102,9 +13615,8 @@ struct HitObject } } - [require(glsl, ser)] - [require(spirv, ser)] [ForceInline] + [require(glsl_spirv, ser_raygen_closesthit_miss)] float GetCurrentTime() { __target_switch { @@ -13120,9 +13632,8 @@ struct HitObject } } - [require(glsl, ser)] - [require(spirv, ser)] [ForceInline] + [require(glsl_spirv, ser_raygen_closesthit_miss)] float3 GetObjectRayOrigin() { __target_switch { @@ -13138,9 +13649,8 @@ struct HitObject } } - [require(glsl, ser)] - [require(spirv, ser)] [ForceInline] + [require(glsl_spirv, ser_raygen_closesthit_miss)] float3 GetObjectRayDirection() { __target_switch { @@ -13156,9 +13666,8 @@ struct HitObject } } - [require(glsl, ser)] - [require(spirv, ser)] [ForceInline] + [require(glsl_spirv, ser_raygen_closesthit_miss)] uint2 GetShaderRecordBufferHandle() { __target_switch { @@ -13175,10 +13684,8 @@ struct HitObject } /// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss. - [require(glsl, ser)] - [require(spirv, ser)] - [require(hlsl, ser)] [ForceInline] + [require(glsl_spirv, ser_raygen_closesthit_miss)] attr_t GetAttributes() { __target_switch @@ -13226,12 +13733,12 @@ struct HitObject __target_intrinsic(hlsl, "NvGetAttributesFromHitObject($0, $1)") [__requiresNVAPI] - [require(hlsl, ser)] + [require(hlsl, ser_raygen_closesthit_miss)] void __hlslGetAttributesFromHitObject(out T t); __target_intrinsic(hlsl, "NvMakeHitWithRecordIndex") [__requiresNVAPI] - [require(hlsl, ser)] + [require(hlsl, ser_raygen_closesthit_miss)] static void __hlslMakeHitWithRecordIndex( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, @@ -13245,7 +13752,7 @@ struct HitObject __target_intrinsic(hlsl, "NvMakeHit") [__requiresNVAPI] - [require(hlsl, ser)] + [require(hlsl, ser_raygen_closesthit_miss)] static void __hlslMakeHit(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, @@ -13259,7 +13766,7 @@ struct HitObject __target_intrinsic(hlsl, "NvTraceRayHitObject") [__requiresNVAPI] - [require(hlsl, ser)] + [require(hlsl, ser_raygen_closesthit_miss)] static void __hlslTraceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -13277,8 +13784,8 @@ struct HitObject __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_EXT_ray_tracing) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectRecordMissNV") + [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeMiss( out HitObject hitObj, uint MissShaderIndex, @@ -13291,8 +13798,8 @@ struct HitObject __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_ray_tracing_motion_blur) - [require(glsl, ser_motion)] __target_intrinsic(glsl, "hitObjectRecordMissMotionNV") + [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslMakeMotionMiss( out HitObject hitObj, uint MissShaderIndex, @@ -13304,45 +13811,45 @@ struct HitObject __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectRecordEmptyNV") + [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeNop(out HitObject hitObj); __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectGetObjectRayDirectionNV($0)") + [require(glsl, ser_raygen_closesthit_miss)] float3 __glslGetRayDirection(); __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectGetWorldRayDirectionNV($0)") + [require(glsl, ser_raygen_closesthit_miss)] float3 __glslGetRayWorldDirection(); __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectGetWorldRayOriginNV($0)") + [require(glsl, ser_raygen_closesthit_miss)] float3 __glslGetRayWorldOrigin(); __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectGetRayTMaxNV($0)") + [require(glsl, ser_raygen_closesthit_miss)] float __glslGetTMax(); __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectGetRayTMinNV($0)") + [require(glsl, ser_raygen_closesthit_miss)] float __glslGetTMin(); // "void hitObjectRecordHitWithIndexNV(hitObjectNV, accelerationStructureEXT,int,int,int,uint,uint,vec3,float,vec3,float,int);" __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectRecordHitWithIndexNV") + [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeHitWithIndex( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, @@ -13361,8 +13868,8 @@ struct HitObject __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_NV_ray_tracing_motion_blur) - [require(glsl, ser_motion)] __target_intrinsic(glsl, "hitObjectRecordHitWithIndexMotionNV") + [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslMakeMotionHitWithIndex( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, @@ -13381,8 +13888,8 @@ struct HitObject // "void hitObjectRecordHitNV(hitObjectNV,accelerationStructureEXT,int,int,int,uint,uint,uint,vec3,float,vec3,float,int);" __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectRecordHitNV") + [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeHit( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, @@ -13402,8 +13909,8 @@ struct HitObject __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_NV_ray_tracing_motion_blur) - [require(glsl, ser_motion)] __target_intrinsic(glsl, "hitObjectRecordHitMotionNV") + [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslMakeMotionHit( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, @@ -13423,14 +13930,14 @@ struct HitObject __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectGetAttributesNV($0, $1)") + [require(glsl, ser_raygen_closesthit_miss)] void __glslGetAttributes(int attributeLocation); __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectTraceRayNV") + [require(glsl, ser_raygen_closesthit_miss)] static void __glslTraceRay( out HitObject hitObject, RaytracingAccelerationStructure accelerationStructure, @@ -13448,8 +13955,8 @@ struct HitObject __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_NV_ray_tracing_motion_blur) - [require(glsl, ser_motion)] __target_intrinsic(glsl, "hitObjectTraceRayMotionNV") + [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslTraceMotionRay( out HitObject hitObject, RaytracingAccelerationStructure accelerationStructure, @@ -13467,8 +13974,8 @@ struct HitObject __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) - [require(glsl, ser)] __target_intrinsic(glsl, "hitObjectExecuteShaderNV") + [require(glsl, ser_raygen_closesthit_miss)] static void __glslInvoke( HitObject hitObj, int payload); @@ -13484,9 +13991,8 @@ struct HitObject [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) -[require(glsl, ser)] -[require(spirv, ser)] -[require(hlsl, ser)] +[ForceInline] +[require(glsl_hlsl_spirv, ser_raygen)] void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) { __target_switch @@ -13522,9 +14028,8 @@ void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) -[require(glsl, ser)] -[require(spirv, ser)] -[require(hlsl, ser)] +[ForceInline] +[require(glsl_hlsl_spirv, ser_raygen)] void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) { __target_switch @@ -13550,9 +14055,8 @@ void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHi [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) -[require(glsl, ser)] -[require(spirv, ser)] -[require(hlsl, ser)] +[ForceInline] +[require(glsl_hlsl_spirv, ser_raygen)] void ReorderThread( HitObject HitOrMiss ) { __target_switch @@ -13574,14 +14078,26 @@ void ReorderThread( HitObject HitOrMiss ) /// /// There doesn't appear to be an equivalent for debugBreak for HLSL -__target_intrinsic(hlsl, "/* debugBreak() not currently supported for HLSL */") -__target_intrinsic(cuda,"__brkpt()") -__target_intrinsic(cpp, "SLANG_BREAKPOINT(0)") -void debugBreak(); __specialized_for_target(glsl) [[vk::spirv_instruction(1, "NonSemantic.DebugBreak")]] -void debugBreak(); +void __glslDebugBreak(); + +[ForceInline] +[require(cpp_cuda_glsl_hlsl, breakpoint)] +void debugBreak() +{ + __target_switch + { + case hlsl: __intrinsic_asm "/* debugBreak() not currently supported for HLSL */"; + case cuda: __intrinsic_asm "__brkpt()"; + case cpp: __intrinsic_asm "SLANG_BREAKPOINT(0)"; + case glsl: + __glslDebugBreak(); + return; + } +} + // // Realtime Clock support @@ -13591,8 +14107,8 @@ void debugBreak(); [__requiresNVAPI] __glsl_extension(GL_EXT_shader_realtime_clock) -[require(shaderclock)] [NonUniformReturn] +[require(cpp_cuda_glsl_hlsl_spirv, shaderclock)] uint getRealtimeClockLow() { __target_switch @@ -13611,14 +14127,15 @@ uint getRealtimeClockLow() } __target_intrinsic(cpp, "std::chrono::high_resolution_clock::now().time_since_epoch().count()") - __target_intrinsic(cuda, "clock64") +__target_intrinsic(cuda, "clock64") [NonUniformReturn] +[require(cpp_cuda, shaderclock)] int64_t __cudaCppGetRealtimeClock(); [__requiresNVAPI] __glsl_extension(GL_EXT_shader_realtime_clock) -[require(shaderclock)] [NonUniformReturn] +[require(cpp_cuda_glsl_hlsl_spirv, shaderclock)] uint2 getRealtimeClock() { __target_switch @@ -13648,16 +14165,19 @@ uint2 getRealtimeClock() __target_intrinsic(cuda, "(threadIdx)") [__readNone] [NonUniformReturn] +[require(cuda)] uint3 cudaThreadIdx(); __target_intrinsic(cuda, "(blockIdx)") [__readNone] [NonUniformReturn] +[require(cuda)] uint3 cudaBlockIdx(); __target_intrinsic(cuda, "(blockDim)") [__readNone] [NonUniformReturn] +[require(cuda)] uint3 cudaBlockDim(); // @@ -13832,6 +14352,7 @@ ${{{{ [__NoSideEffect] [__requiresNVAPI] +[require(glsl_hlsl_spirv, texturefootprint)] vector __textureFootprintGetAnchor(__TextureFootprintData data, int nd) { __target_switch @@ -13849,6 +14370,7 @@ vector __textureFootprintGetAnchor(__TextureFootprintData< [__NoSideEffect] [__requiresNVAPI] +[require(glsl_hlsl_spirv, texturefootprint)] vector __textureFootprintGetOffset(__TextureFootprintData data, int nd) { __target_switch @@ -13865,6 +14387,7 @@ vector __textureFootprintGetOffset(__TextureFootprintData< } __intrinsic_type($(kIROp_TextureFootprintType)) +[require(glsl_hlsl_spirv, texturefootprint)] struct __TextureFootprintData { typealias Anchor = vector; @@ -14011,6 +14534,7 @@ ${ [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) + [require(glsl_spirv, texturefootprint)] bool __queryFootprintGLSL( SamplerState sampler, Coords coords, @@ -14037,6 +14561,7 @@ ${ [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) + [require(glsl_spirv, texturefootprint)] bool __queryFootprintGLSL( SamplerState sampler, Coords coords, @@ -14065,8 +14590,7 @@ ${ __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) __glsl_extension(GL_ARB_sparse_texture_clamp) - __target_intrinsic(glsl, - "textureFootprintClampNV($p, $*2)") + [require(glsl_spirv, texturefootprintclamp)] bool __queryFootprintClampGLSL( SamplerState sampler, Coords coords, @@ -14096,6 +14620,7 @@ ${ __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) __glsl_extension(GL_ARB_sparse_texture_clamp) + [require(glsl_spirv, texturefootprintclamp)] bool __queryFootprintClampGLSL( SamplerState sampler, Coords coords, @@ -14126,6 +14651,7 @@ ${ __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) [__requiresNVAPI] + [require(glsl_spirv, texturefootprint)] bool __queryFootprintLodGLSL( SamplerState sampler, Coords coords, @@ -14158,6 +14684,7 @@ ${{{ __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) [__requiresNVAPI] + [require(glsl_spirv, texturefootprint)] bool __queryFootprintGradGLSL( SamplerState sampler, Coords coords, @@ -14187,6 +14714,7 @@ ${{{ __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) __glsl_extension(GL_ARB_sparse_texture_clamp) + [require(glsl_spirv, texturefootprintclamp)] bool __queryFootprintGradClampGLSL( SamplerState sampler, Coords coords, @@ -14256,6 +14784,7 @@ for(auto levelChoice : kLevelChoices) [__requiresNVAPI] __target_intrinsic(hlsl, "NvFootprint$(CoarseOrFine)($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)") + [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)NVAPI( int nd, uint textureSpace, @@ -14270,6 +14799,7 @@ for(auto levelChoice : kLevelChoices) [__requiresNVAPI] __target_intrinsic(hlsl, "NvFootprint$(CoarseOrFine)Bias($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)") + [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)BiasNVAPI( int nd, uint textureSpace, @@ -14285,6 +14815,7 @@ for(auto levelChoice : kLevelChoices) [__requiresNVAPI] __target_intrinsic(hlsl, "NvFootprint$(CoarseOrFine)Level($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)") + [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)LevelNVAPI( int nd, uint textureSpace, @@ -14300,6 +14831,7 @@ for(auto levelChoice : kLevelChoices) [__requiresNVAPI] __target_intrinsic(hlsl, "NvFootprint$(CoarseOrFine)Grad($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)") + [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)GradNVAPI( int nd, uint textureSpace, @@ -14564,6 +15096,7 @@ extension __TextureImpl coord, float value, out float originalValue) { __target_switch @@ -14628,6 +15161,7 @@ __generic __intrinsic_type($(kIROp_HLSLConstBufferPointerType)) __glsl_extension(GL_EXT_buffer_reference) __magic_type(ConstBufferPointerType) +[require(glsl_spirv, bufferreference)] struct ConstBufferPointer { __glsl_version(450) @@ -14657,6 +15191,7 @@ struct ConstBufferPointer __glsl_version(450) __glsl_extension(GL_EXT_shader_explicit_arithmetic_types_int64) __glsl_extension(GL_EXT_buffer_reference) + [require(glsl_spirv, bufferreference_int64)] static ConstBufferPointer fromUInt(uint64_t val) { __target_switch @@ -14673,6 +15208,7 @@ struct ConstBufferPointer __glsl_version(450) __glsl_extension(GL_EXT_shader_explicit_arithmetic_types_int64) __glsl_extension(GL_EXT_buffer_reference) + [require(glsl_spirv, bufferreference_int64)] uint64_t toUInt() { __target_switch @@ -14691,6 +15227,7 @@ struct ConstBufferPointer __glsl_extension(GL_EXT_buffer_reference) [__NoSideEffect] [ForceInline] + [require(glsl_spirv, bufferreference_int64)] bool isValid() { __target_switch diff --git a/source/slang/slang-ast-dump.cpp b/source/slang/slang-ast-dump.cpp index 5576d9401..0019b2130 100644 --- a/source/slang/slang-ast-dump.cpp +++ b/source/slang/slang-ast-dump.cpp @@ -298,7 +298,10 @@ struct ASTDumpContext { m_writer->emit(v); } - + void dump(CapabilityName v) + { + m_writer->emit(capabilityNameToString(v)); + } void dump(const SemanticVersion& version) { diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index eb546ae6b..4e31948a2 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -66,7 +66,25 @@ alias any_target = hlsl | glsl | c | cpp | cuda | spirv; alias any_textual_target = hlsl | glsl | c | cpp | cuda; alias any_gfx_target = hlsl | glsl | spirv; alias any_cpp_target = cpp | cuda; + +alias cpp_cuda = cpp | cuda; +alias cpp_cuda_glsl_spirv = cpp | cuda | glsl | spirv; +alias cpp_cuda_glsl_hlsl = cpp | cuda | glsl | hlsl; +alias cpp_cuda_glsl_hlsl_spirv = cpp | cuda | glsl | hlsl | spirv_1_0; +alias cpp_cuda_glsl_hlsl_metal_spirv = cpp | cuda | glsl | hlsl | metal | spirv_1_0; +alias cpp_cuda_hlsl = cpp | cuda | hlsl; +alias cpp_glsl = cpp | glsl; +alias cpp_glsl_hlsl_spirv = cpp | glsl | hlsl | spirv_1_0; +alias cpp_hlsl = cpp | hlsl; +alias cuda_glsl_hlsl = cuda | glsl | hlsl; +alias cuda_glsl_hlsl_spirv = cuda | glsl | hlsl | spirv_1_0; +alias cuda_glsl_spirv = cuda | glsl | spirv; +alias cuda_hlsl = cuda | hlsl; +alias cuda_hlsl_spirv = cuda | hlsl | spirv; +alias glsl_hlsl_spirv = glsl | hlsl | spirv; alias glsl_spirv = glsl | spirv; +alias hlsl_spirv = hlsl | spirv; + // Capabilities that stand for target spirv version for GLSL backend. // These are not compilation targets. def glsl_spirv_1_0 : glsl; @@ -80,13 +98,11 @@ def glsl_spirv_1_6 : glsl_spirv_1_5; abstract stage; def vertex : stage; def fragment : stage; -alias pixel = fragment; def compute : stage; def hull : stage; def domain : stage; def geometry : stage; def raygen : stage; -alias raygeneration = raygen; def intersection : stage; def anyhit : stage; def closesthit: stage; @@ -95,6 +111,40 @@ def mesh : stage; def amplification : stage; def callable : stage; +// shader stage alias's +alias pixel = fragment; +alias raygeneration = raygen; +alias tesscontrol = hull; +alias tesseval = domain; +alias raytracing_stages = raygen | intersection | anyhit | closesthit | miss | callable; +alias raytracing_stages_intersection = intersection; +alias raytracing_stages_raygen = raygen; +alias raytracing_stages_anyhit_closesthit = anyhit | closesthit; +alias raytracing_stages_raygen_closesthit_miss = raygen | closesthit | miss; +alias raytracing_stages_anyhit_closesthit_intersection = anyhit | closesthit | intersection; +alias raytracing_stages_anyhit_closesthit_intersection_miss = anyhit | closesthit | intersection | miss; +alias raytracing_stages_raygen_closesthit_miss_callable = raygen | closesthit | miss | callable; +alias shader_stages_compute_tesscontrol_tesseval = compute | tesscontrol | tesseval; +alias shader_stages_compute_fragment = compute | fragment; +alias shader_stages_compute_fragment_geometry_vertex = compute | fragment | geometry | vertex; +alias shader_stages_domain_hull = domain | hull; +alias raytracing_stages_fragment = raytracing_stages | fragment; +alias raytracing_stages_compute = raytracing_stages | compute; +alias raytracing_stages_compute_fragment = raytracing_stages | shader_stages_compute_fragment; +alias raytracing_stages_compute_fragment_geometry_vertex = raytracing_stages | shader_stages_compute_fragment_geometry_vertex; + +def _GLSL_130 : glsl; +def _GLSL_140 : _GLSL_130; +def _GLSL_150 : _GLSL_140; +def _GLSL_330 : _GLSL_150; +def _GLSL_400 : _GLSL_330; +def _GLSL_410 : _GLSL_400; +def _GLSL_420 : _GLSL_410; +def _GLSL_430 : _GLSL_420; +def _GLSL_440 : _GLSL_430; +def _GLSL_450 : _GLSL_440; +def _GLSL_460 : _GLSL_450; + def _sm_4_0 : hlsl; def _sm_4_1 : _sm_4_0; def _sm_5_0 : _sm_4_1; @@ -110,12 +160,21 @@ def _sm_6_7 : _sm_6_6; def hlsl_nvapi : hlsl; -// stage alias -alias tess_control = hull; -alias tess_eval = domain; +def _cuda_sm_1_0 : cuda; +def _cuda_sm_2_0 : _cuda_sm_1_0; +def _cuda_sm_3_0 : _cuda_sm_2_0; +def _cuda_sm_3_5 : _cuda_sm_3_0; +def _cuda_sm_4_0 : _cuda_sm_3_5; +def _cuda_sm_5_0 : _cuda_sm_4_0; +def _cuda_sm_6_0 : _cuda_sm_5_0; +def _cuda_sm_7_0 : _cuda_sm_6_0; +def _cuda_sm_8_0 : _cuda_sm_7_0; +def _cuda_sm_9_0 : _cuda_sm_8_0; // SPIRV extensions. +def SOURCE_EXT_GL_NV_compute_shader_derivatives : spirv_1_0; + def SPV_EXT_fragment_shader_interlock : spirv_1_0; def SPV_KHR_fragment_shader_barycentric : spirv_1_0; def SPV_EXT_fragment_fully_covered : spirv_1_0; @@ -183,7 +242,13 @@ def spvShaderNonUniform : spirv_1_5; def _GL_ARB_derivative_control : glsl; def _GL_ARB_fragment_shader_interlock : glsl; def _GL_ARB_gpu_shader5 : glsl; +def _GL_ARB_shader_image_size : glsl; +def _GL_ARB_shader_texture_image_samples : glsl; def _GL_ARB_sparse_texture_clamp : glsl; +def _GL_EXT_texture_query_lod : glsl; +def _GL_ARB_texture_query_levels : glsl; +def _GL_ARB_texture_cube_map : glsl; +def _GL_ARB_texture_gather : glsl; def _GL_EXT_buffer_reference : glsl; def _GL_EXT_buffer_reference_uvec2 : glsl; def _GL_EXT_debug_printf : glsl; @@ -199,6 +264,7 @@ def _GL_EXT_shader_atomic_float2 : glsl; def _GL_EXT_shader_atomic_int64 : glsl; def _GL_EXT_shader_atomic_float_min_max : glsl; def _GL_EXT_shader_explicit_arithmetic_types_int64 : glsl; +def _GL_EXT_shader_image_load_store : glsl; def _GL_EXT_shader_realtime_clock : glsl; def _GL_EXT_texture_shadow_lod : glsl; def _GL_KHR_memory_scope_semantics : glsl; @@ -207,19 +273,28 @@ def _GL_KHR_shader_subgroup_basic : glsl; def _GL_KHR_shader_subgroup_ballot : glsl; def _GL_KHR_shader_subgroup_quad : glsl; def _GL_KHR_shader_subgroup_shuffle : glsl; +def _GL_KHR_shader_subgroup_shuffle_relative : glsl; +def _GL_KHR_shader_subgroup_shuffle_clustered : glsl; def _GL_KHR_shader_subgroup_vote : glsl; +def _GL_NV_compute_shader_derivatives : glsl; def _GL_NV_shader_subgroup_partitioned : glsl; def _GL_NV_ray_tracing_motion_blur : glsl_spirv_1_4; +def _GL_NV_shader_atomic_fp16_vector : glsl; def _GL_NV_shader_invocation_reorder : glsl_spirv_1_4; def _GL_NV_shader_texture_footprint : glsl; +def _GL_NV_gpu_shader5 : _GL_ARB_gpu_shader5; alias _GL_NV_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric; alias _GL_NV_ray_tracing = _GL_EXT_ray_tracing; // GLSL extension and SPV extension associations. alias GL_ARB_derivative_control = _GL_ARB_derivative_control | spvDerivativeControl; alias GL_ARB_fragment_shader_interlock = _GL_ARB_fragment_shader_interlock | spvFragmentShaderPixelInterlockEXT; -alias GL_ARB_gpu_shader5 = _GL_ARB_fragment_shader_interlock | spirv_1_0; +alias GL_ARB_gpu_shader5 = _GL_ARB_gpu_shader5 | spirv_1_0; alias GL_ARB_sparse_texture_clamp = _GL_ARB_fragment_shader_interlock | spirv_1_0; +alias GL_EXT_texture_query_lod = _GL_EXT_texture_query_lod | spvImageQuery; +alias GL_ARB_texture_query_levels = _GL_ARB_texture_query_levels |spvImageQuery; +alias GL_ARB_texture_cube_map = _GL_ARB_texture_cube_map | spirv_1_0; +alias GL_ARB_texture_gather = _GL_ARB_texture_gather | spirv_1_0; alias GL_EXT_buffer_reference = _GL_ARB_fragment_shader_interlock | spirv_1_5; alias GL_EXT_buffer_reference_uvec2 = _GL_EXT_buffer_reference_uvec2 | spirv_1_0; alias GL_EXT_debug_printf = _GL_EXT_debug_printf | SPV_KHR_non_semantic_info; @@ -235,18 +310,25 @@ alias GL_EXT_shader_atomic_float2 = _GL_EXT_shader_atomic_float2 | spvAtomicFloa alias GL_EXT_shader_atomic_int64 = _GL_EXT_shader_atomic_int64 | spvInt64Atomics; alias GL_EXT_shader_atomic_float_min_max = _GL_EXT_shader_atomic_float_min_max | spvAtomicFloat32MinMaxEXT + spvAtomicFloat16MinMaxEXT; alias GL_EXT_shader_explicit_arithmetic_types_int64 = _GL_EXT_shader_explicit_arithmetic_types_int64 | spirv_1_0; +alias GL_EXT_shader_image_load_store = _GL_EXT_shader_image_load_store | spirv_1_0; alias GL_EXT_shader_realtime_clock = _GL_EXT_shader_realtime_clock | spvShaderClockKHR; alias GL_EXT_texture_shadow_lod = _GL_EXT_texture_shadow_lod | spirv_1_0; alias GL_KHR_memory_scope_semantics = _GL_KHR_memory_scope_semantics | spirv_1_0; alias GL_KHR_shader_subgroup_arithmetic = _GL_KHR_shader_subgroup_arithmetic | spvGroupNonUniformArithmetic; alias GL_KHR_shader_subgroup_basic = _GL_KHR_shader_subgroup_basic | spvGroupNonUniformBallot; alias GL_KHR_shader_subgroup_ballot = _GL_KHR_shader_subgroup_ballot | spvGroupNonUniformBallot; -alias GL_KHR_shader_subgroup_quad = _GL_KHR_shader_subgroup_quad | spvGroupNonUniformQuad; +alias GL_KHR_shader_subgroup_clustered = _GL_KHR_shader_subgroup_shuffle_clustered | spvGroupNonUniformShuffle; alias GL_KHR_shader_subgroup_shuffle = _GL_KHR_shader_subgroup_shuffle | spvGroupNonUniformShuffle; +alias GL_KHR_shader_subgroup_shuffle_relative = _GL_KHR_shader_subgroup_shuffle_relative | spvGroupNonUniformShuffle; alias GL_KHR_shader_subgroup_vote = _GL_KHR_shader_subgroup_vote | spvGroupNonUniformVote; +alias GL_KHR_shader_subgroup_quad = _GL_KHR_shader_subgroup_quad | spvGroupNonUniformQuad; +alias GL_NV_compute_shader_derivatives = _GL_NV_compute_shader_derivatives | SOURCE_EXT_GL_NV_compute_shader_derivatives; +alias GL_ARB_shader_image_size = _GL_ARB_shader_image_size | spvImageQuery; +alias GL_ARB_shader_texture_image_samples = _GL_ARB_shader_texture_image_samples | spvImageQuery; +alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spirv_1_0; alias GL_NV_shader_subgroup_partitioned = _GL_NV_shader_subgroup_partitioned | spvGroupNonUniformPartitionedNV; alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTracingMotionBlurNV; -alias GL_NV_shader_invocation_reorder = _GL_NV_shader_invocation_reorder | spvShaderInvocationReorderNV; +alias GL_NV_shader_invocation_reorder = _GL_NV_shader_invocation_reorder + _GL_EXT_buffer_reference_uvec2 | spvShaderInvocationReorderNV; alias GL_NV_shader_texture_footprint = _GL_NV_shader_texture_footprint | spvImageFootprintNV; alias GL_NV_fragment_shader_barycentric = GL_EXT_fragment_shader_barycentric; @@ -254,24 +336,16 @@ alias GL_NV_ray_tracing = GL_EXT_ray_tracing; // Define feature names -alias tess_control_gfx = tess_control + any_gfx_target; -alias tess_eval_gfx = tess_control + any_gfx_target; -alias fragment_gfx = fragment + any_gfx_target; -alias compute_gfx = compute + any_gfx_target; -alias compute_tess_gfx = compute_gfx + tess_control_gfx + tess_eval_gfx; - alias nvapi = hlsl_nvapi; alias raytracing = GL_EXT_ray_tracing | _sm_6_5 | cuda; -alias raytracing_pos = GL_EXT_ray_tracing + GL_EXT_ray_tracing_position_fetch | _sm_6_5 | cuda; -alias rayquery_pos = GL_EXT_ray_query + GL_EXT_ray_tracing_position_fetch | _sm_6_5 | cuda; -alias rayquery = GL_EXT_ray_query | _sm_6_5 | cuda; -alias ser = GL_EXT_buffer_reference_uvec2 + GL_NV_shader_invocation_reorder + GL_EXT_ray_tracing | _sm_6_6 + hlsl_nvapi; -alias ser_motion = GL_EXT_buffer_reference_uvec2 + GL_NV_shader_invocation_reorder + GL_EXT_ray_tracing + GL_NV_ray_tracing_motion_blur | _sm_6_6 + hlsl_nvapi; -alias shaderclock = spvShaderClockKHR | hlsl_nvapi | _GL_EXT_shader_realtime_clock | cpp | cuda; -alias meshshading = spvMeshShadingEXT | _sm_6_5 | _GL_EXT_mesh_shader; -alias raytracing_motionblur = GL_EXT_ray_tracing + GL_NV_ray_tracing_motion_blur | hlsl_nvapi + _sm_6_5; -alias motionblur = GL_NV_ray_tracing_motion_blur | hlsl_nvapi; -alias texturefootprint = GL_NV_shader_texture_footprint | hlsl_nvapi; +alias ser = raytracing + GL_NV_shader_invocation_reorder | raytracing + hlsl_nvapi | cuda; +alias motionblur = GL_NV_ray_tracing_motion_blur | _sm_6_5 + hlsl_nvapi | cuda; +alias rayquery = GL_EXT_ray_query | _sm_6_5; +alias raytracing_motionblur = raytracing + motionblur | cuda; +alias ser_motion = ser + motionblur; +alias shaderclock = GL_EXT_shader_realtime_clock | hlsl_nvapi | cpp | cuda; +alias meshshading_internal = spvMeshShadingEXT + _sm_6_5 + _GL_EXT_mesh_shader; +alias meshshading = amplification + meshshading_internal | mesh + meshshading_internal; alias fragmentshaderinterlock = _GL_ARB_fragment_shader_interlock | hlsl_nvapi | spvFragmentShaderPixelInterlockEXT; alias atomic64 = GL_EXT_shader_atomic_int64 | _sm_6_6 | cpp | cuda; alias atomicfloat = GL_EXT_shader_atomic_float | _sm_6_0 + hlsl_nvapi | cpp | cuda; @@ -280,76 +354,108 @@ alias groupnonuniform = GL_KHR_shader_subgroup_ballot + GL_KHR_shader_subgroup_s + GL_KHR_shader_subgroup_arithmetic + GL_KHR_shader_subgroup_quad + GL_KHR_shader_subgroup_vote | _sm_6_0 | cuda; alias fragmentshaderbarycentric = GL_EXT_fragment_shader_barycentric | _sm_6_1; -alias fragmentprocessing = fragment_gfx + glsl_spirv | fragment + _sm_5_0; -alias fragmentprocessing_derivativecontrol = fragmentprocessing + GL_ARB_derivative_control; alias shadermemorycontrol = glsl | spirv_1_0 | _sm_5_0; -alias shadermemorycontrol_compute = compute_gfx + shadermemorycontrol; -alias subpass = fragment_gfx; -alias shaderinvocationgroup = GL_KHR_shader_subgroup_vote + glsl | spirv_1_3 | _sm_6_0; +alias shadermemorycontrol_compute = raytracing_stages_compute + shadermemorycontrol; +alias subpass = fragment + any_gfx_target; +alias subgroup_basic = GL_KHR_shader_subgroup_basic | GL_KHR_shader_subgroup_basic + spirv_1_0 | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_basic_ballot = GL_KHR_shader_subgroup_basic + GL_KHR_shader_subgroup_ballot | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_vote = GL_KHR_shader_subgroup_vote | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_arithmetic = GL_KHR_shader_subgroup_arithmetic | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_ballot = GL_KHR_shader_subgroup_ballot | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_shuffle = GL_KHR_shader_subgroup_shuffle | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_shufflerelative = GL_KHR_shader_subgroup_shuffle_relative | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_clustered = GL_KHR_shader_subgroup_clustered | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_quad = GL_KHR_shader_subgroup_quad | _sm_6_0 | _cuda_sm_7_0; +alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned | _sm_6_5; +alias shaderinvocationgroup = subgroup_vote; +alias waveprefix = _sm_6_5 | _cuda_sm_7_0 | GL_KHR_shader_subgroup_arithmetic; +alias bufferreference = GL_EXT_buffer_reference; +alias bufferreference_int64 = bufferreference + GL_EXT_shader_explicit_arithmetic_types_int64; + // Define what each HLSL shader model means on different targets. alias sm_4_0 = _sm_4_0 - | glsl_spirv_1_0 + | glsl_spirv_1_0 + _GL_ARB_sparse_texture_clamp + _GL_EXT_samplerless_texture_functions | spirv_1_0 + spvImageQuery + spvImageGatherExtended + spvMinLod + SPV_GOOGLE_user_type - | cuda - | cpp; + | _cuda_sm_2_0 + | metal + | cpp + ; alias sm_4_1 = _sm_4_1 | glsl_spirv_1_0 + sm_4_0 | spirv_1_0 + sm_4_0 - | cuda - | cpp; + | _cuda_sm_6_0 + | metal + | cpp + ; alias sm_5_0 = _sm_5_0 | glsl_spirv_1_0 + sm_4_1 + _GL_KHR_memory_scope_semantics | spirv_1_0 + sm_4_1 + spvDerivativeControl + spvFragmentFullyCoveredEXT - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_5_1 = _sm_5_1 - | glsl_spirv_1_0 + sm_5_0 + _GL_ARB_gpu_shader5 + _GL_ARB_sparse_texture_clamp + _GL_EXT_nonuniform_qualifier + | glsl_spirv_1_0 + sm_5_0 + _GL_ARB_gpu_shader5 + _GL_EXT_nonuniform_qualifier | spirv_1_0 + sm_5_0 + spvShaderNonUniform - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_0 = _sm_6_0 | glsl_spirv_1_3 + sm_5_1 + groupnonuniform + atomicfloat | spirv_1_3 + sm_5_1 + groupnonuniform + atomicfloat - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_1 = _sm_6_1 | glsl_spirv_1_3 + sm_6_0 + fragmentshaderbarycentric | spirv_1_3 + sm_6_0 + fragmentshaderbarycentric - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_2 = _sm_6_2 | glsl_spirv_1_3 + sm_6_1 | spirv_1_3 + sm_6_1 - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_3 = _sm_6_3 | glsl_spirv_1_4 + sm_6_2 + _GL_EXT_ray_tracing | spirv_1_4 + sm_6_2 + SPV_KHR_ray_tracing - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_4 = _sm_6_4 | glsl_spirv_1_4 + sm_6_3 | spirv_1_4 + sm_6_3 - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_5 = _sm_6_5 | glsl_spirv_1_4 + sm_6_4 + raytracing + meshshading | spirv_1_4 + sm_6_4 + raytracing + meshshading - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_6 = _sm_6_6 | glsl_spirv_1_5 + sm_6_5 @@ -357,34 +463,101 @@ alias sm_6_6 = _sm_6_6 | spirv_1_5 + sm_6_5 + GL_EXT_shader_atomic_int64 + atomicfloat2 + SPV_EXT_descriptor_indexing - | cuda - | cpp; + | _cuda_sm_9_0 + | metal + | cpp + ; alias sm_6_7 = _sm_6_7 | glsl_spirv_1_5 + sm_6_6 | spirv_1_5 + sm_6_6 - | cuda - | cpp; - -alias all = _sm_6_7 + hlsl_nvapi - | glsl_spirv_1_5 + sm_6_7 - + ser + shaderclock + texturefootprint + fragmentshaderinterlock + _GL_NV_shader_subgroup_partitioned - + _GL_NV_ray_tracing_motion_blur + _GL_NV_shader_texture_footprint - | spirv_1_5 + sm_6_7 - + ser + shaderclock + texturefootprint + fragmentshaderinterlock + spvGroupNonUniformPartitionedNV - + spvRayTracingMotionBlurNV + spvRayTracingMotionBlurNV; - + | _cuda_sm_9_0 + | metal + | cpp + ; // Profiles -alias GLSL_150 = glsl + sm_5_1 | spirv_1_0; -alias GLSL_330 = GLSL_150 | spirv_1_0 + sm_5_1; -alias GLSL_400 = GLSL_150 | spirv_1_0 + sm_5_1; -alias GLSL_410 = glsl + sm_5_1 | spirv_1_5 + sm_5_1; -alias GLSL_420 = glsl + sm_5_1 | spirv_1_5 + sm_5_1; -alias GLSL_430 = glsl + sm_5_1 | spirv_1_5 + sm_5_1; -alias GLSL_440 = glsl + sm_6_0 | spirv_1_5 + sm_6_0; -alias GLSL_450 = glsl + sm_6_3 | spirv_1_5 + sm_6_3; -alias GLSL_460 = glsl_spirv_1_5 + all | spirv_1_5 + all; +alias GLSL_130 = _GLSL_130 + | _sm_4_0 + | _cuda_sm_2_0 + | spirv_1_0 + | metal + | cpp + ; +alias GLSL_140 = _GLSL_140 + | _sm_4_1 + | _cuda_sm_2_0 + | spirv_1_0 + | metal + | cpp + ; +alias GLSL_150 = _GLSL_150 + | _sm_4_1 + | _cuda_sm_2_0 + | spirv_1_0 + | metal + | cpp + ; +alias GLSL_330 = _GLSL_330 + | _sm_5_0 + | _cuda_sm_6_0 + | spirv_1_0 + | metal + | cpp + ; +alias GLSL_400 = _GLSL_400 + | _sm_5_1 + | _cuda_sm_6_0 + | spirv_1_3 + | metal + | cpp + ; +alias GLSL_410 = _GLSL_410 + | _sm_5_1 + | _cuda_sm_6_0 + | spirv_1_3 + | metal + | cpp + ; +alias GLSL_420 = _GLSL_420 + | _sm_5_1 + | _cuda_sm_6_0 + | spirv_1_3 + | metal + | cpp + ; +alias GLSL_430 = _GLSL_430 + | _sm_5_1 + | _cuda_sm_6_0 + | spirv_1_3 + | metal + | cpp + ; +alias GLSL_440 = _GLSL_440 + | _sm_6_0 + | _cuda_sm_6_0 + | spirv_1_5 + | metal + | cpp + ; +alias GLSL_450 = _GLSL_450 + | _sm_6_0 + | _cuda_sm_6_0 + | spirv_1_5 + | metal + | cpp + ; +alias GLSL_460 = _GLSL_460 + | _sm_6_6 + | _cuda_sm_6_0 + | spirv_1_5 + | metal + | cpp + ; + +alias GLSL_410_SPIRV_1_0 = _GLSL_410 + GLSL_400 | GLSL_400; +alias GLSL_420_SPIRV_1_0 = _GLSL_420 + GLSL_410_SPIRV_1_0 | GLSL_410_SPIRV_1_0; +alias GLSL_430_SPIRV_1_0 = _GLSL_430 + GLSL_420_SPIRV_1_0 | GLSL_420_SPIRV_1_0; alias DX_4_0 = sm_4_0; alias DX_4_1 = sm_4_1; @@ -398,3 +571,112 @@ alias DX_6_4 = sm_6_4; alias DX_6_5 = sm_6_5; alias DX_6_6 = sm_6_6; alias DX_6_7 = sm_6_7; + +alias sm_2_0_GLSL_140 = sm_4_0 | glsl | spirv_1_0 | cuda | cpp; +alias sm_2_0_GLSL_400 = sm_4_0 | glsl | spirv_1_0 | cuda | cpp; +alias appendstructuredbuffer = sm_5_0 + raytracing_stages_compute_fragment; +alias atomic_hlsl = _sm_4_0; +alias atomic_hlsl_nvapi = _sm_4_0 + hlsl_nvapi; +alias atomic_hlsl_sm_6_6 = _sm_6_6; +alias byteaddressbuffer = sm_4_0; +alias byteaddressbuffer_rw = sm_4_0 + raytracing_stages_compute_fragment; +alias consumestructuredbuffer = sm_5_0 + raytracing_stages_compute_fragment; +alias fragmentprocessing = raytracing_stages_compute_fragment + _sm_5_0 + | fragment + glsl_spirv + | raytracing_stages_compute + GL_NV_compute_shader_derivatives + | raytracing_stages_compute_fragment + GLSL_460 + ; +alias fragmentprocessing_derivativecontrol = raytracing_stages_compute_fragment + _sm_5_0 + | fragment + GL_ARB_derivative_control + | compute + GL_NV_compute_shader_derivatives + | raytracing_stages_compute_fragment + GLSL_460 + ; +alias getattributeatvertex = fragment + _sm_6_1 | fragment + GL_EXT_fragment_shader_barycentric; +alias memorybarrier_compute = raytracing_stages_compute + sm_5_0; +alias structuredbuffer = sm_4_0; +alias structuredbuffer_rw = sm_4_0 + raytracing_stages_compute_fragment; +alias texture_sm_4_1 = sm_4_1 + _GLSL_150; +alias texture_sm_4_1_samplerless = texture_sm_4_1 + GL_EXT_samplerless_texture_functions; +alias texture_sm_4_1_compute_fragment = cpp + texture_sm_4_1 + | cuda + texture_sm_4_1 + | glsl + texture_sm_4_1 + | hlsl + texture_sm_4_1 + raytracing_stages_compute_fragment + | spirv_1_0 + texture_sm_4_1 + ; +// supposedly works on compute but docs say nothing, so for now keep as compute_fragment +alias texture_sm_4_1_fragment = cpp + texture_sm_4_1 + | cuda + texture_sm_4_1 + | glsl + texture_sm_4_1 + | hlsl + texture_sm_4_1 + raytracing_stages_compute_fragment + | spirv_1_0 + texture_sm_4_1 + ; +alias texture_sm_4_1_clamp_fragment = texture_sm_4_1_fragment + GL_ARB_sparse_texture_clamp; +alias texture_sm_4_1_vertex_fragment_geometry = cpp + texture_sm_4_1 + | cuda + texture_sm_4_1 + | glsl + texture_sm_4_1 + | hlsl + texture_sm_4_1 + raytracing_stages_compute_fragment_geometry_vertex + | spirv_1_0 + texture_sm_4_1 + ; +alias texture_gather = texture_sm_4_1_vertex_fragment_geometry + GL_ARB_texture_gather; +alias image_samples = texture_sm_4_1_compute_fragment + GL_ARB_shader_texture_image_samples; +alias image_size = texture_sm_4_1_compute_fragment + GL_ARB_shader_image_size; +alias texture_size = texture_sm_4_1 + GL_ARB_shader_image_size; +alias texture_querylod = texture_sm_4_1 + GL_EXT_texture_query_lod; +alias texture_querylevels = texture_sm_4_1 + GL_ARB_texture_query_levels; +alias texture_shadowlod = texture_sm_4_1 + GL_EXT_texture_shadow_lod + _GLSL_400 + | texture_sm_4_1 + GL_EXT_texture_shadow_lod; +alias texture_shadowlod_cube = texture_shadowlod + GL_ARB_texture_cube_map; + +alias atomic_glsl_float1 = GL_EXT_shader_atomic_float; +alias atomic_glsl_float2 = GL_EXT_shader_atomic_float2; +alias atomic_glsl_halfvec = GL_NV_shader_atomic_fp16_vector; +alias atomic_glsl = GLSL_430_SPIRV_1_0; +alias atomic_glsl_int64 = atomic_glsl + GL_EXT_shader_atomic_int64; +alias GLSL_430_SPIRV_1_0_compute = GLSL_430_SPIRV_1_0 + compute; +alias image_loadstore = GL_EXT_shader_image_load_store + GLSL_420; +alias nonuniformqualifier = sm_5_1; +alias printf = GL_EXT_debug_printf | _sm_4_0 | _cuda_sm_2_0 | cpp; +alias texturefootprint = GL_NV_shader_texture_footprint + GLSL_450 | hlsl_nvapi + _sm_4_0; +alias texturefootprintclamp = texturefootprint + GL_ARB_sparse_texture_clamp; +alias texture_cube = GL_ARB_texture_cube_map; +alias shader5_sm_4_0 = GL_ARB_gpu_shader5 | sm_4_0; +alias shader5_sm_5_0 = GL_ARB_gpu_shader5 | sm_5_0; + +alias atomic_glsl_hlsl_cuda = atomic_glsl | _sm_5_0 | _cuda_sm_2_0; +alias atomic_glsl_hlsl_cuda_float1 = atomic_glsl_float1 | atomic_hlsl_nvapi | _cuda_sm_2_0; +alias atomic_glsl_hlsl_cuda_float2 = atomic_glsl_float2 | atomic_hlsl_nvapi | _cuda_sm_2_0; +alias atomic_glsl_hlsl_cuda2_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_2_0; +alias atomic_glsl_hlsl_cuda5_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_6_0; +alias atomic_glsl_hlsl_cuda6_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_6_0; +alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | atomic_hlsl_nvapi | _cuda_sm_9_0; + +alias breakpoint = GL_EXT_debug_printf | hlsl | _cuda_sm_8_0 | cpp; + +alias rayobject = raytracing | rayquery; +alias raytracing_allstages = raytracing_stages + raytracing; +alias raytracing_anyhit = anyhit + raytracing; +alias raytracing_intersection = raytracing_stages_intersection + raytracing; +alias raytracing_anyhit_closesthit = raytracing_stages_anyhit_closesthit + raytracing; +alias raytracing_anyhit_closesthit_intersection = raytracing_stages_anyhit_closesthit_intersection + raytracing; +alias raytracing_raygen_closesthit_miss = raytracing_stages_raygen_closesthit_miss + raytracing; +alias raytracing_anyhit_closesthit_intersection_miss = raytracing_stages_anyhit_closesthit_intersection_miss + raytracing; +alias raytracing_raygen_closesthit_miss_callable = raytracing_stages_raygen_closesthit_miss_callable + raytracing; +alias raytracing_position = raytracing + GL_EXT_ray_tracing_position_fetch + anyhit + closesthit; +alias raytracing_motionblur_anyhit_closesthit_intersection_miss = raytracing_stages_anyhit_closesthit_intersection_miss + raytracing_motionblur; +alias raytracing_motionblur_raygen_closesthit_miss = raytracing_stages_raygen_closesthit_miss + raytracing_motionblur; +alias rayquery_position = rayquery + GL_EXT_ray_tracing_position_fetch; +alias ser_raygen = raytracing_stages_raygen + ser; +alias ser_raygen_closesthit_miss = raytracing_stages_raygen_closesthit_miss + ser; +alias ser_any_closesthit_intersection_miss = raytracing_stages_anyhit_closesthit_intersection_miss + ser; +alias ser_anyhit_closesthit_intersection = raytracing_stages_anyhit_closesthit_intersection + ser; +alias ser_anyhit_closesthit = raytracing_stages_anyhit_closesthit + ser; +alias ser_motion_raygen_closesthit_miss = raytracing_stages_raygen_closesthit_miss + ser_motion; +alias ser_motion_raygen = raytracing_stages_raygen + ser_motion; + +alias all = _sm_6_7 + hlsl_nvapi + | glsl_spirv_1_5 + sm_6_7 + + ser + shaderclock + texturefootprint + fragmentshaderinterlock + _GL_NV_shader_subgroup_partitioned + + _GL_NV_ray_tracing_motion_blur + _GL_NV_shader_texture_footprint + | spirv_1_5 + sm_6_7 + + ser + shaderclock + texturefootprint + fragmentshaderinterlock + spvGroupNonUniformPartitionedNV + + spvRayTracingMotionBlurNV + spvRayTracingMotionBlurNV; \ No newline at end of file diff --git a/source/slang/slang-capability.cpp b/source/slang/slang-capability.cpp index fbe37892a..0daf83dac 100644 --- a/source/slang/slang-capability.cpp +++ b/source/slang/slang-capability.cpp @@ -988,6 +988,104 @@ void CapabilitySet::canonicalize() m_conjunctions.sort(); } +CapabilitySet CapabilitySet::getTargetsThisIsMissingFromOther(const CapabilitySet& other) +{ + CapabilitySet conflicts{}; + List textualTargetsNotHandled; + for (auto conjunction : this->m_conjunctions) + { + textualTargetsNotHandled.add({}); + auto& currentList = textualTargetsNotHandled.getLast(); + for (auto thatNode : conjunction.getExpandedAtoms()) + { + // To make this faster we can make an assumption that the nodes are: + // {textualTarget, targetAbstract(), targetAbstract(), nonTarget} + // this assumption is not being used since it relies on ordering of .capdef file + if (_getInfo(thatNode).abstractBase == CapabilityName::target) + currentList.getExpandedAtoms().add(thatNode); + } + } + for (auto& thatConjunction : other.m_conjunctions) + { + // Worth the check to early leave due to ~5*5 elements to loop around + if (textualTargetsNotHandled.getCount() == 0) + break; + + for (int i = 0 ; i < textualTargetsNotHandled.getCount(); i++) + { + auto& textualTargets = textualTargetsNotHandled[i]; + + if (textualTargets.countIntersectionWith(thatConjunction) != textualTargets.getExpandedAtoms().getCount()) + continue; + + textualTargetsNotHandled[i] = textualTargets.makeEmpty(); + } + } + CapabilitySet set; + for (auto& i : textualTargetsNotHandled) + { + if (i.isEmpty()) + continue; + set.unionWith(i); + } + return set; +} + +// We only run 'join' logic on "this" conjunctions which are compatiable with "other" conjunctions. +// We only add specific nodes which satisfy the abstractMask. +// Any non-compatible conjunctions with "other"s cconjunctions will be preserved and unmodified. +void CapabilitySet::simpleJoinWithSetMask(const CapabilitySet& other, CapabilityName abstractMask) +{ + CapabilitySet resultSet; + HashSet setUsed; + // get used abstract mask nodes per conjunction so we can trivially check + // if we need to add the abstract mask node to avoid duplicates + List> abstractMaskNodeInUse; + abstractMaskNodeInUse.growToCount(m_conjunctions.getCount()); + for (int i = 0; i < m_conjunctions.getCount(); i++) + { + auto& thisConjunction = m_conjunctions[i]; + auto& setOfInUseNode = abstractMaskNodeInUse[i]; + + for (auto& atom : thisConjunction.getExpandedAtoms()) + { + if (_getInfo(atom).abstractBase != abstractMask) + continue; + setOfInUseNode.add(atom); + } + } + + for (auto& thatConjunction : other.m_conjunctions) + { + for (int i = 0; i < m_conjunctions.getCount(); i++) + { + auto& thisConjunction = m_conjunctions[i]; + auto& setOfInUseNode = abstractMaskNodeInUse[i]; + CapabilityConjunctionSet conjunctionToAddToResultSet; + + if (thisConjunction.isIncompatibleWith(thatConjunction)) + continue; + conjunctionToAddToResultSet = thisConjunction; + setUsed.add(&thisConjunction); + for (auto atom : thatConjunction.getExpandedAtoms()) + { + if (_getInfo(atom).abstractBase != abstractMask + || setOfInUseNode.contains(atom)) + continue; + conjunctionToAddToResultSet.getExpandedAtoms().add(atom); + } + conjunctionToAddToResultSet.getExpandedAtoms().sort(); + resultSet.unionWith(conjunctionToAddToResultSet); + } + } + for (auto& c : m_conjunctions) + { + if (!setUsed.contains(&c)) + resultSet.m_conjunctions.add(c); + } + m_conjunctions = resultSet.m_conjunctions; +} + void CapabilitySet::join(const CapabilitySet& other) { if (isEmpty() || other.isInvalid()) @@ -1176,6 +1274,24 @@ bool CapabilitySet::checkCapabilityRequirement(CapabilitySet const& available, C return true; } +bool CapabilitySet::isExactSubset(CapabilitySet const& maybeSuperSet) +{ + // This should only be used when absolutely required due to the + // cost for complex sets. Simple sets are fine (glsl|spirv...) + for (auto& thisCon : m_conjunctions) + { + bool foundEqualCon = false; + for (auto& thatCon : maybeSuperSet.m_conjunctions) + { + if (thisCon == thatCon) + foundEqualCon = true; + } + if (foundEqualCon == false) + return false; + } + return true; +} + void printDiagnosticArg(StringBuilder& sb, const CapabilitySet& capSet) { bool isFirstSet = true; diff --git a/source/slang/slang-capability.h b/source/slang/slang-capability.h index b0ca9231a..feac03337 100644 --- a/source/slang/slang-capability.h +++ b/source/slang/slang-capability.h @@ -209,6 +209,10 @@ public: void unionWith(const CapabilityConjunctionSet& other); + void simpleJoinWithSetMask(const CapabilitySet& other, CapabilityName abstractMask); + + CapabilitySet getTargetsThisIsMissingFromOther(const CapabilitySet& other); + void canonicalize(); /// Are these two capability sets equal? @@ -226,6 +230,8 @@ public: static bool checkCapabilityRequirement(CapabilitySet const& available, CapabilitySet const& required, const CapabilityConjunctionSet*& outFailedAvailableSet); + bool isExactSubset(CapabilitySet const& maybeSuperSet); + private: // The underlying representation we use is a list of conjunctions. // diff --git a/source/slang/slang-check-decl.cpp b/source/slang/slang-check-decl.cpp index d819121bc..e8cc01ef2 100644 --- a/source/slang/slang-check-decl.cpp +++ b/source/slang/slang-check-decl.cpp @@ -354,6 +354,8 @@ namespace Slang virtual void processReferencedDecl(Decl* decl) = 0; + virtual void processDeclModifiers(Decl* decl) = 0; + void dispatchIfNotNull(Stmt* stmt) { if (!stmt) @@ -462,6 +464,7 @@ namespace Slang { dispatchIfNotNull(expr->type.type); dispatchIfNotNull(expr->declRef.declRefBase); + processDeclModifiers(expr->declRef.getDecl()); } void visitStaticMemberExpr(StaticMemberExpr* expr) { @@ -9813,10 +9816,11 @@ namespace Slang typedef SemanticsDeclReferenceVisitor> Base; const ProcessFunc& handleReferenceFunc; - + RequireCapabilityAttribute* maybeRequireCapability; SemanticsContext& outerContext; - CapabilityDeclReferenceVisitor(const ProcessFunc& processFunc, SemanticsContext& outer) + CapabilityDeclReferenceVisitor(const ProcessFunc& processFunc, RequireCapabilityAttribute* maybeRequireCapability, SemanticsContext& outer) : handleReferenceFunc(processFunc) + , maybeRequireCapability(maybeRequireCapability) , outerContext(outer) , SemanticsDeclReferenceVisitor>(outer) { @@ -9828,6 +9832,11 @@ namespace Slang loc = Base::sourceLocStack.getLast(); handleReferenceFunc(decl, decl->inferredCapabilityRequirements, loc); } + virtual void processDeclModifiers(Decl* decl) + { + if (decl) + handleReferenceFunc(decl, decl->inferredCapabilityRequirements, decl->loc); + } void visitDiscardStmt(DiscardStmt* stmt) { handleReferenceFunc(stmt, CapabilitySet(CapabilityName::fragment), stmt->loc); @@ -9835,9 +9844,42 @@ namespace Slang void visitTargetSwitchStmt(TargetSwitchStmt* stmt) { CapabilitySet set; - for (auto targetCase : stmt->targetCases) + auto targetCaseCount = stmt->targetCases.getCount(); + for (Index targetCaseIndex = 0; targetCaseIndex < targetCaseCount; targetCaseIndex++) { - auto targetCap = CapabilitySet(CapabilityName(targetCase->capability)); + // We may recieve a `default:` case for a `__target_switch`. If this is the case, + // we must resolve the target capability for a non empty set of `calling_functions_targets`: + // ``` default_target = calling_functions_targets-{other_case_targets} ``` + // + // * `calling_functions_capability` = `requirement attribute` of the calling function; if missing + // we can assume it is `any_target` + // + // * `{other_case_targets}` = set of all capabilities all `case` statments target inside the `__target_switch` + + // If we do not handle `default:`, the codegen will fail when trying to find a specific + // codegen target not handled explicitly by a `case` statment. + // We must also ensure the `default` case is last so we have priority to hit `case` statments and can preprocess + // `case` statments before the `default` case. + CapabilitySet targetCap; + if (CapabilityName(stmt->targetCases[targetCaseIndex]->capability) == CapabilityName::Invalid) + { + if (targetCaseCount - 1 != targetCaseIndex) + { + for (Index i = targetCaseIndex; i < targetCaseCount - 1; i++) + std::swap(stmt->targetCases[i], stmt->targetCases[i + 1]); + continue; + } + + if (!maybeRequireCapability) + targetCap = (CapabilitySet(CapabilityName::any_target).getTargetsThisIsMissingFromOther(set)); + else + targetCap = (maybeRequireCapability->capabilitySet.getTargetsThisIsMissingFromOther(set)); + } + else + { + targetCap = CapabilitySet(CapabilityName(stmt->targetCases[targetCaseIndex]->capability)); + } + auto targetCase = stmt->targetCases[targetCaseIndex]; auto oldCap = targetCap; auto bodyCap = getStatementCapabilityUsage(this, targetCase->body); targetCap.join(bodyCap); @@ -9851,6 +9893,7 @@ namespace Slang set.canonicalize(); handleReferenceFunc(stmt, set, stmt->loc); } + void visitRequireCapabilityDecl(RequireCapabilityDecl* decl) { handleReferenceFunc(decl, decl->inferredCapabilityRequirements, decl->loc); @@ -9858,9 +9901,9 @@ namespace Slang }; template - void visitReferencedDecls(SemanticsContext& context, NodeBase* node, SourceLoc initialLoc, const ProcessFunc& func) + void visitReferencedDecls(SemanticsContext& context, NodeBase* node, SourceLoc initialLoc, RequireCapabilityAttribute* maybeRequireCapability, const ProcessFunc& func) { - CapabilityDeclReferenceVisitor visitor(func, context); + CapabilityDeclReferenceVisitor visitor(func, maybeRequireCapability, context); visitor.sourceLocStack.add(initialLoc); if (auto val = as(node)) @@ -9879,7 +9922,7 @@ namespace Slang return CapabilitySet(); CapabilitySet inferredRequirements; - visitReferencedDecls(*visitor, stmt, stmt->loc, [&](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) + visitReferencedDecls(*visitor, stmt, stmt->loc, nullptr, [&](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) { _propagateRequirement(visitor, inferredRequirements, stmt, node, nodeCaps, refLoc); }); @@ -9888,11 +9931,7 @@ namespace Slang void SemanticsDeclCapabilityVisitor::checkVarDeclCommon(VarDeclBase* varDecl) { - visitReferencedDecls(*this, varDecl->type.type, varDecl->loc, [this, varDecl](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) - { - _propagateRequirement(this, varDecl->inferredCapabilityRequirements, varDecl, node, nodeCaps, refLoc); - }); - visitReferencedDecls(*this, varDecl->initExpr, varDecl->loc, [this, varDecl](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) + visitReferencedDecls(*this, varDecl->type.type, varDecl->loc, varDecl->findModifier(), [this, varDecl](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) { _propagateRequirement(this, varDecl->inferredCapabilityRequirements, varDecl, node, nodeCaps, refLoc); }); @@ -9958,7 +9997,7 @@ namespace Slang ensureDecl(member, DeclCheckState::CapabilityChecked); _propagateRequirement(this, funcDecl->inferredCapabilityRequirements, funcDecl, member, member->inferredCapabilityRequirements, member->loc); } - visitReferencedDecls(*this, funcDecl->body, funcDecl->loc, [this, funcDecl](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) + visitReferencedDecls(*this, funcDecl->body, funcDecl->loc, funcDecl->findModifier(), [this, funcDecl](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) { _propagateRequirement(this, funcDecl->inferredCapabilityRequirements, funcDecl, node, nodeCaps, refLoc); }); @@ -9972,7 +10011,7 @@ namespace Slang _propagateRequirement(this, funcDecl->inferredCapabilityRequirements, funcDecl, parentAggTypeDecl, parentAggTypeDecl->inferredCapabilityRequirements, funcDecl->loc); } } - + auto declaredCaps = getDeclaredCapabilitySet(funcDecl); if (!declaredCaps.isEmpty()) @@ -9996,12 +10035,13 @@ namespace Slang if (declaredCaps.isEmpty()) { // If the user has not declared any capabilities, - // we should diagnose an error if this is a public symbol. + // we should diagnose a warning if any_target is not + // a super-set by exact atoms. if (vis == DeclVisibility::Public && !funcDecl->inferredCapabilityRequirements.isEmpty()) { if (!getModuleDecl(funcDecl)->isInLegacyLanguage) { - if (funcDecl->inferredCapabilityRequirements != getAnyPlatformCapabilitySet()) + if (!funcDecl->inferredCapabilityRequirements.isExactSubset(getAnyPlatformCapabilitySet())) { diagnoseCapabilityErrors( getSink(), @@ -10019,6 +10059,9 @@ namespace Slang { // For public decls, we need to enforce that the function // only uses capabilities that it declares. + // At a minimum we will propagate shader requirements to our + // function from calling children in all cases so the parent + // can enforce shader targets correctly and propagate to `main` const CapabilityConjunctionSet* failedAvailableCapabilityConjunction = nullptr; if (!CapabilitySet::checkCapabilityRequirement( declaredCaps, @@ -10028,6 +10071,8 @@ namespace Slang diagnoseUndeclaredCapability(funcDecl, Diagnostics::useOfUndeclaredCapability, failedAvailableCapabilityConjunction); funcDecl->inferredCapabilityRequirements = declaredCaps; } + else + funcDecl->inferredCapabilityRequirements.simpleJoinWithSetMask(declaredCaps, CapabilityName::stage); } else { @@ -10165,7 +10210,7 @@ namespace Slang while (traceLevels > 0) { refDecl = nullptr; - visitReferencedDecls(*visitor, decl, decl->loc, [&](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) + visitReferencedDecls(*visitor, decl, decl->loc, decl->findModifier(), [&](SyntaxNode* node, const CapabilitySet& nodeCaps, SourceLoc refLoc) { if (nodeCaps.isIncompatibleWith(incompatibleAtom)) { @@ -10197,6 +10242,8 @@ namespace Slang { if (decl->inferredCapabilityRequirements.getExpandedAtoms().getCount() == 0) return; + if(!failedAvailableSet) + return; // There are two causes for why type checking failed on failedAvailableSet. // The first scenario is that failedAvailableSet defines a set of capabilities on a diff --git a/source/slang/slang-check-modifier.cpp b/source/slang/slang-check-modifier.cpp index 6d39f977c..13ad8be3a 100644 --- a/source/slang/slang-check-modifier.cpp +++ b/source/slang/slang-check-modifier.cpp @@ -1613,6 +1613,37 @@ namespace Slang } } + void postProcessingOnModifiers(Modifiers& modifiers) + { + // compress all `require` nodes into 1 `require` modifier + RequireCapabilityAttribute* firstRequire = nullptr; + Modifier* previous = nullptr; + Modifier* next = nullptr; + for (auto m = modifiers.first; m != nullptr; m = next) + { + next = m->next; + // + + if (auto req = as(m)) + { + if (!firstRequire) + { + firstRequire = req; + previous = m; + continue; + } + for(auto& con : req->capabilitySet.getExpandedAtoms()) + firstRequire->capabilitySet.unionWith(con); + if(previous) + previous->next = next; + continue; + } + + // + previous = m; + } + } + void SemanticsVisitor::checkModifiers(ModifiableSyntaxNode* syntaxNode) { // TODO(tfoley): need to make sure this only @@ -1685,6 +1716,8 @@ namespace Slang // Whether we actually re-wrote anything or note, lets // install the new list of modifiers on the declaration syntaxNode->modifiers.first = resultModifiers; + + postProcessingOnModifiers(syntaxNode->modifiers); } diff --git a/source/slang/slang-stdlib-textures.cpp b/source/slang/slang-stdlib-textures.cpp index 85282da92..f874c5da9 100644 --- a/source/slang/slang-stdlib-textures.cpp +++ b/source/slang/slang-stdlib-textures.cpp @@ -407,6 +407,7 @@ void TextureTypeInfo::writeGetDimensionFunctions() sb << " __glsl_version(450)\n"; sb << " __glsl_extension(GL_EXT_samplerless_texture_functions)\n"; + sb << " [require(glsl_spirv, texture_sm_4_1)]\n"; writeFunc( "void", "GetDimensions", -- cgit v1.2.3