summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorJay Kwak <82421531+jkwak-work@users.noreply.github.com>2024-05-02 09:59:45 -0700
committerGitHub <noreply@github.com>2024-05-02 09:59:45 -0700
commit679a457940027420817a85070b3fdb9bfc0cca2e (patch)
tree51463c1f19035108e4e1b3a76354b27beae07669 /source
parentd53d793db6f4d82358ada700e1bd98b497384cdc (diff)
Implement SPIR-V target for GLSL functions (#4083)
Fixes #4051 This commit implements SPIR-V target for GLSL functions. It also fixes a few problesm of GLSL targetting implemention too.
Diffstat (limited to 'source')
-rw-r--r--source/slang/glsl.meta.slang598
-rw-r--r--source/slang/hlsl.meta.slang26
2 files changed, 538 insertions, 86 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index 98770293c..0ba6c17aa 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -4,10 +4,10 @@
#define lowp
#define VECTOR_MAP_UNARY(TYPE, COUNT, FUNC, VALUE) \
- vector<TYPE,COUNT> result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(VALUE[i]); } return result
+ vector<TYPE,COUNT> result; [ForceUnroll] for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(VALUE[i]); } return result
#define VECTOR_MAP_TRINARY(TYPE, COUNT, FUNC, A, B, C) \
- vector<TYPE,COUNT> result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result
+ vector<TYPE,COUNT> result; [ForceUnroll] for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result
//
// OpenGL 4.60 spec
@@ -331,6 +331,10 @@ public T asinh(T x)
{
case cpp: __intrinsic_asm "$P_asinh($0)";
case cuda: __intrinsic_asm "$P_asinh($0)";
+ case glsl: __intrinsic_asm "asinh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Asinh $x
+ };
default:
return log(x + sqrt(x * x + T(1)));
}
@@ -342,7 +346,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
public vector<T,N> asinh(vector<T,N> x)
{
- VECTOR_MAP_UNARY(T, N, asinh, x);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "asinh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Asinh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, asinh, x);
+ }
}
__generic<T : __BuiltinFloatingPointType>
@@ -355,6 +367,10 @@ public T acosh(T x)
{
case cpp: __intrinsic_asm "$P_acosh($0)";
case cuda: __intrinsic_asm "$P_acosh($0)";
+ case glsl: __intrinsic_asm "acosh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Acosh $x
+ };
default:
return log(x + sqrt( x * x - T(1)));
}
@@ -366,7 +382,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
public vector<T,N> acosh(vector<T,N> x)
{
- VECTOR_MAP_UNARY(T, N, acosh, x);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "acosh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Acosh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, acosh, x);
+ }
}
__generic<T : __BuiltinFloatingPointType>
@@ -379,6 +403,10 @@ public T atanh(T x)
{
case cpp: __intrinsic_asm "$P_atanh($0)";
case cuda: __intrinsic_asm "$P_atanh($0)";
+ case glsl: __intrinsic_asm "atanh";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Atanh $x
+ };
default:
return T(0.5) * log((T(1) + x) / (T(1) - x));
}
@@ -390,7 +418,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
public vector<T,N> atanh(vector<T,N> x)
{
- VECTOR_MAP_UNARY(T, N, atanh, x);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "atanh";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Atanh $x
+ };
+ default:
+ VECTOR_MAP_UNARY(T, N, atanh, x);
+ }
}
//
@@ -491,6 +527,45 @@ public vector<T, N> mod(vector<T, N> x, vector<T, N> y)
return fmod(x, y);
}
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T,N> min(vector<T,N> x, T y)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "min";
+ default:
+ return min(x, vector<T,N>(y));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T,N> max(vector<T,N> x, T y)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "max";
+ default:
+ return max(x, vector<T,N>(y));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T,N> clamp(vector<T,N> x, T minBound, T maxBound)
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "clamp";
+ default:
+ return clamp(x, vector<T,N>(minBound), vector<T,N>(maxBound));
+ }
+}
+
__generic<T : __BuiltinFloatingPointType>
[__readNone]
[ForceInline]
@@ -506,7 +581,12 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
public vector<T, N> mix(vector<T, N> x, vector<T, N> y, T a)
{
- return lerp(x, y, vector<T, N>(a));
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "mix";
+ default:
+ return mix(x, y, vector<T, N>(a));
+ }
}
__generic<T : __BuiltinFloatingPointType, let N:int>
@@ -524,7 +604,15 @@ __generic<T>
[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
public T mix(T x, T y, bool a)
{
- return (a ? y : x);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "mix";
+ case spirv: return spirv_asm {
+ result:$$T = OpSelect $a $x $y
+ };
+ default:
+ return (a ? y : x);
+ }
}
__generic<T, let N:int>
@@ -533,12 +621,21 @@ __generic<T, let N:int>
[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
public vector<T, N> mix(vector<T, N> x, vector<T, N> y, vector<bool, N> a)
{
- vector<T, N> result;
- for (int i = 0; i < N; i++)
+ __target_switch
{
- result[i] = (a[i] ? y[i] : x[i]);
+ case glsl: __intrinsic_asm "mix";
+ case spirv: return spirv_asm {
+ result:$$vector<T,N> = OpSelect $a $x $y
+ };
+ default:
+ vector<T, N> result;
+ [ForceUnroll]
+ for (int i = 0; i < N; i++)
+ {
+ result[i] = (a[i] ? y[i] : x[i]);
+ }
+ return result;
}
- return result;
}
[__readNone]
@@ -617,28 +714,28 @@ public vector<float, N> uintBitsToFloat(highp vector<uint, N> x)
[ForceInline]
uint packUnorm1x16(float c)
{
- return uint(clamp(c, 0.0, 1.0) * 65535.0 + 0.5);
+ return uint(round(clamp(c, 0.0, 1.0) * 65535.0));
}
[__readNone]
[ForceInline]
uint packSnorm1x16(float v)
{
- return uint(clamp(v ,-1.0, 1.0) * 32767.0 + 32767.5);
+ return uint(round(clamp(v ,-1.0, 1.0) * 32767.0));
}
[__readNone]
[ForceInline]
uint packUnorm1x8(float c)
{
- return uint(clamp(c, 0.0, 1.0) * 255.0 + 0.5);
+ return uint(round(clamp(c, 0.0, 1.0) * 255.0));
}
[__readNone]
[ForceInline]
uint packSnorm1x8(float c)
{
- return uint(clamp(c, -1.0, 1.0) * 127.0 + 127.5);
+ return uint(round(clamp(c, -1.0, 1.0) * 127.0));
}
[__readNone]
@@ -654,7 +751,7 @@ float unpackUnorm1x16(uint p)
float unpackSnorm1x16(uint p)
{
const uint wordMask = 0xffff;
- return clamp((float(p & wordMask) - 32767.0) / 32767.0, -1.0, 1.0);
+ return clamp(float(p & wordMask) / 32767.0, -1.0, 1.0);
}
[__readNone]
@@ -670,7 +767,7 @@ float unpackUnorm1x8(uint p)
float unpackSnorm1x8(uint p)
{
const uint byteMask = 0xff;
- return clamp((float(p & byteMask) - 127.0) / 127.0, -1.0, 1.0);
+ return clamp(float(p & byteMask) / 127.0, -1.0, 1.0);
}
[__readNone]
@@ -689,6 +786,44 @@ uint float2half(float f)
return (s | e | m);
}
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public T ldexp(T x, E exp)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "ldexp";
+ case glsl: __intrinsic_asm "ldexp";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Ldexp $x $exp
+ };
+ default:
+ return ldexp(x, __floatCast<T>(exp));
+ }
+}
+
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "ldexp";
+ case glsl: __intrinsic_asm "ldexp";
+ case spirv: return spirv_asm {
+ OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp
+ };
+ default:
+ vector<T,N> temp;
+ [ForceUnroll]
+ for (int i = 0; i < N; ++i)
+ temp[i] = __floatCast<T>(exp[i]);
+ return ldexp(x, temp);
+ }
+}
+
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
@@ -697,6 +832,9 @@ public uint packUnorm2x16(vec2 v)
__target_switch
{
case glsl: __intrinsic_asm "packUnorm2x16";
+ case spirv: return spirv_asm {
+ result:$$uint = OpExtInst glsl450 PackUnorm2x16 $v
+ };
default:
return packUnorm1x16(v.x) | (packUnorm1x16(v.y) << uint(16));
}
@@ -710,6 +848,9 @@ public uint packSnorm2x16(vec2 v)
__target_switch
{
case glsl: __intrinsic_asm "packSnorm2x16";
+ case spirv: return spirv_asm {
+ result:$$uint = OpExtInst glsl450 PackSnorm2x16 $v
+ };
default:
return packSnorm1x16(v.x) | (packSnorm1x16(v.y) << uint(16));
}
@@ -723,6 +864,9 @@ public uint packUnorm4x8(vec4 v)
__target_switch
{
case glsl: __intrinsic_asm "packUnorm4x8";
+ case spirv: return spirv_asm {
+ result:$$uint = OpExtInst glsl450 PackUnorm4x8 $v
+ };
default:
return packUnorm1x8(v.x) | (packUnorm1x8(v.y) << uint(8)) | (packUnorm1x8(v.z) << uint(16)) | (packUnorm1x8(v.w) << uint(24));
}
@@ -736,6 +880,9 @@ public uint packSnorm4x8(vec4 v)
__target_switch
{
case glsl: __intrinsic_asm "packSnorm4x8";
+ case spirv: return spirv_asm {
+ result:$$uint = OpExtInst glsl450 PackSnorm4x8 $v
+ };
default:
return packSnorm1x8(v.x) | (packSnorm1x8(v.y) << uint(8)) | (packSnorm1x8(v.z) << uint(16)) | (packSnorm1x8(v.w) << uint(24));
}
@@ -749,6 +896,9 @@ public vec2 unpackUnorm2x16(uint p)
__target_switch
{
case glsl: __intrinsic_asm "unpackUnorm2x16";
+ case spirv: return spirv_asm {
+ result:$$vec2 = OpExtInst glsl450 UnpackUnorm2x16 $p
+ };
default:
return vec2(unpackUnorm1x16(p & uint(0xffff)), unpackUnorm1x16(p >> uint(16)));
}
@@ -762,6 +912,9 @@ public vec2 unpackSnorm2x16(uint p)
__target_switch
{
case glsl: __intrinsic_asm "unpackSnorm2x16";
+ case spirv: return spirv_asm {
+ result:$$vec2 = OpExtInst glsl450 UnpackSnorm2x16 $p
+ };
default:
return vec2(unpackSnorm1x16(p & uint(0xffff)), unpackSnorm1x16(p >> uint(16)));
}
@@ -775,6 +928,9 @@ public vec4 unpackUnorm4x8(highp uint p)
__target_switch
{
case glsl: __intrinsic_asm "unpackUnorm4x8";
+ case spirv: return spirv_asm {
+ result:$$vec4 = OpExtInst glsl450 UnpackUnorm4x8 $p
+ };
default:
return vec4(
unpackUnorm1x8(p),
@@ -792,6 +948,9 @@ public vec4 unpackSnorm4x8(highp uint p)
__target_switch
{
case glsl: __intrinsic_asm "unpackSnorm4x8";
+ case spirv: return spirv_asm {
+ result:$$vec4 = OpExtInst glsl450 UnpackSnorm4x8 $p
+ };
default:
return vec4(
unpackSnorm1x8(p),
@@ -809,6 +968,9 @@ public uint packHalf2x16(vec2 v)
__target_switch
{
case glsl: __intrinsic_asm "packHalf2x16";
+ case spirv: return spirv_asm {
+ result:$$uint = OpExtInst glsl450 PackHalf2x16 $v
+ };
default:
return float2half(v.x) | (float2half(v.y) << uint(16));
}
@@ -842,6 +1004,9 @@ public vec2 unpackHalf2x16(uint p)
__target_switch
{
case glsl: __intrinsic_asm "unpackHalf2x16";
+ case spirv: return spirv_asm {
+ result:$$vec2 = OpExtInst glsl450 UnpackHalf2x16 $p
+ };
default:
return vec2(half2float(p & uint(0xffff)), half2float(p >> uint(16)));
}
@@ -855,6 +1020,9 @@ public double packDouble2x32(uvec2 v)
__target_switch
{
case glsl: __intrinsic_asm "packDouble2x32";
+ case spirv: return spirv_asm {
+ result:$$double = OpExtInst glsl450 PackDouble2x32 $v
+ };
default:
// TODO: there is no "asdouble()"
//return asdouble(uint64_t(v.x) | (uint64_t(v.y) << 32));
@@ -870,6 +1038,9 @@ public uvec2 unpackDouble2x32(double v)
__target_switch
{
case glsl: __intrinsic_asm "unpackDouble2x32";
+ case spirv: return spirv_asm {
+ result:$$uvec2 = OpExtInst glsl450 UnpackDouble2x32 $v
+ };
default:
// TODO: there is no "asuint64()"
uint64_t u = 0; // asuint64(v);
@@ -887,7 +1058,15 @@ __generic<T : __BuiltinFloatingPointType>
[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public T faceforward(T n, T i, T ng)
{
- return dot(ng, i) < T(0.0f) ? n : -n;
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "faceforward";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 FaceForward $n $i $ng
+ };
+ default:
+ return dot(ng, i) < T(0.0f) ? n : -n;
+ }
}
//
@@ -904,6 +1083,15 @@ public matrix<T, R, C> outerProduct(vector<T, C> c, vector<T, R> r)
__target_switch
{
case glsl: __intrinsic_asm "outerProduct";
+
+ // Note: SPIR-V takes the input arguments in an opposite order
+ // compared to GLSL. SPIR-V spec document says,
+ // "Its (second argument) number of components must equal the
+ // number of columns in Result Type."
+ //
+ case spirv: return spirv_asm {
+ result:$$matrix<T,R,C> = OpOuterProduct $c $r
+ };
default:
matrix<T, R, C> result;
for (int j = 0; j < R; ++j)
@@ -918,13 +1106,15 @@ public matrix<T, R, C> outerProduct(vector<T, C> c, vector<T, R> r)
}
__generic<T : __BuiltinFloatingPointType, let N : int>
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
-matrix<T,N,N> inverse(matrix<T,N,N> m)
+[require(glsl_spirv, GLSL_400)]
+public matrix<T,N,N> inverse(matrix<T,N,N> m)
{
__target_switch
{
case glsl: __intrinsic_asm "inverse";
- case hlsl: __intrinsic_asm "inverse";
+ case spirv: return spirv_asm {
+ OpExtInst $$matrix<T,N,N> result glsl450 MatrixInverse $m
+ };
}
}
@@ -934,201 +1124,423 @@ matrix<T,N,N> inverse(matrix<T,N,N> m)
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public uint uaddCarry(highp uint x, highp uint y, out lowp uint carry)
{
- let result = x * y;
- carry = ((result < x || result < y) ? 1 : 0);
- return result;
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "uaddCarry";
+ case spirv: return spirv_asm {
+ %ResType = OpTypeStruct $$uint $$uint;
+ %temp:%ResType = OpIAddCarry $x $y;
+ %carry:$$uint = OpCompositeExtract %temp 1;
+ OpStore &carry %carry;
+ result:$$uint = OpCompositeExtract %temp 0
+ };
+ default:
+ let result = x * y;
+ carry = ((result < x || result < y) ? 1 : 0);
+ return result;
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<uint,N> uaddCarry(highp vector<uint,N> x, highp vector<uint,N> y, out lowp vector<uint,N> carry)
{
- VECTOR_MAP_TRINARY(uint, N, uaddCarry, x, y, carry);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "uaddCarry";
+ case spirv: return spirv_asm {
+ %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>;
+ %temp:%ResType = OpIAddCarry $x $y;
+ %carry:$$vector<uint,N> = OpCompositeExtract %temp 1;
+ OpStore &carry %carry;
+ result:$$vector<uint,N> = OpCompositeExtract %temp 0
+ };
+ default:
+ VECTOR_MAP_TRINARY(uint, N, uaddCarry, x, y, carry);
+ }
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public uint usubBorrow(highp uint x, highp uint y, out lowp uint borrow)
{
- borrow = (y > x) ? 1 : 0;
- return x - y;
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "usubBorrow";
+ case spirv: return spirv_asm {
+ %ResType = OpTypeStruct $$uint $$uint;
+ %temp:%ResType = OpISubBorrow $x $y;
+ %borrow:$$uint = OpCompositeExtract %temp 1;
+ OpStore &borrow %borrow;
+ result:$$uint = OpCompositeExtract %temp 0
+ };
+ default:
+ borrow = (y > x) ? 1 : 0;
+ return x - y;
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<uint,N> usubBorrow(highp vector<uint,N> x, highp vector<uint,N> y, out lowp vector<uint,N> borrow)
{
- VECTOR_MAP_TRINARY(uint, N, usubBorrow, x, y, borrow);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "usubBorrow";
+ case spirv: return spirv_asm {
+ %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>;
+ %temp:%ResType = OpISubBorrow $x $y;
+ %borrow:$$vector<uint,N> = OpCompositeExtract %temp 1;
+ OpStore &borrow %borrow;
+ result:$$vector<uint,N> = OpCompositeExtract %temp 0
+ };
+ default:
+ VECTOR_MAP_TRINARY(uint, N, usubBorrow, x, y, borrow);
+ }
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public void umulExtended(highp uint x, highp uint y, out highp uint msb, out highp uint lsb)
{
- uint64_t result = x * y;
- msb = uint(result >> 32);
- lsb = uint(result);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "umulExtended";
+ case spirv: spirv_asm {
+ %ResType = OpTypeStruct $$uint $$uint;
+ %temp:%ResType = OpUMulExtended $x $y;
+ %lsb:$$uint = OpCompositeExtract %temp 0;
+ %msb:$$uint = OpCompositeExtract %temp 1;
+ OpStore &lsb %lsb;
+ OpStore &msb %msb;
+ };
+ default:
+ uint64_t result = x * y;
+ msb = uint(result >> 32);
+ lsb = uint(result);
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public void umulExtended(highp vector<uint,N> x, highp vector<uint,N> y, out highp vector<uint,N> msb, out highp vector<uint,N> lsb)
{
- for(int i = 0; i < N; ++i)
+ __target_switch
{
- umulExtended(x[i], y[i], msb[i], lsb[i]);
+ case glsl: __intrinsic_asm "umulExtended";
+ case spirv: spirv_asm {
+ %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>;
+ %temp:%ResType = OpUMulExtended $x $y;
+ %lsb:$$vector<uint,N> = OpCompositeExtract %temp 0;
+ %msb:$$vector<uint,N> = OpCompositeExtract %temp 1;
+ OpStore &lsb %lsb;
+ OpStore &msb %msb;
+ };
+ default:
+ [ForceUnroll]
+ for(int i = 0; i < N; ++i)
+ {
+ umulExtended(x[i], y[i], msb[i], lsb[i]);
+ }
}
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public void imulExtended(highp int x, highp int y, out highp int msb, out highp int lsb)
{
- int64_t result = x * y;
- msb = int(result >> 32);
- lsb = int(result);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "imulExtended";
+ case spirv: spirv_asm {
+ %ResType = OpTypeStruct $$int $$int;
+ %temp:%ResType = OpSMulExtended $x $y;
+ %lsb:$$int = OpCompositeExtract %temp 0;
+ %msb:$$int = OpCompositeExtract %temp 1;
+ OpStore &lsb %lsb;
+ OpStore &msb %msb;
+ };
+ default:
+ int64_t result = x * y;
+ msb = int(result >> 32);
+ lsb = int(result);
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public void imulExtended(highp vector<int,N> x, highp vector<int,N> y, out highp vector<int,N> msb, out highp vector<int,N> lsb)
{
- for(int i = 0; i < N; ++i)
+ __target_switch
{
- imulExtended(x[i], y[i], msb[i], lsb[i]);
+ case glsl: __intrinsic_asm "imulExtended";
+ case spirv: spirv_asm {
+ %ResType = OpTypeStruct $$vector<int,N> $$vector<int,N>;
+ %temp:%ResType = OpSMulExtended $x $y;
+ %lsb:$$vector<int,N> = OpCompositeExtract %temp 0;
+ %msb:$$vector<int,N> = OpCompositeExtract %temp 1;
+ OpStore &lsb %lsb;
+ OpStore &msb %msb;
+ };
+ default:
+ [ForceUnroll]
+ for(int i = 0; i < N; ++i)
+ {
+ imulExtended(x[i], y[i], msb[i], lsb[i]);
+ }
}
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public int bitfieldExtract(int value, int offset, int bits)
{
- return int(uint(value >> offset) & ((1u << bits) - 1));
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldExtract";
+ case spirv: return spirv_asm {
+ result:$$int = OpBitFieldSExtract $value $offset $bits
+ };
+ default:
+ return int(uint(value >> offset) & ((1u << bits) - 1));
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<int,N> bitfieldExtract(vector<int,N> value, int offset, int bits)
{
- vector<int,N> result;
- for (int i = 0; i < N; ++i)
+ __target_switch
{
- result[i] = bitfieldExtract(value[i], offset, bits);
+ case glsl: __intrinsic_asm "bitfieldExtract";
+ case spirv: return spirv_asm {
+ result:$$vector<int,N> = OpBitFieldSExtract $value $offset $bits
+ };
+ default:
+ vector<int,N> result;
+ [ForceUnroll]
+ for (int i = 0; i < N; ++i)
+ {
+ result[i] = bitfieldExtract(value[i], offset, bits);
+ }
+ return result;
}
- return result;
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public uint bitfieldExtract(uint value, int offset, int bits)
{
- return (value >> offset) & ((1u << bits) - 1);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldExtract";
+ case spirv: return spirv_asm {
+ result:$$uint = OpBitFieldUExtract $value $offset $bits
+ };
+ default:
+ return (value >> offset) & ((1u << bits) - 1);
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<uint,N> bitfieldExtract(vector<uint,N> value, int offset, int bits)
{
- vector<uint,N> result;
- for (int i = 0; i < N; ++i)
+ __target_switch
{
- result[i] = bitfieldExtract(value[i], offset, bits);
+ case glsl: __intrinsic_asm "bitfieldExtract";
+ case spirv: return spirv_asm {
+ result:$$vector<uint,N> = OpBitFieldUExtract $value $offset $bits
+ };
+ default:
+ vector<uint,N> result;
+ [ForceUnroll]
+ for (int i = 0; i < N; ++i)
+ {
+ result[i] = bitfieldExtract(value[i], offset, bits);
+ }
+ return result;
}
- return result;
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public uint bitfieldInsert(uint base, uint insert, int offset, int bits)
{
- uint clearMask = ~(((1u << bits) - 1u) << offset);
- uint clearedBase = base & clearMask;
- uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
- return clearedBase | maskedInsert;
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldInsert";
+ case spirv: return spirv_asm {
+ result:$$uint = OpBitFieldInsert $base $insert $offset $bits
+ };
+ default:
+ uint clearMask = ~(((1u << bits) - 1u) << offset);
+ uint clearedBase = base & clearMask;
+ uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
+ return clearedBase | maskedInsert;
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<uint,N> bitfieldInsert(vector<uint,N> base, vector<uint,N> insert, int offset, int bits)
{
- vector<uint,N> result;
- for (int i = 0; i < N; ++i)
+ __target_switch
{
- result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+ case glsl: __intrinsic_asm "bitfieldInsert";
+ case spirv: return spirv_asm {
+ result:$$vector<uint,N> = OpBitFieldInsert $base $insert $offset $bits
+ };
+ default:
+ vector<uint,N> result;
+ [ForceUnroll]
+ for (int i = 0; i < N; ++i)
+ {
+ result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+ }
+ return result;
}
- return result;
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public int bitfieldInsert(int base, int insert, int offset, int bits)
{
- uint clearMask = ~(((1u << bits) - 1u) << offset);
- uint clearedBase = base & clearMask;
- uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
- return clearedBase | maskedInsert;
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldInsert";
+ case spirv: return spirv_asm {
+ result:$$int = OpBitFieldInsert $base $insert $offset $bits
+ };
+ default:
+ uint clearMask = ~(((1u << bits) - 1u) << offset);
+ uint clearedBase = base & clearMask;
+ uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
+ return clearedBase | maskedInsert;
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<int,N> bitfieldInsert(vector<int,N> base, vector<int,N> insert, int offset, int bits)
{
- vector<int,N> result;
- for (int i = 0; i < N; ++i)
+ __target_switch
{
- result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+ case glsl: __intrinsic_asm "bitfieldInsert";
+ case spirv: return spirv_asm {
+ result:$$vector<int,N> = OpBitFieldInsert $base $insert $offset $bits
+ };
+ default:
+ vector<int,N> result;
+ [ForceUnroll]
+ for (int i = 0; i < N; ++i)
+ {
+ result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+ }
+ return result;
}
- return result;
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public int bitfieldReverse(highp int value)
{
- value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
- value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
- value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
- value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
- value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
- return value;
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldReverse";
+ case spirv: return spirv_asm {
+ result:$$int = OpBitReverse $value
+ };
+ default:
+ value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
+ value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
+ value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
+ value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
+ value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
+ return value;
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<int,N> bitfieldReverse(highp vector<int,N> value)
{
- VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldReverse";
+ case spirv: return spirv_asm {
+ result:$$vector<int,N> = OpBitReverse $value
+ };
+ default:
+ VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+ }
}
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public uint bitfieldReverse(highp uint value)
{
- value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
- value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
- value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
- value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
- value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
- return value;
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldReverse";
+ case spirv: return spirv_asm {
+ result:$$uint = OpBitReverse $value
+ };
+ default:
+ value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
+ value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
+ value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
+ value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
+ value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
+ return value;
+ }
}
__generic<let N:int>
[__readNone]
[ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
public vector<uint,N> bitfieldReverse(highp vector<uint,N> value)
{
- VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitfieldReverse";
+ case spirv: return spirv_asm {
+ result:$$vector<uint,N> = OpBitReverse $value
+ };
+ default:
+ VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+ }
}
[__readNone]
@@ -1145,7 +1557,15 @@ __generic<let N:int>
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
public vector<uint,N> bitCount(vector<uint,N> value)
{
- VECTOR_MAP_UNARY(uint, N, countbits, value);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitCount";
+ case spirv: return spirv_asm {
+ result:$$vector<uint,N> = OpBitCount $value
+ };
+ default:
+ VECTOR_MAP_UNARY(uint, N, countbits, value);
+ }
}
[__readNone]
@@ -1153,7 +1573,15 @@ public vector<uint,N> bitCount(vector<uint,N> value)
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
public int bitCount(int value)
{
- return countbits(uint(value));
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitCount";
+ case spirv: return spirv_asm {
+ result:$$int = OpBitCount $value
+ };
+ default:
+ return countbits(uint(value));
+ }
}
__generic<let N:int>
@@ -1162,7 +1590,15 @@ __generic<let N:int>
[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
public vector<int,N> bitCount(vector<int,N> value)
{
- VECTOR_MAP_UNARY(int, N, countbits, value);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "bitCount";
+ case spirv: return spirv_asm {
+ result:$$vector<int,N> = OpBitCount $value
+ };
+ default:
+ VECTOR_MAP_UNARY(int, N, countbits, value);
+ }
}
[__readNone]
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 2250ed6d4..7cafe764f 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -5741,7 +5741,15 @@ __generic<T : __BuiltinFloatingPointType>
[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
T distance(T x, T y)
{
- return length(x - y);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "distance";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Distance $x $y
+ };
+ default:
+ return length(x - y);
+ }
}
// Vector dot product
@@ -6028,9 +6036,7 @@ T exp2(T x)
__target_switch
{
case glsl:
- if (__isHalf<T>())
- __intrinsic_asm "exp2($0)";
- __intrinsic_asm "exp2(float($0))";
+ __intrinsic_asm "exp2($0)";
case spirv:
if (__isHalf<T>())
{
@@ -6060,6 +6066,8 @@ vector<T,N> exp2(vector<T,N> x)
{
__target_switch
{
+ case glsl:
+ __intrinsic_asm "exp2($0)";
case hlsl: __intrinsic_asm "exp2";
case spirv: return spirv_asm {
OpExtInst $$vector<T,N> result glsl450 Exp2 $x
@@ -7878,7 +7886,15 @@ __generic<T : __BuiltinFloatingPointType>
[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
T length(T x)
{
- return abs(x);
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "length";
+ case spirv: return spirv_asm {
+ OpExtInst $$T result glsl450 Length $x
+ };
+ default:
+ return abs(x);
+ }
}
// Linear interpolation