3 files changed, 1478 insertions, 472 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index 98770293c..0ba6c17aa 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -4,10 +4,10 @@
 #define lowp
 
 #define VECTOR_MAP_UNARY(TYPE, COUNT, FUNC, VALUE) \
-    vector<TYPE,COUNT> result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(VALUE[i]); } return result
+    vector<TYPE,COUNT> result; [ForceUnroll] for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(VALUE[i]); } return result
 
 #define VECTOR_MAP_TRINARY(TYPE, COUNT, FUNC, A, B, C) \
-    vector<TYPE,COUNT> result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result
+    vector<TYPE,COUNT> result; [ForceUnroll] for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result
 
 //
 // OpenGL 4.60 spec
@@ -331,6 +331,10 @@ public T asinh(T x)
     {
     case cpp: __intrinsic_asm "$P_asinh($0)";
     case cuda: __intrinsic_asm "$P_asinh($0)";
+    case glsl: __intrinsic_asm "asinh";
+    case spirv: return spirv_asm {
+        OpExtInst $$T result glsl450 Asinh $x
+    };
     default:
         return log(x + sqrt(x * x + T(1)));
     }
@@ -342,7 +346,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
 [require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
 public vector<T,N> asinh(vector<T,N> x)
 {
-    VECTOR_MAP_UNARY(T, N, asinh, x);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "asinh";
+    case spirv: return spirv_asm {
+        OpExtInst $$vector<T,N> result glsl450 Asinh $x
+    };
+    default:
+        VECTOR_MAP_UNARY(T, N, asinh, x);
+    }
 }
 
 __generic<T : __BuiltinFloatingPointType>
@@ -355,6 +367,10 @@ public T acosh(T x)
     {
     case cpp: __intrinsic_asm "$P_acosh($0)";
     case cuda: __intrinsic_asm "$P_acosh($0)";
+    case glsl: __intrinsic_asm "acosh";
+    case spirv: return spirv_asm {
+        OpExtInst $$T result glsl450 Acosh $x
+    };
     default:
         return log(x + sqrt( x * x - T(1)));
     }
@@ -366,7 +382,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
 [require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)]
 public vector<T,N> acosh(vector<T,N> x)
 {
-    VECTOR_MAP_UNARY(T, N, acosh, x);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "acosh";
+    case spirv: return spirv_asm {
+        OpExtInst $$vector<T,N> result glsl450 Acosh $x
+    };
+    default:
+        VECTOR_MAP_UNARY(T, N, acosh, x);
+    }
 }
 
 __generic<T : __BuiltinFloatingPointType>
@@ -379,6 +403,10 @@ public T atanh(T x)
     {
     case cpp: __intrinsic_asm "$P_atanh($0)";
     case cuda: __intrinsic_asm "$P_atanh($0)";
+    case glsl: __intrinsic_asm "atanh";
+    case spirv: return spirv_asm {
+        OpExtInst $$T result glsl450 Atanh $x
+    };
     default:
         return T(0.5) * log((T(1) + x) / (T(1) - x));
     }
@@ -390,7 +418,15 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
 [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
 public vector<T,N> atanh(vector<T,N> x)
 {
-    VECTOR_MAP_UNARY(T, N, atanh, x);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "atanh";
+    case spirv: return spirv_asm {
+        OpExtInst $$vector<T,N> result glsl450 Atanh $x
+    };
+    default:
+        VECTOR_MAP_UNARY(T, N, atanh, x);
+    }
 }
 
 //
@@ -491,6 +527,45 @@ public vector<T, N> mod(vector<T, N> x, vector<T, N> y)
     return fmod(x, y);
 }
 
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T,N> min(vector<T,N> x, T y)
+{
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "min";
+    default:
+        return min(x, vector<T,N>(y));
+    }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T,N> max(vector<T,N> x, T y)
+{
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "max";
+    default:
+        return max(x, vector<T,N>(y));
+    }
+}
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T,N> clamp(vector<T,N> x, T minBound, T maxBound)
+{
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "clamp";
+    default:
+        return clamp(x, vector<T,N>(minBound), vector<T,N>(maxBound));
+    }
+}
+
 __generic<T : __BuiltinFloatingPointType>
 [__readNone]
 [ForceInline]
@@ -506,7 +581,12 @@ __generic<T : __BuiltinFloatingPointType, let N:int>
 [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
 public vector<T, N> mix(vector<T, N> x, vector<T, N> y, T a)
 {
-    return lerp(x, y, vector<T, N>(a));
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "mix";
+    default:
+        return mix(x, y, vector<T, N>(a));
+    }
 }
 
 __generic<T : __BuiltinFloatingPointType, let N:int>
@@ -524,7 +604,15 @@ __generic<T>
 [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
 public T mix(T x, T y, bool a)
 {
-    return (a ? y : x);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "mix";
+    case spirv: return spirv_asm {
+        result:$$T = OpSelect $a $x $y
+    };
+    default:
+        return (a ? y : x);
+    }
 }
 
 __generic<T, let N:int>
@@ -533,12 +621,21 @@ __generic<T, let N:int>
 [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
 public vector<T, N> mix(vector<T, N> x, vector<T, N> y, vector<bool, N> a)
 {
-    vector<T, N> result;
-    for (int i = 0; i < N; i++)
+    __target_switch
     {
-        result[i] = (a[i] ? y[i] : x[i]);
+    case glsl: __intrinsic_asm "mix";
+    case spirv: return spirv_asm {
+        result:$$vector<T,N> = OpSelect $a $x $y
+    };
+    default:
+        vector<T, N> result;
+        [ForceUnroll]
+        for (int i = 0; i < N; i++)
+        {
+            result[i] = (a[i] ? y[i] : x[i]);
+        }
+        return result;
     }
-    return result;
 }
 
 [__readNone]
@@ -617,28 +714,28 @@ public vector<float, N> uintBitsToFloat(highp vector<uint, N> x)
 [ForceInline]
 uint packUnorm1x16(float c)
 {
-    return uint(clamp(c, 0.0, 1.0) * 65535.0 + 0.5);
+    return uint(round(clamp(c, 0.0, 1.0) * 65535.0));
 }
 
 [__readNone]
 [ForceInline]
 uint packSnorm1x16(float v)
 {
-    return uint(clamp(v ,-1.0, 1.0) * 32767.0 + 32767.5);
+    return uint(round(clamp(v ,-1.0, 1.0) * 32767.0));
 }
 
 [__readNone]
 [ForceInline]
 uint packUnorm1x8(float c)
 {
-    return uint(clamp(c, 0.0, 1.0) * 255.0 + 0.5);
+    return uint(round(clamp(c, 0.0, 1.0) * 255.0));
 }
 
 [__readNone]
 [ForceInline]
 uint packSnorm1x8(float c)
 {
-    return uint(clamp(c, -1.0, 1.0) * 127.0 + 127.5);
+    return uint(round(clamp(c, -1.0, 1.0) * 127.0));
 }
 
 [__readNone]
@@ -654,7 +751,7 @@ float unpackUnorm1x16(uint p)
 float unpackSnorm1x16(uint p)
 {
     const uint wordMask = 0xffff;
-    return clamp((float(p & wordMask) - 32767.0) / 32767.0, -1.0, 1.0);
+    return clamp(float(p & wordMask) / 32767.0, -1.0, 1.0);
 }
 
 [__readNone]
@@ -670,7 +767,7 @@ float unpackUnorm1x8(uint p)
 float unpackSnorm1x8(uint p)
 {
     const uint byteMask = 0xff;
-    return clamp((float(p & byteMask) - 127.0) / 127.0, -1.0, 1.0);
+    return clamp(float(p & byteMask) / 127.0, -1.0, 1.0);
 }
 
 [__readNone]
@@ -689,6 +786,44 @@ uint float2half(float f)
     return (s | e | m);
 }
 
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public T ldexp(T x, E exp)
+{
+    __target_switch
+    {
+    case hlsl: __intrinsic_asm "ldexp";
+    case glsl: __intrinsic_asm "ldexp";
+    case spirv: return spirv_asm {
+        OpExtInst $$T result glsl450 Ldexp $x $exp
+    };
+    default:
+        return ldexp(x, __floatCast<T>(exp));
+    }
+}
+
+__generic<T : __BuiltinFloatingPointType, E : __BuiltinIntegerType, let N : int>
+[__readNone]
+[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
+public vector<T, N> ldexp(vector<T, N> x, vector<E, N> exp)
+{
+    __target_switch
+    {
+    case hlsl: __intrinsic_asm "ldexp";
+    case glsl: __intrinsic_asm "ldexp";
+    case spirv: return spirv_asm {
+        OpExtInst $$vector<T,N> result glsl450 Ldexp $x $exp
+    };
+    default:
+        vector<T,N> temp;
+        [ForceUnroll]
+        for (int i = 0; i < N; ++i)
+            temp[i] = __floatCast<T>(exp[i]);
+        return ldexp(x, temp);
+    }
+}
+
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)]
@@ -697,6 +832,9 @@ public uint packUnorm2x16(vec2 v)
     __target_switch
     {
     case glsl: __intrinsic_asm "packUnorm2x16";
+    case spirv: return spirv_asm {
+        result:$$uint = OpExtInst glsl450 PackUnorm2x16 $v
+    };
     default:
         return packUnorm1x16(v.x) | (packUnorm1x16(v.y) << uint(16));
     }
@@ -710,6 +848,9 @@ public uint packSnorm2x16(vec2 v)
     __target_switch
     {
     case glsl: __intrinsic_asm "packSnorm2x16";
+    case spirv: return spirv_asm {
+        result:$$uint = OpExtInst glsl450 PackSnorm2x16 $v
+    };
     default:
         return packSnorm1x16(v.x) | (packSnorm1x16(v.y) << uint(16));
     }
@@ -723,6 +864,9 @@ public uint packUnorm4x8(vec4 v)
     __target_switch
     {
     case glsl: __intrinsic_asm "packUnorm4x8";
+    case spirv: return spirv_asm {
+        result:$$uint = OpExtInst glsl450 PackUnorm4x8 $v
+    };
     default:
         return packUnorm1x8(v.x) | (packUnorm1x8(v.y) << uint(8)) | (packUnorm1x8(v.z) << uint(16)) | (packUnorm1x8(v.w) << uint(24));
     }
@@ -736,6 +880,9 @@ public uint packSnorm4x8(vec4 v)
     __target_switch
     {
     case glsl: __intrinsic_asm "packSnorm4x8";
+    case spirv: return spirv_asm {
+        result:$$uint = OpExtInst glsl450 PackSnorm4x8 $v
+    };
     default:
         return packSnorm1x8(v.x) | (packSnorm1x8(v.y) << uint(8)) | (packSnorm1x8(v.z) << uint(16)) | (packSnorm1x8(v.w) << uint(24));
     }
@@ -749,6 +896,9 @@ public vec2 unpackUnorm2x16(uint p)
     __target_switch
     {
     case glsl: __intrinsic_asm "unpackUnorm2x16";
+    case spirv: return spirv_asm {
+        result:$$vec2 = OpExtInst glsl450 UnpackUnorm2x16 $p
+    };
     default:
         return vec2(unpackUnorm1x16(p & uint(0xffff)), unpackUnorm1x16(p >> uint(16)));
     }
@@ -762,6 +912,9 @@ public vec2 unpackSnorm2x16(uint p)
     __target_switch
     {
     case glsl: __intrinsic_asm "unpackSnorm2x16";
+    case spirv: return spirv_asm {
+        result:$$vec2 = OpExtInst glsl450 UnpackSnorm2x16 $p
+    };
     default:
         return vec2(unpackSnorm1x16(p & uint(0xffff)), unpackSnorm1x16(p >> uint(16)));
     }
@@ -775,6 +928,9 @@ public vec4 unpackUnorm4x8(highp uint p)
     __target_switch
     {
     case glsl: __intrinsic_asm "unpackUnorm4x8";
+    case spirv: return spirv_asm {
+        result:$$vec4 = OpExtInst glsl450 UnpackUnorm4x8 $p
+    };
     default:
         return vec4(
             unpackUnorm1x8(p),
@@ -792,6 +948,9 @@ public vec4 unpackSnorm4x8(highp uint p)
     __target_switch
     {
     case glsl: __intrinsic_asm "unpackSnorm4x8";
+    case spirv: return spirv_asm {
+        result:$$vec4 = OpExtInst glsl450 UnpackSnorm4x8 $p
+    };
     default:
         return vec4(
             unpackSnorm1x8(p),
@@ -809,6 +968,9 @@ public uint packHalf2x16(vec2 v)
     __target_switch
     {
     case glsl: __intrinsic_asm "packHalf2x16";
+    case spirv: return spirv_asm {
+        result:$$uint = OpExtInst glsl450 PackHalf2x16 $v
+    };
     default:
         return float2half(v.x) | (float2half(v.y) << uint(16));
     }
@@ -842,6 +1004,9 @@ public vec2 unpackHalf2x16(uint p)
     __target_switch
     {
     case glsl: __intrinsic_asm "unpackHalf2x16";
+    case spirv: return spirv_asm {
+        result:$$vec2 = OpExtInst glsl450 UnpackHalf2x16 $p
+    };
     default:
         return vec2(half2float(p & uint(0xffff)), half2float(p >> uint(16)));
     }
@@ -855,6 +1020,9 @@ public double packDouble2x32(uvec2 v)
     __target_switch
     {
     case glsl: __intrinsic_asm "packDouble2x32";
+    case spirv: return spirv_asm {
+        result:$$double = OpExtInst glsl450 PackDouble2x32 $v
+    };
     default:
         // TODO: there is no "asdouble()"
         //return asdouble(uint64_t(v.x) | (uint64_t(v.y) << 32));
@@ -870,6 +1038,9 @@ public uvec2 unpackDouble2x32(double v)
     __target_switch
     {
     case glsl: __intrinsic_asm "unpackDouble2x32";
+    case spirv: return spirv_asm {
+        result:$$uvec2 = OpExtInst glsl450 UnpackDouble2x32 $v
+    };
     default:
         // TODO: there is no "asuint64()"
         uint64_t u = 0; // asuint64(v);
@@ -887,7 +1058,15 @@ __generic<T : __BuiltinFloatingPointType>
 [require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public T faceforward(T n, T i, T ng)
 {
-    return dot(ng, i) < T(0.0f) ? n : -n;
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "faceforward";
+    case spirv: return spirv_asm {
+        OpExtInst $$T result glsl450 FaceForward $n $i $ng
+    };
+    default:
+        return dot(ng, i) < T(0.0f) ? n : -n;
+    }
 }
 
 //
@@ -904,6 +1083,15 @@ public matrix<T, R, C> outerProduct(vector<T, C> c, vector<T, R> r)
     __target_switch
     {
     case glsl: __intrinsic_asm "outerProduct";
+
+    // Note: SPIR-V takes the input arguments in an opposite order
+    // compared to GLSL. SPIR-V spec document says,
+    // "Its (second argument) number of components must equal the
+    //  number of columns in Result Type."
+    //
+    case spirv: return spirv_asm {
+        result:$$matrix<T,R,C> = OpOuterProduct $c $r
+    };
     default:
         matrix<T, R, C> result;
         for (int j = 0; j < R; ++j)
@@ -918,13 +1106,15 @@ public matrix<T, R, C> outerProduct(vector<T, C> c, vector<T, R> r)
 }
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
-[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
-matrix<T,N,N> inverse(matrix<T,N,N> m)
+[require(glsl_spirv, GLSL_400)]
+public matrix<T,N,N> inverse(matrix<T,N,N> m)
 {
     __target_switch
     {
     case glsl: __intrinsic_asm "inverse";
-    case hlsl: __intrinsic_asm "inverse";
+    case spirv: return spirv_asm {
+        OpExtInst $$matrix<T,N,N> result glsl450 MatrixInverse $m
+    };
     }
 }
 
@@ -934,201 +1124,423 @@ matrix<T,N,N> inverse(matrix<T,N,N> m)
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public uint uaddCarry(highp uint x, highp uint y, out lowp uint carry)
 {
-    let result = x * y;
-    carry = ((result < x || result < y) ? 1 : 0);
-    return result;
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "uaddCarry";
+    case spirv: return spirv_asm {
+        %ResType = OpTypeStruct $$uint $$uint;
+        %temp:%ResType = OpIAddCarry $x $y;
+        %carry:$$uint = OpCompositeExtract %temp 1;
+        OpStore &carry %carry;
+        result:$$uint = OpCompositeExtract %temp 0
+    };
+    default:
+        let result = x * y;
+        carry = ((result < x || result < y) ? 1 : 0);
+        return result;
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<uint,N> uaddCarry(highp vector<uint,N> x, highp vector<uint,N> y, out lowp vector<uint,N> carry)
 {
-    VECTOR_MAP_TRINARY(uint, N, uaddCarry, x, y, carry);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "uaddCarry";
+    case spirv: return spirv_asm {
+        %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>;
+        %temp:%ResType = OpIAddCarry $x $y;
+        %carry:$$vector<uint,N> = OpCompositeExtract %temp 1;
+        OpStore &carry %carry;
+        result:$$vector<uint,N> = OpCompositeExtract %temp 0
+    };
+    default:
+        VECTOR_MAP_TRINARY(uint, N, uaddCarry, x, y, carry);
+    }
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public uint usubBorrow(highp uint x, highp uint y, out lowp uint borrow)
 {
-    borrow = (y > x) ? 1 : 0;
-    return x - y;
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "usubBorrow";
+    case spirv: return spirv_asm {
+        %ResType = OpTypeStruct $$uint $$uint;
+        %temp:%ResType = OpISubBorrow $x $y;
+        %borrow:$$uint = OpCompositeExtract %temp 1;
+        OpStore &borrow %borrow;
+        result:$$uint = OpCompositeExtract %temp 0
+    };
+    default:
+        borrow = (y > x) ? 1 : 0;
+        return x - y;
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<uint,N> usubBorrow(highp vector<uint,N> x, highp vector<uint,N> y, out lowp vector<uint,N> borrow)
 {
-    VECTOR_MAP_TRINARY(uint, N, usubBorrow, x, y, borrow);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "usubBorrow";
+    case spirv: return spirv_asm {
+        %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>;
+        %temp:%ResType = OpISubBorrow $x $y;
+        %borrow:$$vector<uint,N> = OpCompositeExtract %temp 1;
+        OpStore &borrow %borrow;
+        result:$$vector<uint,N> = OpCompositeExtract %temp 0
+    };
+    default:
+        VECTOR_MAP_TRINARY(uint, N, usubBorrow, x, y, borrow);
+    }
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public void umulExtended(highp uint x, highp uint y, out highp uint msb, out highp uint lsb)
 {
-    uint64_t result = x * y;
-    msb = uint(result >> 32);
-    lsb = uint(result);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "umulExtended";
+    case spirv: spirv_asm {
+        %ResType = OpTypeStruct $$uint $$uint;
+        %temp:%ResType = OpUMulExtended $x $y;
+        %lsb:$$uint = OpCompositeExtract %temp 0;
+        %msb:$$uint = OpCompositeExtract %temp 1;
+        OpStore &lsb %lsb;
+        OpStore &msb %msb;
+    };
+    default:
+        uint64_t result = x * y;
+        msb = uint(result >> 32);
+        lsb = uint(result);
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public void umulExtended(highp vector<uint,N> x, highp vector<uint,N> y, out highp vector<uint,N> msb, out highp vector<uint,N> lsb)
 {
-    for(int i = 0; i < N; ++i)
+    __target_switch
     {
-       umulExtended(x[i], y[i], msb[i], lsb[i]);
+    case glsl: __intrinsic_asm "umulExtended";
+    case spirv: spirv_asm {
+        %ResType = OpTypeStruct $$vector<uint,N> $$vector<uint,N>;
+        %temp:%ResType = OpUMulExtended $x $y;
+        %lsb:$$vector<uint,N> = OpCompositeExtract %temp 0;
+        %msb:$$vector<uint,N> = OpCompositeExtract %temp 1;
+        OpStore &lsb %lsb;
+        OpStore &msb %msb;
+    };
+    default:
+        [ForceUnroll]
+        for(int i = 0; i < N; ++i)
+        {
+            umulExtended(x[i], y[i], msb[i], lsb[i]);
+        }
     }
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public void imulExtended(highp int x, highp int y, out highp int msb, out highp int lsb)
 {
-    int64_t result = x * y;
-    msb = int(result >> 32);
-    lsb = int(result);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "imulExtended";
+    case spirv: spirv_asm {
+        %ResType = OpTypeStruct $$int $$int;
+        %temp:%ResType = OpSMulExtended $x $y;
+        %lsb:$$int = OpCompositeExtract %temp 0;
+        %msb:$$int = OpCompositeExtract %temp 1;
+        OpStore &lsb %lsb;
+        OpStore &msb %msb;
+    };
+    default:
+        int64_t result = x * y;
+        msb = int(result >> 32);
+        lsb = int(result);
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public void imulExtended(highp vector<int,N> x, highp vector<int,N> y, out highp vector<int,N> msb, out highp vector<int,N> lsb)
 {
-    for(int i = 0; i < N; ++i)
+    __target_switch
     {
-       imulExtended(x[i], y[i], msb[i], lsb[i]);
+    case glsl: __intrinsic_asm "imulExtended";
+    case spirv: spirv_asm {
+        %ResType = OpTypeStruct $$vector<int,N> $$vector<int,N>;
+        %temp:%ResType = OpSMulExtended $x $y;
+        %lsb:$$vector<int,N> = OpCompositeExtract %temp 0;
+        %msb:$$vector<int,N> = OpCompositeExtract %temp 1;
+        OpStore &lsb %lsb;
+        OpStore &msb %msb;
+    };
+    default:
+        [ForceUnroll]
+        for(int i = 0; i < N; ++i)
+        {
+           imulExtended(x[i], y[i], msb[i], lsb[i]);
+        }
     }
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public int bitfieldExtract(int value, int offset, int bits)
 {
-    return int(uint(value >> offset) & ((1u << bits) - 1));
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldExtract";
+    case spirv: return spirv_asm {
+        result:$$int = OpBitFieldSExtract $value $offset $bits
+    };
+    default:
+        return int(uint(value >> offset) & ((1u << bits) - 1));
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<int,N> bitfieldExtract(vector<int,N> value, int offset, int bits)
 {
-    vector<int,N> result;
-    for (int i = 0; i < N; ++i)
+    __target_switch
     {
-        result[i] = bitfieldExtract(value[i], offset, bits);
+    case glsl: __intrinsic_asm "bitfieldExtract";
+    case spirv: return spirv_asm {
+        result:$$vector<int,N> = OpBitFieldSExtract $value $offset $bits
+    };
+    default:
+        vector<int,N> result;
+        [ForceUnroll]
+        for (int i = 0; i < N; ++i)
+        {
+            result[i] = bitfieldExtract(value[i], offset, bits);
+        }
+        return result;
     }
-    return result;
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public uint bitfieldExtract(uint value, int offset, int bits)
 {
-    return (value >> offset) & ((1u << bits) - 1);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldExtract";
+    case spirv: return spirv_asm {
+        result:$$uint = OpBitFieldUExtract $value $offset $bits
+    };
+    default:
+        return (value >> offset) & ((1u << bits) - 1);
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<uint,N> bitfieldExtract(vector<uint,N> value, int offset, int bits)
 {
-    vector<uint,N> result;
-    for (int i = 0; i < N; ++i)
+    __target_switch
     {
-        result[i] = bitfieldExtract(value[i], offset, bits);
+    case glsl: __intrinsic_asm "bitfieldExtract";
+    case spirv: return spirv_asm {
+        result:$$vector<uint,N> = OpBitFieldUExtract $value $offset $bits
+    };
+    default:
+        vector<uint,N> result;
+        [ForceUnroll]
+        for (int i = 0; i < N; ++i)
+        {
+            result[i] = bitfieldExtract(value[i], offset, bits);
+        }
+        return result;
     }
-    return result;
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public uint bitfieldInsert(uint base, uint insert, int offset, int bits)
 {
-    uint clearMask = ~(((1u << bits) - 1u) << offset);
-    uint clearedBase = base & clearMask;
-    uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
-    return clearedBase | maskedInsert;
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldInsert";
+    case spirv: return spirv_asm {
+        result:$$uint = OpBitFieldInsert $base $insert $offset $bits
+    };
+    default:
+        uint clearMask = ~(((1u << bits) - 1u) << offset);
+        uint clearedBase = base & clearMask;
+        uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
+        return clearedBase | maskedInsert;
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<uint,N> bitfieldInsert(vector<uint,N> base, vector<uint,N> insert, int offset, int bits)
 {
-    vector<uint,N> result;
-    for (int i = 0; i < N; ++i)
+    __target_switch
     {
-        result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+    case glsl: __intrinsic_asm "bitfieldInsert";
+    case spirv: return spirv_asm {
+        result:$$vector<uint,N> = OpBitFieldInsert $base $insert $offset $bits
+    };
+    default:
+        vector<uint,N> result;
+        [ForceUnroll]
+        for (int i = 0; i < N; ++i)
+        {
+            result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+        }
+        return result;
     }
-    return result;
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public int bitfieldInsert(int base, int insert, int offset, int bits)
 {
-    uint clearMask = ~(((1u << bits) - 1u) << offset);
-    uint clearedBase = base & clearMask;
-    uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
-    return clearedBase | maskedInsert;
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldInsert";
+    case spirv: return spirv_asm {
+        result:$$int = OpBitFieldInsert $base $insert $offset $bits
+    };
+    default:
+        uint clearMask = ~(((1u << bits) - 1u) << offset);
+        uint clearedBase = base & clearMask;
+        uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset;
+        return clearedBase | maskedInsert;
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<int,N> bitfieldInsert(vector<int,N> base, vector<int,N> insert, int offset, int bits)
 {
-    vector<int,N> result;
-    for (int i = 0; i < N; ++i)
+    __target_switch
     {
-        result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+    case glsl: __intrinsic_asm "bitfieldInsert";
+    case spirv: return spirv_asm {
+        result:$$vector<int,N> = OpBitFieldInsert $base $insert $offset $bits
+    };
+    default:
+        vector<int,N> result;
+        [ForceUnroll]
+        for (int i = 0; i < N; ++i)
+        {
+            result[i] = bitfieldInsert(base[i], insert[i], offset, bits);
+        }
+        return result;
     }
-    return result;
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public int bitfieldReverse(highp int value)
 {
-    value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
-    value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
-    value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
-    value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
-    value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
-    return value;
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldReverse";
+    case spirv: return spirv_asm {
+        result:$$int = OpBitReverse $value
+    };
+    default:
+        value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
+        value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
+        value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
+        value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
+        value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
+        return value;
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<int,N> bitfieldReverse(highp vector<int,N> value)
 {
-    VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldReverse";
+    case spirv: return spirv_asm {
+        result:$$vector<int,N> = OpBitReverse $value
+    };
+    default:
+        VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+    }
 }
 
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public uint bitfieldReverse(highp uint value)
 {
-    value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
-    value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
-    value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
-    value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
-    value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
-    return value;
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldReverse";
+    case spirv: return spirv_asm {
+        result:$$uint = OpBitReverse $value
+    };
+    default:
+        value = ((value & 0xAAAAAAAA) >> 1) | ((value & 0x55555555) << 1);
+        value = ((value & 0xCCCCCCCC) >> 2) | ((value & 0x33333333) << 2);
+        value = ((value & 0xF0F0F0F0) >> 4) | ((value & 0x0F0F0F0F) << 4);
+        value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
+        value = ((value & 0xFFFF0000) >> 16) | ((value & 0x0000FFFF) << 16);
+        return value;
+    }
 }
 
 __generic<let N:int>
 [__readNone]
 [ForceInline]
+[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)]
 public vector<uint,N> bitfieldReverse(highp vector<uint,N> value)
 {
-    VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitfieldReverse";
+    case spirv: return spirv_asm {
+        result:$$vector<uint,N> = OpBitReverse $value
+    };
+    default:
+        VECTOR_MAP_UNARY(int, N, bitfieldReverse, value);
+    }
 }
 
 [__readNone] 
@@ -1145,7 +1557,15 @@ __generic<let N:int>
 [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
 public vector<uint,N> bitCount(vector<uint,N> value)
 {
-    VECTOR_MAP_UNARY(uint, N, countbits, value);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitCount";
+    case spirv: return spirv_asm {
+        result:$$vector<uint,N> = OpBitCount $value
+    };
+    default:
+        VECTOR_MAP_UNARY(uint, N, countbits, value);
+    }
 }
 
 [__readNone] 
@@ -1153,7 +1573,15 @@ public vector<uint,N> bitCount(vector<uint,N> value)
 [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
 public int bitCount(int value)
 {
-    return countbits(uint(value));
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitCount";
+    case spirv: return spirv_asm {
+        result:$$int = OpBitCount $value
+    };
+    default:
+        return countbits(uint(value));
+    }
 }
     
 __generic<let N:int>
@@ -1162,7 +1590,15 @@ __generic<let N:int>
 [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)]
 public vector<int,N> bitCount(vector<int,N> value)
 {
-    VECTOR_MAP_UNARY(int, N, countbits, value);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "bitCount";
+    case spirv: return spirv_asm {
+        result:$$vector<int,N> = OpBitCount $value
+    };
+    default:
+        VECTOR_MAP_UNARY(int, N, countbits, value);
+    }
 }
 
 [__readNone]
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 2250ed6d4..7cafe764f 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -5741,7 +5741,15 @@ __generic<T : __BuiltinFloatingPointType>
 [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
 T distance(T x, T y)
 {
-    return length(x - y);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "distance";
+    case spirv: return spirv_asm {
+        OpExtInst $$T result glsl450 Distance $x $y
+    };
+    default:
+        return length(x - y);
+    }
 }
 
 // Vector dot product
@@ -6028,9 +6036,7 @@ T exp2(T x)
     __target_switch
     {
     case glsl:
-        if (__isHalf<T>())
-            __intrinsic_asm "exp2($0)";
-        __intrinsic_asm "exp2(float($0))";
+        __intrinsic_asm "exp2($0)";
     case spirv:
         if (__isHalf<T>())
         {
@@ -6060,6 +6066,8 @@ vector<T,N> exp2(vector<T,N> x)
 {
     __target_switch
     {
+    case glsl:
+        __intrinsic_asm "exp2($0)";
     case hlsl: __intrinsic_asm "exp2";
     case spirv: return spirv_asm {
         OpExtInst $$vector<T,N> result glsl450 Exp2 $x
@@ -7878,7 +7886,15 @@ __generic<T : __BuiltinFloatingPointType>
 [require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)]
 T length(T x)
 {
-    return abs(x);
+    __target_switch
+    {
+    case glsl: __intrinsic_asm "length";
+    case spirv: return spirv_asm {
+        OpExtInst $$T result glsl450 Length $x
+    };
+    default:
+        return abs(x);
+    }
 }
 
 // Linear interpolation
diff --git a/tests/glsl-intrinsic/intrinsic-basic.slang b/tests/glsl-intrinsic/intrinsic-basic.slang
index 1769aa5fe..42d416c1e 100644
--- a/tests/glsl-intrinsic/intrinsic-basic.slang
+++ b/tests/glsl-intrinsic/intrinsic-basic.slang
@@ -1,29 +1,23 @@
-// TODO: Use debug symbols will trigger a downstream error during validation layer instrumentation.
-// Need to file a bug for the downstream tools.
-// TODO: enable the test with direct-to-spirv backend when the pack/unpack functions are fixed.
-//       (delete) -emit-spirv-via-glsl flag here.
-//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -output-using-type -g0 -emit-spirv-via-glsl
+//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
+//TEST:SIMPLE(filecheck=CHECK_GLSL_SPIRV): -allow-glsl -stage compute -entry computeMain -target spirv
+//TEST:SIMPLE(filecheck=CHECK_SPIR): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly
+//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl
+//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
+//TEST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
-//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -g2
-//TEST:SIMPLE(filecheck=CHECK_SPV):  -allow-glsl -stage compute -entry computeMain -target spirv  -g2
-//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl  -g2
-//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA  -g2
-//TEST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp  -g2
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -output-using-type -emit-spirv-via-glsl
+//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -output-using-type -emit-spirv-directly
 
+//TEST_INPUT:ubuffer(data=[0 1 -1], stride=4):name=inputBuffer
+buffer MyBlockName1
+{
+    int data[10];
+} inputBuffer;
 
-// "inverse()" function is not implemented yet.
-//#defined TEST_when_inverse_works
-
-// "ftransform()" function is not implemented yet.
-//#defined TEST_when_fransform_works
-
-// "exp2" for double type is causing an issue with SPIRV
-//#define TEST_when_exp2_double_type_works
-
-//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
+//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
 buffer MyBlockName2
 {
-    int4 result;
+    int result;
 } outputBuffer;
 
 
@@ -43,223 +37,497 @@ bool equals(matrix<T, N, M> lhs, matrix<T, N, M> rhs)
     return true;
 }
 
-bool dim1TypeFuncs()
+bool Test_ScalarType()
 {
+    // CHECK_GLSL-LABEL: Test_ScalarType
+
     typealias genFType = float;
     typealias genDType = double;
     typealias genIType = int;
     typealias genUType = uint;
     typealias genBType = bool;
 
+    // Temporary variables
     genFType outGenFType;
     genDType outGenDType;
     genIType outGenIType, outGenIType2;
     genUType outGenUType, outGenUType2;
 
-    constexpr float epsilon = 0.000001;
+    constexpr const float epsilon = 0.000001;
+    const mat2 identity2x2 = mat2(vec2(1,0),vec2(0,1));
+    const mat3 identity3x3 = mat3(vec3(1,0,0),vec3(0,1,0),vec3(0,0,1));
+    const mat4 identity4x4 = mat4(vec4(1,0,0,0),vec4(0,1,0,0),vec4(0,0,1,0),vec4(0,0,0,1));
+
+    // CHECK_SPIR-LABEL: OpAccessChain {{.*}} %inputBuffer
+    const int zero = inputBuffer.data[0];
+    const int one = inputBuffer.data[1];
+    const int negaOne = inputBuffer.data[2];
 
     bool voidResults = true;
 
     // 8.8. Integer Functions
-    umulExtended(genUType(0), genUType(0), outGenUType, outGenUType2);
+
+    // CHECK_GLSL: umulExtended(
+    // CHECK_SPIR: OpUMulExtended{{ }}
+    umulExtended(genUType(zero), genUType(zero), outGenUType, outGenUType2);
     voidResults = voidResults && genUType(0) == outGenUType;
     voidResults = voidResults && genUType(0) == outGenUType2;
 
-    imulExtended(genIType(0), genIType(0), outGenIType, outGenIType2);
+    // CHECK_GLSL: imulExtended(
+    // CHECK_SPIR: OpSMulExtended{{ }}
+    imulExtended(genIType(zero), genIType(zero), outGenIType, outGenIType2);
     voidResults = voidResults && genIType(0) == outGenIType;
     voidResults = voidResults && genIType(0) == outGenIType2;
 
     return voidResults
         // 8.1. Angle and Trigonometry Functions
-        && genFType(0) == radians(genFType(0))
-        && genFType(0) == degrees(genFType(0))
-        && genFType(0) == sin(genFType(0))
-        && genFType(1) == cos(genFType(0))
-        && genFType(0) == tan(genFType(0))
-        && genFType(0) == asin(genFType(0))
-        && genFType(0) == acos(genFType(1))
-        && genFType(0) == atan(genFType(0), genFType(0))
-        && genFType(0) == atan(genFType(0))
-        && genFType(0) == sinh(genFType(0))
-        && genFType(1) == cosh(genFType(0))
-        && genFType(0) == tanh(genFType(0))
-        && genFType(0) == asinh(genFType(0))
-        && genFType(0) == acosh(genFType(1))
-        && genFType(0) == atanh(genFType(0))
+
+        // CHECK_GLSL: radians(
+        // CHECK_SPIR: Radians{{ }}
+        && genFType(0) == radians(genFType(zero))
+
+        // CHECK_GLSL: degrees(
+        // CHECK_SPIR: Degrees{{ }}
+        && genFType(0) == degrees(genFType(zero))
+
+        // CHECK_GLSL: sin(
+        // CHECK_SPIR: Sin{{ }}
+        && genFType(0) == sin(genFType(zero))
+
+        // CHECK_GLSL: cos(
+        // CHECK_SPIR: Cos{{ }}
+        && genFType(1) == cos(genFType(zero))
+
+        // CHECK_GLSL: tan(
+        // CHECK_SPIR: Tan{{ }}
+        && genFType(0) == tan(genFType(zero))
+
+        // CHECK_GLSL: asin(
+        // CHECK_SPIR: Asin{{ }}
+        && genFType(0) == asin(genFType(zero))
+
+        // CHECK_GLSL: acos(
+        // CHECK_SPIR: Acos{{ }}
+        && genFType(0) == acos(genFType(one))
+
+        // CHECK_GLSL-COUNT-2: atan(
+        // CHECK_SPIR: Atan2{{ }}
+        // CHECK_SPIR: Atan{{ }}
+        && genFType(0) == atan(genFType(zero), genFType(zero))
+        && genFType(0) == atan(genFType(zero))
+
+        // CHECK_GLSL: sinh(
+        // CHECK_SPIR: Sinh{{ }}
+        && genFType(0) == sinh(genFType(zero))
+
+        // CHECK_GLSL: cosh(
+        // CHECK_SPIR: Cosh{{ }}
+        && genFType(1) == cosh(genFType(zero))
+
+        // CHECK_GLSL: tanh(
+        // CHECK_SPIR: Tanh{{ }}
+        && genFType(0) == tanh(genFType(zero))
+
+        // CHECK_GLSL: asinh(
+        // CHECK_SPIR: Asinh{{ }}
+        && genFType(0) == asinh(genFType(zero))
+
+        // CHECK_GLSL: acosh(
+        // CHECK_SPIR: Acosh{{ }}
+        && genFType(0) == acosh(genFType(one))
+
+        // CHECK_GLSL: atanh(
+        // CHECK_SPIR: Atanh{{ }}
+        && genFType(0) == atanh(genFType(zero))
 
         // 8.2. Exponential Functions
-        && genFType(1) == pow(genFType(1), genFType(0))
-        && genFType(1) == exp(genFType(0))
-        && genFType(0) == log(genFType(1))
-        && genFType(1) == exp2(genFType(0))
-#if defined(TEST_when_exp2_double_type_works)
-        && genDType(1) == exp2(genDType(0))
-#endif // #if defined(TEST_when_exp2_double_type_works)
-        && genFType(0) == log2(genFType(1))
-        && genFType(0) == sqrt(genFType(0))
-        && genDType(0) == sqrt(genDType(0))
-        && genFType(1) == inversesqrt(genFType(1))
-        && genDType(1) == inversesqrt(genDType(1))
+
+        // CHECK_GLSL: pow(
+        // CHECK_SPIR: Pow{{ }}
+        && genFType(1) == pow(genFType(one), genFType(zero))
+
+        // CHECK_GLSL: exp(
+        // CHECK_SPIR: Exp{{ }}
+        && genFType(1) == exp(genFType(zero))
+
+        // CHECK_GLSL: log(
+        // CHECK_SPIR: Log{{ }}
+        && genFType(0) == log(genFType(one))
+
+        // CHECK_GLSL: exp2(
+        // CHECK_SPIR: Exp2{{ }}
+        && genFType(1) == exp2(genFType(zero))
+
+        // CHECK_GLSL: log2(
+        // CHECK_SPIR: Log2{{ }}
+        && genFType(0) == log2(genFType(one))
+
+        // CHECK_GLSL-COUNT-2: sqrt(
+        // CHECK_SPIR-COUNT-2: Sqrt{{ }}
+        && genFType(0) == sqrt(genFType(zero))
+        && genDType(0) == sqrt(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: inversesqrt(
+        // CHECK_SPIR-COUNT-2: InverseSqrt{{ }}
+        && genFType(1) == inversesqrt(genFType(one))
+        && genDType(1) == inversesqrt(genDType(one))
 
         // 8.3. Common Functions
-        && genFType(0) == abs(genFType(0))
-        && genIType(0) == abs(genIType(0))
-        && genDType(0) == abs(genDType(0))
-        && genFType(0) == sign(genFType(0))
+
+        // CHECK_GLSL-COUNT-3: abs(
+        // CHECK_SPIR: FAbs{{ }}
+        // CHECK_SPIR: SAbs{{ }}
+        // CHECK_SPIR: FAbs{{ }}
+        && genFType(0) == abs(genFType(zero))
+        && genIType(0) == abs(genIType(zero))
+        && genDType(0) == abs(genDType(zero))
+
+        // CHECK_GLSL-COUNT-3: sign(
+        // CHECK_SPIR: FSign{{ }}
+        // CHECK_SPIR: SSign{{ }}
+        // CHECK_SPIR: FSign{{ }}
+        && genFType(0) == sign(genFType(zero))
 #if !defined(TARGET_CUDA)
-        && genIType(0) == sign(genIType(0))
+        && genIType(0) == sign(genIType(zero))
 #endif // #if !defined(TARGET_CUDA)
-        && genDType(0) == sign(genDType(0))
-        && genFType(0) == floor(genFType(0))
-        && genDType(0) == floor(genDType(0))
-        && genFType(0) == trunc(genFType(0))
-        && genDType(0) == trunc(genDType(0))
-        && genFType(0) == round(genFType(0))
-        && genDType(0) == round(genDType(0))
-        && genFType(0) == roundEven(genFType(0))
-        && genDType(0) == roundEven(genDType(0))
-        && genFType(0) == ceil(genFType(0))
-        && genDType(0) == ceil(genDType(0))
-        && genFType(0) == fract(genFType(0))
-        && genDType(0) == fract(genDType(0))
-        && genFType(0) == mod(genFType(0), float(1))
-        && genFType(0) == mod(genFType(0), genFType(1))
-        && genDType(0) == mod(genDType(0), double(1))
-        && genDType(0) == mod(genDType(0), genDType(0))
-        && genFType(0) == modf(genFType(0), outGenFType) && 0 == outGenFType
-        && genDType(0) == modf(genDType(0), outGenDType) && 0 == outGenDType
-        && genFType(0) == min(genFType(0), genFType(0))
-        && genFType(0) == min(genFType(0), float(0))
-        && genDType(0) == min(genDType(0), genDType(0))
-        && genDType(0) == min(genDType(0), double(0))
-        && genIType(0) == min(genIType(0), genIType(0))
-        && genIType(0) == min(genIType(0), int(0))
-        && genUType(0) == min(genUType(0), genUType(0))
-        && genUType(0) == min(genUType(0), uint(0))
-        && genFType(0) == max(genFType(0), genFType(0))
-        && genFType(0) == max(genFType(0), float(0))
-        && genDType(0) == max(genDType(0), genDType(0))
-        && genDType(0) == max(genDType(0), double(0))
-        && genIType(0) == max(genIType(0), genIType(0))
-        && genIType(0) == max(genIType(0), int(0))
-        && genUType(0) == max(genUType(0), genUType(0))
-        && genUType(0) == max(genUType(0), uint(0))
-        && genFType(0) == clamp(genFType(0), genFType(0), genFType(0))
-        && genFType(0) == clamp(genFType(0), float(0), float(0))
-        && genDType(0) == clamp(genDType(0), genDType(0), genDType(0))
-        && genDType(0) == clamp(genDType(0), double(0), double(0))
-        && genIType(0) == clamp(genIType(0), genIType(0), genIType(0))
-        && genIType(0) == clamp(genIType(0), int(0), int(0))
-        && genUType(0) == clamp(genUType(0), genUType(0), genUType(0))
-        && genUType(0) == clamp(genUType(0), uint(0), uint(0))
-        && genFType(0) == mix(genFType(0), genFType(0), genFType(0))
-        && genFType(0) == mix(genFType(0), genFType(0), float(0))
-        && genDType(0) == mix(genDType(0), genDType(0), genDType(0))
-        && genDType(0) == mix(genDType(0), genDType(0), double(0))
-        && genFType(0) == mix(genFType(0), genFType(0), genBType(0))
-        && genDType(0) == mix(genDType(0), genDType(0), genBType(0))
-        && genIType(0) == mix(genIType(0), genIType(0), genBType(0))
-        && genUType(0) == mix(genUType(0), genUType(0), genBType(0))
-        && genBType(0) == mix(genBType(0), genBType(0), genBType(0))
-        && genFType(0) == step(genFType(1), genFType(0))
-        && genFType(0) == step(float(1), genFType(0))
-        && genDType(0) == step(genDType(1), genDType(0))
-        && genDType(0) == step(double(1), genDType(0))
-        && genFType(0) == smoothstep(genFType(0), genFType(1), genFType(0))
-        && genFType(0) == smoothstep(float(0), float(1), genFType(0))
-        && genDType(0) == smoothstep(genDType(0), genDType(1), genDType(0))
-        && genDType(0) == smoothstep(double(0), double(1), genDType(0))
-        && genBType(0) == isnan(genFType(0))
-        && genBType(0) == isnan(genDType(0))
-        && genBType(0) == isinf(genFType(0))
-        && genBType(0) == isinf(genDType(0))
-        && genIType(0) == floatBitsToInt(genFType(0))
-        && genUType(0) == floatBitsToUint(genFType(0))
-        && genFType(0) == intBitsToFloat(genIType(0))
-        && genFType(0) == uintBitsToFloat(genUType(0))
-        && genFType(0) == fma(genFType(0), genFType(0), genFType(0))
-        && genDType(0) == fma(genDType(0), genDType(0), genDType(0))
-        && genFType(0) == frexp(genFType(0), outGenIType) && genIType(0) == outGenIType
-        && genDType(0) == frexp(genDType(0), outGenIType) && genIType(0) == outGenIType
-        && genFType(0) == ldexp(genFType(0), genIType(0))
-#if defined(TEST_when_exp2_double_type_works)
-        && genDType(0) == ldexp(genDType(0), genIType(0))
-#endif // #if defined(TEST_when_exp2_double_type_works)
+        && genDType(0) == sign(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: floor(
+        // CHECK_SPIR-COUNT-2: Floor{{ }}
+        && genFType(0) == floor(genFType(zero))
+        && genDType(0) == floor(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: trunc(
+        // CHECK_SPIR-COUNT-2: Trunc{{ }}
+        && genFType(0) == trunc(genFType(zero))
+        && genDType(0) == trunc(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: round(
+        // CHECK_SPIR-COUNT-2: Round{{ }}
+        && genFType(0) == round(genFType(zero))
+        && genDType(0) == round(genDType(zero))
+
+#if 0
+        // C-HECK_GLSL-COUNT-2: roundEven(
+        // C-HECK_SPIR-COUNT-2: RoundEven{{ }}
+        && genFType(0) == roundEven(genFType(zero))
+        && genDType(0) == roundEven(genDType(zero))
+#endif
+
+        // CHECK_GLSL-COUNT-2: ceil(
+        // CHECK_SPIR-COUNT-2: Ceil{{ }}
+        && genFType(0) == ceil(genFType(zero))
+        && genDType(0) == ceil(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: fract(
+        // CHECK_SPIR-COUNT-2: Fract{{ }}
+        && genFType(0) == fract(genFType(zero))
+        && genDType(0) == fract(genDType(zero))
+
+#if 0
+        // C-HECK_GLSL-COUNT-2: mod(
+        && genFType(0) == mod(genFType(zero), genFType(one))
+        && genDType(0) == mod(genDType(zero), genDType(one))
+#endif
+
+        // CHECK_GLSL-COUNT-2: modf(
+        // CHECK_SPIR-COUNT-2: Modf{{ }}
+        && genFType(0) == modf(genFType(zero), outGenFType) && 0 == outGenFType
+        && genDType(0) == modf(genDType(zero), outGenDType) && 0 == outGenDType
+
+        // CHECK_GLSL-COUNT-4: min(
+        // CHECK_SPIR-COUNT-2: FMin{{ }}
+        // CHECK_SPIR: SMin{{ }}
+        // CHECK_SPIR: UMin{{ }}
+        && genFType(0) == min(genFType(zero), genFType(zero))
+        && genDType(0) == min(genDType(zero), genDType(zero))
+        && genIType(0) == min(genIType(zero), genIType(zero))
+        && genUType(0) == min(genUType(zero), genUType(zero))
+
+        // CHECK_GLSL-COUNT-4: max(
+        // CHECK_SPIR-COUNT-2: FMax{{ }}
+        // CHECK_SPIR: SMax{{ }}
+        // CHECK_SPIR: UMax{{ }}
+        && genFType(0) == max(genFType(zero), genFType(zero))
+        && genDType(0) == max(genDType(zero), genDType(zero))
+        && genIType(0) == max(genIType(zero), genIType(zero))
+        && genUType(0) == max(genUType(zero), genUType(zero))
+
+        // CHECK_GLSL-COUNT-4: clamp(
+        // CHECK_SPIR-COUNT-2: FClamp{{ }}
+        // CHECK_SPIR: SClamp{{ }}
+        // CHECK_SPIR: UClamp{{ }}
+        && genFType(0) == clamp(genFType(zero), genFType(zero), genFType(zero))
+        && genDType(0) == clamp(genDType(zero), genDType(zero), genDType(zero))
+        && genIType(0) == clamp(genIType(zero), genIType(zero), genIType(zero))
+        && genUType(0) == clamp(genUType(zero), genUType(zero), genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: mix(
+        // CHECK_SPIR-COUNT-2: FMix{{ }}
+        && genFType(0) == mix(genFType(zero), genFType(zero), genFType(zero))
+        && genDType(0) == mix(genDType(zero), genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-5: mix(
+        // C-HECK_SPIR-COUNT-5: OpSelect{{ }}
+        && genFType(0) == mix(genFType(zero), genFType(zero), genBType(zero))
+        && genDType(0) == mix(genDType(zero), genDType(zero), genBType(zero))
+        && genIType(0) == mix(genIType(zero), genIType(zero), genBType(zero))
+        && genUType(0) == mix(genUType(zero), genUType(zero), genBType(zero))
+        && genBType(0) == mix(genBType(zero), genBType(zero), genBType(zero))
+
+        // CHECK_GLSL-COUNT-4: step(
+        // CHECK_SPIR-COUNT-4: Step{{ }}
+        && genFType(0) == step(genFType(one), genFType(zero))
+        && genFType(0) == step(float(one), genFType(zero))
+        && genDType(0) == step(genDType(one), genDType(zero))
+        && genDType(0) == step(double(one), genDType(zero))
+
+        // CHECK_GLSL-COUNT-4: smoothstep(
+        // CHECK_SPIR-COUNT-4: SmoothStep{{ }}
+        && genFType(0) == smoothstep(genFType(zero), genFType(one), genFType(zero))
+        && genFType(0) == smoothstep(float(zero), float(one), genFType(zero))
+        && genDType(0) == smoothstep(genDType(zero), genDType(one), genDType(zero))
+        && genDType(0) == smoothstep(double(zero), double(one), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: isnan(
+        // CHECK_SPIR-COUNT-2: OpIsNan{{ }}
+        && genBType(0) == isnan(genFType(zero))
+        && genBType(0) == isnan(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: isinf(
+        // CHECK_SPIR-COUNT-2: OpIsInf{{ }}
+        && genBType(0) == isinf(genFType(zero))
+        && genBType(0) == isinf(genDType(zero))
+
+        // CHECK_GLSL: floatBitsToInt(
+        // CHECK_SPIR: OpBitcast{{ }}
+        && genIType(0) == floatBitsToInt(genFType(zero))
+
+        // CHECK_GLSL: floatBitsToUint(
+        // CHECK_SPIR: OpBitcast{{ }}
+        && genUType(0) == floatBitsToUint(genFType(zero))
+
+        // CHECK_GLSL: intBitsToFloat(
+        // CHECK_SPIR: OpBitcast{{ }}
+        && genFType(0) == intBitsToFloat(genIType(zero))
+
+        // CHECK_GLSL: uintBitsToFloat(
+        // CHECK_SPIR: OpBitcast{{ }}
+        && genFType(0) == uintBitsToFloat(genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: fma(
+        // CHECK_SPIR-COUNT-2: Fma{{ }}
+        && genFType(0) == fma(genFType(zero), genFType(zero), genFType(zero))
+        && genDType(0) == fma(genDType(zero), genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: frexp(
+        // CHECK_SPIR-COUNT-2: Frexp{{ }}
+        && genFType(0) == frexp(genFType(zero), outGenIType) && genIType(0) == outGenIType
+        && genDType(0) == frexp(genDType(zero), outGenIType) && genIType(0) == outGenIType
+
+        // CHECK_GLSL-COUNT-2: ldexp(
+        // CHECK_SPIR-COUNT-2: Ldexp{{ }}
+        && genFType(0) == ldexp(genFType(zero), genIType(zero))
+        && genDType(0) == ldexp(genDType(zero), genIType(zero))
+
+        // 8.4. Floating-Point Pack and Unpack Functions
+
+        // CHECK_GLSL: packUnorm2x16(
+        // CHECK_SPIR: PackUnorm2x16{{ }}
+        && uint(0)  == packUnorm2x16(vec2(zero))
+
+        // CHECK_GLSL: packSnorm2x16(
+        // CHECK_SPIR: PackSnorm2x16{{ }}
+        && uint(0)  == packSnorm2x16(vec2(zero))
+
+        // CHECK_GLSL: packUnorm4x8(
+        // CHECK_SPIR: PackUnorm4x8{{ }}
+        && uint(0)  == packUnorm4x8(vec4(zero))
+
+        // CHECK_GLSL: packSnorm4x8(
+        // CHECK_SPIR: PackSnorm4x8{{ }}
+        && uint(0)  == packSnorm4x8(vec4(zero))
+
+        // CHECK_GLSL: unpackUnorm2x16(
+        // CHECK_SPIR: UnpackUnorm2x16{{ }}
+        && vec2(0)  == unpackUnorm2x16(uint(zero))
+
+        // CHECK_GLSL: unpackSnorm2x16(
+        // CHECK_SPIR: UnpackSnorm2x16{{ }}
+        && vec2(0)  == unpackSnorm2x16(uint(zero))
+
+        // CHECK_GLSL: unpackUnorm4x8(
+        // CHECK_SPIR: UnpackUnorm4x8{{ }}
+        && vec4(0)  == unpackUnorm4x8(uint(zero))
+
+        // CHECK_GLSL: unpackSnorm4x8(
+        // CHECK_SPIR: UnpackSnorm4x8{{ }}
+        && vec4(0)  == unpackSnorm4x8(uint(zero))
+
+        // CHECK_GLSL: packHalf2x16(
+        // CHECK_SPIR: PackHalf2x16{{ }}
+        && uint(0)  == packHalf2x16(vec2(zero))
+
+        // CHECK_GLSL: unpackHalf2x16(
+        // CHECK_SPIR: UnpackHalf2x16{{ }}
+        && vec2(0)  == unpackHalf2x16(uint(zero))
+
+        // CHECK_GLSL: packDouble2x32(
+        // CHECK_SPIR: PackDouble2x32{{ }}
+        && double(0) == packDouble2x32(uvec2(zero))
+
+        // CHECK_GLSL: unpackDouble2x32(
+        // CHECK_SPIR: UnpackDouble2x32{{ }}
+        && uvec2(0) == unpackDouble2x32(packDouble2x32(uvec2(zero)))
 
         // 8.5. Geometric Functions
-        && float(0) == length(genFType(0))
-        && double(0) == length(genDType(0))
-        && float(0) == distance(genFType(0), genFType(0))
-        && double(0) == distance(genDType(0), genDType(0))
-        && float(0) == dot(genFType(0), genFType(0))
-        && double(0) == dot(genDType(0), genDType(0))
-        && (abs(float(1) - length(normalize(genFType(1)))) < epsilon)
-        && (abs(double(1) - length(normalize(genDType(1)))) < double(epsilon))
-        && genFType(1) == faceforward(genFType(1), genFType(1), genFType(-1))
-        && genDType(1) == faceforward(genDType(1), genDType(1), genDType(-1))
-        && genFType(0) == reflect(genFType(0), genFType(0))
-        && genDType(0) == reflect(genDType(0), genDType(0))
-        && genFType(0) == refract(genFType(0), genFType(0), float(0))
-        && genDType(0) == refract(genDType(0), genDType(0), double(0))
+
+        // CHECK_GLSL-COUNT-2: length(
+        // CHECK_SPIR-COUNT-2: Length{{ }}
+        && float(0)  == length(genFType(zero))
+        && double(0) == length(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: distance(
+        // CHECK_SPIR-COUNT-2: Distance{{ }}
+        && float(0)  == distance(genFType(zero), genFType(zero))
+        && double(0) == distance(genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: dot(
+        // SPIR-V doesn't have OpDot for scalar types
+        && float(0)  == dot(genFType(zero), genFType(zero))
+        && double(0) == dot(genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: cross(
+        // CHECK_SPIR-COUNT-2: Cross{{ }}
+        && vec3(0)  == cross(vec3(0), vec3(zero))
+        && dvec3(0) == cross(dvec3(0), dvec3(zero))
+
+        // CHECK_GLSL-COUNT-2: normalize(
+        // CHECK_SPIR-COUNT-2: Normalize{{ }}
+        && (abs(float(1)  - length(normalize(genFType(one)))) < epsilon)
+        && (abs(double(1) - length(normalize(genDType(one)))) < double(epsilon))
+
+        // CHECK_GLSL-COUNT-2: faceforward(
+        // CHECK_SPIR-COUNT-2: FaceForward{{ }}
+        && genFType(1) == faceforward(genFType(one), genFType(one), genFType(negaOne))
+        && genDType(1) == faceforward(genDType(one), genDType(one), genDType(negaOne))
+
+        // CHECK_GLSL-COUNT-2: reflect(
+        // CHECK_SPIR-COUNT-2: Reflect{{ }}
+        && genFType(0) == reflect(genFType(zero), genFType(zero))
+        && genDType(0) == reflect(genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: refract(
+        // CHECK_SPIR-COUNT-2: Refract{{ }}
+        && genFType(0) == refract(genFType(zero), genFType(zero), float(zero))
+        && genDType(0) == refract(genDType(zero), genDType(zero), double(zero))
 
         // 8.6. Matrix Functions
-        && equals(mat2x2(0), matrixCompMult(mat2x2(0), mat2x2(0)))
-        && equals(mat2x3(0), matrixCompMult(mat2x3(0), mat2x3(0)))
-        && equals(mat2x4(0), matrixCompMult(mat2x4(0), mat2x4(0)))
-        && equals(mat3x2(0), matrixCompMult(mat3x2(0), mat3x2(0)))
-        && equals(mat3x3(0), matrixCompMult(mat3x3(0), mat3x3(0)))
-        && equals(mat3x4(0), matrixCompMult(mat3x4(0), mat3x4(0)))
-        && equals(mat4x2(0), matrixCompMult(mat4x2(0), mat4x2(0)))
-        && equals(mat4x3(0), matrixCompMult(mat4x3(0), mat4x3(0)))
-        && equals(mat4x4(0), matrixCompMult(mat4x4(0), mat4x4(0)))
-        && equals(mat2(0), outerProduct(vec2(0), vec2(0)))
-        && equals(mat3(0), outerProduct(vec3(0), vec3(0)))
-        && equals(mat4(0), outerProduct(vec4(0), vec4(0)))
-        && equals(mat2x3(0), outerProduct(vec3(0), vec2(0)))
-        && equals(mat3x2(0), outerProduct(vec2(0), vec3(0)))
-        && equals(mat2x4(0), outerProduct(vec4(0), vec2(0)))
-        && equals(mat4x2(0), outerProduct(vec2(0), vec4(0)))
-        && equals(mat3x4(0), outerProduct(vec4(0), vec3(0)))
-        && equals(mat4x3(0), outerProduct(vec3(0), vec4(0)))
-        && equals(mat2(0), transpose(mat2(0)))
-        && equals(mat3(0), transpose(mat3(0)))
-        && equals(mat4(0), transpose(mat4(0)))
-        && equals(mat2x3(0), transpose(mat3x2(0)))
-        && equals(mat3x2(0), transpose(mat2x3(0)))
-        && equals(mat2x4(0), transpose(mat4x2(0)))
-        && equals(mat4x2(0), transpose(mat2x4(0)))
-        && equals(mat3x4(0), transpose(mat4x3(0)))
-        && equals(mat4x3(0), transpose(mat3x4(0)))
-        && float(0) == determinant(mat2(0))
-        && float(0) == determinant(mat3(0))
-        && float(0) == determinant(mat4(0))
-#if defined(TEST_when_inverse_works)
-        && equals(mat2(0), inverse(mat2(0)))
-        && equals(mat3(0), inverse(mat3(0)))
-        && equals(mat4(0), inverse(mat4(0)))
-#endif // #if defined(TEST_when_inverse_works)
-
-//         // 8.8. Integer Functions
-        && genUType(0) == uaddCarry(genUType(0), genUType(0), outGenUType) && genUType(0) == outGenUType
-        && genUType(0) == usubBorrow(genUType(0), genUType(0), outGenUType) && genUType(0) == outGenUType
-        && genIType(0) == bitfieldExtract(genIType(0), int(0), int(0))
-        && genUType(0) == bitfieldExtract(genUType(0), int(0), int(0))
-        && genIType(0) == bitfieldInsert(genIType(0), genIType(0), int(0), int(0))
-        && genUType(0) == bitfieldInsert(genUType(0), genUType(0), int(0), int(0))
-        && genIType(0) == bitfieldReverse(genIType(0))
-        && genUType(0) == bitfieldReverse(genUType(0))
-        && genIType(0) == bitCount(genIType(0))
-        && genIType(0) == bitCount(genUType(0))
-        && genIType(-1) == findLSB(genIType(0))
-        && genIType(-1) == findLSB(genUType(0))
-        && genIType(-1) == findMSB(genIType(0))
-        && genIType(-1) == findMSB(genUType(0))
+
+        // CHECK_GLSL-COUNT-9: matrixCompMult(
+        && equals(mat2x2(0), matrixCompMult(mat2x2(0), mat2x2(zero)))
+        && equals(mat2x3(0), matrixCompMult(mat2x3(0), mat2x3(zero)))
+        && equals(mat2x4(0), matrixCompMult(mat2x4(0), mat2x4(zero)))
+        && equals(mat3x2(0), matrixCompMult(mat3x2(0), mat3x2(zero)))
+        && equals(mat3x3(0), matrixCompMult(mat3x3(0), mat3x3(zero)))
+        && equals(mat3x4(0), matrixCompMult(mat3x4(0), mat3x4(zero)))
+        && equals(mat4x2(0), matrixCompMult(mat4x2(0), mat4x2(zero)))
+        && equals(mat4x3(0), matrixCompMult(mat4x3(0), mat4x3(zero)))
+        && equals(mat4x4(0), matrixCompMult(mat4x4(0), mat4x4(zero)))
+
+        // CHECK_GLSL-COUNT-4: outerProduct(
+        // CHECK_SPIR-COUNT-4: OpOuterProduct{{ }}
+        && equals(mat2(0), outerProduct(vec2(0), vec2(zero)))
+        && equals(mat3(0), outerProduct(vec3(0), vec3(zero)))
+        && equals(mat4(0), outerProduct(vec4(0), vec4(zero)))
+        && equals(mat2x3(0), outerProduct(vec3(0), vec2(zero)))
+        && equals(mat3x2(0), outerProduct(vec2(0), vec3(zero)))
+        && equals(mat2x4(0), outerProduct(vec4(0), vec2(zero)))
+        && equals(mat4x2(0), outerProduct(vec2(0), vec4(zero)))
+        && equals(mat3x4(0), outerProduct(vec4(0), vec3(zero)))
+        && equals(mat4x3(0), outerProduct(vec3(0), vec4(zero)))
+
+        // CHECK_GLSL-COUNT-9: transpose(
+        // CHECK_SPIR-COUNT-9: OpTranspose{{ }}
+        && equals(mat2(0), transpose(mat2(zero)))
+        && equals(mat3(0), transpose(mat3(zero)))
+        && equals(mat4(0), transpose(mat4(zero)))
+        && equals(mat2x3(0), transpose(mat3x2(zero)))
+        && equals(mat3x2(0), transpose(mat2x3(zero)))
+        && equals(mat2x4(0), transpose(mat4x2(zero)))
+        && equals(mat4x2(0), transpose(mat2x4(zero)))
+        && equals(mat3x4(0), transpose(mat4x3(zero)))
+        && equals(mat4x3(0), transpose(mat3x4(zero)))
+
+        // CHECK_GLSL-COUNT-3: determinant(
+        // CHECK_SPIR-COUNT-3: Determinant{{ }}
+        && float(0) == determinant(mat2(zero))
+        && float(0) == determinant(mat3(zero))
+        && float(0) == determinant(mat4(zero))
+
+        // CHECK_GLSL-COUNT-3: inverse(
+        // CHECK_SPIR-COUNT-3: MatrixInverse{{ }}
+        && equals(identity2x2, inverse(identity2x2))
+        && equals(identity3x3, inverse(identity3x3))
+        && equals(identity4x4, inverse(identity4x4))
+
+        // 8.8. Integer Functions
+
+        // CHECK_GLSL: uaddCarry(
+        // CHECK_SPIR: OpIAddCarry{{ }}
+        && genUType(0) == uaddCarry(genUType(zero), genUType(zero), outGenUType) && genUType(0) == outGenUType
+
+        // CHECK_GLSL: usubBorrow(
+        // CHECK_SPIR: OpISubBorrow{{ }}
+        && genUType(0) == usubBorrow(genUType(zero), genUType(zero), outGenUType) && genUType(0) == outGenUType
+
+        // CHECK_GLSL-COUNT-2: bitfieldExtract(
+        // CHECK_SPIR: OpBitFieldSExtract{{ }}
+        // CHECK_SPIR: OpBitFieldUExtract{{ }}
+        && genIType(0) == bitfieldExtract(genIType(zero), int(zero), int(zero))
+        && genUType(0) == bitfieldExtract(genUType(zero), int(zero), int(zero))
+
+        // CHECK_GLSL-COUNT-2: bitfieldInsert(
+        // CHECK_SPIR-COUNT-2: OpBitFieldInsert{{ }}
+        && genIType(0) == bitfieldInsert(genIType(zero), genIType(zero), int(zero), int(zero))
+        && genUType(0) == bitfieldInsert(genUType(zero), genUType(zero), int(zero), int(zero))
+
+        // CHECK_GLSL-COUNT-2: bitfieldReverse(
+        // CHECK_SPIR-COUNT-2: OpBitReverse{{ }}
+        && genIType(0) == bitfieldReverse(genIType(zero))
+        && genUType(0) == bitfieldReverse(genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: bitCount(
+        // CHECK_SPIR-COUNT-2: OpBitCount{{ }}
+        && genIType(0) == bitCount(genIType(zero))
+        && genIType(0) == bitCount(genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: findLSB(
+        // CHECK_SPIR-COUNT-2: FindILsb{{ }}
+        && genIType(-1) == findLSB(genIType(zero))
+        && genIType(-1) == findLSB(genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: findMSB(
+        // CHECK_SPIR: FindSMsb{{ }}
+        // CHECK_SPIR: FindUMsb{{ }}
+        && genIType(-1) == findMSB(genIType(zero))
+        && genIType(-1) == findMSB(genUType(zero))
         ;
 }
 
 __generic<let N : int>
-bool dimNTypeFuncs()
+bool Test_VectorType()
 {
+    // CHECK_GLSL-LABEL: bool Test_VectorType_0
+    // CHECK_GLSL-NOT: for{{ *}}(
+    // CHECK_GLSL-LABEL: bool Test_VectorType_1
+
+    // The following type names are what OpenGL document
+    // uses to refer variants of the vector types.
+    // The difference between `genFType` and `vec` is that
+    // `genFType` includes a scalar type, which is just
+    // `float` whereas `vec` doesn't include it.
+    //
     typealias genFType = vector<float, N>;
     typealias genDType = vector<double, N>;
     typealias genIType = vector<int, N>;
@@ -270,244 +538,530 @@ bool dimNTypeFuncs()
     typealias uvec     = vector<uint, N>;
     typealias bvec     = vector<bool, N>;
 
+    // Temporary variables
     genFType outGenFType;
     genDType outGenDType;
     genIType outGenIType, outGenIType2;
     genUType outGenUType, outGenUType2;
 
-    constexpr float epsilon = 0.000001;
+    constexpr const float epsilon = 0.000001;
+
+    // CHECK_SPIR-LABEL: OpAccessChain {{.*}} %inputBuffer
+    const int zero = inputBuffer.data[0];
+    const int one = inputBuffer.data[1];
+    const int negaOne = inputBuffer.data[2];
 
     bool voidResults = true;
 
+    // Note: "CHECK_SPIR-NOT:" testing is to detect cases where a scalar
+    // version of the function is called when it should use a vector version.
+
     // 8.8. Integer Functions
-    umulExtended(genUType(0), genUType(0), outGenUType, outGenUType2);
+
+    // CHECK_GLSL: umulExtended(
+    // CHECK_SPIR: OpUMulExtended{{ }}
+    // CHECK_SPIR-NOT: OpUMulExtended{{ }}
+    umulExtended(genUType(zero), genUType(zero), outGenUType, outGenUType2);
     voidResults = voidResults && genUType(0) == outGenUType;
     voidResults = voidResults && genUType(0) == outGenUType2;
 
-    imulExtended(genIType(0), genIType(0), outGenIType, outGenIType2);
+    // CHECK_GLSL: imulExtended(
+    // CHECK_SPIR: OpSMulExtended{{ }}
+    // CHECK_SPIR-NOT: OpSMulExtended{{ }}
+    imulExtended(genIType(zero), genIType(zero), outGenIType, outGenIType2);
     voidResults = voidResults && genIType(0) == outGenIType;
     voidResults = voidResults && genIType(0) == outGenIType2;
 
     return voidResults
         // 8.1. Angle and Trigonometry Functions
-        && genFType(0) == radians(genFType(0))
-        && genFType(0) == degrees(genFType(0))
-        && genFType(0) == sin(genFType(0))
-        && genFType(1) == cos(genFType(0))
-        && genFType(0) == tan(genFType(0))
-        && genFType(0) == asin(genFType(0))
-        && genFType(0) == acos(genFType(1))
-        && genFType(0) == atan(genFType(0), genFType(0))
-        && genFType(0) == atan(genFType(0))
-        && genFType(0) == sinh(genFType(0))
-        && genFType(1) == cosh(genFType(0))
-        && genFType(0) == tanh(genFType(0))
+
+        // CHECK_GLSL: radians(
+        // CHECK_SPIR: Radians{{ }}
+        // CHECK_SPIR-NOT: Radians{{ }}
+        && genFType(0) == radians(genFType(zero))
+
+        // CHECK_GLSL: degrees(
+        // CHECK_SPIR: Degrees{{ }}
+        // CHECK_SPIR-NOT: Degrees{{ }}
+        && genFType(0) == degrees(genFType(zero))
+
+        // CHECK_GLSL: sin(
+        // CHECK_SPIR: Sin{{ }}
+        // CHECK_SPIR-NOT: Sin{{ }}
+        && genFType(0) == sin(genFType(zero))
+
+        // CHECK_GLSL: cos(
+        // CHECK_SPIR: Cos{{ }}
+        // CHECK_SPIR-NOT: Cos{{ }}
+        && genFType(1) == cos(genFType(zero))
+
+        // CHECK_GLSL: tan(
+        // CHECK_SPIR: Tan{{ }}
+        // CHECK_SPIR-NOT: Tan{{ }}
+        && genFType(0) == tan(genFType(zero))
+
+        // CHECK_GLSL: asin(
+        // CHECK_SPIR: Asin{{ }}
+        // CHECK_SPIR-NOT: Asin{{ }}
+        && genFType(0) == asin(genFType(zero))
+
+        // CHECK_GLSL: acos(
+        // CHECK_SPIR: Acos{{ }}
+        // CHECK_SPIR-NOT: Acos{{ }}
+        && genFType(0) == acos(genFType(one))
+
+        // CHECK_GLSL-COUNT-2: atan(
+        // CHECK_SPIR: Atan2{{ }}
+        // CHECK_SPIR-NOT: Atan2{{ }}
+        // CHECK_SPIR: Atan{{ }}
+        // CHECK_SPIR-NOT: Atan{{ }}
+        && genFType(0) == atan(genFType(zero), genFType(zero))
+        && genFType(0) == atan(genFType(zero))
+
+        // CHECK_GLSL: sinh(
+        // CHECK_SPIR: Sinh{{ }}
+        // CHECK_SPIR-NOT: Sinh{{ }}
+        && genFType(0) == sinh(genFType(zero))
+
+        // CHECK_GLSL: cosh(
+        // CHECK_SPIR: Cosh{{ }}
+        // CHECK_SPIR-NOT: Cosh{{ }}
+        && genFType(1) == cosh(genFType(zero))
+
+        // CHECK_GLSL: tanh(
+        // CHECK_SPIR: Tanh{{ }}
+        // CHECK_SPIR-NOT: Tanh{{ }}
+        && genFType(0) == tanh(genFType(zero))
+
 #if !defined(TARGET_CUDA)
-        && genFType(0) == asinh(genFType(0))
-        && genFType(0) == acosh(genFType(1))
-        && genFType(0) == atanh(genFType(0))
+        // CHECK_GLSL: asinh(
+        // CHECK_SPIR: Asinh{{ }}
+        // CHECK_SPIR-NOT: Asinh{{ }}
+        && genFType(0) == asinh(genFType(zero))
+
+        // CHECK_GLSL: acosh(
+        // CHECK_SPIR: Acosh{{ }}
+        // CHECK_SPIR-NOT: Acosh{{ }}
+        && genFType(0) == acosh(genFType(one))
+
+        // CHECK_GLSL: atanh(
+        // CHECK_SPIR: Atanh{{ }}
+        // CHECK_SPIR-NOT: Atanh{{ }}
+        && genFType(0) == atanh(genFType(zero))
 #endif // #if !defined(TARGET_CUDA)
 
         // 8.2. Exponential Functions
-        && genFType(1) == pow(genFType(1), genFType(0))
-        && genFType(1) == exp(genFType(0))
-        && genFType(0) == log(genFType(1))
-        && genFType(1) == exp2(genFType(0))
-#if defined(TEST_when_exp2_double_type_works)
-        && genDType(1) == exp2(genDType(0))
-#endif // #if defined(TEST_when_exp2_double_type_works)
-        && genFType(0) == log2(genFType(1))
-        && genFType(0) == sqrt(genFType(0))
-        && genDType(0) == sqrt(genDType(0))
-        && genFType(1) == inversesqrt(genFType(1))
-        && genDType(1) == inversesqrt(genDType(1))
+
+        // CHECK_GLSL: pow(
+        // CHECK_SPIR: Pow{{ }}
+        // CHECK_SPIR-NOT: Pow{{ }}
+        && genFType(1) == pow(genFType(one), genFType(zero))
+
+        // CHECK_GLSL: exp(
+        // CHECK_SPIR: Exp{{ }}
+        // CHECK_SPIR-NOT: Exp{{ }}
+        && genFType(1) == exp(genFType(zero))
+
+        // CHECK_GLSL: log(
+        // CHECK_SPIR: Log{{ }}
+        // CHECK_SPIR-NOT: Log{{ }}
+        && genFType(0) == log(genFType(one))
+
+        // CHECK_GLSL: exp2(
+        // CHECK_SPIR: Exp2{{ }}
+        // CHECK_SPIR-NOT: Exp2{{ }}
+        && genFType(1) == exp2(genFType(zero))
+
+        // CHECK_GLSL: log2(
+        // CHECK_SPIR: Log2{{ }}
+        // CHECK_SPIR-NOT: Log2{{ }}
+        && genFType(0) == log2(genFType(one))
+
+        // CHECK_GLSL-COUNT-2: sqrt(
+        // CHECK_SPIR-COUNT-2: Sqrt{{ }}
+        // CHECK_SPIR-NOT: Sqrt{{ }}
+        && genFType(0) == sqrt(genFType(zero))
+        && genDType(0) == sqrt(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: inversesqrt(
+        // CHECK_SPIR-COUNT-2: InverseSqrt{{ }}
+        // CHECK_SPIR-NOT: InverseSqrt{{ }}
+        && genFType(1) == inversesqrt(genFType(one))
+        && genDType(1) == inversesqrt(genDType(one))
 
         // 8.3. Common Functions
-        && genFType(0) == abs(genFType(0))
-        && genIType(0) == abs(genIType(0))
-        && genDType(0) == abs(genDType(0))
-        && genFType(0) == sign(genFType(0))
+
+        // CHECK_GLSL-COUNT-3: abs(
+        // CHECK_SPIR: FAbs{{ }}
+        // CHECK_SPIR-NOT: FAbs{{ }}
+        // CHECK_SPIR: SAbs{{ }}
+        // CHECK_SPIR-NOT: SAbs{{ }}
+        // CHECK_SPIR: FAbs{{ }}
+        // CHECK_SPIR-NOT: FAbs{{ }}
+        && genFType(0) == abs(genFType(zero))
+        && genIType(0) == abs(genIType(zero))
+        && genDType(0) == abs(genDType(zero))
+
+        // CHECK_GLSL-COUNT-3: sign(
+        // CHECK_SPIR: FSign{{ }}
+        // CHECK_SPIR-NOT: FSign{{ }}
+        // CHECK_SPIR: SSign{{ }}
+        // CHECK_SPIR-NOT: SSign{{ }}
+        // CHECK_SPIR: FSign{{ }}
+        // CHECK_SPIR-NOT: FSign{{ }}
+        && genFType(0) == sign(genFType(zero))
 #if !defined(TARGET_CUDA)
-        && genIType(0) == sign(genIType(0))
+        && genIType(0) == sign(genIType(zero))
 #endif // #if !defined(TARGET_CUDA)
-        && genDType(0) == sign(genDType(0))
-        && genFType(0) == floor(genFType(0))
-        && genDType(0) == floor(genDType(0))
-        && genFType(0) == trunc(genFType(0))
-        && genDType(0) == trunc(genDType(0))
-        && genFType(0) == round(genFType(0))
-        && genDType(0) == round(genDType(0))
-        && genFType(0) == roundEven(genFType(0))
-        && genDType(0) == roundEven(genDType(0))
-        && genFType(0) == ceil(genFType(0))
-        && genDType(0) == ceil(genDType(0))
-        && genFType(0) == fract(genFType(0))
-        && genDType(0) == fract(genDType(0))
-        && genFType(0) == mod(genFType(0), float(1))
-        && genFType(0) == mod(genFType(0), genFType(1))
-        && genDType(0) == mod(genDType(0), double(1))
-        && genDType(0) == mod(genDType(0), genDType(0))
-        && genFType(0) == modf(genFType(0), outGenFType) && genFType(0) == outGenFType
-        && genDType(0) == modf(genDType(0), outGenDType) && genDType(0) == outGenDType
-        && genFType(0) == min(genFType(0), genFType(0))
-        && genFType(0) == min(genFType(0), float(0))
-        && genDType(0) == min(genDType(0), genDType(0))
-        && genDType(0) == min(genDType(0), double(0))
-        && genIType(0) == min(genIType(0), genIType(0))
-        && genIType(0) == min(genIType(0), int(0))
-        && genUType(0) == min(genUType(0), genUType(0))
-        && genUType(0) == min(genUType(0), uint(0))
-        && genFType(0) == max(genFType(0), genFType(0))
-        && genFType(0) == max(genFType(0), float(0))
-        && genDType(0) == max(genDType(0), genDType(0))
-        && genDType(0) == max(genDType(0), double(0))
-        && genIType(0) == max(genIType(0), genIType(0))
-        && genIType(0) == max(genIType(0), int(0))
-        && genUType(0) == max(genUType(0), genUType(0))
-        && genUType(0) == max(genUType(0), uint(0))
-        && genFType(0) == clamp(genFType(0), genFType(0), genFType(0))
-        && genFType(0) == clamp(genFType(0), float(0), float(0))
-        && genDType(0) == clamp(genDType(0), genDType(0), genDType(0))
-        && genDType(0) == clamp(genDType(0), double(0), double(0))
-        && genIType(0) == clamp(genIType(0), genIType(0), genIType(0))
-        && genIType(0) == clamp(genIType(0), int(0), int(0))
-        && genUType(0) == clamp(genUType(0), genUType(0), genUType(0))
-        && genUType(0) == clamp(genUType(0), uint(0), uint(0))
-        && genFType(0) == mix(genFType(0), genFType(0), genFType(0))
-        && genFType(0) == mix(genFType(0), genFType(0), float(0))
-        && genDType(0) == mix(genDType(0), genDType(0), genDType(0))
-        && genDType(0) == mix(genDType(0), genDType(0), double(0))
+        && genDType(0) == sign(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: floor(
+        // CHECK_SPIR-COUNT-2: Floor{{ }}
+        // CHECK_SPIR-NOT: Floor{{ }}
+        && genFType(0) == floor(genFType(zero))
+        && genDType(0) == floor(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: trunc(
+        // CHECK_SPIR-COUNT-2: Trunc{{ }}
+        // CHECK_SPIR-NOT: Trunc{{ }}
+        && genFType(0) == trunc(genFType(zero))
+        && genDType(0) == trunc(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: round(
+        // CHECK_SPIR-COUNT-2: Round{{ }}
+        // CHECK_SPIR-NOT: Round{{ }}
+        && genFType(0) == round(genFType(zero))
+        && genDType(0) == round(genDType(zero))
+
+#if 0
+        // C-HECK_GLSL-COUNT-2: roundEven(
+        // C-HECK_SPIR-COUNT-2: RoundEven{{ }}
+        // C-HECK_SPIR-NOT: RoundEven{{ }}
+        && genFType(0) == roundEven(genFType(zero))
+        && genDType(0) == roundEven(genDType(zero))
+#endif
+
+        // CHECK_GLSL-COUNT-2: ceil(
+        // CHECK_SPIR-COUNT-2: Ceil{{ }}
+        // CHECK_SPIR-NOT: Ceil{{ }}
+        && genFType(0) == ceil(genFType(zero))
+        && genDType(0) == ceil(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: fract(
+        // CHECK_SPIR-COUNT-2: Fract{{ }}
+        // CHECK_SPIR-NOT: Fract{{ }}
+        && genFType(0) == fract(genFType(zero))
+        && genDType(0) == fract(genDType(zero))
+
+#if 0
+        // C-HECK_GLSL-COUNT-4: mod(
+        && genFType(0) == mod(genFType(zero), float(one))
+        && genFType(0) == mod(genFType(zero), genFType(one))
+        && genDType(0) == mod(genDType(zero), double(one))
+        && genDType(0) == mod(genDType(zero), genDType(one))
+#endif
+
+        // CHECK_GLSL-COUNT-2: modf(
+        // CHECK_SPIR-COUNT-2: Modf{{ }}
+        // CHECK_SPIR-NOT: Modf{{ }}
+        && genFType(0) == modf(genFType(zero), outGenFType) && genFType(0) == outGenFType
+        && genDType(0) == modf(genDType(zero), outGenDType) && genDType(0) == outGenDType
+
+        // CHECK_GLSL-COUNT-8: min(
+        // CHECK_SPIR-COUNT-4: FMin{{ }}
+        // CHECK_SPIR-NOT: FMin{{ }}
+        // CHECK_SPIR-COUNT-2: SMin{{ }}
+        // CHECK_SPIR-NOT: SMin{{ }}
+        // CHECK_SPIR-COUNT-2: UMin{{ }}
+        // CHECK_SPIR-NOT: UMin{{ }}
+        && genFType(0) == min(genFType(zero), genFType(zero))
+        && genFType(0) == min(genFType(zero), float(zero))
+        && genDType(0) == min(genDType(zero), genDType(zero))
+        && genDType(0) == min(genDType(zero), double(zero))
+        && genIType(0) == min(genIType(zero), genIType(zero))
+        && genIType(0) == min(genIType(zero), int(zero))
+        && genUType(0) == min(genUType(zero), genUType(zero))
+        && genUType(0) == min(genUType(zero), uint(zero))
+
+        // CHECK_GLSL-COUNT-8: max(
+        // CHECK_SPIR-COUNT-4: FMax{{ }}
+        // CHECK_SPIR-NOT: FMax{{ }}
+        // CHECK_SPIR-COUNT-2: SMax{{ }}
+        // CHECK_SPIR-NOT: SMax{{ }}
+        // CHECK_SPIR-COUNT-2: UMax{{ }}
+        // CHECK_SPIR-NOT: UMax{{ }}
+        && genFType(0) == max(genFType(zero), genFType(zero))
+        && genFType(0) == max(genFType(zero), float(zero))
+        && genDType(0) == max(genDType(zero), genDType(zero))
+        && genDType(0) == max(genDType(zero), double(zero))
+        && genIType(0) == max(genIType(zero), genIType(zero))
+        && genIType(0) == max(genIType(zero), int(zero))
+        && genUType(0) == max(genUType(zero), genUType(zero))
+        && genUType(0) == max(genUType(zero), uint(zero))
+
+        // CHECK_GLSL-COUNT-8: clamp(
+        // CHECK_SPIR-COUNT-4: FClamp{{ }}
+        // CHECK_SPIR-NOT: FClamp{{ }}
+        // CHECK_SPIR-COUNT-2: SClamp{{ }}
+        // CHECK_SPIR-NOT: SClamp{{ }}
+        // CHECK_SPIR-COUNT-2: UClamp{{ }}
+        // CHECK_SPIR-NOT: UClamp{{ }}
+        && genFType(0) == clamp(genFType(zero), genFType(zero), genFType(zero))
+        && genFType(0) == clamp(genFType(zero), float(zero), float(zero))
+        && genDType(0) == clamp(genDType(zero), genDType(zero), genDType(zero))
+        && genDType(0) == clamp(genDType(zero), double(zero), double(zero))
+        && genIType(0) == clamp(genIType(zero), genIType(zero), genIType(zero))
+        && genIType(0) == clamp(genIType(zero), int(zero), int(zero))
+        && genUType(0) == clamp(genUType(zero), genUType(zero), genUType(zero))
+        && genUType(0) == clamp(genUType(zero), uint(zero), uint(zero))
+
+        // CHECK_GLSL-COUNT-4: mix(
+        // CHECK_SPIR-COUNT-4: FMix{{ }}
+        // CHECK_SPIR-NOT: FMix{{ }}
+        && genFType(0) == mix(genFType(zero), genFType(zero), genFType(zero))
+        && genFType(0) == mix(genFType(zero), genFType(zero), float(one))
+        && genDType(0) == mix(genDType(zero), genDType(zero), genDType(zero))
+        && genDType(0) == mix(genDType(zero), genDType(zero), double(one))
+
 #if !defined(TARGET_CUDA)
-        && genFType(0) == mix(genFType(0), genFType(0), genBType(0))
-        && genDType(0) == mix(genDType(0), genDType(0), genBType(0))
-        && genIType(0) == mix(genIType(0), genIType(0), genBType(0))
-        && genUType(0) == mix(genUType(0), genUType(0), genBType(0))
-        && genBType(0) == mix(genBType(0), genBType(0), genBType(0))
+        // CHECK_GLSL-COUNT-5: mix(
+        // C-HECK_SPIR-COUNT-5: OpSelect{{ }}
+        && genFType(0) == mix(genFType(zero), genFType(zero), genBType(zero))
+        && genDType(0) == mix(genDType(zero), genDType(zero), genBType(zero))
+        && genIType(0) == mix(genIType(zero), genIType(zero), genBType(zero))
+        && genUType(0) == mix(genUType(zero), genUType(zero), genBType(zero))
+        && genBType(0) == mix(genBType(zero), genBType(zero), genBType(zero))
 #endif // #if !defined(TARGET_CUDA)
-        && genFType(0) == step(genFType(1), genFType(0))
-        && genFType(0) == step(float(1), genFType(0))
-        && genDType(0) == step(genDType(1), genDType(0))
-        && genDType(0) == step(double(1), genDType(0))
-        && genFType(0) == smoothstep(genFType(0), genFType(1), genFType(0))
-        && genFType(0) == smoothstep(float(0), float(1), genFType(0))
-        && genDType(0) == smoothstep(genDType(0), genDType(1), genDType(0))
-        && genDType(0) == smoothstep(double(0), double(1), genDType(0))
+
+        // CHECK_GLSL-COUNT-4: step(
+        // CHECK_SPIR-COUNT-4: Step{{ }}
+        // CHECK_SPIR-NOT: Step{{ }}
+        && genFType(0) == step(genFType(one), genFType(zero))
+        && genFType(0) == step(float(one), genFType(zero))
+        && genDType(0) == step(genDType(one), genDType(zero))
+        && genDType(0) == step(double(one), genDType(zero))
+
+        // CHECK_GLSL-COUNT-4: smoothstep(
+        // CHECK_SPIR-COUNT-4: SmoothStep{{ }}
+        // CHECK_SPIR-NOT: SmoothStep{{ }}
+        && genFType(0) == smoothstep(genFType(zero), genFType(one), genFType(zero))
+        && genFType(0) == smoothstep(float(zero), float(one), genFType(zero))
+        && genDType(0) == smoothstep(genDType(zero), genDType(one), genDType(zero))
+        && genDType(0) == smoothstep(double(zero), double(one), genDType(zero))
+
 #if !defined(TARGET_CUDA)
-        && genBType(0) == isnan(genFType(0))
-        && genBType(0) == isnan(genDType(0))
-        && genBType(0) == isinf(genFType(0))
-        && genBType(0) == isinf(genDType(0))
+        // CHECK_GLSL-COUNT-2: isnan(
+        // CHECK_SPIR-COUNT-2: OpIsNan{{ }}
+        // CHECK_SPIR-NOT: OpIsNan{{ }}
+        && genBType(0) == isnan(genFType(zero))
+        && genBType(0) == isnan(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: isinf(
+        // CHECK_SPIR-COUNT-2: OpIsInf{{ }}
+        // CHECK_SPIR-NOT: OpIsInf{{ }}
+        && genBType(0) == isinf(genFType(zero))
+        && genBType(0) == isinf(genDType(zero))
 #endif // #if !defined(TARGET_CUDA)
-        && genIType(0) == floatBitsToInt(genFType(0))
-        && genUType(0) == floatBitsToUint(genFType(0))
-        && genFType(0) == intBitsToFloat(genIType(0))
-        && genFType(0) == uintBitsToFloat(genUType(0))
-        && genFType(0) == fma(genFType(0), genFType(0), genFType(0))
-        && genDType(0) == fma(genDType(0), genDType(0), genDType(0))
-        && genFType(0) == frexp(genFType(0), outGenIType) && genIType(0) == outGenIType
-        && genDType(0) == frexp(genDType(0), outGenIType) && genIType(0) == outGenIType
-        && genFType(0) == ldexp(genFType(0), genIType(0))
-#if defined(TEST_when_exp2_double_type_works)
-        && genDType(0) == ldexp(genDType(0), genIType(0))
-#endif // #if defined(TEST_when_exp2_double_type_works)
-
-//         // 8.4. Floating-Point Pack and Unpack Functions
-        // && uint(0) == packUnorm2x16(vec2(0))
-        // && uint(0) == packSnorm2x16(vec2(0))
-        // && uint(0) == packUnorm4x8(vec4(0))
-        // && uint(0) == packSnorm4x8(vec4(0))
-        // && vec2(0) == unpackUnorm2x16(uint(0))
-        // && vec2(0) == unpackSnorm2x16(uint(0))
-        // && vec4(0) == unpackUnorm4x8(uint(0))
-        // && vec4(0) == unpackSnorm4x8(uint(0))
-        // && uint(0) == packHalf2x16(vec2(0))
-        // && vec2(0) == unpackHalf2x16(uint(0))
-        // && double(0) == packDouble2x32(uvec2(0))
-        // && uvec2(0) == unpackDouble2x32(double(0))
+
+        // CHECK_GLSL: floatBitsToInt(
+        // CHECK_SPIR: OpBitcast{{ }}
+        // CHECK_SPIR-NOT: OpBitcast{{ }}
+        && genIType(0) == floatBitsToInt(genFType(zero))
+
+        // CHECK_GLSL: floatBitsToUint(
+        // CHECK_SPIR: OpBitcast{{ }}
+        // CHECK_SPIR-NOT: OpBitcast{{ }}
+        && genUType(0) == floatBitsToUint(genFType(zero))
+
+        // CHECK_GLSL: intBitsToFloat(
+        // CHECK_SPIR: OpBitcast{{ }}
+        // CHECK_SPIR-NOT: OpBitcast{{ }}
+        && genFType(0) == intBitsToFloat(genIType(zero))
+
+        // CHECK_GLSL: uintBitsToFloat(
+        // CHECK_SPIR: OpBitcast{{ }}
+        // CHECK_SPIR-NOT: OpBitcast{{ }}
+        && genFType(0) == uintBitsToFloat(genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: fma(
+        // CHECK_SPIR-COUNT-2: Fma{{ }}
+        // CHECK_SPIR-NOT: Fma{{ }}
+        && genFType(0) == fma(genFType(zero), genFType(zero), genFType(zero))
+        && genDType(0) == fma(genDType(zero), genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: frexp(
+        // CHECK_SPIR-COUNT-2: Frexp{{ }}
+        // CHECK_SPIR-NOT: Frexp{{ }}
+        && genFType(0) == frexp(genFType(zero), outGenIType) && genIType(0) == outGenIType
+        && genDType(0) == frexp(genDType(zero), outGenIType) && genIType(0) == outGenIType
+
+        // CHECK_GLSL-COUNT-2: ldexp(
+        // CHECK_SPIR-COUNT-2: Ldexp{{ }}
+        // CHECK_SPIR-NOT: Ldexp{{ }}
+        && genFType(0) == ldexp(genFType(zero), genIType(zero))
+        && genDType(0) == ldexp(genDType(zero), genIType(zero))
 
         // 8.5. Geometric Functions
-        && float(0) == length(genFType(0))
-        && double(0) == length(genDType(0))
-        && float(0) == distance(genFType(0), genFType(0))
-        && double(0) == distance(genDType(0), genDType(0))
-        && float(0) == dot(genFType(0), genFType(0))
-        && double(0) == dot(genDType(0), genDType(0))
-        && vec3(0) == cross(vec3(0), vec3(0))
-        && dvec3(0) == cross(dvec3(0), dvec3(0))
-        && (abs(float(1) - length(normalize(genFType(1)))) < epsilon)
-        && (abs(double(1) - length(normalize(genDType(1)))) < double(epsilon))
-#if defined(TEST_when_fransform_works)
-        && vec4(0) == ftransform()
-#endif // #if defined(TEST_when_fransform_works)
-        && genFType(1) == faceforward(genFType(1), genFType(1), genFType(-1))
-        && genDType(1) == faceforward(genDType(1), genDType(1), genDType(-1))
-        && genFType(0) == reflect(genFType(0), genFType(0))
-        && genDType(0) == reflect(genDType(0), genDType(0))
-        && genFType(0) == refract(genFType(0), genFType(0), float(0))
-        && genDType(0) == refract(genDType(0), genDType(0), double(0))
+
+        // CHECK_GLSL-COUNT-2: length(
+        // CHECK_SPIR-COUNT-2: Length{{ }}
+        // CHECK_SPIR-NOT: Length{{ }}
+        && float(0) == length(genFType(zero))
+        && double(0) == length(genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: distance(
+        // CHECK_SPIR-COUNT-2: Distance{{ }}
+        // CHECK_SPIR-NOT: Distance{{ }}
+        && float(0) == distance(genFType(zero), genFType(zero))
+        && double(0) == distance(genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: dot(
+        && float(0) == dot(genFType(zero), genFType(zero))
+        && double(0) == dot(genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: normalize(
+        // CHECK_SPIR-COUNT-2: Normalize{{ }}
+        // CHECK_SPIR-NOT: Normalize{{ }}
+        && (abs(float(1) - length(normalize(genFType(one)))) < epsilon)
+        && (abs(double(1) - length(normalize(genDType(one)))) < double(epsilon))
+
+        // CHECK_GLSL-COUNT-2: faceforward(
+        // CHECK_SPIR-COUNT-2: FaceForward{{ }}
+        // CHECK_SPIR-NOT: FaceForward{{ }}
+        && genFType(1) == faceforward(genFType(one), genFType(one), genFType(negaOne))
+        && genDType(1) == faceforward(genDType(one), genDType(one), genDType(negaOne))
+
+        // CHECK_GLSL-COUNT-2: reflect(
+        // CHECK_SPIR-COUNT-2: Reflect{{ }}
+        // CHECK_SPIR-NOT: Reflect{{ }}
+        && genFType(0) == reflect(genFType(zero), genFType(zero))
+        && genDType(0) == reflect(genDType(zero), genDType(zero))
+
+        // CHECK_GLSL-COUNT-2: refract(
+        // CHECK_SPIR-COUNT-2: Refract{{ }}
+        // CHECK_SPIR-NOT: Refract{{ }}
+        && genFType(0) == refract(genFType(zero), genFType(zero), float(zero))
+        && genDType(0) == refract(genDType(zero), genDType(zero), double(zero))
 
         // 8.7. Vector Relational Functions
+
 #if !defined(TARGET_CUDA)
-        && bvec(1) == lessThan(vec(0), vec(1))
-        && bvec(1) == lessThan(ivec(0), ivec(1))
-        && bvec(1) == lessThan(uvec(0), uvec(1))
-        && bvec(1) == lessThanEqual(vec(0), vec(1))
-        && bvec(1) == lessThanEqual(ivec(0), ivec(1))
-        && bvec(1) == lessThanEqual(uvec(0), uvec(1))
-        && bvec(0) == greaterThan(vec(0), vec(1))
-        && bvec(0) == greaterThan(ivec(0), ivec(1))
-        && bvec(0) == greaterThan(uvec(0), uvec(1))
-        && bvec(0) == greaterThanEqual(vec(0), vec(1))
-        && bvec(0) == greaterThanEqual(ivec(0), ivec(1))
-        && bvec(0) == greaterThanEqual(uvec(0), uvec(1))
-        && bvec(1) == equal(vec(0), vec(0))
-        && bvec(1) == equal(ivec(0), ivec(0))
-        && bvec(1) == equal(uvec(0), uvec(0))
-        && bvec(1) == equal(bvec(0), bvec(0))
-        && bvec(0) == notEqual(vec(0), vec(0))
-        && bvec(0) == notEqual(ivec(0), ivec(0))
-        && bvec(0) == notEqual(uvec(0), uvec(0))
-        && bvec(0) == notEqual(bvec(0), bvec(0))
-        && bool(0) == any(bvec(0))
-        && bool(0) == all(bvec(0))
-        && bvec(1) == not(bvec(0))
+        // CHECK_GLSL-COUNT-3: lessThan(
+        && bvec(1) == lessThan(vec(zero), vec(one))
+        && bvec(1) == lessThan(ivec(zero), ivec(one))
+        && bvec(1) == lessThan(uvec(zero), uvec(one))
+
+        // CHECK_GLSL-COUNT-3: lessThanEqual(
+        && bvec(1) == lessThanEqual(vec(zero), vec(one))
+        && bvec(1) == lessThanEqual(ivec(zero), ivec(one))
+        && bvec(1) == lessThanEqual(uvec(zero), uvec(one))
+
+        // CHECK_GLSL-COUNT-3: greaterThan(
+        && bvec(0) == greaterThan(vec(zero), vec(one))
+        && bvec(0) == greaterThan(ivec(zero), ivec(one))
+        && bvec(0) == greaterThan(uvec(zero), uvec(one))
+
+        // CHECK_GLSL-COUNT-3: greaterThanEqual(
+        && bvec(0) == greaterThanEqual(vec(zero), vec(one))
+        && bvec(0) == greaterThanEqual(ivec(zero), ivec(one))
+        && bvec(0) == greaterThanEqual(uvec(zero), uvec(one))
+
+        // CHECK_GLSL-COUNT-4: equal(
+        && bvec(1) == equal(vec(zero), vec(zero))
+        && bvec(1) == equal(ivec(zero), ivec(zero))
+        && bvec(1) == equal(uvec(zero), uvec(zero))
+        && bvec(1) == equal(bvec(zero), bvec(zero))
+
+        // CHECK_GLSL-COUNT-4: notEqual(
+        && bvec(0) == notEqual(vec(zero), vec(zero))
+        && bvec(0) == notEqual(ivec(zero), ivec(zero))
+        && bvec(0) == notEqual(uvec(zero), uvec(zero))
+        && bvec(0) == notEqual(bvec(zero), bvec(zero))
+
+        // CHECK_GLSL: any(
+        && bool(0) == any(bvec(zero))
+
+        // CHECK_GLSL: all(
+        && bool(0) == all(bvec(zero))
+
+        // CHECK_GLSL: not(
+        && bvec(1) == not(bvec(zero))
 #endif // #if !defined(TARGET_CUDA)
 
         // 8.8. Integer Functions
-        && genUType(0) == uaddCarry(genUType(0), genUType(0), outGenUType) && genUType(0) == outGenUType
-        && genUType(0) == usubBorrow(genUType(0), genUType(0), outGenUType) && genUType(0) == outGenUType
-        && genIType(0) == bitfieldExtract(genIType(0), int(0), int(0))
-        && genUType(0) == bitfieldExtract(genUType(0), int(0), int(0))
-        && genIType(0) == bitfieldInsert(genIType(0), genIType(0), int(0), int(0))
-        && genUType(0) == bitfieldInsert(genUType(0), genUType(0), int(0), int(0))
-        && genIType(0) == bitfieldReverse(genIType(0))
-        && genUType(0) == bitfieldReverse(genUType(0))
-        && genIType(0) == bitCount(genIType(0))
-        && genIType(0) == bitCount(genUType(0))
+
+        // CHECK_GLSL: uaddCarry(
+        // CHECK_SPIR: OpIAddCarry{{ }}
+        // CHECK_SPIR-NOT: OpIAddCarry{{ }}
+        && genUType(0) == uaddCarry(genUType(zero), genUType(zero), outGenUType) && genUType(0) == outGenUType
+
+        // CHECK_GLSL: usubBorrow(
+        // CHECK_SPIR: OpISubBorrow{{ }}
+        // CHECK_SPIR-NOT: OpISubBorrow{{ }}
+        && genUType(0) == usubBorrow(genUType(zero), genUType(zero), outGenUType) && genUType(0) == outGenUType
+
+        // CHECK_GLSL-COUNT-2: bitfieldExtract(
+        // CHECK_SPIR: OpBitFieldSExtract{{ }}
+        // CHECK_SPIR-NOT: OpBitFieldSExtract{{ }}
+        // CHECK_SPIR: OpBitFieldUExtract{{ }}
+        // CHECK_SPIR-NOT: OpBitFieldUExtract{{ }}
+        && genIType(0) == bitfieldExtract(genIType(zero), int(zero), int(zero))
+        && genUType(0) == bitfieldExtract(genUType(zero), int(zero), int(zero))
+
+        // CHECK_GLSL-COUNT-2: bitfieldInsert(
+        // CHECK_SPIR-COUNT-2: OpBitFieldInsert{{ }}
+        // CHECK_SPIR-NOT: OpBitFieldInsert{{ }}
+        && genIType(0) == bitfieldInsert(genIType(zero), genIType(zero), int(zero), int(zero))
+        && genUType(0) == bitfieldInsert(genUType(zero), genUType(zero), int(zero), int(zero))
+
+        // CHECK_GLSL-COUNT-2: bitfieldReverse(
+        // CHECK_SPIR-COUNT-2: OpBitReverse{{ }}
+        // CHECK_SPIR-NOT: OpBitReverse{{ }}
+        && genIType(0) == bitfieldReverse(genIType(zero))
+        && genUType(0) == bitfieldReverse(genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: bitCount(
+        // CHECK_SPIR-COUNT-2: OpBitCount{{ }}
+        // CHECK_SPIR-NOT: OpBitCount{{ }}
+        && genIType(0) == bitCount(genIType(zero))
+        && genIType(0) == bitCount(genUType(zero))
+
 #if !defined(TARGET_CUDA)
-        && genIType(-1) == findLSB(genIType(0))
-        && genIType(-1) == findLSB(genUType(0))
-        && genIType(-1) == findMSB(genIType(0))
-        && genIType(-1) == findMSB(genUType(0))
+        // CHECK_GLSL-COUNT-2: findLSB(
+        // CHECK_SPIR-COUNT-2: FindILsb{{ }}
+        // CHECK_SPIR-NOT: FindILsb{{ }}
+        && genIType(-1) == findLSB(genIType(zero))
+        && genIType(-1) == findLSB(genUType(zero))
+
+        // CHECK_GLSL-COUNT-2: findMSB(
+        // CHECK_SPIR: FindSMsb{{ }}
+        // CHECK_SPIR-NOT: FindSMsb{{ }}
+        // CHECK_SPIR: FindUMsb{{ }}
+        // CHECK_SPIR-NOT: FindUMsb{{ }}
+        && genIType(-1) == findMSB(genIType(zero))
+        && genIType(-1) == findMSB(genUType(zero))
 #endif // #if !defined(TARGET_CUDA)
         ;
+
+    // CHECK_SPIR-LABEL: OpAccessChain {{.*}} %inputBuffer
 }
 
 [numthreads(4, 1, 1)]
 void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
 {
     // CHECK_GLSL: void main(
-    // CHECK_SPV: OpEntryPoint
+    // CHECK_GLSL_SPIRV: OpEntryPoint
     // CHECK_HLSL: void computeMain(
     // CHECK_CUDA: void computeMain(
     // CHECK_CPP: void _computeMain(
     // BUF: 1
 
     bool r = true
-        && dim1TypeFuncs()
-        && dimNTypeFuncs<2>()
-        && dimNTypeFuncs<3>()
-        && dimNTypeFuncs<4>();
+        && Test_ScalarType()
+        && Test_VectorType<2>()
+        && Test_VectorType<3>()
+        && Test_VectorType<4>()
+        ;
 
-    outputBuffer.result[0] = int(r);
+    outputBuffer.result = int(r);
 }