Clean-ups related to expanded standard library coverage (#1269)

This change continues the work already started in moving the definitions of many built-in functions to the standard library. The main focus in this change was reducing the number of operations that had to be special-cased on the CPU and CUDA targets by making sure that the scalar cases of built-in functions map to the proper names in the prelude (e.g., `F32_sin()`) via the ordinary `__target_intrinsic` mechanism. In some cases this cleanup meant that special-case logic that was constructing definitions for those functions using C++ code could be scrapped. Additional changes made along the way: * A few scalar functions that were missing in the CPU/CUDA preludes got added: `round`, hyperbolic trigonometric functions, `frexp`, `modf`, and `fma` * The floating-point `min()` and `max()` definitions in the preludes were changed to use intrinsic operations on the target (which are likely to follow IEEE semantics, while our definitions did not) * For the CUDA target, many of the functions had their names translated during code emit from, e.g., `sin` to `sinf`. This change makes the CUDA target more closely match the C++/CPU target in using names like `F32_sin` consistently. * For the CUDA target, a few additional functions have intrinsics that don't exist (portably) on CPU: `sincos()` and `rsqrt()`. * For the Slang stdlib definitions to work, a new `$P` replacement was defined for `__targert_intrinsic` that expands to a type based on the first operand of the function (e.g., `F32` for `float`). * I removed the dedicated opcodes for matrix-matrix, matrix-vector, and vector-matrix multiplication, and instead turned them into ordinary functions with definitions and `__target_intrinsic` modifiers to map them appropriately for HLSL and GLSL. This is realistically how we would have implemented these if we'd had `__target_intrinsic` from the start. Notes about possible follow-on work: * The `ldexp` function is still left in the Slang stdlib because it has to account for a floating-point exponent and the `math.h` version only handles integers for the exponent. It is possible that we can/should define another overload for `ldexp` (and `frexp`) that uses an integer for exponent, and then have that one be a built-in on CPU/CUDA, with the HLSL `frexp` being defined in the stdlib to delegate to the correct `frexp` for those targets. * The `firstbithigh` and related functions are missing for our CPU and CUDA targets, and will need to be added. It is worth nothing that `firstbithigh` apparently has some very odd functionality around signed integer arguments (which are supported, despite MSDN being unclear on that point). General cleanup will be required for those functions. * Maxing the various matrix and vector products no longer be intrinsic ops might affect how we emit code for them as sub-expressions (both whether we fold them into use sites and how we parenthize them). This doesn't seem to affect any of our existing tests, but we could consider marking these functions with `[__readNone]` to ensure they can be folded, and then also adding whatever modifier(s) we might invent to control precdence and parentheses insertion during emit.
author: Tim Foley <tfoleyNV@users.noreply.github.com> 2020-03-11 08:50:38 -0700
committer: GitHub <noreply@github.com> 2020-03-11 08:50:38 -0700
commit: 935768c6a00c258bf5122a2d04b84064a1eee67d (patch)
tree: 68dac944da274a21acb8c8bf651401c26e289f4c /source
parent: b380b1af6ba6f5f58e3841c2a5b14db7ee8c372d (diff)
10 files changed, 227 insertions, 391 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 20158c1b1..03496ccc8 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -349,8 +349,13 @@ void abort();
 // Absolute value (HLSL SM 1.0)
 
 __generic<T : __BuiltinSignedArithmeticType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_abs($0)")
+__target_intrinsic(cpp, "$P_abs($0)")
 T abs(T x);
 /*{
+    // Note: this simple definition may not be appropriate for floating-point inputs
     return x < 0 ? -x : x;
 }*/
 
@@ -372,6 +377,10 @@ matrix<T,N,M> abs(matrix<T,N,M> x)
 // Inverse cosine (HLSL SM 1.0)
 
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_acos($0)")
+__target_intrinsic(cpp, "$P_acos($0)")
 T acos(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -530,6 +539,8 @@ matrix<float,N,M> asfloat(matrix<float,N,M> x);
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(hlsl)
 __target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_asin($0)")
+__target_intrinsic(cpp, "$P_asin($0)")
 T asin(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -659,6 +670,10 @@ matrix<uint,N,M> asuint(matrix<uint,N,M> x);
 
 // Inverse tangent (HLSL SM 1.0)
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_atan($0)")
+__target_intrinsic(cpp, "$P_atan($0)")
 T atan(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -679,6 +694,8 @@ matrix<T, N, M> atan(matrix<T, N, M> x)
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(hlsl)
 __target_intrinsic(glsl,"atan($0,$1)")
+__target_intrinsic(cuda, "$P_atan2($0, $1)")
+__target_intrinsic(cpp, "$P_atan2($0, $1)")
 T atan2(T y, T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -698,6 +715,10 @@ matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
 
 // Ceiling (HLSL SM 1.0)
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_ceil($0)")
+__target_intrinsic(cpp, "$P_ceil($0)")
 T ceil(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -767,6 +788,10 @@ void clip(matrix<T,N,M> x)
 
 // Cosine
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_cos($0)")
+__target_intrinsic(cpp, "$P_cos($0)")
 T cos(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -786,6 +811,10 @@ matrix<T, N, M> cos(matrix<T, N, M> x)
 
 // Hyperbolic cosine
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_cosh($0)")
+__target_intrinsic(cpp, "$P_cosh($0)")
 T cosh(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -804,7 +833,10 @@ matrix<T, N, M> cosh(matrix<T, N, M> x)
 }
 
 // Population count
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl, "bitCount")
+__target_intrinsic(cuda, "$P_countbits($0)")
+__target_intrinsic(cpp, "$P_countbits($0)")
 uint countbits(uint value);
 
 // Cross product
@@ -1070,6 +1102,10 @@ matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
 // Base-e exponent
 
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_exp($0)")
+__target_intrinsic(cpp, "$P_exp($0)")
 T exp(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1090,6 +1126,10 @@ matrix<T, N, M> exp(matrix<T, N, M> x)
 // Base-2 exponent
 
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_exp2($0)")
+__target_intrinsic(cpp, "$P_exp2($0)")
 T exp2(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1133,7 +1173,10 @@ vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng)
 }
 
 // Find first set bit starting at high bit and working down
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl,"findMSB")
+__target_intrinsic(cuda, "$P_firstbithigh($0)")
+__target_intrinsic(cpp, "$P_firstbithigh($0)")
 int firstbithigh(int value);
 
 __target_intrinsic(hlsl)
@@ -1144,7 +1187,10 @@ vector<int, N> firstbithigh(vector<int, N> value)
     VECTOR_MAP_UNARY(int, N, firstbithigh, value);
 }
 
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl,"findMSB")
+__target_intrinsic(cuda, "$P_firstbithigh($0)")
+__target_intrinsic(cpp, "$P_firstbithigh($0)")
 uint firstbithigh(uint value);
 
 __target_intrinsic(hlsl)
@@ -1156,7 +1202,10 @@ vector<uint,N> firstbithigh(vector<uint,N> value)
 }
 
 // Find first set bit starting at low bit and working up
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl,"findLSB")
+__target_intrinsic(cuda, "$P_firstbitlow($0)")
+__target_intrinsic(cpp, "$P_firstbitlow($0)")
 int firstbitlow(int value);
 
 __target_intrinsic(hlsl)
@@ -1167,7 +1216,10 @@ vector<int,N> firstbitlow(vector<int,N> value)
     VECTOR_MAP_UNARY(int, N, firstbitlow, value);
 }
 
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl,"findLSB")
+__target_intrinsic(cuda, "$P_firstbitlow($0)")
+__target_intrinsic(cpp, "$P_firstbitlow($0)")
 uint firstbitlow(uint value);
 
 __target_intrinsic(hlsl)
@@ -1181,6 +1233,10 @@ vector<uint,N> firstbitlow(vector<uint,N> value)
 // Floor (HLSL SM 1.0)
 
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_floor($0)")
+__target_intrinsic(cpp, "$P_floor($0)")
 T floor(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1201,6 +1257,8 @@ matrix<T, N, M> floor(matrix<T, N, M> x)
 // Fused multiply-add for doubles
 __target_intrinsic(hlsl)
 __target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
+__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
 double fma(double a, double b, double c);
 
 __generic<let N : int>
@@ -1220,6 +1278,10 @@ matrix<double, N, M> fma(matrix<double, N, M> a, matrix<double, N, M> b, matrix<
 
 // Floating point remainder of x/y
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_fmod($0, $1)")
+__target_intrinsic(cpp, "$P_fmod($0, $1)")
 T fmod(T x, T y);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1239,7 +1301,10 @@ matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
 
 // Fractional part
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl, fract)
+__target_intrinsic(cuda, "$P_frac($0)")
+__target_intrinsic(cpp, "$P_frac($0)")
 T frac(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1438,8 +1503,8 @@ void InterlockedXor(__ref uint dest, uint value, out uint original_value);
 
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(hlsl)
-__target_intrinsic(cpu)
-__target_intrinsic(cuda)
+__target_intrinsic(cuda, "$P_isfinite($0)")
+__target_intrinsic(cpp, "$P_isfinite($0)")
 bool isfinite(T x)
 {
     return !(isinf(x) || isnan(x));
@@ -1461,6 +1526,10 @@ matrix<bool, N, M> isfinite(matrix<T, N, M> x)
 
 // Is floating-point value infinite?
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_isinf($0)")
+__target_intrinsic(cpp, "$P_isinf($0)")
 bool isinf(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1480,6 +1549,10 @@ matrix<bool, N, M> isinf(matrix<T, N, M> x)
 
 // Is floating-point value not-a-number?
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_isnan($0)")
+__target_intrinsic(cpp, "$P_isnan($0)")
 bool isnan(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1565,6 +1638,10 @@ float4 lit(float n_dot_l, float n_dot_h, float m)
 
 // Base-e logarithm
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_log($0)")
+__target_intrinsic(cpp, "$P_log($0)")
 T log(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1586,6 +1663,8 @@ matrix<T, N, M> log(matrix<T, N, M> x)
 __generic<T : __BuiltinFloatingPointType> 
 __target_intrinsic(hlsl)
 __target_intrinsic(glsl, "(log( $0 ) * $S0( 0.43429448190325182765112891891661) )" )
+__target_intrinsic(cuda, "$P_log10($0)")
+__target_intrinsic(cpp, "$P_log10($0)")
 T log10(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1605,6 +1684,10 @@ matrix<T,N,M> log10(matrix<T,N,M> x)
 
 // Base-2 logarithm
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_log2($0)")
+__target_intrinsic(cpp, "$P_log2($0)")
 T log2(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1627,6 +1710,8 @@ matrix<T,N,M> log2(matrix<T,N,M> x)
 __generic<T : __BuiltinArithmeticType>
 __target_intrinsic(hlsl)
 __target_intrinsic(glsl, fma)
+__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
+__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
 T mad(T mvalue, T avalue, T bvalue);
 
 __generic<T : __BuiltinArithmeticType, let N : int>
@@ -1646,6 +1731,10 @@ matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N,
 
 // maximum
 __generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_max($0, $1)")
+__target_intrinsic(cpp, "$P_max($0, $1)")
 T max(T x, T y);
 // Note: a stdlib implementation of `max` (or `min`) will require splitting
 // floating-point and integer cases apart, because the floating-point
@@ -1669,6 +1758,10 @@ matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
 
 // minimum
 __generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_min($0, $1)")
+__target_intrinsic(cpp, "$P_min($0, $1)")
 T min(T x, T y);
 
 __generic<T : __BuiltinArithmeticType, let N : int>
@@ -1757,28 +1850,64 @@ T mul(vector<T, N> x, vector<T, N> y)
     return dot(x, y);
 }
 
-${{{{
-// TODO: The following functions could conceivably be defined
-// in the stdlib for the benefit of targets without direct
-// support for matrices, but the use of `__intrinsic_op` to
-// map them to a dedicated IR instruction interferes with
-// that choice.
-}}}}
-
 // vector-matrix
 __generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-__intrinsic_op(mulVectorMatrix)
-vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl, "($1 * $0)")
+vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
+{
+    vector<T,M> result;
+    for( int j = 0; j < M; ++j )
+    {
+        T sum = T(0);
+        for( int i = 0; i < N; ++i )
+        {
+            sum += left[i] * right[i][j];
+        }
+        result[j] = sum;
+    }
+    return result;
+}
 
 // matrix-vector
 __generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-__intrinsic_op(mulMatrixVector)
-vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl, "($1 * $0)")
+vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
+{
+    vector<T,N> result;
+    for( int i = 0; i < N; ++i )
+    {
+        T sum = T(0);
+        for( int j = 0; j < M; ++j )
+        {
+            sum += left[i][j] * right[j];
+        }
+        result[i] = sum;
+    }
+    return result;
+}
+
 
 // matrix-matrix
 __generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int>
-__intrinsic_op(mulMatrixMatrix)
-matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl, "($1 * $0)")
+matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
+{
+    matrix<T,R,C> result;
+    for( int r = 0; r < R; ++r)
+    for( int c = 0; c < C; ++c)
+    {
+        T sum = T(0);
+        for( int i = 0; i < N; ++i )
+        {
+            sum += left[r][i] * right[i][c];
+        }
+        result[r][c] = sum;
+    }
+    return result;
+}
 
 // noise (deprecated)
 
@@ -1839,6 +1968,10 @@ vector<T,N> normalize(vector<T,N> x)
 
 // Raise to a power
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_pow($0, $1)")
+__target_intrinsic(cpp, "$P_pow($0, $1)")
 T pow(T x, T y);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -1999,7 +2132,10 @@ vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta)
 }
 
 // Reverse order of bits
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl, "bitfieldReverse")
+__target_intrinsic(cuda, "$P_reversebits($0)")
+__target_intrinsic(cpp, "$P_reversebits($0)")
 uint reversebits(uint value);
 
 __target_intrinsic(glsl, "bitfieldReverse")
@@ -2011,6 +2147,10 @@ vector<uint, N> reversebits(vector<uint, N> value)
 
 // Round-to-nearest
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_round($0)")
+__target_intrinsic(cpp, "$P_round($0)")
 T round(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2032,7 +2172,12 @@ matrix<T,N,M> round(matrix<T,N,M> x)
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(hlsl)
 __target_intrinsic(glsl, "inversesqrt($0)")
-T rsqrt(T x);
+__target_intrinsic(cuda, "$P_rsqrt($0)")
+__target_intrinsic(cpp, "$P_rsqrt($0)")
+T rsqrt(T x)
+{
+    return T(1.0) / sqrt(x);
+}
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
 __target_intrinsic(hlsl)
@@ -2076,7 +2221,10 @@ matrix<T,N,M> saturate(matrix<T,N,M> x)
 
 // Extract sign of value
 __generic<T : __BuiltinSignedArithmeticType>
+__target_intrinsic(hlsl)
 __target_intrinsic(glsl, "int(sign($0))")
+__target_intrinsic(cuda, "$P_sign($0)")
+__target_intrinsic(cpp, "$P_sign($0)")
 int sign(T x);
 
 __generic<T : __BuiltinSignedArithmeticType, let N : int>
@@ -2098,6 +2246,10 @@ matrix<int, N, M> sign(matrix<T, N, M> x)
 // Sine
 
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_sin($0)")
+__target_intrinsic(cpp, "$P_sin($0)")
 T sin(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2118,6 +2270,7 @@ matrix<T, N, M> sin(matrix<T, N, M> x)
 // Sine and cosine
 __generic<T : __BuiltinFloatingPointType>
 __target_intrinsic(hlsl)
+__target_intrinsic(cuda, "$P_sincos($0, $1, $2)")
 void sincos(T x, out T s, out T c)
 {
     s = sin(x);
@@ -2142,6 +2295,10 @@ void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c)
 
 // Hyperbolic Sine
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_sinh($0)")
+__target_intrinsic(cpp, "$P_sinh($0)")
 T sinh(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2186,6 +2343,10 @@ matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N
 
 // Square root
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_sqrt($0)")
+__target_intrinsic(cpp, "$P_sqrt($0)")
 T sqrt(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2229,6 +2390,10 @@ matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x)
 
 // Tangent
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_tan($0)")
+__target_intrinsic(cpp, "$P_tan($0)")
 T tan(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2248,6 +2413,10 @@ matrix<T, N, M> tan(matrix<T, N, M> x)
 
 // Hyperbolic tangent
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_tanh($0)")
+__target_intrinsic(cpp, "$P_tanh($0)")
 T tanh(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -2280,6 +2449,10 @@ matrix<T, M, N> transpose(matrix<T, N, M> x)
 
 // Truncate to integer
 __generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+__target_intrinsic(cuda, "$P_trunc($0)")
+__target_intrinsic(cpp, "$P_trunc($0)")
 T trunc(T x);
 
 __generic<T : __BuiltinFloatingPointType, let N : int>
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 55f251565..3631040b8 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -1777,6 +1777,42 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl(
                 }
                 break;
 
+            case 'P':
+                // Type-based prefix as used for CUDA and C++ targets
+                {
+                    Index argIndex = 0;
+                    SLANG_RELEASE_ASSERT(argCount > argIndex);
+                    auto arg = args[argIndex].get();
+                    auto argType = arg->getDataType();
+
+                    const char* str = "";
+                    switch(argType->op)
+                    {
+                    #define CASE(OP, STR) \
+                    case kIROp_##OP: str = #STR; break
+
+                    CASE(Int8Type,      I8);
+                    CASE(Int16Type,     I16);
+                    CASE(IntType,       I32);
+                    CASE(Int64Type,     I64);
+                    CASE(UInt8Type,     U8);
+                    CASE(UInt16Type,    U16);
+                    CASE(UIntType,      U32);
+                    CASE(UInt64Type,    U64);
+                    CASE(HalfType,      F16);
+                    CASE(FloatType,     F32);
+                    CASE(DoubleType,    F64);
+
+                    #undef CASE
+
+                    default:
+                        SLANG_UNEXPECTED("unexpected type in intrinsic definition");
+                        break;
+                    }
+                    m_writer->emit(str);
+                }
+                break;
+
             default:
                 SLANG_UNEXPECTED("bad format in intrinsic definition");
                 break;
@@ -2059,17 +2095,6 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
         }
         break;
 
-    case kIROp_Mul_Vector_Matrix:
-    case kIROp_Mul_Matrix_Vector:
-    case kIROp_Mul_Matrix_Matrix:
-        // Default impl
-        m_writer->emit("mul(");
-        emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
-        m_writer->emit(", ");
-        emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
-        m_writer->emit(")");
-        break;
-
     case kIROp_swizzle:
         {
             auto prec = getInfo(EmitOp::Postfix);
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 7fb04c33b..bece6c2d0 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -901,56 +901,6 @@ void CPPSourceEmitter::_emitSignature(const UnownedStringSlice& funcName, const
     writer->emit(")");
 }
 
-void CPPSourceEmitter::_emitVecMatMulDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{
-    IRFuncType* funcType = specOp->signatureType;
-    SLANG_ASSERT(funcType->getParamCount() == 2);
-    IRType* paramType0 = funcType->getParamType(0);
-    IRType* paramType1 = funcType->getParamType(1);
-    IRType* retType = specOp->returnType;
-
-    SourceWriter* writer = getSourceWriter();
-
-    _emitSignature(funcName, specOp);
-
-    writer->emit("\n{\n");
-    writer->indent();
-
-    emitType(retType);
-    writer->emit(" r;\n");
-
-    TypeDimension dimA = _getTypeDimension(paramType0, false);
-    TypeDimension dimB = _getTypeDimension(paramType1, true);
-    TypeDimension resultDim = _getTypeDimension(retType, paramType1->op == kIROp_VectorType);
-
-    for (int i = 0; i < resultDim.rowCount; ++i)
-    {
-        for (int j = 0; j < resultDim.colCount; ++j)
-        {
-            _emitAccess(UnownedStringSlice::fromLiteral("r"), resultDim, i, j, writer);
-            writer->emit(" = ");
-
-            for (int k = 0; k < dimA.colCount; k++)
-            {
-                if (k > 0)
-                {
-                    writer->emit(" + ");
-                }
-                _emitAccess(UnownedStringSlice::fromLiteral("a"), dimA, i, k, writer);
-                writer->emit(" * ");
-                _emitAccess(UnownedStringSlice::fromLiteral("b"), dimB, k, j, writer);
-            }
-
-            writer->emit(";\n");
-        }
-    }
-
-    writer->emit("return r;\n");
-
-    writer->dedent();
-    writer->emit("}\n\n");
-}
-
 UnownedStringSlice CPPSourceEmitter::_getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op op, IRType*const* argTypes, Int argCount, IRType* retType)
 {
     HLSLIntrinsic intrinsic;
@@ -960,38 +910,6 @@ UnownedStringSlice CPPSourceEmitter::_getAndEmitSpecializedOperationDefinition(H
     return  _getFuncName(specOp);
 }
 
-void CPPSourceEmitter::_emitLengthDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{
-    SourceWriter* writer = getSourceWriter();
-
-    IRFuncType* funcType = specOp->signatureType;
-    SLANG_ASSERT(funcType->getParamCount() == 1);
-    IRType* paramType0 = funcType->getParamType(0);
-
-    SLANG_ASSERT(paramType0->op == kIROp_VectorType);
-
-    IRBasicType* elementType = as<IRBasicType>(static_cast<IRVectorType*>(paramType0)->getElementType());
-
-    IRType* dotArgs[] = { paramType0, paramType0 };
-    UnownedStringSlice dotFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Dot, dotArgs, SLANG_COUNT_OF(dotArgs), elementType);
-
-    UnownedStringSlice sqrtName = _getScalarFuncName(HLSLIntrinsic::Op::Sqrt, elementType);
-
-    _emitSignature(funcName, specOp);
-
-    writer->emit("\n{\n");
-    writer->indent();
-
-    writer->emit("return ");
-    writer->emit(sqrtName);
-    writer->emit("(");
-    writer->emit(dotFuncName);
-    writer->emit("(a, a));\n");
-   
-    writer->dedent();
-    writer->emit("}\n\n");
-}
-
 void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
 {
     SourceWriter* writer = getSourceWriter();
@@ -1049,47 +967,6 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName,
     }
 }
 
-void CPPSourceEmitter::_emitNormalizeDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{    
-    SourceWriter* writer = getSourceWriter();
-
-    IRFuncType* funcType = specOp->signatureType;
-    SLANG_ASSERT(funcType->getParamCount() == 1);
-    IRType* paramType0 = funcType->getParamType(0);
-
-    SLANG_ASSERT(paramType0->op == kIROp_VectorType);
-
-    IRBasicType* elementType = as<IRBasicType>(static_cast<IRVectorType*>(paramType0)->getElementType());
-
-    IRType* dotArgs[] = { paramType0, paramType0 };
-    UnownedStringSlice dotFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Dot, dotArgs, SLANG_COUNT_OF(dotArgs), elementType);
-    UnownedStringSlice rsqrtName = _getScalarFuncName(HLSLIntrinsic::Op::RecipSqrt, elementType);
-    IRType* vecMulScalarArgs[] = { paramType0, elementType };
-    UnownedStringSlice vecMulScalarName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Mul, vecMulScalarArgs, SLANG_COUNT_OF(vecMulScalarArgs), paramType0);
-
-    TypeDimension dimA = _getTypeDimension(paramType0, false);
-
-    // Assumes C++
-
-    _emitSignature(funcName, specOp);
-
-    writer->emit("\n{\n");
-    writer->indent();
-
-    writer->emit("return ");
-
-    // Assumes C++ here
-    writer->emit("a * ");
-    writer->emit(rsqrtName);
-    writer->emit("(");
-    writer->emit(dotFuncName);
-    writer->emit("(a, a));\n");
-
-    writer->dedent();
-    writer->emit("}\n\n");
-}
-
-
 void CPPSourceEmitter::_emitConstructConvertDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
 {
     SourceWriter* writer = getSourceWriter();
@@ -1329,42 +1206,6 @@ void CPPSourceEmitter::_emitConstructFromScalarDefinition(const UnownedStringSli
     writer->emit("}\n\n");
 }
 
-void CPPSourceEmitter::_emitReflectDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp)
-{
-    SourceWriter* writer = getSourceWriter();
-
-    IRFuncType* funcType = specOp->signatureType;
-    SLANG_ASSERT(funcType->getParamCount() == 2);
-    IRType* paramType0 = funcType->getParamType(0);
-
-    SLANG_ASSERT(paramType0->op == kIROp_VectorType);
-
-    IRBasicType* elementType = as<IRBasicType>(static_cast<IRVectorType*>(paramType0)->getElementType());
-
-    // Make sure we have all these functions defined before emitting 
-    IRType* dotArgs[] = { paramType0, paramType0 };
-    UnownedStringSlice dotFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Dot, dotArgs, SLANG_COUNT_OF(dotArgs), elementType);
-
-    IRType* subArgs[] = { paramType0, paramType0};
-    UnownedStringSlice subFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Sub, subArgs, SLANG_COUNT_OF(subArgs), paramType0);
-
-    IRType* vecMulScalarArgs[] = { paramType0, elementType };
-    UnownedStringSlice vecMulScalarFuncName = _getAndEmitSpecializedOperationDefinition(HLSLIntrinsic::Op::Mul, vecMulScalarArgs, SLANG_COUNT_OF(vecMulScalarArgs), paramType0);
-
-    // Assumes C++
-
-    _emitSignature(funcName, specOp);
-    writer->emit("\n{\n");
-    writer->indent();
-
-    writer->emit("return a - b * 2.0 * ");
-    writer->emit(dotFuncName);
-    writer->emit("(a, b);\n");
-
-    writer->dedent();
-    writer->emit("}\n\n");
-}
-
 void CPPSourceEmitter::_maybeEmitSpecializedOperationDefinition(const HLSLIntrinsic* specOp)
 {
     // Check if it's been emitted already, if not add it.
@@ -1385,28 +1226,11 @@ void CPPSourceEmitter::emitSpecializedOperationDefinition(const HLSLIntrinsic* s
         {
             return _emitInitDefinition(_getFuncName(specOp), specOp);
         }
-        case Op::VecMatMul:
-        case Op::Dot:
-        {
-            return _emitVecMatMulDefinition(_getFuncName(specOp), specOp);
-        }
         case Op::Any:
         case Op::All:
         {
             return _emitAnyAllDefinition(_getFuncName(specOp), specOp);
         }
-        case Op::Normalize:
-        {
-            return _emitNormalizeDefinition(_getFuncName(specOp), specOp);
-        }
-        case Op::Length:
-        {
-            return _emitLengthDefinition(_getFuncName(specOp), specOp);
-        }
-        case Op::Reflect:
-        {
-            return _emitReflectDefinition(_getFuncName(specOp), specOp);
-        }
         case Op::ConstructConvert:
         {
             return _emitConstructConvertDefinition(_getFuncName(specOp), specOp);
diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h
index 7f9046643..99f180850 100644
--- a/source/slang/slang-emit-cpp.h
+++ b/source/slang/slang-emit-cpp.h
@@ -91,15 +91,10 @@ protected:
     void _calcGlobalParams(const List<EmitAction>& actions, List<GlobalParamInfo>& outParams, IRGlobalParam** outEntryPointGlobalParams);
     void _emitUniformStateMembers(const List<EmitAction>& actions, IRGlobalParam** outEntryPointGlobalParams);
 
-    void _emitVecMatMulDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
-
     void _emitAryDefinition(const HLSLIntrinsic* specOp);
 
     // Really we don't want any of these defined like they are here, they should be defined in slang stdlib 
     void _emitAnyAllDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
-    void _emitLengthDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
-    void _emitNormalizeDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
-    void _emitReflectDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
     void _emitConstructConvertDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
     void _emitConstructFromScalarDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
     void _emitGetAtDefinition(const UnownedStringSlice& funcName, const HLSLIntrinsic* specOp);
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 3531d55db..91439d5d3 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -112,26 +112,7 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT
     
     switch (op)
     {
-        case Op::Sin:
-        case Op::Cos:
-        case Op::Tan:
-        case Op::ArcSin:
-        case Op::ArcCos:
-        case Op::ArcTan:
-        case Op::ArcTan2:
-        case Op::Floor:
-        case Op::Ceil:
-        case Op::FMod:
-        case Op::Exp2:
-        case Op::Exp:
-        case Op::Log:
-        case Op::Log2:
-        case Op::Log10:
         case Op::FRem:
-        case Op::Sqrt:
-        case Op::RecipSqrt:
-        case Op::Pow:
-        case Op::Trunc:
         {
             if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
             {
@@ -139,25 +120,6 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT
             }
             break;
         }
-        case Op::Max:
-        case Op::Min:
-        case Op::Abs:
-        {
-            // There are only floating point built in versions of these, prefixed with f
-            if (type->op == kIROp_FloatType || type->op == kIROp_DoubleType)
-            {
-                outBuilder << "f";
-                outBuilder << HLSLIntrinsic::getInfo(op).funcName;
-
-                if (type->op == kIROp_FloatType)
-                {
-                    outBuilder << "f";
-                }
-                return SLANG_OK;
-            }
-            break;
-        }
-
         default: break;
     }
 
@@ -171,23 +133,6 @@ SlangResult CUDASourceEmitter::calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicT
         return SLANG_OK;
     }
 
-    // Missing ones:
-    // 
-    // sincos - the built in uses pointer, so we'll just define in prelude
-    // rcp
-    // sign
-    // saturate
-    // frac
-    // smoothstep
-    // lerp
-    // clamp
-    // step
-    // 
-    // For integer types
-    // abs
-    // min
-    // max
-
     // Defer to the supers impl
     return Super::calcScalarFuncName(op, type, outBuilder);
 }
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index 155b86a9c..b433b4d94 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -1096,31 +1096,6 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
             }
             break;
         }
-        case kIROp_Mul_Vector_Matrix:
-        case kIROp_Mul_Matrix_Vector:
-        case kIROp_Mul_Matrix_Matrix:
-        {
-            EmitOpInfo outerPrec = inOuterPrec;
-            bool needClose = false;
-
-            // GLSL expresses inner-product multiplications
-            // with the ordinary infix `*` operator.
-            //
-            // Note that the order of the operands is reversed
-            // compared to HLSL (and Slang's internal representation)
-            // because the notion of what is a "row" vs. a "column"
-            // is reversed between HLSL/Slang and GLSL.
-            //
-            auto prec = getInfo(EmitOp::Mul);
-            needClose = maybeEmitParens(outerPrec, prec);
-
-            emitOperand(inst->getOperand(1), leftSide(outerPrec, prec));
-            m_writer->emit(" * ");
-            emitOperand(inst->getOperand(0), rightSide(prec, outerPrec));
-
-            maybeCloseParens(needClose);
-            return true;
-        }
         case kIROp_Select:
         {
             if (inst->getOperand(0)->getDataType()->op != kIROp_BoolType)
diff --git a/source/slang/slang-hlsl-intrinsic-set.cpp b/source/slang/slang-hlsl-intrinsic-set.cpp
index 82a8851e0..27871141d 100644
--- a/source/slang/slang-hlsl-intrinsic-set.cpp
+++ b/source/slang/slang-hlsl-intrinsic-set.cpp
@@ -220,42 +220,9 @@ SlangResult HLSLIntrinsicSet::makeIntrinsic(IRInst* inst, HLSLIntrinsic& out)
         {
         default: break;
 
-        case Op::Sin:
-        case Op::Cos:
-        case Op::Tan:
-        case Op::ArcSin:
-        case Op::ArcCos:
-        case Op::ArcTan:
-        case Op::ArcTan2:
-        case Op::Rcp:
-        case Op::Sign:
-        case Op::Frac:
-        case Op::Ceil:
-        case Op::Floor:
-        case Op::Trunc:
-        case Op::Sqrt:
-        case Op::RecipSqrt:
-        case Op::Exp2:
-        case Op::Exp:
-        case Op::Log:
-        case Op::Log2:
-        case Op::Log10:
-        case Op::Abs:
-        case Op::Min:
-        case Op::Max:
-        case Op::Pow:
-        case Op::FMod:
-        case Op::SmoothStep:
-        case Op::Lerp:
-        case Op::Clamp:
-        case Op::Step:
         case Op::AsFloat:
         case Op::AsInt:
         case Op::AsUInt:
-        case Op::IsInfinite:
-        case Op::IsFinite:
-        case Op::IsNan:
-        case Op::LdExp:
             // Note: the `any()`/`all()` case can't be handled via a stdlib definition
             // right now because `bool` vectors map to `int` vectors on the CUDA
             // path, so that the generated `geAt` operation is incorrect.
@@ -605,14 +572,6 @@ HLSLIntrinsic::Op HLSLIntrinsicOpLookup::getOpForIROp(IRInst* inst)
 
         case kIROp_constructVectorFromScalar: return Op::ConstructFromScalar;
 
-        case kIROp_Mul_Matrix_Matrix:
-        case kIROp_Mul_Matrix_Vector:
-        case kIROp_Mul_Vector_Matrix:
-        {
-            return Op::VecMatMul;
-        }
-        case kIROp_Dot:     return Op::Dot;
-
         default:            return Op::Invalid;
     }
 }
diff --git a/source/slang/slang-hlsl-intrinsic-set.h b/source/slang/slang-hlsl-intrinsic-set.h
index 6ab5480b3..ca3fced50 100644
--- a/source/slang/slang-hlsl-intrinsic-set.h
+++ b/source/slang/slang-hlsl-intrinsic-set.h
@@ -64,53 +64,6 @@ just constructXXXFromScalar. Would be good if there was a suitable name to encom
         \
         x(Swizzle, "", -1) \
         \
-        x(Dot, "dot", 2) \
-        x(VecMatMul, "mul", 2) \
-        \
-        x(Normalize, "normalize", 1) \
-        x(Length, "length", 1) \
-        \
-        x(Sin, "sin", 1) \
-        x(Cos, "cos", 1) \
-        x(Tan, "tan", 1) \
-        \
-        x(ArcSin, "asin", 1) \
-        x(ArcCos, "acos", 1) \
-        x(ArcTan, "atan", 1) \
-        \
-        x(ArcTan2, "atan2", 2) \
-        \
-        x(Rcp, "rcp", 1) \
-        x(Sign, "sign", 1) \
-        x(Frac, "frac", 1) \
-        \
-        x(Ceil, "ceil", 1) \
-        x(Floor, "floor", 1) \
-        x(Trunc, "trunc", 1) \
-        \
-        x(Sqrt, "sqrt", 1) \
-        x(RecipSqrt, "rsqrt", 1) \
-        \
-        x(Exp2, "exp2", 1) \
-        x(Exp, "exp", 1) \
-        \
-        x(Log, "log", 1) \
-        x(Log2, "log2", 1) \
-        x(Log10, "log10", 1) \
-        \
-        x(Abs, "abs", 1) \
-        \
-        x(Min, "min", 2) \
-        x(Max, "max", 2) \
-        x(Pow, "pow", 2) \
-        x(FMod, "fmod", 2) \
-        x(Reflect, "reflect", 2) \
-        \
-        x(SmoothStep, "smoothstep", 3) \
-        x(Lerp, "lerp", 3) \
-        x(Clamp, "clamp", 3) \
-        x(Step, "step", 2) \
-        \
         x(AsFloat, "asfloat", 1) \
         x(AsInt, "asint", -1) \
         x(AsUInt, "asuint", -1) \
@@ -120,13 +73,7 @@ just constructXXXFromScalar. Would be good if there was a suitable name to encom
         x(ConstructFromScalar, "", 1) \
         \
         x(GetAt, "", 2) \
-        \
-        x(CountBits, "countbits", 1) \
-        \
-        x(IsInfinite, "isinf", 1) \
-        x(IsFinite, "isfinite", 1) \
-        x(IsNan, "isnan", 1) \
-        x(LdExp, "ldexp", 2)
+        /* end */
 
 struct HLSLIntrinsic
 {
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 89fec618c..3fdf9f113 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -373,10 +373,6 @@ INST(Dot, dot, 2, 0)
 
 INST(GetStringHash, getStringHash, 1, 0)
 
-INST(Mul_Vector_Matrix, mulVectorMatrix, 2, 0)
-INST(Mul_Matrix_Vector, mulMatrixVector, 2, 0)
-INST(Mul_Matrix_Matrix, mulMatrixMatrix, 2, 0)
-
 // Texture sampling operation of the form `t.Sample(s,u)`
 INST(Sample, sample, 3, 0)
 
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 6e1b6fe83..f84300327 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -4951,9 +4951,6 @@ namespace Slang
         case kIROp_BitNot:
         case kIROp_Select:
         case kIROp_Dot:
-        case kIROp_Mul_Vector_Matrix:
-        case kIROp_Mul_Matrix_Vector:
-        case kIROp_Mul_Matrix_Matrix:
         case kIROp_MakeExistential:
         case kIROp_ExtractExistentialType:
         case kIROp_ExtractExistentialValue:
author	Tim Foley <tfoleyNV@users.noreply.github.com>	2020-03-11 08:50:38 -0700
committer	GitHub <noreply@github.com>	2020-03-11 08:50:38 -0700
commit	935768c6a00c258bf5122a2d04b84064a1eee67d (patch)
tree	68dac944da274a21acb8c8bf651401c26e289f4c /source
parent	b380b1af6ba6f5f58e3841c2a5b14db7ee8c372d (diff)