Merge pull request #178 from tfoleyNV/boilerplate-generator

Initial work on boilerplate code generator
author: Tim Foley <tfoleyNV@users.noreply.github.com> 2017-09-11 10:27:41 -0700
committer: GitHub <noreply@github.com> 2017-09-11 10:27:41 -0700
commit: 80fb7b05b851e645d821331fdbbcea1add686c9a (patch)
tree: 5f9b010837de0c78f2f96e59388bf76e4cbd8575 /source
parent: 0e566a63f0bafb7def65521315e9f19a2bc79e34 (diff)
parent: 14137cbd2ddd7deebcdf8cc85c30d534bec8e40b (diff)
9 files changed, 4426 insertions, 2194 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
new file mode 100644
index 000000000..fc2d27b08
--- /dev/null
+++ b/source/slang/core.meta.slang
@@ -0,0 +1,907 @@
+// Slang `core` library
+
+// A type that can be used as an operand for builtins
+interface __BuiltinType {}
+
+// A type that can be used for arithmetic operations
+interface __BuiltinArithmeticType : __BuiltinType {}
+
+// A type that logically has a sign (positive/negative/zero)
+interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}
+
+// A type that can represent integers
+interface __BuiltinIntegerType : __BuiltinArithmeticType {}
+
+// A type that can represent non-integers
+interface __BuiltinRealType : __BuiltinArithmeticType {}
+
+// A type that uses a floating-point representation
+interface __BuiltinFloatingPointType : __BuiltinRealType, __BuiltinSignedArithmeticType {}
+
+__generic<T,U> __intrinsic_op(Sequence) U operator,(T left, U right);
+
+__generic<T> __intrinsic_op(select) T operator?:(bool condition, T ifTrue, T ifFalse);
+__generic<T, let N : int> __intrinsic_op(select) vector<T,N> operator?:(vector<bool,N> condition, vector<T,N> ifTrue, vector<T,N> ifFalse);
+
+${{{{
+// We are going to use code generation to produce the
+// declarations for all of our base types.
+
+static const int kBaseTypeCount = sizeof(kBaseTypes) / sizeof(kBaseTypes[0]);
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    EMIT_LINE_DIRECTIVE();
+    sb << "__builtin_type(" << int(kBaseTypes[tt].tag) << ") struct " << kBaseTypes[tt].name;
+
+    // Declare interface conformances for this type
+
+    sb << "\n    : __BuiltinType\n";
+
+    switch (kBaseTypes[tt].tag)
+    {
+    case BaseType::Float:
+        sb << "\n    , __BuiltinFloatingPointType\n";
+        sb << "\n    ,  __BuiltinRealType\n";
+        // fall through to:
+    case BaseType::Int:
+        sb << "\n    ,  __BuiltinSignedArithmeticType\n";
+        // fall through to:
+    case BaseType::UInt:
+    case BaseType::UInt64:
+        sb << "\n    ,  __BuiltinArithmeticType\n";
+        // fall through to:
+    case BaseType::Bool:
+        sb << "\n    ,  __BuiltinType\n";
+        break;
+
+    default:
+        break;
+    }
+
+    sb << "\n{\n";
+
+
+    // Declare initializers to convert from various other types
+    for (int ss = 0; ss < kBaseTypeCount; ++ss)
+    {
+        // Don't allow conversion from `void`
+        if (kBaseTypes[ss].tag == BaseType::Void)
+            continue;
+
+        // We need to emit a modifier so that the semantic-checking
+        // layer will know it can use these operations for implicit
+        // conversion.
+        ConversionCost conversionCost = getBaseTypeConversionCost(
+            kBaseTypes[tt],
+            kBaseTypes[ss]);
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__implicit_conversion(" << conversionCost << ")\n";
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__init(" << kBaseTypes[ss].name << " value);\n";
+    }
+
+    sb << "};\n";
+}
+
+
+
+// Declare vector and matrix types
+
+sb << "__generic<T = float, let N : int = 4> __magic_type(Vector) struct vector\n{\n";
+sb << "    typedef T Element;\n";
+
+// Declare initializer taking a single scalar of the elemnt type
+sb << "    __implicit_conversion(" << kConversionCost_ScalarToVector << ")\n";
+sb << "    __init(T value);\n";
+
+sb << "};\n";
+
+// TODO: Probably need to do similar
+}}}}
+
+__generic<T = float, let R : int = 4, let C : int = 4>
+__magic_type(Matrix)
+struct matrix {};
+
+${{{{
+
+
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for (int tt = 0; tt < kTypeCount; ++tt)
+{
+    // Declare HLSL vector types
+    for (int ii = 1; ii <= 4; ++ii)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << ii << "> " << kTypes[tt].name << ii << ";\n";
+    }
+
+    // Declare HLSL matrix types
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].name << rr << "x" << cc << ";\n";
+    }
+}
+
+// Declare additional built-in generic types
+//        EMIT_LINE_DIRECTIVE();
+
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_ConstantBufferType << ")\n";
+sb << "__magic_type(ConstantBuffer) struct ConstantBuffer {};\n";
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_TextureBufferType << ")\n";
+sb << "__magic_type(TextureBuffer) struct TextureBuffer {};\n";
+
+
+static const char* kComponentNames[]{ "x", "y", "z", "w" };
+static const char* kVectorNames[]{ "", "x", "xy", "xyz", "xyzw" };
+
+// Need to add constructors to the types above
+for (int N = 2; N <= 4; ++N)
+{
+    sb << "__generic<T> __extension vector<T, " << N << ">\n{\n";
+
+    // initialize from N scalars
+    sb << "__init(";
+    for (int ii = 0; ii < N; ++ii)
+    {
+        if (ii != 0) sb << ", ";
+        sb << "T " << kComponentNames[ii];
+    }
+    sb << ");\n";
+
+    // Initialize from an M-vector and then scalars
+    for (int M = 2; M < N; ++M)
+    {
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        for (int ii = M; ii < N; ++ii)
+        {
+            sb << ", T " << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    // initialize from another vector of the same size
+    //
+    // TODO(tfoley): this overlaps with implicit conversions.
+    // We should look for a way that we can define implicit
+    // conversions directly in the stdlib instead...
+    sb << "__generic<U> __init(vector<U," << N << ">);\n";
+
+    // Initialize from two vectors, of size M and N-M
+    for(int M = 2; M <= (N-2); ++M)
+    {
+        int K = N - M;
+        SLANG_ASSERT(K >= 2);
+
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        sb << ", vector<T," << K << "> ";
+        for (int ii = 0; ii < K; ++ii)
+        {
+            sb << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    sb << "}\n";
+}
+
+// The above extension was generic in the *type* of the vector,
+// but explicit in the *size*. We will now declare an extension
+// for each builtin type that is generic in the size.
+//
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    if(kBaseTypes[tt].tag == BaseType::Void) continue;
+
+    sb << "__generic<let N : int> __extension vector<"
+        << kBaseTypes[tt].name << ",N>\n{\n";
+
+    for (int ff = 0; ff < kBaseTypeCount; ++ff)
+    {
+        if(kBaseTypes[ff].tag == BaseType::Void) continue;
+
+        // We need a constructor to make a vector from a scalar
+        // of another type.
+
+        if( tt != ff )
+        {
+            auto cost = getBaseTypeConversionCost(
+                kBaseTypes[tt],
+                kBaseTypes[ff]);
+            cost += kConversionCost_ScalarToVector;
+
+            sb << "    __implicit_conversion(" << cost << ")\n";
+            sb << "    __init(" << kBaseTypes[ff].name << " value);\n";
+        }
+    }
+
+    sb << "}\n";
+}
+
+for( int R = 2; R <= 4; ++R )
+for( int C = 2; C <= 4; ++C )
+{
+    sb << "__generic<T> __extension matrix<T, " << R << "," << C << ">\n{\n";
+
+    // initialize from R*C scalars
+    sb << "__init(";
+    for( int ii = 0; ii < R; ++ii )
+    for( int jj = 0; jj < C; ++jj )
+    {
+        if ((ii+jj) != 0) sb << ", ";
+        sb << "T m" << ii << jj;
+    }
+    sb << ");\n";
+
+    // Initialize from R C-vectors
+    sb << "__init(";
+    for (int ii = 0; ii < R; ++ii)
+    {
+        if(ii != 0) sb << ", ";
+        sb << "vector<T," << C << "> row" << ii;
+    }
+    sb << ");\n";
+
+
+    // initialize from another matrix of the same size
+    //
+    // TODO(tfoley): See comment about how this overlaps
+    // with implicit conversion, in the `vector` case above
+    sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n";
+
+    // initialize from a matrix of larger size
+    for(int rr = R; rr <= 4; ++rr)
+    for( int cc = C; cc <= 4; ++cc )
+    {
+        if(rr == R && cc == C) continue;
+        sb << "__init(matrix<T," << rr << "," << cc << "> value);\n";
+    }
+
+    sb << "}\n";
+}
+
+// Declare built-in texture and sampler types
+
+
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "struct SamplerState {};";
+        
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "struct SamplerComparisonState {};";
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "Texture1D",		TextureType::Shape1D,	1 },
+    { "Texture2D",		TextureType::Shape2D,	2 },
+    { "Texture3D",		TextureType::Shape3D,	3 },
+    { "TextureCube",	TextureType::ShapeCube,	3 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* name = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        for (int accessLevel = 0; accessLevel < kBaseTextureAccessLevelCount; ++accessLevel)
+        {
+            auto access = kBaseTextureAccessLevels[accessLevel].access;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+            flavor |= (access << 8);
+
+            // emit a generic signature
+            // TODO: allow for multisample count to come in as well...
+            sb << "__generic<T = float4> ";
+
+            sb << "__magic_type(Texture," << int(flavor) << ")\n";
+            sb << "__intrinsic_type(" << kIROp_TextureType << ", " << flavor << ")\n";
+            sb << "struct ";
+            sb << kBaseTextureAccessLevels[accessLevel].name;
+            sb << name;
+            if (isMultisample) sb << "MS";
+            if (isArray) sb << "Array";
+//                        if (isShadow) sb << "Shadow";
+            sb << "\n{";
+
+            if( !isMultisample )
+            {
+                sb << "float CalculateLevelOfDetail(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                sb << "float CalculateLevelOfDetailUnclamped(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+            }
+
+            // `GetDimensions`
+
+            for(int isFloat = 0; isFloat < 2; ++isFloat)
+            for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
+            {
+                {
+                    sb << "__glsl_version(450)\n";
+                    sb << "__target_intrinsic(glsl, \"(";
+
+                    int aa = 0;
+                    String lodStr = "0";
+                    if (includeMipInfo)
+                    {
+                        int mipLevelArg = aa++;
+                        lodStr = "int($";
+                        lodStr.append(mipLevelArg);
+                        lodStr.append(")");
+                    }
+
+                    int cc = 0;
+                    switch(baseShape)
+                    {
+                    case TextureType::Shape1D:
+                        sb << "($" << aa++ << " = textureSize($$P, " << lodStr << "))";
+                        cc = 1;
+                        break;
+
+                    case TextureType::Shape2D:
+                    case TextureType::ShapeCube:
+                        sb << "($" << aa++ << " = textureSize($$P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ").y)";
+                        cc = 2;
+                        break;
+
+                    case TextureType::Shape3D:
+                        sb << "($" << aa++ << " = textureSize($$P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ").y)";
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ").z)";
+                        cc = 3;
+                        break;
+
+                    default:
+                        SLANG_UNEXPECTED("unhandled resource shape");
+                        break;
+                    }
+
+                    if(isArray)
+                    {
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ")." << kComponentNames[cc] << ")";
+                    }
+
+                    if(isMultisample)
+                    {
+                        sb << ", ($" << aa++ << " = textureSamples($$P))";
+                    }
+
+                    if (includeMipInfo)
+                    {
+                        sb << ", ($" << aa++ << " = textureQueryLevels($$P))";
+                    }
+
+
+                    sb << ")\")\n";
+                    sb << "__intrinsic_op\n";
+
+                }
+
+                char const* t = isFloat ? "out float " : "out uint ";
+
+                sb << "void GetDimensions(";
+                if(includeMipInfo)
+                    sb << "uint mipLevel, ";
+
+                switch(baseShape)
+                {
+                case TextureType::Shape1D:
+                    sb << t << "width";
+                    break;
+
+                case TextureType::Shape2D:
+                case TextureType::ShapeCube:
+                    sb << t << "width,";
+                    sb << t << "height";
+                    break;
+
+                case TextureType::Shape3D:
+                    sb << t << "width,";
+                    sb << t << "height,";
+                    sb << t << "depth";
+                    break;
+
+                default:
+                    assert(!"unexpected");
+                    break;
+                }
+
+                if(isArray)
+                {
+                    sb << ", " << t << "elements";
+                }
+
+                if(isMultisample)
+                {
+                    sb << ", " << t << "sampleCount";
+                }
+
+                if(includeMipInfo)
+                    sb << ", " << t << "numberOfLevels";
+
+                sb << ");\n";
+            }
+
+            // `GetSamplePosition()`
+            if( isMultisample )
+            {
+                sb << "float2 GetSamplePosition(int s);\n";
+            }
+
+            // `Load()`
+
+            if( kBaseTextureTypes[tt].coordCount + isArray < 4 )
+            {
+                int loadCoordCount = kBaseTextureTypes[tt].coordCount + isArray + (isMultisample?0:1);
+
+                // When translating to GLSL, we need to break apart the `location` argument.
+                //
+                // TODO: this should realy be handled by having this member actually get lowered!
+                static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
+                static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($$P, $0, $1)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($$P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ");\n";
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetchOffset($$P, $0, $1, $2)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($$P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << loadCoordCount << " offset";
+                sb << ");\n";
+
+
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << kBaseTextureTypes[tt].coordCount << " offset";
+                sb << ", out uint status";
+                sb << ");\n";
+            }
+
+            if(baseShape != TextureType::ShapeCube)
+            {
+                // subscript operator
+                sb << "__intrinsic_op __subscript(uint" << kBaseTextureTypes[tt].coordCount + isArray << " location) -> T;\n";
+            }
+
+            if( !isMultisample )
+            {
+                // `Sample()`
+
+                sb << "__target_intrinsic(glsl, \"texture($$p, $1)\")\n";
+
+                // TODO: only enable if IR is being used?
+                sb << "__intrinsic_op(sample)\n";
+
+                sb << "__intrinsic_op\n";
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($$p, $1, $2)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T Sample(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp);\n";
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp, out uint status);\n";
+
+
+                // `SampleBias()`
+                sb << "__target_intrinsic(glsl, \"texture($$p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleBias(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($$p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleBias(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleCmp()` and `SampleCmpLevelZero`
+                sb << "T SampleCmp(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                int baseCoordCount = kBaseTextureTypes[tt].coordCount;
+                int arrCoordCount = baseCoordCount + isArray;
+                if (arrCoordCount < 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureLod($$p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    sb << ", 0.0)\")\n";
+                }
+                else if(arrCoordCount <= 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureGrad($$p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    // Construct gradients
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    // Note(tfoley): MSDN seems confused, and claims that the `offset`
+                    // parameter for `SampleCmp` is available for everything but 3D
+                    // textures, while `Sample` and `SampleBias` are consistent in
+                    // saying they only exclude `offset` for cube maps (which makes
+                    // sense). I'm going to assume the documentation for `SampleCmp`
+                    // is just wrong.
+
+                    sb << "T SampleCmp(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    sb << "T SampleCmpLevelZero(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+
+                sb << "__target_intrinsic(glsl, \"textureGrad($$p, $1, $2, $3)\")\n";
+                sb << "__intrinsic_op(sampleGrad)\n";
+                sb << "T SampleGrad(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureGradOffset($$p, $1, $2, $3, $4)\")\n";
+                    sb << "__intrinsic_op(sampleGrad)\n";
+                    sb << "T SampleGrad(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleLevel`
+
+                sb << "__target_intrinsic(glsl, \"textureLod($$p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleLevel(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float level);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureLodOffset($$p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleLevel(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float level, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+            }
+
+            sb << "\n};\n";
+
+            // `Gather*()` operations are handled via an `extension` declaration,
+            // because this lets us capture the element type of the texture.
+            //
+            // TODO: longer-term there should be something like a `TextureElementType`
+            // interface, that both scalars and vectors implement, that then exposes
+            // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
+            //
+            static const struct {
+                char const* genericPrefix;
+                char const* elementType;
+            } kGatherExtensionCases[] = {
+                { "__generic<T, let N : int>", "vector<T,N>" },
+
+                // TODO: need a case here for scalars `T`, but also
+                // need to ensure that case doesn't accidentally match
+                // for `T = vector<...>`, which requires actual checking
+                // of constraints on generic parameters.
+            };
+            for(auto cc : kGatherExtensionCases)
+            {
+                // TODO: this should really be an `if` around the entire `Gather` logic
+                if (isMultisample) break;
+
+                EMIT_LINE_DIRECTIVE();
+                sb << cc.genericPrefix << " __extension ";
+                sb << kBaseTextureAccessLevels[accessLevel].name;
+                sb << name;
+                if (isArray) sb << "Array";
+                sb << "<" << cc.elementType << " >";
+                sb << "\n{\n";
+
+
+                // `Gather`
+                // (tricky because it returns a 4-vector of the element type
+                // of the texture components...)
+                //
+                // TODO: is it actually correct to restrict these so that, e.g.,
+                // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
+                // it nominally doesn't have an alpha component?
+                static const struct {
+                    int componentIndex;
+                    char const* componentName;
+                } kGatherComponets[] = {
+                    { 0, "" },
+                    { 0, "Red" },
+                    { 1, "Green" },
+                    { 2, "Blue" },
+                    { 3, "Alpha" },
+                };
+
+                for(auto kk : kGatherComponets)
+                {
+                    auto componentIndex = kk.componentIndex;
+                    auto componentName = kk.componentName;
+
+                    EMIT_LINE_DIRECTIVE();
+                            
+                    sb << "__target_intrinsic(glsl, \"textureGather($$p, $1, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffset($$p, $1, $2, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                    sb << "out uint status);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffsets($$p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4, ";
+                    sb << "out uint status);\n";
+                }
+
+                EMIT_LINE_DIRECTIVE();
+                sb << "\n}\n";
+            }
+        }
+    }
+}
+
+
+for (auto op : unaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* fixity = (op.flags & POSTFIX) != 0 ? "__postfix " : "__prefix ";
+        char const* qual = (op.flags & ASSIGNMENT) != 0 ? "in out " : "";
+
+        // scalar version
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << type.name << " operator" << op.opName << "(" << qual << type.name << " value);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << type.name << ",N> operator" << op.opName << "(" << qual << "vector<" << type.name << ",N> value);\n";
+
+        // matrix version
+        sb << "__generic<let N : int, let M : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << type.name << ",N,M> operator" << op.opName << "(" << qual << "matrix<" << type.name << ",N,M> value);\n";
+    }
+}
+
+for (auto op : binaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        if (op.flags & COMPARISON) resultType = "bool";
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        // TODO: handle `SHIFT`
+
+        // scalar version
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << resultType << " operator" << op.opName << "(" << leftQual << leftType << " left, " << rightType << " right);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, vector<" << rightType << ",N> right);\n";
+
+        // matrix version
+
+        // skip matrix-matrix multiply operations here, so that GLSL doesn't see them
+        switch (op.opCode)
+        {
+        case kIROp_Mul:
+        case kIRPseudoOp_MulAssign:
+            break;
+
+        default:
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+            break;
+        }
+
+        // We are going to go ahead and explicitly define combined
+        // operations for the scalar-op-vector, etc. cases, rather
+        // than rely on promotion rules.
+
+        // scalar-vector and scalar-matrix
+        if (!(op.flags & ASSIGNMENT))
+        {
+            sb << "__generic<let N : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << leftType << " left, vector<" << rightType << ",N> right);\n";
+
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << leftType << " left, matrix<" << rightType << ",N,M> right);\n";
+        }
+
+        // vector-scalar and matrix-scalar
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, " << rightType << " right);\n";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
+    }
+}
+
+}}}}
diff --git a/source/slang/core.meta.slang.cpp b/source/slang/core.meta.slang.cpp
new file mode 100644
index 000000000..8395f11f5
--- /dev/null
+++ b/source/slang/core.meta.slang.cpp
@@ -0,0 +1,910 @@
+sb << "// Slang `core` library\n";
+sb << "\n";
+sb << "// A type that can be used as an operand for builtins\n";
+sb << "interface __BuiltinType {}\n";
+sb << "\n";
+sb << "// A type that can be used for arithmetic operations\n";
+sb << "interface __BuiltinArithmeticType : __BuiltinType {}\n";
+sb << "\n";
+sb << "// A type that logically has a sign (positive/negative/zero)\n";
+sb << "interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}\n";
+sb << "\n";
+sb << "// A type that can represent integers\n";
+sb << "interface __BuiltinIntegerType : __BuiltinArithmeticType {}\n";
+sb << "\n";
+sb << "// A type that can represent non-integers\n";
+sb << "interface __BuiltinRealType : __BuiltinArithmeticType {}\n";
+sb << "\n";
+sb << "// A type that uses a floating-point representation\n";
+sb << "interface __BuiltinFloatingPointType : __BuiltinRealType, __BuiltinSignedArithmeticType {}\n";
+sb << "\n";
+sb << "__generic<T,U> __intrinsic_op(Sequence) U operator,(T left, U right);\n";
+sb << "\n";
+sb << "__generic<T> __intrinsic_op(select) T operator?:(bool condition, T ifTrue, T ifFalse);\n";
+sb << "__generic<T, let N : int> __intrinsic_op(select) vector<T,N> operator?:(vector<bool,N> condition, vector<T,N> ifTrue, vector<T,N> ifFalse);\n";
+sb << "\n";
+sb << "";
+
+// We are going to use code generation to produce the
+// declarations for all of our base types.
+
+static const int kBaseTypeCount = sizeof(kBaseTypes) / sizeof(kBaseTypes[0]);
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    EMIT_LINE_DIRECTIVE();
+    sb << "__builtin_type(" << int(kBaseTypes[tt].tag) << ") struct " << kBaseTypes[tt].name;
+
+    // Declare interface conformances for this type
+
+    sb << "\n    : __BuiltinType\n";
+
+    switch (kBaseTypes[tt].tag)
+    {
+    case BaseType::Float:
+        sb << "\n    , __BuiltinFloatingPointType\n";
+        sb << "\n    ,  __BuiltinRealType\n";
+        // fall through to:
+    case BaseType::Int:
+        sb << "\n    ,  __BuiltinSignedArithmeticType\n";
+        // fall through to:
+    case BaseType::UInt:
+    case BaseType::UInt64:
+        sb << "\n    ,  __BuiltinArithmeticType\n";
+        // fall through to:
+    case BaseType::Bool:
+        sb << "\n    ,  __BuiltinType\n";
+        break;
+
+    default:
+        break;
+    }
+
+    sb << "\n{\n";
+
+
+    // Declare initializers to convert from various other types
+    for (int ss = 0; ss < kBaseTypeCount; ++ss)
+    {
+        // Don't allow conversion from `void`
+        if (kBaseTypes[ss].tag == BaseType::Void)
+            continue;
+
+        // We need to emit a modifier so that the semantic-checking
+        // layer will know it can use these operations for implicit
+        // conversion.
+        ConversionCost conversionCost = getBaseTypeConversionCost(
+            kBaseTypes[tt],
+            kBaseTypes[ss]);
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__implicit_conversion(" << conversionCost << ")\n";
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__init(" << kBaseTypes[ss].name << " value);\n";
+    }
+
+    sb << "};\n";
+}
+
+
+
+// Declare vector and matrix types
+
+sb << "__generic<T = float, let N : int = 4> __magic_type(Vector) struct vector\n{\n";
+sb << "    typedef T Element;\n";
+
+// Declare initializer taking a single scalar of the elemnt type
+sb << "    __implicit_conversion(" << kConversionCost_ScalarToVector << ")\n";
+sb << "    __init(T value);\n";
+
+sb << "};\n";
+
+// TODO: Probably need to do similar
+sb << "\n";
+sb << "\n";
+sb << "__generic<T = float, let R : int = 4, let C : int = 4>\n";
+sb << "__magic_type(Matrix)\n";
+sb << "struct matrix {};\n";
+sb << "\n";
+sb << "";
+
+
+
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for (int tt = 0; tt < kTypeCount; ++tt)
+{
+    // Declare HLSL vector types
+    for (int ii = 1; ii <= 4; ++ii)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << ii << "> " << kTypes[tt].name << ii << ";\n";
+    }
+
+    // Declare HLSL matrix types
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].name << rr << "x" << cc << ";\n";
+    }
+}
+
+// Declare additional built-in generic types
+//        EMIT_LINE_DIRECTIVE();
+
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_ConstantBufferType << ")\n";
+sb << "__magic_type(ConstantBuffer) struct ConstantBuffer {};\n";
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_TextureBufferType << ")\n";
+sb << "__magic_type(TextureBuffer) struct TextureBuffer {};\n";
+
+
+static const char* kComponentNames[]{ "x", "y", "z", "w" };
+static const char* kVectorNames[]{ "", "x", "xy", "xyz", "xyzw" };
+
+// Need to add constructors to the types above
+for (int N = 2; N <= 4; ++N)
+{
+    sb << "__generic<T> __extension vector<T, " << N << ">\n{\n";
+
+    // initialize from N scalars
+    sb << "__init(";
+    for (int ii = 0; ii < N; ++ii)
+    {
+        if (ii != 0) sb << ", ";
+        sb << "T " << kComponentNames[ii];
+    }
+    sb << ");\n";
+
+    // Initialize from an M-vector and then scalars
+    for (int M = 2; M < N; ++M)
+    {
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        for (int ii = M; ii < N; ++ii)
+        {
+            sb << ", T " << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    // initialize from another vector of the same size
+    //
+    // TODO(tfoley): this overlaps with implicit conversions.
+    // We should look for a way that we can define implicit
+    // conversions directly in the stdlib instead...
+    sb << "__generic<U> __init(vector<U," << N << ">);\n";
+
+    // Initialize from two vectors, of size M and N-M
+    for(int M = 2; M <= (N-2); ++M)
+    {
+        int K = N - M;
+        SLANG_ASSERT(K >= 2);
+
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        sb << ", vector<T," << K << "> ";
+        for (int ii = 0; ii < K; ++ii)
+        {
+            sb << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    sb << "}\n";
+}
+
+// The above extension was generic in the *type* of the vector,
+// but explicit in the *size*. We will now declare an extension
+// for each builtin type that is generic in the size.
+//
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    if(kBaseTypes[tt].tag == BaseType::Void) continue;
+
+    sb << "__generic<let N : int> __extension vector<"
+        << kBaseTypes[tt].name << ",N>\n{\n";
+
+    for (int ff = 0; ff < kBaseTypeCount; ++ff)
+    {
+        if(kBaseTypes[ff].tag == BaseType::Void) continue;
+
+        // We need a constructor to make a vector from a scalar
+        // of another type.
+
+        if( tt != ff )
+        {
+            auto cost = getBaseTypeConversionCost(
+                kBaseTypes[tt],
+                kBaseTypes[ff]);
+            cost += kConversionCost_ScalarToVector;
+
+            sb << "    __implicit_conversion(" << cost << ")\n";
+            sb << "    __init(" << kBaseTypes[ff].name << " value);\n";
+        }
+    }
+
+    sb << "}\n";
+}
+
+for( int R = 2; R <= 4; ++R )
+for( int C = 2; C <= 4; ++C )
+{
+    sb << "__generic<T> __extension matrix<T, " << R << "," << C << ">\n{\n";
+
+    // initialize from R*C scalars
+    sb << "__init(";
+    for( int ii = 0; ii < R; ++ii )
+    for( int jj = 0; jj < C; ++jj )
+    {
+        if ((ii+jj) != 0) sb << ", ";
+        sb << "T m" << ii << jj;
+    }
+    sb << ");\n";
+
+    // Initialize from R C-vectors
+    sb << "__init(";
+    for (int ii = 0; ii < R; ++ii)
+    {
+        if(ii != 0) sb << ", ";
+        sb << "vector<T," << C << "> row" << ii;
+    }
+    sb << ");\n";
+
+
+    // initialize from another matrix of the same size
+    //
+    // TODO(tfoley): See comment about how this overlaps
+    // with implicit conversion, in the `vector` case above
+    sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n";
+
+    // initialize from a matrix of larger size
+    for(int rr = R; rr <= 4; ++rr)
+    for( int cc = C; cc <= 4; ++cc )
+    {
+        if(rr == R && cc == C) continue;
+        sb << "__init(matrix<T," << rr << "," << cc << "> value);\n";
+    }
+
+    sb << "}\n";
+}
+
+// Declare built-in texture and sampler types
+
+
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "struct SamplerState {};";
+        
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "struct SamplerComparisonState {};";
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "Texture1D",		TextureType::Shape1D,	1 },
+    { "Texture2D",		TextureType::Shape2D,	2 },
+    { "Texture3D",		TextureType::Shape3D,	3 },
+    { "TextureCube",	TextureType::ShapeCube,	3 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* name = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        for (int accessLevel = 0; accessLevel < kBaseTextureAccessLevelCount; ++accessLevel)
+        {
+            auto access = kBaseTextureAccessLevels[accessLevel].access;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+            flavor |= (access << 8);
+
+            // emit a generic signature
+            // TODO: allow for multisample count to come in as well...
+            sb << "__generic<T = float4> ";
+
+            sb << "__magic_type(Texture," << int(flavor) << ")\n";
+            sb << "__intrinsic_type(" << kIROp_TextureType << ", " << flavor << ")\n";
+            sb << "struct ";
+            sb << kBaseTextureAccessLevels[accessLevel].name;
+            sb << name;
+            if (isMultisample) sb << "MS";
+            if (isArray) sb << "Array";
+//                        if (isShadow) sb << "Shadow";
+            sb << "\n{";
+
+            if( !isMultisample )
+            {
+                sb << "float CalculateLevelOfDetail(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                sb << "float CalculateLevelOfDetailUnclamped(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+            }
+
+            // `GetDimensions`
+
+            for(int isFloat = 0; isFloat < 2; ++isFloat)
+            for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
+            {
+                {
+                    sb << "__glsl_version(450)\n";
+                    sb << "__target_intrinsic(glsl, \"(";
+
+                    int aa = 0;
+                    String lodStr = "0";
+                    if (includeMipInfo)
+                    {
+                        int mipLevelArg = aa++;
+                        lodStr = "int($";
+                        lodStr.append(mipLevelArg);
+                        lodStr.append(")");
+                    }
+
+                    int cc = 0;
+                    switch(baseShape)
+                    {
+                    case TextureType::Shape1D:
+                        sb << "($" << aa++ << " = textureSize($P, " << lodStr << "))";
+                        cc = 1;
+                        break;
+
+                    case TextureType::Shape2D:
+                    case TextureType::ShapeCube:
+                        sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
+                        cc = 2;
+                        break;
+
+                    case TextureType::Shape3D:
+                        sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").z)";
+                        cc = 3;
+                        break;
+
+                    default:
+                        SLANG_UNEXPECTED("unhandled resource shape");
+                        break;
+                    }
+
+                    if(isArray)
+                    {
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ")." << kComponentNames[cc] << ")";
+                    }
+
+                    if(isMultisample)
+                    {
+                        sb << ", ($" << aa++ << " = textureSamples($P))";
+                    }
+
+                    if (includeMipInfo)
+                    {
+                        sb << ", ($" << aa++ << " = textureQueryLevels($P))";
+                    }
+
+
+                    sb << ")\")\n";
+                    sb << "__intrinsic_op\n";
+
+                }
+
+                char const* t = isFloat ? "out float " : "out uint ";
+
+                sb << "void GetDimensions(";
+                if(includeMipInfo)
+                    sb << "uint mipLevel, ";
+
+                switch(baseShape)
+                {
+                case TextureType::Shape1D:
+                    sb << t << "width";
+                    break;
+
+                case TextureType::Shape2D:
+                case TextureType::ShapeCube:
+                    sb << t << "width,";
+                    sb << t << "height";
+                    break;
+
+                case TextureType::Shape3D:
+                    sb << t << "width,";
+                    sb << t << "height,";
+                    sb << t << "depth";
+                    break;
+
+                default:
+                    assert(!"unexpected");
+                    break;
+                }
+
+                if(isArray)
+                {
+                    sb << ", " << t << "elements";
+                }
+
+                if(isMultisample)
+                {
+                    sb << ", " << t << "sampleCount";
+                }
+
+                if(includeMipInfo)
+                    sb << ", " << t << "numberOfLevels";
+
+                sb << ");\n";
+            }
+
+            // `GetSamplePosition()`
+            if( isMultisample )
+            {
+                sb << "float2 GetSamplePosition(int s);\n";
+            }
+
+            // `Load()`
+
+            if( kBaseTextureTypes[tt].coordCount + isArray < 4 )
+            {
+                int loadCoordCount = kBaseTextureTypes[tt].coordCount + isArray + (isMultisample?0:1);
+
+                // When translating to GLSL, we need to break apart the `location` argument.
+                //
+                // TODO: this should realy be handled by having this member actually get lowered!
+                static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
+                static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($P, $0, $1)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ");\n";
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetchOffset($P, $0, $1, $2)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << loadCoordCount << " offset";
+                sb << ");\n";
+
+
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << kBaseTextureTypes[tt].coordCount << " offset";
+                sb << ", out uint status";
+                sb << ");\n";
+            }
+
+            if(baseShape != TextureType::ShapeCube)
+            {
+                // subscript operator
+                sb << "__intrinsic_op __subscript(uint" << kBaseTextureTypes[tt].coordCount + isArray << " location) -> T;\n";
+            }
+
+            if( !isMultisample )
+            {
+                // `Sample()`
+
+                sb << "__target_intrinsic(glsl, \"texture($p, $1)\")\n";
+
+                // TODO: only enable if IR is being used?
+                sb << "__intrinsic_op(sample)\n";
+
+                sb << "__intrinsic_op\n";
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T Sample(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp);\n";
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp, out uint status);\n";
+
+
+                // `SampleBias()`
+                sb << "__target_intrinsic(glsl, \"texture($p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleBias(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleBias(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleCmp()` and `SampleCmpLevelZero`
+                sb << "T SampleCmp(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                int baseCoordCount = kBaseTextureTypes[tt].coordCount;
+                int arrCoordCount = baseCoordCount + isArray;
+                if (arrCoordCount < 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureLod($p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    sb << ", 0.0)\")\n";
+                }
+                else if(arrCoordCount <= 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureGrad($p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    // Construct gradients
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    // Note(tfoley): MSDN seems confused, and claims that the `offset`
+                    // parameter for `SampleCmp` is available for everything but 3D
+                    // textures, while `Sample` and `SampleBias` are consistent in
+                    // saying they only exclude `offset` for cube maps (which makes
+                    // sense). I'm going to assume the documentation for `SampleCmp`
+                    // is just wrong.
+
+                    sb << "T SampleCmp(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    sb << "T SampleCmpLevelZero(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+
+                sb << "__target_intrinsic(glsl, \"textureGrad($p, $1, $2, $3)\")\n";
+                sb << "__intrinsic_op(sampleGrad)\n";
+                sb << "T SampleGrad(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureGradOffset($p, $1, $2, $3, $4)\")\n";
+                    sb << "__intrinsic_op(sampleGrad)\n";
+                    sb << "T SampleGrad(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleLevel`
+
+                sb << "__target_intrinsic(glsl, \"textureLod($p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleLevel(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float level);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureLodOffset($p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleLevel(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float level, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+            }
+
+            sb << "\n};\n";
+
+            // `Gather*()` operations are handled via an `extension` declaration,
+            // because this lets us capture the element type of the texture.
+            //
+            // TODO: longer-term there should be something like a `TextureElementType`
+            // interface, that both scalars and vectors implement, that then exposes
+            // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
+            //
+            static const struct {
+                char const* genericPrefix;
+                char const* elementType;
+            } kGatherExtensionCases[] = {
+                { "__generic<T, let N : int>", "vector<T,N>" },
+
+                // TODO: need a case here for scalars `T`, but also
+                // need to ensure that case doesn't accidentally match
+                // for `T = vector<...>`, which requires actual checking
+                // of constraints on generic parameters.
+            };
+            for(auto cc : kGatherExtensionCases)
+            {
+                // TODO: this should really be an `if` around the entire `Gather` logic
+                if (isMultisample) break;
+
+                EMIT_LINE_DIRECTIVE();
+                sb << cc.genericPrefix << " __extension ";
+                sb << kBaseTextureAccessLevels[accessLevel].name;
+                sb << name;
+                if (isArray) sb << "Array";
+                sb << "<" << cc.elementType << " >";
+                sb << "\n{\n";
+
+
+                // `Gather`
+                // (tricky because it returns a 4-vector of the element type
+                // of the texture components...)
+                //
+                // TODO: is it actually correct to restrict these so that, e.g.,
+                // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
+                // it nominally doesn't have an alpha component?
+                static const struct {
+                    int componentIndex;
+                    char const* componentName;
+                } kGatherComponets[] = {
+                    { 0, "" },
+                    { 0, "Red" },
+                    { 1, "Green" },
+                    { 2, "Blue" },
+                    { 3, "Alpha" },
+                };
+
+                for(auto kk : kGatherComponets)
+                {
+                    auto componentIndex = kk.componentIndex;
+                    auto componentName = kk.componentName;
+
+                    EMIT_LINE_DIRECTIVE();
+                            
+                    sb << "__target_intrinsic(glsl, \"textureGather($p, $1, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $1, $2, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                    sb << "out uint status);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4, ";
+                    sb << "out uint status);\n";
+                }
+
+                EMIT_LINE_DIRECTIVE();
+                sb << "\n}\n";
+            }
+        }
+    }
+}
+
+
+for (auto op : unaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* fixity = (op.flags & POSTFIX) != 0 ? "__postfix " : "__prefix ";
+        char const* qual = (op.flags & ASSIGNMENT) != 0 ? "in out " : "";
+
+        // scalar version
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << type.name << " operator" << op.opName << "(" << qual << type.name << " value);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << type.name << ",N> operator" << op.opName << "(" << qual << "vector<" << type.name << ",N> value);\n";
+
+        // matrix version
+        sb << "__generic<let N : int, let M : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << type.name << ",N,M> operator" << op.opName << "(" << qual << "matrix<" << type.name << ",N,M> value);\n";
+    }
+}
+
+for (auto op : binaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        if (op.flags & COMPARISON) resultType = "bool";
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        // TODO: handle `SHIFT`
+
+        // scalar version
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << resultType << " operator" << op.opName << "(" << leftQual << leftType << " left, " << rightType << " right);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, vector<" << rightType << ",N> right);\n";
+
+        // matrix version
+
+        // skip matrix-matrix multiply operations here, so that GLSL doesn't see them
+        switch (op.opCode)
+        {
+        case kIROp_Mul:
+        case kIRPseudoOp_MulAssign:
+            break;
+
+        default:
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+            break;
+        }
+
+        // We are going to go ahead and explicitly define combined
+        // operations for the scalar-op-vector, etc. cases, rather
+        // than rely on promotion rules.
+
+        // scalar-vector and scalar-matrix
+        if (!(op.flags & ASSIGNMENT))
+        {
+            sb << "__generic<let N : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << leftType << " left, vector<" << rightType << ",N> right);\n";
+
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << leftType << " left, matrix<" << rightType << ",N,M> right);\n";
+        }
+
+        // vector-scalar and matrix-scalar
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, " << rightType << " right);\n";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
+    }
+}
+
+sb << "\n";
+sb << "";
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
new file mode 100644
index 000000000..878cea188
--- /dev/null
+++ b/source/slang/glsl.meta.slang
@@ -0,0 +1,205 @@
+// Slang GLSL compatibility library
+
+${{{{
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for( int tt = 0; tt < kTypeCount; ++tt )
+{
+    // Declare GLSL aliases for HLSL types
+    for (int vv = 2; vv <= 4; ++vv)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << vv << "> " << kTypes[tt].glslPrefix << "vec" << vv << ";\n";
+        sb << "typedef matrix<" << kTypes[tt].name << "," << vv << "," << vv << "> " << kTypes[tt].glslPrefix << "mat" << vv << ";\n";
+    }
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].glslPrefix << "mat" << rr << "x" << cc << ";\n";
+    }
+}
+
+// Multiplication operations for vectors + matrices
+
+// scalar-vector and vector-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(vector<T,N> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(T x, vector<T,N> y);\n";
+
+// scalar-matrix and matrix-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(matrix<T,N,M> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(T x, matrix<T,N,M> y);\n";
+
+// vector-vector (dot product)
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T operator*(vector<T,N> x, vector<T,N> y);\n";
+
+// vector-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> operator*(vector<T,N> x, matrix<T,N,M> y);\n";
+
+// matrix-vector
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> operator*(matrix<T,N,M> x, vector<T,M> y);\n";
+
+// matrix-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> operator*(matrix<T,R,N> x, matrix<T,N,C> y);\n";
+
+
+
+//
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "1D",		TextureType::Shape1D,	1 },
+    { "2D",		TextureType::Shape2D,	2 },
+    { "3D",		TextureType::Shape3D,	3 },
+    { "Cube",	TextureType::ShapeCube,	3 },
+    { "Buffer", TextureType::ShapeBuffer,   1 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* shapeName = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        {
+            auto readAccess = SLANG_RESOURCE_ACCESS_READ;
+            auto readWriteAccess = SLANG_RESOURCE_ACCESS_READ_WRITE;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+                        
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+
+
+            unsigned readFlavor = flavor | (readAccess << 8);
+            unsigned readWriteFlavor = flavor | (readWriteAccess << 8);
+
+            StringBuilder nameBuilder;
+            nameBuilder << shapeName;
+            if (isMultisample) nameBuilder << "MS";
+            if (isArray) nameBuilder << "Array";
+            auto name = nameBuilder.ProduceString();
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(TextureSampler," << int(readFlavor) << ") struct ";
+            sb << "__sampler" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(Texture," << int(readFlavor) << ") struct ";
+            sb << "__texture" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(GLSLImageType," << int(readWriteFlavor) << ") struct ";
+            sb << "__image" << name;
+            sb << " {};\n";
+
+            // TODO(tfoley): flesh this out for all the available prefixes
+            static const struct
+            {
+                char const* prefix;
+                char const* elementType;
+            } kTextureElementTypes[] = {
+                { "", "vec4" },
+                { "i", "ivec4" },
+                { "u", "uvec4" },
+                { nullptr, nullptr },
+            };
+            for( auto ee = kTextureElementTypes; ee->prefix; ++ee )
+            {
+                sb << "typedef __sampler" << name << "<" << ee->elementType << "> " << ee->prefix << "sampler" << name << ";\n";
+                sb << "typedef __texture" << name << "<" << ee->elementType << "> " << ee->prefix << "texture" << name << ";\n";
+                sb << "typedef __image" << name << "<" << ee->elementType << "> " << ee->prefix << "image" << name << ";\n";
+            }
+        }
+    }
+}
+
+sb << "__generic<T> __magic_type(GLSLInputParameterBlockType) struct __GLSLInputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLOutputParameterBlockType) struct __GLSLOutputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLShaderStorageBufferType) struct __GLSLShaderStorageBuffer {};\n";
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct sampler {};";
+
+sb << "__magic_type(GLSLInputAttachmentType) struct subpassInput {};";
+
+// Define additional keywords
+
+sb << "syntax buffer : GLSLBufferModifier;\n";
+
+// [GLSL 4.3] Storage Qualifiers
+
+// TODO: need to support `shared` here with its GLSL meaning
+
+sb << "syntax patch : GLSLPatchModifier;\n";
+// `centroid` and `sample` handled centrally
+
+// [GLSL 4.5] Interpolation Qualifiers
+sb << "syntax smooth : SimpleModifier;\n";
+sb << "syntax flat : SimpleModifier;\n";
+sb << "syntax noperspectie : SimpleModifier;\n";
+
+
+// [GLSL 4.3.2] Constant Qualifier
+
+// We need to handle GLSL `const` separately from HLSL `const`,
+// since they mean such different things.
+
+// [GLSL 4.7.2] Precision Qualifiers
+sb << "syntax highp : SimpleModifier;\n";
+sb << "syntax mediump : SimpleModifier;\n";
+sb << "syntax lowp : SimpleModifier;\n";
+
+// [GLSL 4.8.1] The Invariant Qualifier
+
+sb << "syntax invariant : SimpleModifier;\n";
+
+// [GLSL 4.10] Memory Qualifiers
+
+sb << "syntax coherent : SimpleModifier;\n";
+sb << "syntax volatile : SimpleModifier;\n";
+sb << "syntax restrict : SimpleModifier;\n";
+sb << "syntax readonly : GLSLReadOnlyModifier;\n";
+sb << "syntax writeonly : GLSLWriteOnlyModifier;\n";
+
+// We will treat `subroutine` as a qualifier for now
+sb << "syntax subroutine : SimpleModifier;\n";
+
+
+
+}}}}
+\ No newline at end of file
diff --git a/source/slang/glsl.meta.slang.cpp b/source/slang/glsl.meta.slang.cpp
new file mode 100644
index 000000000..e43a51ea9
--- /dev/null
+++ b/source/slang/glsl.meta.slang.cpp
@@ -0,0 +1,206 @@
+sb << "// Slang GLSL compatibility library\n";
+sb << "\n";
+sb << "";
+
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for( int tt = 0; tt < kTypeCount; ++tt )
+{
+    // Declare GLSL aliases for HLSL types
+    for (int vv = 2; vv <= 4; ++vv)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << vv << "> " << kTypes[tt].glslPrefix << "vec" << vv << ";\n";
+        sb << "typedef matrix<" << kTypes[tt].name << "," << vv << "," << vv << "> " << kTypes[tt].glslPrefix << "mat" << vv << ";\n";
+    }
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].glslPrefix << "mat" << rr << "x" << cc << ";\n";
+    }
+}
+
+// Multiplication operations for vectors + matrices
+
+// scalar-vector and vector-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(vector<T,N> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(T x, vector<T,N> y);\n";
+
+// scalar-matrix and matrix-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(matrix<T,N,M> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(T x, matrix<T,N,M> y);\n";
+
+// vector-vector (dot product)
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T operator*(vector<T,N> x, vector<T,N> y);\n";
+
+// vector-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> operator*(vector<T,N> x, matrix<T,N,M> y);\n";
+
+// matrix-vector
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> operator*(matrix<T,N,M> x, vector<T,M> y);\n";
+
+// matrix-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> operator*(matrix<T,R,N> x, matrix<T,N,C> y);\n";
+
+
+
+//
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "1D",		TextureType::Shape1D,	1 },
+    { "2D",		TextureType::Shape2D,	2 },
+    { "3D",		TextureType::Shape3D,	3 },
+    { "Cube",	TextureType::ShapeCube,	3 },
+    { "Buffer", TextureType::ShapeBuffer,   1 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* shapeName = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        {
+            auto readAccess = SLANG_RESOURCE_ACCESS_READ;
+            auto readWriteAccess = SLANG_RESOURCE_ACCESS_READ_WRITE;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+                        
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+
+
+            unsigned readFlavor = flavor | (readAccess << 8);
+            unsigned readWriteFlavor = flavor | (readWriteAccess << 8);
+
+            StringBuilder nameBuilder;
+            nameBuilder << shapeName;
+            if (isMultisample) nameBuilder << "MS";
+            if (isArray) nameBuilder << "Array";
+            auto name = nameBuilder.ProduceString();
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(TextureSampler," << int(readFlavor) << ") struct ";
+            sb << "__sampler" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(Texture," << int(readFlavor) << ") struct ";
+            sb << "__texture" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(GLSLImageType," << int(readWriteFlavor) << ") struct ";
+            sb << "__image" << name;
+            sb << " {};\n";
+
+            // TODO(tfoley): flesh this out for all the available prefixes
+            static const struct
+            {
+                char const* prefix;
+                char const* elementType;
+            } kTextureElementTypes[] = {
+                { "", "vec4" },
+                { "i", "ivec4" },
+                { "u", "uvec4" },
+                { nullptr, nullptr },
+            };
+            for( auto ee = kTextureElementTypes; ee->prefix; ++ee )
+            {
+                sb << "typedef __sampler" << name << "<" << ee->elementType << "> " << ee->prefix << "sampler" << name << ";\n";
+                sb << "typedef __texture" << name << "<" << ee->elementType << "> " << ee->prefix << "texture" << name << ";\n";
+                sb << "typedef __image" << name << "<" << ee->elementType << "> " << ee->prefix << "image" << name << ";\n";
+            }
+        }
+    }
+}
+
+sb << "__generic<T> __magic_type(GLSLInputParameterBlockType) struct __GLSLInputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLOutputParameterBlockType) struct __GLSLOutputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLShaderStorageBufferType) struct __GLSLShaderStorageBuffer {};\n";
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct sampler {};";
+
+sb << "__magic_type(GLSLInputAttachmentType) struct subpassInput {};";
+
+// Define additional keywords
+
+sb << "syntax buffer : GLSLBufferModifier;\n";
+
+// [GLSL 4.3] Storage Qualifiers
+
+// TODO: need to support `shared` here with its GLSL meaning
+
+sb << "syntax patch : GLSLPatchModifier;\n";
+// `centroid` and `sample` handled centrally
+
+// [GLSL 4.5] Interpolation Qualifiers
+sb << "syntax smooth : SimpleModifier;\n";
+sb << "syntax flat : SimpleModifier;\n";
+sb << "syntax noperspectie : SimpleModifier;\n";
+
+
+// [GLSL 4.3.2] Constant Qualifier
+
+// We need to handle GLSL `const` separately from HLSL `const`,
+// since they mean such different things.
+
+// [GLSL 4.7.2] Precision Qualifiers
+sb << "syntax highp : SimpleModifier;\n";
+sb << "syntax mediump : SimpleModifier;\n";
+sb << "syntax lowp : SimpleModifier;\n";
+
+// [GLSL 4.8.1] The Invariant Qualifier
+
+sb << "syntax invariant : SimpleModifier;\n";
+
+// [GLSL 4.10] Memory Qualifiers
+
+sb << "syntax coherent : SimpleModifier;\n";
+sb << "syntax volatile : SimpleModifier;\n";
+sb << "syntax restrict : SimpleModifier;\n";
+sb << "syntax readonly : GLSLReadOnlyModifier;\n";
+sb << "syntax writeonly : GLSLWriteOnlyModifier;\n";
+
+// We will treat `subroutine` as a qualifier for now
+sb << "syntax subroutine : SimpleModifier;\n";
+
+
+
+sb << "";
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
new file mode 100644
index 000000000..3b4b85b91
--- /dev/null
+++ b/source/slang/hlsl.meta.slang
@@ -0,0 +1,1065 @@
+// Slang HLSL compatibility library
+
+typedef uint UINT;
+
+__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer
+{
+    __intrinsic_op void Append(T value);
+
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+};
+
+__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer
+{
+    __intrinsic_op void GetDimensions(
+        out uint dim);
+
+    __intrinsic_op uint Load(int location);
+    __intrinsic_op uint Load(int location, out uint status);
+
+    __intrinsic_op uint2 Load2(int location);
+    __intrinsic_op uint2 Load2(int location, out uint status);
+
+    __intrinsic_op uint3 Load3(int location);
+    __intrinsic_op uint3 Load3(int location, out uint status);
+
+    __intrinsic_op uint4 Load4(int location);
+    __intrinsic_op uint4 Load4(int location, out uint status);
+};
+
+__generic<T> __magic_type(HLSLStructuredBufferType) struct StructuredBuffer
+{
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+
+    __intrinsic_op T Load(int location);
+    __intrinsic_op T Load(int location, out uint status);
+
+    __intrinsic_op __subscript(uint index) -> T;
+};
+
+__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer
+{
+    __intrinsic_op T Consume();
+
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+};
+
+__generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch
+{
+    __intrinsic_op __subscript(uint index) -> T;
+};
+
+__generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch
+{
+    __intrinsic_op __subscript(uint index) -> T { set; }
+};
+
+__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer
+{
+    // Note(tfoley): supports alll operations from `ByteAddressBuffer`
+    // TODO(tfoley): can this be made a sub-type?
+
+    __intrinsic_op void GetDimensions(
+        out uint dim);
+
+    __intrinsic_op uint Load(int location);
+    __intrinsic_op uint Load(int location, out uint status);
+
+    __intrinsic_op uint2 Load2(int location);
+    __intrinsic_op uint2 Load2(int location, out uint status);
+
+    __intrinsic_op uint3 Load3(int location);
+    __intrinsic_op uint3 Load3(int location, out uint status);
+
+    __intrinsic_op uint4 Load4(int location);
+    __intrinsic_op uint4 Load4(int location, out uint status);
+
+    // Added operations:
+
+    __intrinsic_op void InterlockedAdd(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedAdd(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedAnd(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedAnd(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedCompareExchange(
+        UINT dest,
+        UINT compare_value,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedCompareExchange(
+        UINT dest,
+        UINT compare_value,
+        UINT value);
+
+    __intrinsic_op void InterlockedCompareStore(
+        UINT dest,
+        UINT compare_value,
+        UINT value);
+    __intrinsic_op void InterlockedCompareStore(
+        UINT dest,
+        UINT compare_value);
+
+    __intrinsic_op void InterlockedExchange(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedExchange(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedMax(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedMax(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedMin(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedMin(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedOr(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedOr(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedXor(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedXor(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void Store(
+        uint address,
+        uint value);
+
+    __intrinsic_op void Store2(
+        uint address,
+        uint2 value);
+
+    __intrinsic_op void Store3(
+        uint address,
+        uint3 value);
+
+    __intrinsic_op void Store4(
+        uint address,
+        uint4 value);
+};
+
+__generic<T> __magic_type(HLSLRWStructuredBufferType) struct RWStructuredBuffer
+{
+    __intrinsic_op uint DecrementCounter();
+
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+
+    __intrinsic_op void IncrementCounter();
+
+    __intrinsic_op T Load(int location);
+    __intrinsic_op T Load(int location, out uint status);
+
+    __intrinsic_op __subscript(uint index) -> T { get; set; }
+};
+
+__generic<T> __magic_type(HLSLPointStreamType) struct PointStream
+{
+    void Append(T value);
+    void RestartStrip();
+};
+
+__generic<T> __magic_type(HLSLLineStreamType) struct LineStream
+{
+    void Append(T value);
+    void RestartStrip();
+};
+
+__generic<T> __magic_type(HLSLTriangleStreamType) struct TriangleStream
+{
+    void Append(T value);
+    void RestartStrip();
+};
+
+// Note(tfoley): Trying to systematically add all the HLSL builtins
+
+// Try to terminate the current draw or dispatch call (HLSL SM 4.0)
+__intrinsic_op void abort();
+
+// Absolute value (HLSL SM 1.0)
+__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);
+
+// Inverse cosine (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);
+
+// Test if all components are non-zero (HLSL SM 1.0)
+__generic<T : __BuiltinType> __intrinsic_op T all(T x);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);
+
+// Barrier for writes to all memory spaces (HLSL SM 5.0)
+__intrinsic_op void AllMemoryBarrier();
+
+// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)
+__intrinsic_op void AllMemoryBarrierWithGroupSync();
+
+// Test if any components is non-zero (HLSL SM 1.0)
+__generic<T : __BuiltinType> __intrinsic_op T any(T x);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);
+
+
+// Reinterpret bits as a double (HLSL SM 5.0)
+__intrinsic_op double asdouble(uint lowbits, uint highbits);
+
+// Reinterpret bits as a float (HLSL SM 4.0)
+__intrinsic_op float asfloat( int x);
+__intrinsic_op float asfloat(uint x);
+__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);
+__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);
+
+
+// Inverse sine (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);
+
+// Reinterpret bits as an int (HLSL SM 4.0)
+__intrinsic_op int asint(float x);
+__intrinsic_op int asint(uint x);
+__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);
+__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);
+
+// Reinterpret bits of double as a uint (HLSL SM 5.0)
+__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);
+
+// Reinterpret bits as a uint (HLSL SM 4.0)
+__intrinsic_op uint asuint(float x);
+__intrinsic_op uint asuint(int x);
+__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);
+__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);
+
+// Inverse tangent (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl,"atan($0,$1)")
+__intrinsic_op
+T atan2(T y, T x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl,"atan($0,$1)")
+__intrinsic_op
+vector<T,N> atan2(vector<T,N> y, vector<T,N> x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl,"atan($0,$1)")
+__intrinsic_op
+matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);
+
+// Ceiling (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);
+
+
+// Check access status to tiled resource
+__intrinsic_op bool CheckAccessFullyMapped(uint status);
+
+// Clamp (HLSL SM 1.0)
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T clamp(T x, T min, T max);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> clamp(vector<T,N> x, vector<T,N> min, vector<T,N> max);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);
+
+// Clip (discard) fragment conditionally
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);
+
+// Cosine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cos(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cos(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cos(matrix<T,N,M> x);
+
+// Hyperbolic cosine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);
+
+// Population count
+__intrinsic_op uint countbits(uint value);
+
+// Cross product
+__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);
+
+// Convert encoded color
+__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);
+
+// Partial-difference derivatives
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, dFdx)
+__intrinsic_op
+T ddx(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, dFdx)
+__intrinsic_op
+vector<T,N> ddx(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, dFdx)
+__intrinsic_op
+matrix<T,N,M> ddx(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxCoarse)
+__intrinsic_op
+T ddx_coarse(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxCoarse)
+__intrinsic_op
+vector<T,N> ddx_coarse(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxCoarse)
+__intrinsic_op
+matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxFine)
+__intrinsic_op
+T ddx_fine(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxFine)
+__intrinsic_op
+vector<T,N> ddx_fine(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxFine)
+__intrinsic_op
+matrix<T,N,M> ddx_fine(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, dFdy)
+__intrinsic_op
+T ddy(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, dFdy)
+__intrinsic_op
+vector<T,N> ddy(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, dFdy)
+__intrinsic_op
+ matrix<T,N,M> ddy(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyCoarse)
+__intrinsic_op
+T ddy_coarse(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyCoarse)
+__intrinsic_op
+vector<T,N> ddy_coarse(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyCoarse)
+__intrinsic_op
+matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyFine)
+__intrinsic_op
+T ddy_fine(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyFine)
+__intrinsic_op
+vector<T,N> ddy_fine(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyFine)
+__intrinsic_op
+matrix<T,N,M> ddy_fine(matrix<T,N,M> x);
+
+
+// Radians to degrees
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);
+
+// Matrix determinant
+
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);
+
+// Barrier for device memory
+__intrinsic_op void DeviceMemoryBarrier();
+__intrinsic_op void DeviceMemoryBarrierWithGroupSync();
+
+// Vector distance
+
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);
+
+// Vector dot product
+
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);
+
+// Helper for computing distance terms for lighting (obsolete)
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);
+
+// Error message
+
+// __intrinsic_op void errorf( string format, ... );
+
+// Attribute evaluation
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
+
+// Base-e exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);
+
+// Base-2 exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);
+
+// Convert 16-bit float stored in low bits of integer
+__intrinsic_op float f16tof32(uint value);
+__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);
+
+// Convert to 16-bit float stored in low bits of integer
+__intrinsic_op uint f32tof16(float value);
+__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);
+
+// Flip surface normal to face forward, if needed
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);
+
+// Find first set bit starting at high bit and working down
+__intrinsic_op int firstbithigh(int value);
+__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);
+
+__intrinsic_op uint firstbithigh(uint value);
+__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);
+
+// Find first set bit starting at low bit and working up
+__intrinsic_op int firstbitlow(int value);
+__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);
+
+__intrinsic_op uint firstbitlow(uint value);
+__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);
+
+// Floor (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);
+
+// Fused multiply-add for doubles
+__intrinsic_op double fma(double a, double b, double c);
+__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
+__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
+
+// Floating point remainder of x/y
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// Fractional part
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, fract)
+__intrinsic_op
+T frac(T x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, fract)
+__intrinsic_op
+vector<T,N> frac(vector<T,N> x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, fract)
+__intrinsic_op
+matrix<T,N,M> frac(matrix<T,N,M> x);
+
+// Split float into mantissa and exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);
+
+// Texture filter width
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);
+
+// Get number of samples in render target
+__intrinsic_op uint GetRenderTargetSampleCount();
+
+// Get position of given sample
+__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);
+
+// Group memory barrier
+__intrinsic_op void GroupMemoryBarrier();
+__intrinsic_op void GroupMemoryBarrierWithGroupSync();
+
+// Atomics
+__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);
+__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);
+__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);
+
+__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);
+
+// Is floating-point value finite?
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);
+
+// Is floating-point value infinite?
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);
+
+// Is floating-point value not-a-number?
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);
+
+// Construct float from mantissa and exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);
+
+// Vector length
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);
+
+// Linear interpolation
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, mix)
+__intrinsic_op
+T lerp(T x, T y, T s);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, mix)
+__intrinsic_op
+vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, mix)
+__intrinsic_op
+matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);
+
+// Legacy lighting function (obsolete)
+__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);
+
+// Base-e logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);
+
+// Base-10 logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);
+
+// Base-2 logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);
+
+// multiply-add
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
+
+// maximum
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// minimum
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// split into integer and fractional parts (both with same sign)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);
+
+// msad4 (whatever that is)
+__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);
+
+// General inner products
+
+// scalar-scalar
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);
+
+// scalar-vector and vector-scalar
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);
+
+// scalar-matrix and matrix-scalar
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);
+
+// vector-vector (dot product)
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);
+
+// vector-matrix
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
+
+// matrix-vector
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
+
+// matrix-matrix
+__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
+
+// noise (deprecated)
+__intrinsic_op float noise(float x);
+__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);
+
+// Normalize a vector
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);
+
+// Raise to a power
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// Output message
+
+// __intrinsic_op void printf( string format, ... );
+
+// Tessellation factor fixup routines
+
+__intrinsic_op void Process2DQuadTessFactorsAvg(
+    in  float4 RawEdgeFactors,
+    in  float2 InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void Process2DQuadTessFactorsMax(
+    in  float4 RawEdgeFactors,
+    in  float2 InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void Process2DQuadTessFactorsMin(
+    in  float4 RawEdgeFactors,
+    in  float2 InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessIsolineTessFactors(
+    in  float RawDetailFactor,
+    in  float RawDensityFactor,
+    out float RoundedDetailFactor,
+    out float RoundedDensityFactor);
+
+__intrinsic_op void ProcessQuadTessFactorsAvg(
+    in  float4 RawEdgeFactors,
+    in  float InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessQuadTessFactorsMax(
+    in  float4 RawEdgeFactors,
+    in  float InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessQuadTessFactorsMin(
+    in  float4 RawEdgeFactors,
+    in  float InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessTriTessFactorsAvg(
+    in  float3 RawEdgeFactors,
+    in  float InsideScale,
+    out float3 RoundedEdgeTessFactors,
+    out float RoundedInsideTessFactor,
+    out float UnroundedInsideTessFactor);
+
+__intrinsic_op void ProcessTriTessFactorsMax(
+    in  float3 RawEdgeFactors,
+    in  float InsideScale,
+    out float3 RoundedEdgeTessFactors,
+    out float RoundedInsideTessFactor,
+    out float UnroundedInsideTessFactor);
+
+__intrinsic_op void ProcessTriTessFactorsMin(
+    in  float3 RawEdgeFactors,
+    in  float InsideScale,
+    out float3 RoundedEdgeTessFactors,
+    out float RoundedInsideTessFactors,
+    out float UnroundedInsideTessFactors);
+
+// Degrees to radians
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);
+
+// Approximate reciprocal
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);
+
+// Reflect incident vector across plane with given normal
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__intrinsic_op
+vector<T,N> reflect(vector<T,N> i, vector<T,N> n);
+
+// Refract incident vector given surface normal and index of refraction
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__intrinsic_op
+vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);
+
+// Reverse order of bits
+__intrinsic_op uint reversebits(uint value);
+__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);
+
+// Round-to-nearest
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);
+
+// Reciprocal of square root
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);
+
+// Clamp value to [0,1] range
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
+T saturate(T x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
+vector<T,N> saturate(vector<T,N> x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
+matrix<T,N,M> saturate(matrix<T,N,M> x);
+
+
+// Extract sign of value
+__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);
+
+
+// Sine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sin(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sin(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sin(matrix<T,N,M> x);
+
+// Sine and cosine
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);
+
+// Hyperbolic Sine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);
+
+// Smooth step (Hermite interpolation)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);
+
+// Square root
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);
+
+// Step function
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);
+
+// Tangent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);
+
+// Hyperbolic tangent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);
+
+// Legacy texture-fetch operations
+
+/*
+__intrinsic_op float4 tex1D(sampler1D s, float t);
+__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);
+__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);
+__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);
+__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);
+__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);
+
+__intrinsic_op float4 tex2D(sampler2D s, float2 t);
+__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);
+__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);
+__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);
+__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);
+__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);
+
+__intrinsic_op float4 tex3D(sampler3D s, float3 t);
+__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);
+__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);
+__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);
+
+__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);
+__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);
+__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);
+__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);
+*/
+
+// Matrix transpose
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);
+
+// Truncate to integer
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);
+
+// Shader model 6.0 stuff
+
+__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);
+
+__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);
+
+__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);
+
+__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);
+
+__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);
+
+__intrinsic_op bool WaveAllEqual(bool expr);
+__intrinsic_op bool WaveAllTrue(bool expr);
+__intrinsic_op bool WaveAnyTrue(bool expr);
+
+uint64_t WaveBallot(bool expr);
+
+uint WaveGetLaneCount();
+uint WaveGetLaneIndex();
+uint WaveGetOrderedIndex();
+
+bool WaveIsHelperLane();
+
+bool WaveOnce();
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);
+
+// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
+typedef Texture2D texture2D;
+
+${{{{
+
+// Component-wise multiplication ops
+for(auto op : binaryOps)
+{
+    switch (op.opCode)
+    {
+    default:
+        continue;
+
+    case kIROp_Mul:
+    case kIRPseudoOp_MulAssign:
+        break;
+    }
+
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+    }
+}
+
+//
+
+// Buffer types
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseBufferAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]);
+
+for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
+{
+
+    sb << "__generic<T> __magic_type(Texture, ";
+    sb << ResourceType::makeFlavor(ResourceType::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access);
+    sb << ") struct ";
+    sb << kBaseBufferAccessLevels[aa].name;
+    sb << "Buffer {\n";
+
+    sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($$P, $0)$$z\")\n";
+    sb << "__intrinsic_op T Load(int location);\n";
+
+    sb << "__intrinsic_op T Load(int location, out uint status);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($$P, int($0))$$z\")\n";
+    sb << "__intrinsic_op __subscript(uint index) -> T";
+
+    if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
+    {
+        sb << " { get; set; }\n";
+    }
+    else
+    {
+        sb << ";\n";
+    }
+
+    sb << "};\n";
+}
+
+}}}}
+\ No newline at end of file
diff --git a/source/slang/hlsl.meta.slang.cpp b/source/slang/hlsl.meta.slang.cpp
new file mode 100644
index 000000000..e9e2277e6
--- /dev/null
+++ b/source/slang/hlsl.meta.slang.cpp
@@ -0,0 +1,1066 @@
+sb << "// Slang HLSL compatibility library\n";
+sb << "\n";
+sb << "typedef uint UINT;\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op void Append(T value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint dim);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint Load(int location);\n";
+sb << "    __intrinsic_op uint Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint2 Load2(int location);\n";
+sb << "    __intrinsic_op uint2 Load2(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint3 Load3(int location);\n";
+sb << "    __intrinsic_op uint3 Load3(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint4 Load4(int location);\n";
+sb << "    __intrinsic_op uint4 Load4(int location, out uint status);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLStructuredBufferType) struct StructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "\n";
+sb << "    __intrinsic_op T Load(int location);\n";
+sb << "    __intrinsic_op T Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T;\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op T Consume();\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch\n";
+sb << "{\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T;\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch\n";
+sb << "{\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T { set; }\n";
+sb << "};\n";
+sb << "\n";
+sb << "__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer\n";
+sb << "{\n";
+sb << "    // Note(tfoley): supports alll operations from `ByteAddressBuffer`\n";
+sb << "    // TODO(tfoley): can this be made a sub-type?\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint dim);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint Load(int location);\n";
+sb << "    __intrinsic_op uint Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint2 Load2(int location);\n";
+sb << "    __intrinsic_op uint2 Load2(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint3 Load3(int location);\n";
+sb << "    __intrinsic_op uint3 Load3(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint4 Load4(int location);\n";
+sb << "    __intrinsic_op uint4 Load4(int location, out uint status);\n";
+sb << "\n";
+sb << "    // Added operations:\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedAdd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedAdd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedAnd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedAnd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedCompareExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedCompareExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedCompareStore(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value,\n";
+sb << "        UINT value);\n";
+sb << "    __intrinsic_op void InterlockedCompareStore(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedMax(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedMax(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedMin(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedMin(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedOr(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedOr(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedXor(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedXor(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store(\n";
+sb << "        uint address,\n";
+sb << "        uint value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store2(\n";
+sb << "        uint address,\n";
+sb << "        uint2 value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store3(\n";
+sb << "        uint address,\n";
+sb << "        uint3 value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store4(\n";
+sb << "        uint address,\n";
+sb << "        uint4 value);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLRWStructuredBufferType) struct RWStructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op uint DecrementCounter();\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "\n";
+sb << "    __intrinsic_op void IncrementCounter();\n";
+sb << "\n";
+sb << "    __intrinsic_op T Load(int location);\n";
+sb << "    __intrinsic_op T Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T { get; set; }\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLPointStreamType) struct PointStream\n";
+sb << "{\n";
+sb << "    void Append(T value);\n";
+sb << "    void RestartStrip();\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLLineStreamType) struct LineStream\n";
+sb << "{\n";
+sb << "    void Append(T value);\n";
+sb << "    void RestartStrip();\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLTriangleStreamType) struct TriangleStream\n";
+sb << "{\n";
+sb << "    void Append(T value);\n";
+sb << "    void RestartStrip();\n";
+sb << "};\n";
+sb << "\n";
+sb << "// Note(tfoley): Trying to systematically add all the HLSL builtins\n";
+sb << "\n";
+sb << "// Try to terminate the current draw or dispatch call (HLSL SM 4.0)\n";
+sb << "__intrinsic_op void abort();\n";
+sb << "\n";
+sb << "// Absolute value (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Inverse cosine (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Test if all components are non-zero (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T all(T x);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Barrier for writes to all memory spaces (HLSL SM 5.0)\n";
+sb << "__intrinsic_op void AllMemoryBarrier();\n";
+sb << "\n";
+sb << "// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)\n";
+sb << "__intrinsic_op void AllMemoryBarrierWithGroupSync();\n";
+sb << "\n";
+sb << "// Test if any components is non-zero (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T any(T x);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Reinterpret bits as a double (HLSL SM 5.0)\n";
+sb << "__intrinsic_op double asdouble(uint lowbits, uint highbits);\n";
+sb << "\n";
+sb << "// Reinterpret bits as a float (HLSL SM 4.0)\n";
+sb << "__intrinsic_op float asfloat( int x);\n";
+sb << "__intrinsic_op float asfloat(uint x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Inverse sine (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Reinterpret bits as an int (HLSL SM 4.0)\n";
+sb << "__intrinsic_op int asint(float x);\n";
+sb << "__intrinsic_op int asint(uint x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);\n";
+sb << "\n";
+sb << "// Reinterpret bits of double as a uint (HLSL SM 5.0)\n";
+sb << "__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);\n";
+sb << "\n";
+sb << "// Reinterpret bits as a uint (HLSL SM 4.0)\n";
+sb << "__intrinsic_op uint asuint(float x);\n";
+sb << "__intrinsic_op uint asuint(int x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);\n";
+sb << "\n";
+sb << "// Inverse tangent (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
+sb << "__intrinsic_op\n";
+sb << "T atan2(T y, T x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> atan2(vector<T,N> y, vector<T,N> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Ceiling (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Check access status to tiled resource\n";
+sb << "__intrinsic_op bool CheckAccessFullyMapped(uint status);\n";
+sb << "\n";
+sb << "// Clamp (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T clamp(T x, T min, T max);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> clamp(vector<T,N> x, vector<T,N> min, vector<T,N> max);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);\n";
+sb << "\n";
+sb << "// Clip (discard) fragment conditionally\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Cosine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cos(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cos(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cos(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Hyperbolic cosine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Population count\n";
+sb << "__intrinsic_op uint countbits(uint value);\n";
+sb << "\n";
+sb << "// Cross product\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);\n";
+sb << "\n";
+sb << "// Convert encoded color\n";
+sb << "__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);\n";
+sb << "\n";
+sb << "// Partial-difference derivatives\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, dFdx)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddx(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, dFdx)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddx(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, dFdx)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddx(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddx_coarse(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddx_coarse(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxFine)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddx_fine(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxFine)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddx_fine(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxFine)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddx_fine(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, dFdy)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddy(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, dFdy)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddy(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, dFdy)\n";
+sb << "__intrinsic_op\n";
+sb << " matrix<T,N,M> ddy(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddy_coarse(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddy_coarse(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyFine)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddy_fine(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyFine)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddy_fine(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyFine)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddy_fine(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Radians to degrees\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Matrix determinant\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);\n";
+sb << "\n";
+sb << "// Barrier for device memory\n";
+sb << "__intrinsic_op void DeviceMemoryBarrier();\n";
+sb << "__intrinsic_op void DeviceMemoryBarrierWithGroupSync();\n";
+sb << "\n";
+sb << "// Vector distance\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// Vector dot product\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// Helper for computing distance terms for lighting (obsolete)\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);\n";
+sb << "\n";
+sb << "// Error message\n";
+sb << "\n";
+sb << "// __intrinsic_op void errorf( string format, ... );\n";
+sb << "\n";
+sb << "// Attribute evaluation\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);\n";
+sb << "\n";
+sb << "// Base-e exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Base-2 exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Convert 16-bit float stored in low bits of integer\n";
+sb << "__intrinsic_op float f16tof32(uint value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Convert to 16-bit float stored in low bits of integer\n";
+sb << "__intrinsic_op uint f32tof16(float value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);\n";
+sb << "\n";
+sb << "// Flip surface normal to face forward, if needed\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);\n";
+sb << "\n";
+sb << "// Find first set bit starting at high bit and working down\n";
+sb << "__intrinsic_op int firstbithigh(int value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);\n";
+sb << "\n";
+sb << "__intrinsic_op uint firstbithigh(uint value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Find first set bit starting at low bit and working up\n";
+sb << "__intrinsic_op int firstbitlow(int value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);\n";
+sb << "\n";
+sb << "__intrinsic_op uint firstbitlow(uint value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Floor (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Fused multiply-add for doubles\n";
+sb << "__intrinsic_op double fma(double a, double b, double c);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);\n";
+sb << "\n";
+sb << "// Floating point remainder of x/y\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// Fractional part\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, fract)\n";
+sb << "__intrinsic_op\n";
+sb << "T frac(T x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, fract)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> frac(vector<T,N> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, fract)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> frac(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Split float into mantissa and exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);\n";
+sb << "\n";
+sb << "// Texture filter width\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Get number of samples in render target\n";
+sb << "__intrinsic_op uint GetRenderTargetSampleCount();\n";
+sb << "\n";
+sb << "// Get position of given sample\n";
+sb << "__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);\n";
+sb << "\n";
+sb << "// Group memory barrier\n";
+sb << "__intrinsic_op void GroupMemoryBarrier();\n";
+sb << "__intrinsic_op void GroupMemoryBarrierWithGroupSync();\n";
+sb << "\n";
+sb << "// Atomics\n";
+sb << "__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);\n";
+sb << "__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "// Is floating-point value finite?\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Is floating-point value infinite?\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Is floating-point value not-a-number?\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Construct float from mantissa and exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);\n";
+sb << "\n";
+sb << "// Vector length\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);\n";
+sb << "\n";
+sb << "// Linear interpolation\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, mix)\n";
+sb << "__intrinsic_op\n";
+sb << "T lerp(T x, T y, T s);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, mix)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, mix)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);\n";
+sb << "\n";
+sb << "// Legacy lighting function (obsolete)\n";
+sb << "__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);\n";
+sb << "\n";
+sb << "// Base-e logarithm\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Base-10 logarithm\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Base-2 logarithm\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// multiply-add\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);\n";
+sb << "\n";
+sb << "// maximum\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// minimum\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// split into integer and fractional parts (both with same sign)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);\n";
+sb << "\n";
+sb << "// msad4 (whatever that is)\n";
+sb << "__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);\n";
+sb << "\n";
+sb << "// General inner products\n";
+sb << "\n";
+sb << "// scalar-scalar\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);\n";
+sb << "\n";
+sb << "// scalar-vector and vector-scalar\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// scalar-matrix and matrix-scalar\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// vector-vector (dot product)\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// vector-matrix\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// matrix-vector\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);\n";
+sb << "\n";
+sb << "// matrix-matrix\n";
+sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);\n";
+sb << "\n";
+sb << "// noise (deprecated)\n";
+sb << "__intrinsic_op float noise(float x);\n";
+sb << "__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);\n";
+sb << "\n";
+sb << "// Normalize a vector\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);\n";
+sb << "\n";
+sb << "// Raise to a power\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// Output message\n";
+sb << "\n";
+sb << "// __intrinsic_op void printf( string format, ... );\n";
+sb << "\n";
+sb << "// Tessellation factor fixup routines\n";
+sb << "\n";
+sb << "__intrinsic_op void Process2DQuadTessFactorsAvg(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float2 InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void Process2DQuadTessFactorsMax(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float2 InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void Process2DQuadTessFactorsMin(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float2 InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessIsolineTessFactors(\n";
+sb << "    in  float RawDetailFactor,\n";
+sb << "    in  float RawDensityFactor,\n";
+sb << "    out float RoundedDetailFactor,\n";
+sb << "    out float RoundedDensityFactor);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessQuadTessFactorsAvg(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessQuadTessFactorsMax(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessQuadTessFactorsMin(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessTriTessFactorsAvg(\n";
+sb << "    in  float3 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float3 RoundedEdgeTessFactors,\n";
+sb << "    out float RoundedInsideTessFactor,\n";
+sb << "    out float UnroundedInsideTessFactor);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessTriTessFactorsMax(\n";
+sb << "    in  float3 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float3 RoundedEdgeTessFactors,\n";
+sb << "    out float RoundedInsideTessFactor,\n";
+sb << "    out float UnroundedInsideTessFactor);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessTriTessFactorsMin(\n";
+sb << "    in  float3 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float3 RoundedEdgeTessFactors,\n";
+sb << "    out float RoundedInsideTessFactors,\n";
+sb << "    out float UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "// Degrees to radians\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Approximate reciprocal\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Reflect incident vector across plane with given normal\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> reflect(vector<T,N> i, vector<T,N> n);\n";
+sb << "\n";
+sb << "// Refract incident vector given surface normal and index of refraction\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);\n";
+sb << "\n";
+sb << "// Reverse order of bits\n";
+sb << "__intrinsic_op uint reversebits(uint value);\n";
+sb << "__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Round-to-nearest\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Reciprocal of square root\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Clamp value to [0,1] range\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, \"clamp($0, 0, 1)\") __intrinsic_op\n";
+sb << "T saturate(T x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, \"clamp($0, 0, 1)\") __intrinsic_op\n";
+sb << "vector<T,N> saturate(vector<T,N> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, \"clamp($0, 0, 1)\") __intrinsic_op\n";
+sb << "matrix<T,N,M> saturate(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Extract sign of value\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Sine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sin(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sin(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sin(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Sine and cosine\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);\n";
+sb << "\n";
+sb << "// Hyperbolic Sine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Smooth step (Hermite interpolation)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Square root\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Step function\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Tangent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Hyperbolic tangent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Legacy texture-fetch operations\n";
+sb << "\n";
+sb << "/*\n";
+sb << "__intrinsic_op float4 tex1D(sampler1D s, float t);\n";
+sb << "__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);\n";
+sb << "__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);\n";
+sb << "__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);\n";
+sb << "\n";
+sb << "__intrinsic_op float4 tex2D(sampler2D s, float2 t);\n";
+sb << "__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
+sb << "__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
+sb << "__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);\n";
+sb << "\n";
+sb << "__intrinsic_op float4 tex3D(sampler3D s, float3 t);\n";
+sb << "__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);\n";
+sb << "\n";
+sb << "__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);\n";
+sb << "__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);\n";
+sb << "__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);\n";
+sb << "__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);\n";
+sb << "*/\n";
+sb << "\n";
+sb << "// Matrix transpose\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Truncate to integer\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Shader model 6.0 stuff\n";
+sb << "\n";
+sb << "__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__intrinsic_op bool WaveAllEqual(bool expr);\n";
+sb << "__intrinsic_op bool WaveAllTrue(bool expr);\n";
+sb << "__intrinsic_op bool WaveAnyTrue(bool expr);\n";
+sb << "\n";
+sb << "uint64_t WaveBallot(bool expr);\n";
+sb << "\n";
+sb << "uint WaveGetLaneCount();\n";
+sb << "uint WaveGetLaneIndex();\n";
+sb << "uint WaveGetOrderedIndex();\n";
+sb << "\n";
+sb << "bool WaveIsHelperLane();\n";
+sb << "\n";
+sb << "bool WaveOnce();\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);\n";
+sb << "\n";
+sb << "// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points\n";
+sb << "typedef Texture2D texture2D;\n";
+sb << "\n";
+sb << "";
+
+
+// Component-wise multiplication ops
+for(auto op : binaryOps)
+{
+    switch (op.opCode)
+    {
+    default:
+        continue;
+
+    case kIROp_Mul:
+    case kIRPseudoOp_MulAssign:
+        break;
+    }
+
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+    }
+}
+
+//
+
+// Buffer types
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseBufferAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]);
+
+for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
+{
+
+    sb << "__generic<T> __magic_type(Texture, ";
+    sb << ResourceType::makeFlavor(ResourceType::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access);
+    sb << ") struct ";
+    sb << kBaseBufferAccessLevels[aa].name;
+    sb << "Buffer {\n";
+
+    sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($P, $0)$z\")\n";
+    sb << "__intrinsic_op T Load(int location);\n";
+
+    sb << "__intrinsic_op T Load(int location, out uint status);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($P, int($0))$z\")\n";
+    sb << "__intrinsic_op __subscript(uint index) -> T";
+
+    if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
+    {
+        sb << " { get; set; }\n";
+    }
+    else
+    {
+        sb << ";\n";
+    }
+
+    sb << "};\n";
+}
+
+sb << "";
diff --git a/source/slang/slang-stdlib.cpp b/source/slang/slang-stdlib.cpp
index 4d5fd6f87..cf45cbca8 100644
--- a/source/slang/slang-stdlib.cpp
+++ b/source/slang/slang-stdlib.cpp
@@ -8,1039 +8,6 @@
 #define STRINGIZE2(x) #x
 #define LINE_STRING STRINGIZE(__LINE__)
 
-enum { kCoreLibIncludeStringLine = __LINE__ + 1 };
-const char* kCoreLibIncludeStringChunks[] = { R"=(
-
-// A type that can be used as an operand for builtins
-interface __BuiltinType {}
-
-// A type that can be used for arithmetic operations
-interface __BuiltinArithmeticType : __BuiltinType {}
-
-// A type that logically has a sign (positive/negative/zero)
-interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}
-
-// A type that can represent integers
-interface __BuiltinIntegerType : __BuiltinArithmeticType {}
-
-// A type that can represent non-integers
-interface __BuiltinRealType : __BuiltinArithmeticType {}
-
-// A type that uses a floating-point representation
-interface __BuiltinFloatingPointType : __BuiltinRealType, __BuiltinSignedArithmeticType {}
-
-__generic<T,U> __intrinsic_op(Sequence) U operator,(T left, U right);
-
-__generic<T> __intrinsic_op(select) T operator?:(bool condition, T ifTrue, T ifFalse);
-__generic<T, let N : int> __intrinsic_op(select) vector<T,N> operator?:(vector<bool,N> condition, vector<T,N> ifTrue, vector<T,N> ifFalse);
-
-)=" };
-
-
-enum { kHLSLLibIncludeStringLine = __LINE__+1 };
-const char * kHLSLLibIncludeStringChunks[] = { R"=(
-
-typedef uint UINT;
-
-__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer
-{
-    __intrinsic_op void Append(T value);
-
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-};
-
-__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer
-{
-    __intrinsic_op void GetDimensions(
-        out uint dim);
-
-    __intrinsic_op uint Load(int location);
-    __intrinsic_op uint Load(int location, out uint status);
-
-    __intrinsic_op uint2 Load2(int location);
-    __intrinsic_op uint2 Load2(int location, out uint status);
-
-    __intrinsic_op uint3 Load3(int location);
-    __intrinsic_op uint3 Load3(int location, out uint status);
-
-    __intrinsic_op uint4 Load4(int location);
-    __intrinsic_op uint4 Load4(int location, out uint status);
-};
-
-__generic<T> __magic_type(HLSLStructuredBufferType) struct StructuredBuffer
-{
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-
-    __intrinsic_op T Load(int location);
-    __intrinsic_op T Load(int location, out uint status);
-
-    __intrinsic_op __subscript(uint index) -> T;
-};
-
-__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer
-{
-    __intrinsic_op T Consume();
-
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-};
-
-__generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch
-{
-    __intrinsic_op __subscript(uint index) -> T;
-};
-
-__generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch
-{
-    __intrinsic_op __subscript(uint index) -> T { set; }
-};
-
-__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer
-{
-    // Note(tfoley): supports alll operations from `ByteAddressBuffer`
-    // TODO(tfoley): can this be made a sub-type?
-
-    __intrinsic_op void GetDimensions(
-        out uint dim);
-
-    __intrinsic_op uint Load(int location);
-    __intrinsic_op uint Load(int location, out uint status);
-
-    __intrinsic_op uint2 Load2(int location);
-    __intrinsic_op uint2 Load2(int location, out uint status);
-
-    __intrinsic_op uint3 Load3(int location);
-    __intrinsic_op uint3 Load3(int location, out uint status);
-
-    __intrinsic_op uint4 Load4(int location);
-    __intrinsic_op uint4 Load4(int location, out uint status);
-
-    // Added operations:
-
-    __intrinsic_op void InterlockedAdd(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedAdd(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedAnd(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedAnd(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedCompareExchange(
-        UINT dest,
-        UINT compare_value,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedCompareExchange(
-        UINT dest,
-        UINT compare_value,
-        UINT value);
-
-    __intrinsic_op void InterlockedCompareStore(
-        UINT dest,
-        UINT compare_value,
-        UINT value);
-    __intrinsic_op void InterlockedCompareStore(
-        UINT dest,
-        UINT compare_value);
-
-    __intrinsic_op void InterlockedExchange(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedExchange(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedMax(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedMax(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedMin(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedMin(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedOr(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedOr(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedXor(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedXor(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void Store(
-        uint address,
-        uint value);
-
-    __intrinsic_op void Store2(
-        uint address,
-        uint2 value);
-
-    __intrinsic_op void Store3(
-        uint address,
-        uint3 value);
-
-    __intrinsic_op void Store4(
-        uint address,
-        uint4 value);
-};
-
-__generic<T> __magic_type(HLSLRWStructuredBufferType) struct RWStructuredBuffer
-{
-    __intrinsic_op uint DecrementCounter();
-
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-
-    __intrinsic_op void IncrementCounter();
-
-    __intrinsic_op T Load(int location);
-    __intrinsic_op T Load(int location, out uint status);
-
-    __intrinsic_op __subscript(uint index) -> T { get; set; }
-};
-
-__generic<T> __magic_type(HLSLPointStreamType) struct PointStream
-{
-    void Append(T value);
-    void RestartStrip();
-};
-
-__generic<T> __magic_type(HLSLLineStreamType) struct LineStream
-{
-    void Append(T value);
-    void RestartStrip();
-};
-
-__generic<T> __magic_type(HLSLTriangleStreamType) struct TriangleStream
-{
-    void Append(T value);
-    void RestartStrip();
-};
-
-)=", R"=(
-
-// Note(tfoley): Trying to systematically add all the HLSL builtins
-
-// Try to terminate the current draw or dispatch call (HLSL SM 4.0)
-__intrinsic_op void abort();
-
-// Absolute value (HLSL SM 1.0)
-__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);
-
-// Inverse cosine (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);
-
-// Test if all components are non-zero (HLSL SM 1.0)
-__generic<T : __BuiltinType> __intrinsic_op T all(T x);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);
-
-// Barrier for writes to all memory spaces (HLSL SM 5.0)
-__intrinsic_op void AllMemoryBarrier();
-
-// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)
-__intrinsic_op void AllMemoryBarrierWithGroupSync();
-
-// Test if any components is non-zero (HLSL SM 1.0)
-__generic<T : __BuiltinType> __intrinsic_op T any(T x);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);
-
-
-// Reinterpret bits as a double (HLSL SM 5.0)
-__intrinsic_op double asdouble(uint lowbits, uint highbits);
-
-// Reinterpret bits as a float (HLSL SM 4.0)
-__intrinsic_op float asfloat( int x);
-__intrinsic_op float asfloat(uint x);
-__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);
-__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);
-
-
-// Inverse sine (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);
-
-// Reinterpret bits as an int (HLSL SM 4.0)
-__intrinsic_op int asint(float x);
-__intrinsic_op int asint(uint x);
-__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);
-__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);
-
-// Reinterpret bits of double as a uint (HLSL SM 5.0)
-__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);
-
-// Reinterpret bits as a uint (HLSL SM 4.0)
-__intrinsic_op uint asuint(float x);
-__intrinsic_op uint asuint(int x);
-__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);
-__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);
-
-// Inverse tangent (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
-T atan2(T y, T x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
-vector<T,N> atan2(vector<T,N> y, vector<T,N> x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
-matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);
-
-// Ceiling (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);
-
-
-// Check access status to tiled resource
-__intrinsic_op bool CheckAccessFullyMapped(uint status);
-
-// Clamp (HLSL SM 1.0)
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T clamp(T x, T min, T max);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> clamp(vector<T,N> x, vector<T,N> min, vector<T,N> max);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);
-
-// Clip (discard) fragment conditionally
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);
-
-// Cosine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cos(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cos(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cos(matrix<T,N,M> x);
-
-// Hyperbolic cosine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);
-
-// Population count
-__intrinsic_op uint countbits(uint value);
-
-// Cross product
-__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);
-
-// Convert encoded color
-__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);
-
-// Partial-difference derivatives
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, dFdx)
-__intrinsic_op
-T ddx(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, dFdx)
-__intrinsic_op
-vector<T,N> ddx(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, dFdx)
-__intrinsic_op
-matrix<T,N,M> ddx(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
-T ddx_coarse(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
-vector<T,N> ddx_coarse(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
-matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
-T ddx_fine(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
-vector<T,N> ddx_fine(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
-matrix<T,N,M> ddx_fine(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, dFdy)
-__intrinsic_op
-T ddy(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, dFdy)
-__intrinsic_op
-vector<T,N> ddy(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, dFdy)
-__intrinsic_op
- matrix<T,N,M> ddy(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
-T ddy_coarse(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
-vector<T,N> ddy_coarse(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
-matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
-T ddy_fine(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
-vector<T,N> ddy_fine(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
-matrix<T,N,M> ddy_fine(matrix<T,N,M> x);
-
-
-// Radians to degrees
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);
-
-// Matrix determinant
-
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);
-
-// Barrier for device memory
-__intrinsic_op void DeviceMemoryBarrier();
-__intrinsic_op void DeviceMemoryBarrierWithGroupSync();
-
-// Vector distance
-
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);
-
-// Vector dot product
-
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);
-
-// Helper for computing distance terms for lighting (obsolete)
-
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);
-
-// Error message
-
-// __intrinsic_op void errorf( string format, ... );
-
-// Attribute evaluation
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
-
-// Base-e exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);
-
-// Base-2 exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);
-
-// Convert 16-bit float stored in low bits of integer
-__intrinsic_op float f16tof32(uint value);
-__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);
-
-// Convert to 16-bit float stored in low bits of integer
-__intrinsic_op uint f32tof16(float value);
-__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);
-
-// Flip surface normal to face forward, if needed
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);
-
-// Find first set bit starting at high bit and working down
-__intrinsic_op int firstbithigh(int value);
-__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);
-
-__intrinsic_op uint firstbithigh(uint value);
-__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);
-
-// Find first set bit starting at low bit and working up
-__intrinsic_op int firstbitlow(int value);
-__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);
-
-__intrinsic_op uint firstbitlow(uint value);
-__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);
-
-// Floor (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);
-
-// Fused multiply-add for doubles
-__intrinsic_op double fma(double a, double b, double c);
-__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
-__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
-
-// Floating point remainder of x/y
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// Fractional part
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, fract)
-__intrinsic_op
-T frac(T x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, fract)
-__intrinsic_op
-vector<T,N> frac(vector<T,N> x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, fract)
-__intrinsic_op
-matrix<T,N,M> frac(matrix<T,N,M> x);
-
-// Split float into mantissa and exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);
-
-// Texture filter width
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);
-
-)=", R"=(
-
-// Get number of samples in render target
-__intrinsic_op uint GetRenderTargetSampleCount();
-
-// Get position of given sample
-__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);
-
-// Group memory barrier
-__intrinsic_op void GroupMemoryBarrier();
-__intrinsic_op void GroupMemoryBarrierWithGroupSync();
-
-// Atomics
-__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);
-__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);
-__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);
-
-__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);
-
-// Is floating-point value finite?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);
-
-// Is floating-point value infinite?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);
-
-// Is floating-point value not-a-number?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);
-
-// Construct float from mantissa and exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);
-
-// Vector length
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);
-
-// Linear interpolation
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, mix)
-__intrinsic_op
-T lerp(T x, T y, T s);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, mix)
-__intrinsic_op
-vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, mix)
-__intrinsic_op
-matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);
-
-// Legacy lighting function (obsolete)
-__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);
-
-// Base-e logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);
-
-// Base-10 logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);
-
-// Base-2 logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);
-
-// multiply-add
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
-
-// maximum
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// minimum
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// split into integer and fractional parts (both with same sign)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);
-
-// msad4 (whatever that is)
-__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);
-
-// General inner products
-
-// scalar-scalar
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);
-
-// scalar-vector and vector-scalar
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);
-
-// scalar-matrix and matrix-scalar
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);
-
-// vector-vector (dot product)
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);
-
-// vector-matrix
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
-
-// matrix-vector
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
-
-// matrix-matrix
-__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
-
-// noise (deprecated)
-__intrinsic_op float noise(float x);
-__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);
-
-// Normalize a vector
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);
-
-// Raise to a power
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// Output message
-
-// __intrinsic_op void printf( string format, ... );
-
-// Tessellation factor fixup routines
-
-__intrinsic_op void Process2DQuadTessFactorsAvg(
-    in  float4 RawEdgeFactors,
-    in  float2 InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void Process2DQuadTessFactorsMax(
-    in  float4 RawEdgeFactors,
-    in  float2 InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void Process2DQuadTessFactorsMin(
-    in  float4 RawEdgeFactors,
-    in  float2 InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessIsolineTessFactors(
-    in  float RawDetailFactor,
-    in  float RawDensityFactor,
-    out float RoundedDetailFactor,
-    out float RoundedDensityFactor);
-
-__intrinsic_op void ProcessQuadTessFactorsAvg(
-    in  float4 RawEdgeFactors,
-    in  float InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessQuadTessFactorsMax(
-    in  float4 RawEdgeFactors,
-    in  float InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessQuadTessFactorsMin(
-    in  float4 RawEdgeFactors,
-    in  float InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessTriTessFactorsAvg(
-    in  float3 RawEdgeFactors,
-    in  float InsideScale,
-    out float3 RoundedEdgeTessFactors,
-    out float RoundedInsideTessFactor,
-    out float UnroundedInsideTessFactor);
-
-__intrinsic_op void ProcessTriTessFactorsMax(
-    in  float3 RawEdgeFactors,
-    in  float InsideScale,
-    out float3 RoundedEdgeTessFactors,
-    out float RoundedInsideTessFactor,
-    out float UnroundedInsideTessFactor);
-
-__intrinsic_op void ProcessTriTessFactorsMin(
-    in  float3 RawEdgeFactors,
-    in  float InsideScale,
-    out float3 RoundedEdgeTessFactors,
-    out float RoundedInsideTessFactors,
-    out float UnroundedInsideTessFactors);
-
-// Degrees to radians
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);
-
-// Approximate reciprocal
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);
-
-// Reflect incident vector across plane with given normal
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__intrinsic_op
-vector<T,N> reflect(vector<T,N> i, vector<T,N> n);
-
-// Refract incident vector given surface normal and index of refraction
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__intrinsic_op
-vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);
-
-// Reverse order of bits
-__intrinsic_op uint reversebits(uint value);
-__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);
-
-// Round-to-nearest
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);
-
-// Reciprocal of square root
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);
-
-// Clamp value to [0,1] range
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
-T saturate(T x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
-vector<T,N> saturate(vector<T,N> x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
-matrix<T,N,M> saturate(matrix<T,N,M> x);
-
-
-// Extract sign of value
-__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);
-
-)=", R"=(
-
-
-// Sine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sin(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sin(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sin(matrix<T,N,M> x);
-
-// Sine and cosine
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);
-
-// Hyperbolic Sine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);
-
-// Smooth step (Hermite interpolation)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);
-
-// Square root
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);
-
-// Step function
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);
-
-// Tangent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);
-
-// Hyperbolic tangent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);
-
-// Legacy texture-fetch operations
-
-/*
-__intrinsic_op float4 tex1D(sampler1D s, float t);
-__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);
-__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);
-__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);
-__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);
-__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);
-
-__intrinsic_op float4 tex2D(sampler2D s, float2 t);
-__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);
-__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);
-__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);
-__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);
-__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);
-
-__intrinsic_op float4 tex3D(sampler3D s, float3 t);
-__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);
-__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);
-__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);
-
-__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);
-__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);
-__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);
-__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);
-*/
-
-// Matrix transpose
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);
-
-// Truncate to integer
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);
-
-
-)=", R"=(
-
-// Shader model 6.0 stuff
-
-__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);
-
-__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);
-
-__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);
-
-__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);
-
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);
-
-__intrinsic_op bool WaveAllEqual(bool expr);
-__intrinsic_op bool WaveAllTrue(bool expr);
-__intrinsic_op bool WaveAnyTrue(bool expr);
-
-uint64_t WaveBallot(bool expr);
-
-uint WaveGetLaneCount();
-uint WaveGetLaneIndex();
-uint WaveGetOrderedIndex();
-
-bool WaveIsHelperLane();
-
-bool WaveOnce();
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);
-
-
-)=", R"=(
-
-// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
-typedef Texture2D texture2D;
-
-#line default
-)=" };
-
-
 namespace Slang
 {
     String Session::getStdlibPath()
@@ -1277,890 +244,11 @@ namespace Slang
 
         StringBuilder sb;
 
-        // generate operator overloads
-
-
         String path = getStdlibPath();
 
 #define EMIT_LINE_DIRECTIVE() sb << "#line " << (__LINE__+1) << " \"" << path << "\"\n"
 
-        // Generate declarations for all the base types
-
-        static const int kBaseTypeCount = sizeof(kBaseTypes) / sizeof(kBaseTypes[0]);
-        for (int tt = 0; tt < kBaseTypeCount; ++tt)
-        {
-            EMIT_LINE_DIRECTIVE();
-            sb << "__builtin_type(" << int(kBaseTypes[tt].tag) << ") struct " << kBaseTypes[tt].name;
-
-            // Declare interface conformances for this type
-
-            sb << "\n    : __BuiltinType\n";
-
-            switch (kBaseTypes[tt].tag)
-            {
-            case BaseType::Float:
-                sb << "\n    , __BuiltinFloatingPointType\n";
-                sb << "\n    ,  __BuiltinRealType\n";
-                // fall through to:
-            case BaseType::Int:
-                sb << "\n    ,  __BuiltinSignedArithmeticType\n";
-                // fall through to:
-            case BaseType::UInt:
-            case BaseType::UInt64:
-                sb << "\n    ,  __BuiltinArithmeticType\n";
-                // fall through to:
-            case BaseType::Bool:
-                sb << "\n    ,  __BuiltinType\n";
-                break;
-
-            default:
-                break;
-            }
-
-            sb << "\n{\n";
-
-
-            // Declare initializers to convert from various other types
-            for (int ss = 0; ss < kBaseTypeCount; ++ss)
-            {
-                // Don't allow conversion from `void`
-                if (kBaseTypes[ss].tag == BaseType::Void)
-                    continue;
-
-                // We need to emit a modifier so that the semantic-checking
-                // layer will know it can use these operations for implicit
-                // conversion.
-                ConversionCost conversionCost = getBaseTypeConversionCost(
-                    kBaseTypes[tt],
-                    kBaseTypes[ss]);
-
-                EMIT_LINE_DIRECTIVE();
-                sb << "__implicit_conversion(" << conversionCost << ")\n";
-
-                EMIT_LINE_DIRECTIVE();
-                sb << "__init(" << kBaseTypes[ss].name << " value);\n";
-            }
-
-            sb << "};\n";
-        }
-
-        // Declare vector and matrix types
-
-        sb << "__generic<T = float, let N : int = 4> __magic_type(Vector) struct vector\n{\n";
-        sb << "    typedef T Element;\n";
-
-        // Declare initializer taking a single scalar of the elemnt type
-        sb << "    __implicit_conversion(" << kConversionCost_ScalarToVector << ")\n";
-        sb << "    __init(T value);\n";
-
-        sb << "};\n";
-
-        // TODO: Probably need to do similar
-        sb << "__generic<T = float, let R : int = 4, let C : int = 4> __magic_type(Matrix) struct matrix {};\n";
-
-        static const struct {
-            char const* name;
-            char const* glslPrefix;
-        } kTypes[] =
-        {
-            {"float", ""},
-            {"int", "i"},
-            {"uint", "u"},
-            {"bool", "b"},
-        };
-        static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
-
-        for (int tt = 0; tt < kTypeCount; ++tt)
-        {
-            // Declare HLSL vector types
-            for (int ii = 1; ii <= 4; ++ii)
-            {
-                sb << "typedef vector<" << kTypes[tt].name << "," << ii << "> " << kTypes[tt].name << ii << ";\n";
-            }
-
-            // Declare HLSL matrix types
-            for (int rr = 2; rr <= 4; ++rr)
-            for (int cc = 2; cc <= 4; ++cc)
-            {
-                sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].name << rr << "x" << cc << ";\n";
-            }
-        }
-
-        // Declare additional built-in generic types
-//        EMIT_LINE_DIRECTIVE();
-
-
-        sb << "__generic<T>\n";
-        sb << "__intrinsic_type(" << kIROp_ConstantBufferType << ")\n";
-        sb << "__magic_type(ConstantBuffer) struct ConstantBuffer {};\n";
-
-        sb << "__generic<T>\n";
-        sb << "__intrinsic_type(" << kIROp_TextureBufferType << ")\n";
-        sb << "__magic_type(TextureBuffer) struct TextureBuffer {};\n";
-
-
-        static const char* kComponentNames[]{ "x", "y", "z", "w" };
-        static const char* kVectorNames[]{ "", "x", "xy", "xyz", "xyzw" };
-
-        // Need to add constructors to the types above
-        for (int N = 2; N <= 4; ++N)
-        {
-            sb << "__generic<T> __extension vector<T, " << N << ">\n{\n";
-
-            // initialize from N scalars
-            sb << "__init(";
-            for (int ii = 0; ii < N; ++ii)
-            {
-                if (ii != 0) sb << ", ";
-                sb << "T " << kComponentNames[ii];
-            }
-            sb << ");\n";
-
-            // Initialize from an M-vector and then scalars
-            for (int M = 2; M < N; ++M)
-            {
-                sb << "__init(vector<T," << M << "> " << kVectorNames[M];
-                for (int ii = M; ii < N; ++ii)
-                {
-                    sb << ", T " << kComponentNames[ii];
-                }
-                sb << ");\n";
-            }
-
-            // initialize from another vector of the same size
-            //
-            // TODO(tfoley): this overlaps with implicit conversions.
-            // We should look for a way that we can define implicit
-            // conversions directly in the stdlib instead...
-            sb << "__generic<U> __init(vector<U," << N << ">);\n";
-
-            // Initialize from two vectors, of size M and N-M
-            for(int M = 2; M <= (N-2); ++M)
-            {
-                int K = N - M;
-                SLANG_ASSERT(K >= 2);
-
-                sb << "__init(vector<T," << M << "> " << kVectorNames[M];
-                sb << ", vector<T," << K << "> ";
-                for (int ii = 0; ii < K; ++ii)
-                {
-                    sb << kComponentNames[ii];
-                }
-                sb << ");\n";
-            }
-
-            sb << "}\n";
-        }
-
-        // The above extension was generic in the *type* of the vector,
-        // but explicit in the *size*. We will now declare an extension
-        // for each builtin type that is generic in the size.
-        //
-        for (int tt = 0; tt < kBaseTypeCount; ++tt)
-        {
-            if(kBaseTypes[tt].tag == BaseType::Void) continue;
-
-            sb << "__generic<let N : int> __extension vector<"
-                << kBaseTypes[tt].name << ",N>\n{\n";
-
-            for (int ff = 0; ff < kBaseTypeCount; ++ff)
-            {
-                if(kBaseTypes[ff].tag == BaseType::Void) continue;
-
-                // We need a constructor to make a vector from a scalar
-                // of another type.
-
-                if( tt != ff )
-                {
-                    auto cost = getBaseTypeConversionCost(
-                        kBaseTypes[tt],
-                        kBaseTypes[ff]);
-                    cost += kConversionCost_ScalarToVector;
-
-                    sb << "    __implicit_conversion(" << cost << ")\n";
-                    sb << "    __init(" << kBaseTypes[ff].name << " value);\n";
-                }
-            }
-
-            sb << "}\n";
-        }
-
-        for( int R = 2; R <= 4; ++R )
-        for( int C = 2; C <= 4; ++C )
-        {
-            sb << "__generic<T> __extension matrix<T, " << R << "," << C << ">\n{\n";
-
-            // initialize from R*C scalars
-            sb << "__init(";
-            for( int ii = 0; ii < R; ++ii )
-            for( int jj = 0; jj < C; ++jj )
-            {
-                if ((ii+jj) != 0) sb << ", ";
-                sb << "T m" << ii << jj;
-            }
-            sb << ");\n";
-
-            // Initialize from R C-vectors
-            sb << "__init(";
-            for (int ii = 0; ii < R; ++ii)
-            {
-                if(ii != 0) sb << ", ";
-                sb << "vector<T," << C << "> row" << ii;
-            }
-            sb << ");\n";
-
-
-            // initialize from another matrix of the same size
-            //
-            // TODO(tfoley): See comment about how this overlaps
-            // with implicit conversion, in the `vector` case above
-            sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n";
-
-            // initialize from a matrix of larger size
-            for(int rr = R; rr <= 4; ++rr)
-            for( int cc = C; cc <= 4; ++cc )
-            {
-                if(rr == R && cc == C) continue;
-                sb << "__init(matrix<T," << rr << "," << cc << "> value);\n";
-            }
-
-            sb << "}\n";
-        }
-
-        // Declare built-in texture and sampler types
-
-
-
-        sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ")\n";
-        sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerState) << ")\n";
-        sb << "struct SamplerState {};";
-        
-        sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
-        sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
-        sb << "struct SamplerComparisonState {};";
-
-        // TODO(tfoley): Need to handle `RW*` variants of texture types as well...
-        static const struct {
-            char const*			name;
-            TextureType::Shape	baseShape;
-            int					coordCount;
-        } kBaseTextureTypes[] = {
-            { "Texture1D",		TextureType::Shape1D,	1 },
-            { "Texture2D",		TextureType::Shape2D,	2 },
-            { "Texture3D",		TextureType::Shape3D,	3 },
-            { "TextureCube",	TextureType::ShapeCube,	3 },
-        };
-        static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
-
-
-        static const struct {
-            char const*         name;
-            SlangResourceAccess access;
-        } kBaseTextureAccessLevels[] = {
-            { "",                   SLANG_RESOURCE_ACCESS_READ },
-            { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
-            { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
-        };
-        static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
-
-        for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
-        {
-            char const* name = kBaseTextureTypes[tt].name;
-            TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
-
-            for (int isArray = 0; isArray < 2; ++isArray)
-            {
-                // Arrays of 3D textures aren't allowed
-                if (isArray && baseShape == TextureType::Shape3D) continue;
-
-                for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
-                for (int accessLevel = 0; accessLevel < kBaseTextureAccessLevelCount; ++accessLevel)
-                {
-                    auto access = kBaseTextureAccessLevels[accessLevel].access;
-
-                    // TODO: any constraints to enforce on what gets to be multisampled?
-
-                    unsigned flavor = baseShape;
-                    if (isArray)		flavor |= TextureType::ArrayFlag;
-                    if (isMultisample)	flavor |= TextureType::MultisampleFlag;
-//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
-
-                    flavor |= (access << 8);
-
-                    // emit a generic signature
-                    // TODO: allow for multisample count to come in as well...
-                    sb << "__generic<T = float4> ";
-
-                    sb << "__magic_type(Texture," << int(flavor) << ")\n";
-                    sb << "__intrinsic_type(" << kIROp_TextureType << ", " << flavor << ")\n";
-                    sb << "struct ";
-                    sb << kBaseTextureAccessLevels[accessLevel].name;
-                    sb << name;
-                    if (isMultisample) sb << "MS";
-                    if (isArray) sb << "Array";
-//                        if (isShadow) sb << "Shadow";
-                    sb << "\n{";
-
-                    if( !isMultisample )
-                    {
-                        sb << "float CalculateLevelOfDetail(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
-
-                        sb << "float CalculateLevelOfDetailUnclamped(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
-                    }
-
-                    // `GetDimensions`
-
-                    for(int isFloat = 0; isFloat < 2; ++isFloat)
-                    for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
-                    {
-                        {
-                            sb << "__glsl_version(450)\n";
-                            sb << "__target_intrinsic(glsl, \"(";
-
-                            int aa = 0;
-                            String lodStr = "0";
-                            if (includeMipInfo)
-                            {
-                                int mipLevelArg = aa++;
-                                lodStr = "int($";
-                                lodStr.append(mipLevelArg);
-                                lodStr.append(")");
-                            }
-
-                            int cc = 0;
-                            switch(baseShape)
-                            {
-                            case TextureType::Shape1D:
-                                sb << "($" << aa++ << " = textureSize($P, " << lodStr << "))";
-                                cc = 1;
-                                break;
-
-                            case TextureType::Shape2D:
-                            case TextureType::ShapeCube:
-                                sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
-                                cc = 2;
-                                break;
-
-                            case TextureType::Shape3D:
-                                sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").z)";
-                                cc = 3;
-                                break;
-
-                            default:
-                                SLANG_UNEXPECTED("unhandled resource shape");
-                                break;
-                            }
-
-                            if(isArray)
-                            {
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ")." << kComponentNames[cc] << ")";
-                            }
-
-                            if(isMultisample)
-                            {
-                                sb << ", ($" << aa++ << " = textureSamples($P))";
-                            }
-
-                            if (includeMipInfo)
-                            {
-                                sb << ", ($" << aa++ << " = textureQueryLevels($P))";
-                            }
-
-
-                            sb << ")\")\n";
-                            sb << "__intrinsic_op\n";
-
-                        }
-
-                        char const* t = isFloat ? "out float " : "out uint ";
-
-                        sb << "void GetDimensions(";
-                        if(includeMipInfo)
-                            sb << "uint mipLevel, ";
-
-                        switch(baseShape)
-                        {
-                        case TextureType::Shape1D:
-                            sb << t << "width";
-                            break;
-
-                        case TextureType::Shape2D:
-                        case TextureType::ShapeCube:
-                            sb << t << "width,";
-                            sb << t << "height";
-                            break;
-
-                        case TextureType::Shape3D:
-                            sb << t << "width,";
-                            sb << t << "height,";
-                            sb << t << "depth";
-                            break;
-
-                        default:
-                            assert(!"unexpected");
-                            break;
-                        }
-
-                        if(isArray)
-                        {
-                            sb << ", " << t << "elements";
-                        }
-
-                        if(isMultisample)
-                        {
-                            sb << ", " << t << "sampleCount";
-                        }
-
-                        if(includeMipInfo)
-                            sb << ", " << t << "numberOfLevels";
-
-                        sb << ");\n";
-                    }
-
-                    // `GetSamplePosition()`
-                    if( isMultisample )
-                    {
-                        sb << "float2 GetSamplePosition(int s);\n";
-                    }
-
-                    // `Load()`
-
-                    if( kBaseTextureTypes[tt].coordCount + isArray < 4 )
-                    {
-                        int loadCoordCount = kBaseTextureTypes[tt].coordCount + isArray + (isMultisample?0:1);
-
-                        // When translating to GLSL, we need to break apart the `location` argument.
-                        //
-                        // TODO: this should realy be handled by having this member actually get lowered!
-                        static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
-                        static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
-
-                        if (isMultisample)
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetch($P, $0, $1)\")\n";
-                        }
-                        else
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
-                        }
-                        sb << "__intrinsic_op\n";
-                        sb << "T Load(";
-                        sb << "int" << loadCoordCount << " location";
-                        if(isMultisample)
-                        {
-                            sb << ", int sampleIndex";
-                        }
-                        sb << ");\n";
-
-                        if (isMultisample)
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetchOffset($P, $0, $1, $2)\")\n";
-                        }
-                        else
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
-                        }
-                        sb << "__intrinsic_op\n";
-                        sb << "T Load(";
-                        sb << "int" << loadCoordCount << " location";
-                        if(isMultisample)
-                        {
-                            sb << ", int sampleIndex";
-                        }
-                        sb << ", int" << loadCoordCount << " offset";
-                        sb << ");\n";
-
-
-                        sb << "T Load(";
-                        sb << "int" << loadCoordCount << " location";
-                        if(isMultisample)
-                        {
-                            sb << ", int sampleIndex";
-                        }
-                        sb << ", int" << kBaseTextureTypes[tt].coordCount << " offset";
-                        sb << ", out uint status";
-                        sb << ");\n";
-                    }
-
-                    if(baseShape != TextureType::ShapeCube)
-                    {
-                        // subscript operator
-                        sb << "__intrinsic_op __subscript(uint" << kBaseTextureTypes[tt].coordCount + isArray << " location) -> T;\n";
-                    }
-
-                    if( !isMultisample )
-                    {
-                        // `Sample()`
-
-                        sb << "__target_intrinsic(glsl, \"texture($p, $1)\")\n";
-
-                        // TODO: only enable if IR is being used?
-                        sb << "__intrinsic_op(sample)\n";
-
-                        sb << "__intrinsic_op\n";
-                        sb << "T Sample(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2)\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "T Sample(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-                        sb << "T Sample(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
-                        }
-                        sb << "float clamp);\n";
-
-                        sb << "T Sample(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
-                        }
-                        sb << "float clamp, out uint status);\n";
-
-
-                        // `SampleBias()`
-                        sb << "__target_intrinsic(glsl, \"texture($p, $1, $2)\")\n";
-                        sb << "__intrinsic_op\n";
-                        sb << "T SampleBias(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2, $3)\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "T SampleBias(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-                        // `SampleCmp()` and `SampleCmpLevelZero`
-                        sb << "T SampleCmp(SamplerComparisonState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float compareValue";
-                        sb << ");\n";
-
-                        int baseCoordCount = kBaseTextureTypes[tt].coordCount;
-                        int arrCoordCount = baseCoordCount + isArray;
-                        if (arrCoordCount < 3)
-                        {
-                            int extCoordCount = arrCoordCount + 1;
-
-                            if (extCoordCount < 3)
-                                extCoordCount = 3;
-
-                            sb << "__target_intrinsic(glsl, \"textureLod($p, ";
-
-                            sb << "vec" << extCoordCount << "($1,";
-                            for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
-                            {
-                                sb << " 0.0,";
-                            }
-                            sb << "$2)";
-
-                            sb << ", 0.0)\")\n";
-                        }
-                        else if(arrCoordCount <= 3)
-                        {
-                            int extCoordCount = arrCoordCount + 1;
-
-                            if (extCoordCount < 3)
-                                extCoordCount = 3;
-
-                            sb << "__target_intrinsic(glsl, \"textureGrad($p, ";
-
-                            sb << "vec" << extCoordCount << "($1,";
-                            for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
-                            {
-                                sb << " 0.0,";
-                            }
-                            sb << "$2)";
-
-                            // Construct gradients
-                            sb << ", vec" << baseCoordCount << "(0.0)";
-                            sb << ", vec" << baseCoordCount << "(0.0)";
-                            sb << ")\")\n";
-                        }
-                        sb << "__intrinsic_op\n";
-                        sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float compareValue";
-                        sb << ");\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            // Note(tfoley): MSDN seems confused, and claims that the `offset`
-                            // parameter for `SampleCmp` is available for everything but 3D
-                            // textures, while `Sample` and `SampleBias` are consistent in
-                            // saying they only exclude `offset` for cube maps (which makes
-                            // sense). I'm going to assume the documentation for `SampleCmp`
-                            // is just wrong.
-
-                            sb << "T SampleCmp(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float compareValue, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-
-                            sb << "T SampleCmpLevelZero(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float compareValue, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-
-                        sb << "__target_intrinsic(glsl, \"textureGrad($p, $1, $2, $3)\")\n";
-                        sb << "__intrinsic_op(sampleGrad)\n";
-                        sb << "T SampleGrad(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY";
-                        sb << ");\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureGradOffset($p, $1, $2, $3, $4)\")\n";
-                            sb << "__intrinsic_op(sampleGrad)\n";
-                            sb << "T SampleGrad(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-                        // `SampleLevel`
-
-                        sb << "__target_intrinsic(glsl, \"textureLod($p, $1, $2)\")\n";
-                        sb << "__intrinsic_op\n";
-                        sb << "T SampleLevel(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float level);\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureLodOffset($p, $1, $2, $3)\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "T SampleLevel(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float level, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-                    }
-
-                    sb << "\n};\n";
-
-                    // `Gather*()` operations are handled via an `extension` declaration,
-                    // because this lets us capture the element type of the texture.
-                    //
-                    // TODO: longer-term there should be something like a `TextureElementType`
-                    // interface, that both scalars and vectors implement, that then exposes
-                    // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
-                    //
-                    static const struct {
-                        char const* genericPrefix;
-                        char const* elementType;
-                    } kGatherExtensionCases[] = {
-                        { "__generic<T, let N : int>", "vector<T,N>" },
-
-                        // TODO: need a case here for scalars `T`, but also
-                        // need to ensure that case doesn't accidentally match
-                        // for `T = vector<...>`, which requires actual checking
-                        // of constraints on generic parameters.
-                    };
-                    for(auto cc : kGatherExtensionCases)
-                    {
-                        // TODO: this should really be an `if` around the entire `Gather` logic
-                        if (isMultisample) break;
-
-                        EMIT_LINE_DIRECTIVE();
-                        sb << cc.genericPrefix << " __extension ";
-                        sb << kBaseTextureAccessLevels[accessLevel].name;
-                        sb << name;
-                        if (isArray) sb << "Array";
-                        sb << "<" << cc.elementType << " >";
-                        sb << "\n{\n";
-
-
-                        // `Gather`
-                        // (tricky because it returns a 4-vector of the element type
-                        // of the texture components...)
-                        //
-                        // TODO: is it actually correct to restrict these so that, e.g.,
-                        // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
-                        // it nominally doesn't have an alpha component?
-                        static const struct {
-                            int componentIndex;
-                            char const* componentName;
-                        } kGatherComponets[] = {
-                            { 0, "" },
-                            { 0, "Red" },
-                            { 1, "Green" },
-                            { 2, "Blue" },
-                            { 3, "Alpha" },
-                        };
-
-                        for(auto kk : kGatherComponets)
-                        {
-                            auto componentIndex = kk.componentIndex;
-                            auto componentName = kk.componentName;
-
-                            EMIT_LINE_DIRECTIVE();
-                            
-                            sb << "__target_intrinsic(glsl, \"textureGather($p, $1, " << componentIndex << ")\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $1, $2, " << componentIndex << ")\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
-                            sb << "out uint status);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4, ";
-                            sb << "out uint status);\n";
-                        }
-
-                        EMIT_LINE_DIRECTIVE();
-                        sb << "\n}\n";
-                    }
-                }
-            }
-        }
-
-
-        for (auto op : unaryOps)
-        {
-            for (auto type : kBaseTypes)
-            {
-                if ((type.flags & op.flags) == 0)
-                    continue;
-
-                char const* fixity = (op.flags & POSTFIX) != 0 ? "__postfix " : "__prefix ";
-                char const* qual = (op.flags & ASSIGNMENT) != 0 ? "in out " : "";
-
-                // scalar version
-                sb << fixity;
-                sb << "__intrinsic_op(" << int(op.opCode) << ") " << type.name << " operator" << op.opName << "(" << qual << type.name << " value);\n";
-
-                // vector version
-                sb << "__generic<let N : int> ";
-                sb << fixity;
-                sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << type.name << ",N> operator" << op.opName << "(" << qual << "vector<" << type.name << ",N> value);\n";
-
-                // matrix version
-                sb << "__generic<let N : int, let M : int> ";
-                sb << fixity;
-                sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << type.name << ",N,M> operator" << op.opName << "(" << qual << "matrix<" << type.name << ",N,M> value);\n";
-            }
-        }
-
-        for (auto op : binaryOps)
-        {
-            for (auto type : kBaseTypes)
-            {
-                if ((type.flags & op.flags) == 0)
-                    continue;
-
-                char const* leftType = type.name;
-                char const* rightType = leftType;
-                char const* resultType = leftType;
-
-                if (op.flags & COMPARISON) resultType = "bool";
-
-                char const* leftQual = "";
-                if(op.flags & ASSIGNMENT) leftQual = "in out ";
-
-                // TODO: handle `SHIFT`
-
-                // scalar version
-                sb << "__intrinsic_op(" << int(op.opCode) << ") " << resultType << " operator" << op.opName << "(" << leftQual << leftType << " left, " << rightType << " right);\n";
-
-                // vector version
-                sb << "__generic<let N : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, vector<" << rightType << ",N> right);\n";
-
-                // matrix version
-
-                // skip matrix-matrix multiply operations here, so that GLSL doesn't see them
-                switch (op.opCode)
-                {
-                case kIROp_Mul:
-                case kIRPseudoOp_MulAssign:
-                    break;
-
-                default:
-                    sb << "__generic<let N : int, let M : int> ";
-                    sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
-                    break;
-                }
-
-                // We are going to go ahead and explicitly define combined
-                // operations for the scalar-op-vector, etc. cases, rather
-                // than rely on promotion rules.
-
-                // scalar-vector and scalar-matrix
-                if (!(op.flags & ASSIGNMENT))
-                {
-                    sb << "__generic<let N : int> ";
-                    sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << leftType << " left, vector<" << rightType << ",N> right);\n";
-
-                    sb << "__generic<let N : int, let M : int> ";
-                    sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << leftType << " left, matrix<" << rightType << ",N,M> right);\n";
-                }
-
-                // vector-scalar and matrix-scalar
-                sb << "__generic<let N : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, " << rightType << " right);\n";
-
-                sb << "__generic<let N : int, let M : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
-            }
-        }
-
-        // Output a suitable `#line` directive to point at our raw stdlib code above
-        sb << "\n#line " << kCoreLibIncludeStringLine << " \"" << path << "\"\n";
-
-        int chunkCount = sizeof(kCoreLibIncludeStringChunks) / sizeof(kCoreLibIncludeStringChunks[0]);
-        for (int cc = 0; cc < chunkCount; ++cc)
-        {
-            sb << kCoreLibIncludeStringChunks[cc];
-        }
+        #include "core.meta.slang.cpp"
 
         coreLibraryCode = sb.ProduceString();
         return coreLibraryCode;
@@ -2173,90 +261,7 @@ namespace Slang
 
         StringBuilder sb;
 
-
-        // Component-wise multiplication ops
-        for(auto op : binaryOps)
-        {
-            switch (op.opCode)
-            {
-            default:
-                continue;
-
-            case kIROp_Mul:
-            case kIRPseudoOp_MulAssign:
-                break;
-            }
-
-            for (auto type : kBaseTypes)
-            {
-                if ((type.flags & op.flags) == 0)
-                    continue;
-
-                char const* leftType = type.name;
-                char const* rightType = leftType;
-                char const* resultType = leftType;
-
-                char const* leftQual = "";
-                if(op.flags & ASSIGNMENT) leftQual = "in out ";
-
-                sb << "__generic<let N : int, let M : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
-            }
-        }
-
-        //
-
-        // Buffer types
-
-        static const struct {
-            char const*         name;
-            SlangResourceAccess access;
-        } kBaseBufferAccessLevels[] = {
-            { "",                   SLANG_RESOURCE_ACCESS_READ },
-            { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
-            { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
-        };
-        static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]);
-
-        for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
-        {
-
-            sb << "__generic<T> __magic_type(Texture, ";
-            sb << ResourceType::makeFlavor(ResourceType::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access);
-            sb << ") struct ";
-            sb << kBaseBufferAccessLevels[aa].name;
-            sb << "Buffer {\n";
-
-            sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
-
-            sb << "__target_intrinsic(glsl, \"texelFetch($P, $0)$z\")\n";
-            sb << "__intrinsic_op T Load(int location);\n";
-
-            sb << "__intrinsic_op T Load(int location, out uint status);\n";
-
-            sb << "__target_intrinsic(glsl, \"texelFetch($P, int($0))$z\")\n";
-            sb << "__intrinsic_op __subscript(uint index) -> T";
-
-            if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
-            {
-                sb << " { get; set; }\n";
-            }
-            else
-            {
-                sb << ";\n";
-            }
-
-            sb << "};\n";
-        }
-
-        // Output a suitable `#line` directive to point at our raw stdlib code above
-        sb << "\n#line " << kHLSLLibIncludeStringLine << " \"" << getStdlibPath() << "\"\n";
-
-        int chunkCount = sizeof(kHLSLLibIncludeStringChunks) / sizeof(kHLSLLibIncludeStringChunks[0]);
-        for (int cc = 0; cc < chunkCount; ++cc)
-        {
-            sb << kHLSLLibIncludeStringChunks[cc];
-        }
+        #include "hlsl.meta.slang.cpp"
 
         hlslLibraryCode = sb.ProduceString();
         return hlslLibraryCode;
@@ -2274,203 +279,7 @@ namespace Slang
 
         StringBuilder sb;
 
-        static const struct {
-            char const* name;
-            char const* glslPrefix;
-        } kTypes[] =
-        {
-            {"float", ""},
-            {"int", "i"},
-            {"uint", "u"},
-            {"bool", "b"},
-        };
-        static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
-
-        for( int tt = 0; tt < kTypeCount; ++tt )
-        {
-            // Declare GLSL aliases for HLSL types
-            for (int vv = 2; vv <= 4; ++vv)
-            {
-                sb << "typedef vector<" << kTypes[tt].name << "," << vv << "> " << kTypes[tt].glslPrefix << "vec" << vv << ";\n";
-                sb << "typedef matrix<" << kTypes[tt].name << "," << vv << "," << vv << "> " << kTypes[tt].glslPrefix << "mat" << vv << ";\n";
-            }
-            for (int rr = 2; rr <= 4; ++rr)
-            for (int cc = 2; cc <= 4; ++cc)
-            {
-                sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].glslPrefix << "mat" << rr << "x" << cc << ";\n";
-            }
-        }
-
-        // Multiplication operations for vectors + matrices
-
-        // scalar-vector and vector-scalar
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(vector<T,N> x, T y);\n";
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(T x, vector<T,N> y);\n";
-
-        // scalar-matrix and matrix-scalar
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(matrix<T,N,M> x, T y);\n";
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(T x, matrix<T,N,M> y);\n";
-
-        // vector-vector (dot product)
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T operator*(vector<T,N> x, vector<T,N> y);\n";
-
-        // vector-matrix
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> operator*(vector<T,N> x, matrix<T,N,M> y);\n";
-
-        // matrix-vector
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> operator*(matrix<T,N,M> x, vector<T,M> y);\n";
-
-        // matrix-matrix
-        sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> operator*(matrix<T,R,N> x, matrix<T,N,C> y);\n";
-
-
-
-        //
-
-        // TODO(tfoley): Need to handle `RW*` variants of texture types as well...
-        static const struct {
-            char const*			name;
-            TextureType::Shape	baseShape;
-            int					coordCount;
-        } kBaseTextureTypes[] = {
-            { "1D",		TextureType::Shape1D,	1 },
-            { "2D",		TextureType::Shape2D,	2 },
-            { "3D",		TextureType::Shape3D,	3 },
-            { "Cube",	TextureType::ShapeCube,	3 },
-            { "Buffer", TextureType::ShapeBuffer,   1 },
-        };
-        static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
-
-
-        static const struct {
-            char const*         name;
-            SlangResourceAccess access;
-        } kBaseTextureAccessLevels[] = {
-            { "",                   SLANG_RESOURCE_ACCESS_READ },
-            { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
-            { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
-        };
-        static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
-
-        for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
-        {
-            char const* shapeName = kBaseTextureTypes[tt].name;
-            TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
-
-            for (int isArray = 0; isArray < 2; ++isArray)
-            {
-                // Arrays of 3D textures aren't allowed
-                if (isArray && baseShape == TextureType::Shape3D) continue;
-
-                for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
-                {
-                    auto readAccess = SLANG_RESOURCE_ACCESS_READ;
-                    auto readWriteAccess = SLANG_RESOURCE_ACCESS_READ_WRITE;
-
-                    // TODO: any constraints to enforce on what gets to be multisampled?
-
-                        
-                    unsigned flavor = baseShape;
-                    if (isArray)		flavor |= TextureType::ArrayFlag;
-                    if (isMultisample)	flavor |= TextureType::MultisampleFlag;
-//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
-
-
-
-                    unsigned readFlavor = flavor | (readAccess << 8);
-                    unsigned readWriteFlavor = flavor | (readWriteAccess << 8);
-
-                    StringBuilder nameBuilder;
-                    nameBuilder << shapeName;
-                    if (isMultisample) nameBuilder << "MS";
-                    if (isArray) nameBuilder << "Array";
-                    auto name = nameBuilder.ProduceString();
-
-                    sb << "__generic<T> ";
-                    sb << "__magic_type(TextureSampler," << int(readFlavor) << ") struct ";
-                    sb << "__sampler" << name;
-                    sb << " {};\n";
-
-                    sb << "__generic<T> ";
-                    sb << "__magic_type(Texture," << int(readFlavor) << ") struct ";
-                    sb << "__texture" << name;
-                    sb << " {};\n";
-
-                    sb << "__generic<T> ";
-                    sb << "__magic_type(GLSLImageType," << int(readWriteFlavor) << ") struct ";
-                    sb << "__image" << name;
-                    sb << " {};\n";
-
-                    // TODO(tfoley): flesh this out for all the available prefixes
-                    static const struct
-                    {
-                        char const* prefix;
-                        char const* elementType;
-                    } kTextureElementTypes[] = {
-                        { "", "vec4" },
-                        { "i", "ivec4" },
-                        { "u", "uvec4" },
-                        { nullptr, nullptr },
-                    };
-                    for( auto ee = kTextureElementTypes; ee->prefix; ++ee )
-                    {
-                        sb << "typedef __sampler" << name << "<" << ee->elementType << "> " << ee->prefix << "sampler" << name << ";\n";
-                        sb << "typedef __texture" << name << "<" << ee->elementType << "> " << ee->prefix << "texture" << name << ";\n";
-                        sb << "typedef __image" << name << "<" << ee->elementType << "> " << ee->prefix << "image" << name << ";\n";
-                    }
-                }
-            }
-        }
-
-        sb << "__generic<T> __magic_type(GLSLInputParameterBlockType) struct __GLSLInputParameterBlock {};\n";
-        sb << "__generic<T> __magic_type(GLSLOutputParameterBlockType) struct __GLSLOutputParameterBlock {};\n";
-        sb << "__generic<T> __magic_type(GLSLShaderStorageBufferType) struct __GLSLShaderStorageBuffer {};\n";
-
-        sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct sampler {};";
-
-        sb << "__magic_type(GLSLInputAttachmentType) struct subpassInput {};";
-
-        // Define additional keywords
-
-        sb << "syntax buffer : GLSLBufferModifier;\n";
-
-        // [GLSL 4.3] Storage Qualifiers
-
-        // TODO: need to support `shared` here with its GLSL meaning
-
-        sb << "syntax patch : GLSLPatchModifier;\n";
-        // `centroid` and `sample` handled centrally
-
-        // [GLSL 4.5] Interpolation Qualifiers
-        sb << "syntax smooth : SimpleModifier;\n";
-        sb << "syntax flat : SimpleModifier;\n";
-        sb << "syntax noperspectie : SimpleModifier;\n";
-
-
-        // [GLSL 4.3.2] Constant Qualifier
-
-        // We need to handle GLSL `const` separately from HLSL `const`,
-        // since they mean such different things.
-
-        // [GLSL 4.7.2] Precision Qualifiers
-        sb << "syntax highp : SimpleModifier;\n";
-        sb << "syntax mediump : SimpleModifier;\n";
-        sb << "syntax lowp : SimpleModifier;\n";
-
-        // [GLSL 4.8.1] The Invariant Qualifier
-
-        sb << "syntax invariant : SimpleModifier;\n";
-
-        // [GLSL 4.10] Memory Qualifiers
-
-        sb << "syntax coherent : SimpleModifier;\n";
-        sb << "syntax volatile : SimpleModifier;\n";
-        sb << "syntax restrict : SimpleModifier;\n";
-        sb << "syntax readonly : GLSLReadOnlyModifier;\n";
-        sb << "syntax writeonly : GLSLWriteOnlyModifier;\n";
-
-        // We will treat `subroutine` as a qualifier for now
-        sb << "syntax subroutine : SimpleModifier;\n";
+        #include "glsl.meta.slang.cpp"
 
         glslLibraryCode = sb.ProduceString();
         return glslLibraryCode;
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index bc41ddb71..1f55138e4 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -229,6 +229,65 @@
       <Project>{f9be7957-8399-899e-0c49-e714fddd4b65}</Project>
     </ProjectReference>
   </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="core.meta.slang">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+    </CustomBuild>
+    <CustomBuild Include="glsl.meta.slang">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+    </CustomBuild>
+    <CustomBuild Include="hlsl.meta.slang">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+    </CustomBuild>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index 4bd7ff9a0..9a85ce966 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -64,4 +64,9 @@
     <ClCompile Include="ir.cpp" />
     <ClCompile Include="lower-to-ir.cpp" />
   </ItemGroup>
+  <ItemGroup>
+    <None Include="core.meta.slang" />
+    <None Include="glsl.meta.slang" />
+    <None Include="hlsl.meta.slang" />
+  </ItemGroup>
 </Project>
 \ No newline at end of file
author	Tim Foley <tfoleyNV@users.noreply.github.com>	2017-09-11 10:27:41 -0700
committer	GitHub <noreply@github.com>	2017-09-11 10:27:41 -0700
commit	80fb7b05b851e645d821331fdbbcea1add686c9a (patch)
tree	5f9b010837de0c78f2f96e59388bf76e4cbd8575 /source
parent	0e566a63f0bafb7def65521315e9f19a2bc79e34 (diff)
parent	14137cbd2ddd7deebcdf8cc85c30d534bec8e40b (diff)