Initial work on boilerplate code generator

The goal here is to get the Slang "standard library" code out of string literals and into something a bit more like an actual code file. This is handled by having a `slang-generate` tool that can translate a "template" file that mixes raw Slang code (or any language we want to generate...) with generation logic that is implemented in C++ (currently). This work isn't final by any stretch of the imagination, but it moves a lot of code and not merging it ASAP will complicate other changes. My expectation is that the generator tool will be beefed up on an as-needed basis, to get our stdlib code working. Similarly, the stdlib code does not really take advantage of the new approach as much as it could. That is something we can clean up along the way as we do modifications of the stdlib.
author: Tim Foley <tfoley@nvidia.com> 2017-09-07 14:35:07 -0700
committer: Tim Foley <tfoley@nvidia.com> 2017-09-11 09:50:56 -0700
commit: 14137cbd2ddd7deebcdf8cc85c30d534bec8e40b (patch)
tree: 5f9b010837de0c78f2f96e59388bf76e4cbd8575 /source
parent: 0e566a63f0bafb7def65521315e9f19a2bc79e34 (diff)
9 files changed, 4426 insertions, 2194 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
new file mode 100644
index 000000000..fc2d27b08
--- /dev/null
+++ b/source/slang/core.meta.slang
@@ -0,0 +1,907 @@
+// Slang `core` library
+
+// A type that can be used as an operand for builtins
+interface __BuiltinType {}
+
+// A type that can be used for arithmetic operations
+interface __BuiltinArithmeticType : __BuiltinType {}
+
+// A type that logically has a sign (positive/negative/zero)
+interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}
+
+// A type that can represent integers
+interface __BuiltinIntegerType : __BuiltinArithmeticType {}
+
+// A type that can represent non-integers
+interface __BuiltinRealType : __BuiltinArithmeticType {}
+
+// A type that uses a floating-point representation
+interface __BuiltinFloatingPointType : __BuiltinRealType, __BuiltinSignedArithmeticType {}
+
+__generic<T,U> __intrinsic_op(Sequence) U operator,(T left, U right);
+
+__generic<T> __intrinsic_op(select) T operator?:(bool condition, T ifTrue, T ifFalse);
+__generic<T, let N : int> __intrinsic_op(select) vector<T,N> operator?:(vector<bool,N> condition, vector<T,N> ifTrue, vector<T,N> ifFalse);
+
+${{{{
+// We are going to use code generation to produce the
+// declarations for all of our base types.
+
+static const int kBaseTypeCount = sizeof(kBaseTypes) / sizeof(kBaseTypes[0]);
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    EMIT_LINE_DIRECTIVE();
+    sb << "__builtin_type(" << int(kBaseTypes[tt].tag) << ") struct " << kBaseTypes[tt].name;
+
+    // Declare interface conformances for this type
+
+    sb << "\n    : __BuiltinType\n";
+
+    switch (kBaseTypes[tt].tag)
+    {
+    case BaseType::Float:
+        sb << "\n    , __BuiltinFloatingPointType\n";
+        sb << "\n    ,  __BuiltinRealType\n";
+        // fall through to:
+    case BaseType::Int:
+        sb << "\n    ,  __BuiltinSignedArithmeticType\n";
+        // fall through to:
+    case BaseType::UInt:
+    case BaseType::UInt64:
+        sb << "\n    ,  __BuiltinArithmeticType\n";
+        // fall through to:
+    case BaseType::Bool:
+        sb << "\n    ,  __BuiltinType\n";
+        break;
+
+    default:
+        break;
+    }
+
+    sb << "\n{\n";
+
+
+    // Declare initializers to convert from various other types
+    for (int ss = 0; ss < kBaseTypeCount; ++ss)
+    {
+        // Don't allow conversion from `void`
+        if (kBaseTypes[ss].tag == BaseType::Void)
+            continue;
+
+        // We need to emit a modifier so that the semantic-checking
+        // layer will know it can use these operations for implicit
+        // conversion.
+        ConversionCost conversionCost = getBaseTypeConversionCost(
+            kBaseTypes[tt],
+            kBaseTypes[ss]);
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__implicit_conversion(" << conversionCost << ")\n";
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__init(" << kBaseTypes[ss].name << " value);\n";
+    }
+
+    sb << "};\n";
+}
+
+
+
+// Declare vector and matrix types
+
+sb << "__generic<T = float, let N : int = 4> __magic_type(Vector) struct vector\n{\n";
+sb << "    typedef T Element;\n";
+
+// Declare initializer taking a single scalar of the elemnt type
+sb << "    __implicit_conversion(" << kConversionCost_ScalarToVector << ")\n";
+sb << "    __init(T value);\n";
+
+sb << "};\n";
+
+// TODO: Probably need to do similar
+}}}}
+
+__generic<T = float, let R : int = 4, let C : int = 4>
+__magic_type(Matrix)
+struct matrix {};
+
+${{{{
+
+
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for (int tt = 0; tt < kTypeCount; ++tt)
+{
+    // Declare HLSL vector types
+    for (int ii = 1; ii <= 4; ++ii)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << ii << "> " << kTypes[tt].name << ii << ";\n";
+    }
+
+    // Declare HLSL matrix types
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].name << rr << "x" << cc << ";\n";
+    }
+}
+
+// Declare additional built-in generic types
+//        EMIT_LINE_DIRECTIVE();
+
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_ConstantBufferType << ")\n";
+sb << "__magic_type(ConstantBuffer) struct ConstantBuffer {};\n";
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_TextureBufferType << ")\n";
+sb << "__magic_type(TextureBuffer) struct TextureBuffer {};\n";
+
+
+static const char* kComponentNames[]{ "x", "y", "z", "w" };
+static const char* kVectorNames[]{ "", "x", "xy", "xyz", "xyzw" };
+
+// Need to add constructors to the types above
+for (int N = 2; N <= 4; ++N)
+{
+    sb << "__generic<T> __extension vector<T, " << N << ">\n{\n";
+
+    // initialize from N scalars
+    sb << "__init(";
+    for (int ii = 0; ii < N; ++ii)
+    {
+        if (ii != 0) sb << ", ";
+        sb << "T " << kComponentNames[ii];
+    }
+    sb << ");\n";
+
+    // Initialize from an M-vector and then scalars
+    for (int M = 2; M < N; ++M)
+    {
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        for (int ii = M; ii < N; ++ii)
+        {
+            sb << ", T " << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    // initialize from another vector of the same size
+    //
+    // TODO(tfoley): this overlaps with implicit conversions.
+    // We should look for a way that we can define implicit
+    // conversions directly in the stdlib instead...
+    sb << "__generic<U> __init(vector<U," << N << ">);\n";
+
+    // Initialize from two vectors, of size M and N-M
+    for(int M = 2; M <= (N-2); ++M)
+    {
+        int K = N - M;
+        SLANG_ASSERT(K >= 2);
+
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        sb << ", vector<T," << K << "> ";
+        for (int ii = 0; ii < K; ++ii)
+        {
+            sb << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    sb << "}\n";
+}
+
+// The above extension was generic in the *type* of the vector,
+// but explicit in the *size*. We will now declare an extension
+// for each builtin type that is generic in the size.
+//
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    if(kBaseTypes[tt].tag == BaseType::Void) continue;
+
+    sb << "__generic<let N : int> __extension vector<"
+        << kBaseTypes[tt].name << ",N>\n{\n";
+
+    for (int ff = 0; ff < kBaseTypeCount; ++ff)
+    {
+        if(kBaseTypes[ff].tag == BaseType::Void) continue;
+
+        // We need a constructor to make a vector from a scalar
+        // of another type.
+
+        if( tt != ff )
+        {
+            auto cost = getBaseTypeConversionCost(
+                kBaseTypes[tt],
+                kBaseTypes[ff]);
+            cost += kConversionCost_ScalarToVector;
+
+            sb << "    __implicit_conversion(" << cost << ")\n";
+            sb << "    __init(" << kBaseTypes[ff].name << " value);\n";
+        }
+    }
+
+    sb << "}\n";
+}
+
+for( int R = 2; R <= 4; ++R )
+for( int C = 2; C <= 4; ++C )
+{
+    sb << "__generic<T> __extension matrix<T, " << R << "," << C << ">\n{\n";
+
+    // initialize from R*C scalars
+    sb << "__init(";
+    for( int ii = 0; ii < R; ++ii )
+    for( int jj = 0; jj < C; ++jj )
+    {
+        if ((ii+jj) != 0) sb << ", ";
+        sb << "T m" << ii << jj;
+    }
+    sb << ");\n";
+
+    // Initialize from R C-vectors
+    sb << "__init(";
+    for (int ii = 0; ii < R; ++ii)
+    {
+        if(ii != 0) sb << ", ";
+        sb << "vector<T," << C << "> row" << ii;
+    }
+    sb << ");\n";
+
+
+    // initialize from another matrix of the same size
+    //
+    // TODO(tfoley): See comment about how this overlaps
+    // with implicit conversion, in the `vector` case above
+    sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n";
+
+    // initialize from a matrix of larger size
+    for(int rr = R; rr <= 4; ++rr)
+    for( int cc = C; cc <= 4; ++cc )
+    {
+        if(rr == R && cc == C) continue;
+        sb << "__init(matrix<T," << rr << "," << cc << "> value);\n";
+    }
+
+    sb << "}\n";
+}
+
+// Declare built-in texture and sampler types
+
+
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "struct SamplerState {};";
+        
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "struct SamplerComparisonState {};";
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "Texture1D",		TextureType::Shape1D,	1 },
+    { "Texture2D",		TextureType::Shape2D,	2 },
+    { "Texture3D",		TextureType::Shape3D,	3 },
+    { "TextureCube",	TextureType::ShapeCube,	3 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* name = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        for (int accessLevel = 0; accessLevel < kBaseTextureAccessLevelCount; ++accessLevel)
+        {
+            auto access = kBaseTextureAccessLevels[accessLevel].access;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+            flavor |= (access << 8);
+
+            // emit a generic signature
+            // TODO: allow for multisample count to come in as well...
+            sb << "__generic<T = float4> ";
+
+            sb << "__magic_type(Texture," << int(flavor) << ")\n";
+            sb << "__intrinsic_type(" << kIROp_TextureType << ", " << flavor << ")\n";
+            sb << "struct ";
+            sb << kBaseTextureAccessLevels[accessLevel].name;
+            sb << name;
+            if (isMultisample) sb << "MS";
+            if (isArray) sb << "Array";
+//                        if (isShadow) sb << "Shadow";
+            sb << "\n{";
+
+            if( !isMultisample )
+            {
+                sb << "float CalculateLevelOfDetail(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                sb << "float CalculateLevelOfDetailUnclamped(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+            }
+
+            // `GetDimensions`
+
+            for(int isFloat = 0; isFloat < 2; ++isFloat)
+            for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
+            {
+                {
+                    sb << "__glsl_version(450)\n";
+                    sb << "__target_intrinsic(glsl, \"(";
+
+                    int aa = 0;
+                    String lodStr = "0";
+                    if (includeMipInfo)
+                    {
+                        int mipLevelArg = aa++;
+                        lodStr = "int($";
+                        lodStr.append(mipLevelArg);
+                        lodStr.append(")");
+                    }
+
+                    int cc = 0;
+                    switch(baseShape)
+                    {
+                    case TextureType::Shape1D:
+                        sb << "($" << aa++ << " = textureSize($$P, " << lodStr << "))";
+                        cc = 1;
+                        break;
+
+                    case TextureType::Shape2D:
+                    case TextureType::ShapeCube:
+                        sb << "($" << aa++ << " = textureSize($$P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ").y)";
+                        cc = 2;
+                        break;
+
+                    case TextureType::Shape3D:
+                        sb << "($" << aa++ << " = textureSize($$P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ").y)";
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ").z)";
+                        cc = 3;
+                        break;
+
+                    default:
+                        SLANG_UNEXPECTED("unhandled resource shape");
+                        break;
+                    }
+
+                    if(isArray)
+                    {
+                        sb << ", ($" << aa++ << " = textureSize($$P, " << lodStr << ")." << kComponentNames[cc] << ")";
+                    }
+
+                    if(isMultisample)
+                    {
+                        sb << ", ($" << aa++ << " = textureSamples($$P))";
+                    }
+
+                    if (includeMipInfo)
+                    {
+                        sb << ", ($" << aa++ << " = textureQueryLevels($$P))";
+                    }
+
+
+                    sb << ")\")\n";
+                    sb << "__intrinsic_op\n";
+
+                }
+
+                char const* t = isFloat ? "out float " : "out uint ";
+
+                sb << "void GetDimensions(";
+                if(includeMipInfo)
+                    sb << "uint mipLevel, ";
+
+                switch(baseShape)
+                {
+                case TextureType::Shape1D:
+                    sb << t << "width";
+                    break;
+
+                case TextureType::Shape2D:
+                case TextureType::ShapeCube:
+                    sb << t << "width,";
+                    sb << t << "height";
+                    break;
+
+                case TextureType::Shape3D:
+                    sb << t << "width,";
+                    sb << t << "height,";
+                    sb << t << "depth";
+                    break;
+
+                default:
+                    assert(!"unexpected");
+                    break;
+                }
+
+                if(isArray)
+                {
+                    sb << ", " << t << "elements";
+                }
+
+                if(isMultisample)
+                {
+                    sb << ", " << t << "sampleCount";
+                }
+
+                if(includeMipInfo)
+                    sb << ", " << t << "numberOfLevels";
+
+                sb << ");\n";
+            }
+
+            // `GetSamplePosition()`
+            if( isMultisample )
+            {
+                sb << "float2 GetSamplePosition(int s);\n";
+            }
+
+            // `Load()`
+
+            if( kBaseTextureTypes[tt].coordCount + isArray < 4 )
+            {
+                int loadCoordCount = kBaseTextureTypes[tt].coordCount + isArray + (isMultisample?0:1);
+
+                // When translating to GLSL, we need to break apart the `location` argument.
+                //
+                // TODO: this should realy be handled by having this member actually get lowered!
+                static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
+                static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($$P, $0, $1)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($$P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ");\n";
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetchOffset($$P, $0, $1, $2)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($$P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << loadCoordCount << " offset";
+                sb << ");\n";
+
+
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << kBaseTextureTypes[tt].coordCount << " offset";
+                sb << ", out uint status";
+                sb << ");\n";
+            }
+
+            if(baseShape != TextureType::ShapeCube)
+            {
+                // subscript operator
+                sb << "__intrinsic_op __subscript(uint" << kBaseTextureTypes[tt].coordCount + isArray << " location) -> T;\n";
+            }
+
+            if( !isMultisample )
+            {
+                // `Sample()`
+
+                sb << "__target_intrinsic(glsl, \"texture($$p, $1)\")\n";
+
+                // TODO: only enable if IR is being used?
+                sb << "__intrinsic_op(sample)\n";
+
+                sb << "__intrinsic_op\n";
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($$p, $1, $2)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T Sample(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp);\n";
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp, out uint status);\n";
+
+
+                // `SampleBias()`
+                sb << "__target_intrinsic(glsl, \"texture($$p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleBias(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($$p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleBias(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleCmp()` and `SampleCmpLevelZero`
+                sb << "T SampleCmp(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                int baseCoordCount = kBaseTextureTypes[tt].coordCount;
+                int arrCoordCount = baseCoordCount + isArray;
+                if (arrCoordCount < 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureLod($$p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    sb << ", 0.0)\")\n";
+                }
+                else if(arrCoordCount <= 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureGrad($$p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    // Construct gradients
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    // Note(tfoley): MSDN seems confused, and claims that the `offset`
+                    // parameter for `SampleCmp` is available for everything but 3D
+                    // textures, while `Sample` and `SampleBias` are consistent in
+                    // saying they only exclude `offset` for cube maps (which makes
+                    // sense). I'm going to assume the documentation for `SampleCmp`
+                    // is just wrong.
+
+                    sb << "T SampleCmp(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    sb << "T SampleCmpLevelZero(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+
+                sb << "__target_intrinsic(glsl, \"textureGrad($$p, $1, $2, $3)\")\n";
+                sb << "__intrinsic_op(sampleGrad)\n";
+                sb << "T SampleGrad(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureGradOffset($$p, $1, $2, $3, $4)\")\n";
+                    sb << "__intrinsic_op(sampleGrad)\n";
+                    sb << "T SampleGrad(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleLevel`
+
+                sb << "__target_intrinsic(glsl, \"textureLod($$p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleLevel(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float level);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureLodOffset($$p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleLevel(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float level, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+            }
+
+            sb << "\n};\n";
+
+            // `Gather*()` operations are handled via an `extension` declaration,
+            // because this lets us capture the element type of the texture.
+            //
+            // TODO: longer-term there should be something like a `TextureElementType`
+            // interface, that both scalars and vectors implement, that then exposes
+            // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
+            //
+            static const struct {
+                char const* genericPrefix;
+                char const* elementType;
+            } kGatherExtensionCases[] = {
+                { "__generic<T, let N : int>", "vector<T,N>" },
+
+                // TODO: need a case here for scalars `T`, but also
+                // need to ensure that case doesn't accidentally match
+                // for `T = vector<...>`, which requires actual checking
+                // of constraints on generic parameters.
+            };
+            for(auto cc : kGatherExtensionCases)
+            {
+                // TODO: this should really be an `if` around the entire `Gather` logic
+                if (isMultisample) break;
+
+                EMIT_LINE_DIRECTIVE();
+                sb << cc.genericPrefix << " __extension ";
+                sb << kBaseTextureAccessLevels[accessLevel].name;
+                sb << name;
+                if (isArray) sb << "Array";
+                sb << "<" << cc.elementType << " >";
+                sb << "\n{\n";
+
+
+                // `Gather`
+                // (tricky because it returns a 4-vector of the element type
+                // of the texture components...)
+                //
+                // TODO: is it actually correct to restrict these so that, e.g.,
+                // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
+                // it nominally doesn't have an alpha component?
+                static const struct {
+                    int componentIndex;
+                    char const* componentName;
+                } kGatherComponets[] = {
+                    { 0, "" },
+                    { 0, "Red" },
+                    { 1, "Green" },
+                    { 2, "Blue" },
+                    { 3, "Alpha" },
+                };
+
+                for(auto kk : kGatherComponets)
+                {
+                    auto componentIndex = kk.componentIndex;
+                    auto componentName = kk.componentName;
+
+                    EMIT_LINE_DIRECTIVE();
+                            
+                    sb << "__target_intrinsic(glsl, \"textureGather($$p, $1, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffset($$p, $1, $2, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                    sb << "out uint status);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffsets($$p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4, ";
+                    sb << "out uint status);\n";
+                }
+
+                EMIT_LINE_DIRECTIVE();
+                sb << "\n}\n";
+            }
+        }
+    }
+}
+
+
+for (auto op : unaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* fixity = (op.flags & POSTFIX) != 0 ? "__postfix " : "__prefix ";
+        char const* qual = (op.flags & ASSIGNMENT) != 0 ? "in out " : "";
+
+        // scalar version
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << type.name << " operator" << op.opName << "(" << qual << type.name << " value);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << type.name << ",N> operator" << op.opName << "(" << qual << "vector<" << type.name << ",N> value);\n";
+
+        // matrix version
+        sb << "__generic<let N : int, let M : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << type.name << ",N,M> operator" << op.opName << "(" << qual << "matrix<" << type.name << ",N,M> value);\n";
+    }
+}
+
+for (auto op : binaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        if (op.flags & COMPARISON) resultType = "bool";
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        // TODO: handle `SHIFT`
+
+        // scalar version
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << resultType << " operator" << op.opName << "(" << leftQual << leftType << " left, " << rightType << " right);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, vector<" << rightType << ",N> right);\n";
+
+        // matrix version
+
+        // skip matrix-matrix multiply operations here, so that GLSL doesn't see them
+        switch (op.opCode)
+        {
+        case kIROp_Mul:
+        case kIRPseudoOp_MulAssign:
+            break;
+
+        default:
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+            break;
+        }
+
+        // We are going to go ahead and explicitly define combined
+        // operations for the scalar-op-vector, etc. cases, rather
+        // than rely on promotion rules.
+
+        // scalar-vector and scalar-matrix
+        if (!(op.flags & ASSIGNMENT))
+        {
+            sb << "__generic<let N : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << leftType << " left, vector<" << rightType << ",N> right);\n";
+
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << leftType << " left, matrix<" << rightType << ",N,M> right);\n";
+        }
+
+        // vector-scalar and matrix-scalar
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, " << rightType << " right);\n";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
+    }
+}
+
+}}}}
diff --git a/source/slang/core.meta.slang.cpp b/source/slang/core.meta.slang.cpp
new file mode 100644
index 000000000..8395f11f5
--- /dev/null
+++ b/source/slang/core.meta.slang.cpp
@@ -0,0 +1,910 @@
+sb << "// Slang `core` library\n";
+sb << "\n";
+sb << "// A type that can be used as an operand for builtins\n";
+sb << "interface __BuiltinType {}\n";
+sb << "\n";
+sb << "// A type that can be used for arithmetic operations\n";
+sb << "interface __BuiltinArithmeticType : __BuiltinType {}\n";
+sb << "\n";
+sb << "// A type that logically has a sign (positive/negative/zero)\n";
+sb << "interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}\n";
+sb << "\n";
+sb << "// A type that can represent integers\n";
+sb << "interface __BuiltinIntegerType : __BuiltinArithmeticType {}\n";
+sb << "\n";
+sb << "// A type that can represent non-integers\n";
+sb << "interface __BuiltinRealType : __BuiltinArithmeticType {}\n";
+sb << "\n";
+sb << "// A type that uses a floating-point representation\n";
+sb << "interface __BuiltinFloatingPointType : __BuiltinRealType, __BuiltinSignedArithmeticType {}\n";
+sb << "\n";
+sb << "__generic<T,U> __intrinsic_op(Sequence) U operator,(T left, U right);\n";
+sb << "\n";
+sb << "__generic<T> __intrinsic_op(select) T operator?:(bool condition, T ifTrue, T ifFalse);\n";
+sb << "__generic<T, let N : int> __intrinsic_op(select) vector<T,N> operator?:(vector<bool,N> condition, vector<T,N> ifTrue, vector<T,N> ifFalse);\n";
+sb << "\n";
+sb << "";
+
+// We are going to use code generation to produce the
+// declarations for all of our base types.
+
+static const int kBaseTypeCount = sizeof(kBaseTypes) / sizeof(kBaseTypes[0]);
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    EMIT_LINE_DIRECTIVE();
+    sb << "__builtin_type(" << int(kBaseTypes[tt].tag) << ") struct " << kBaseTypes[tt].name;
+
+    // Declare interface conformances for this type
+
+    sb << "\n    : __BuiltinType\n";
+
+    switch (kBaseTypes[tt].tag)
+    {
+    case BaseType::Float:
+        sb << "\n    , __BuiltinFloatingPointType\n";
+        sb << "\n    ,  __BuiltinRealType\n";
+        // fall through to:
+    case BaseType::Int:
+        sb << "\n    ,  __BuiltinSignedArithmeticType\n";
+        // fall through to:
+    case BaseType::UInt:
+    case BaseType::UInt64:
+        sb << "\n    ,  __BuiltinArithmeticType\n";
+        // fall through to:
+    case BaseType::Bool:
+        sb << "\n    ,  __BuiltinType\n";
+        break;
+
+    default:
+        break;
+    }
+
+    sb << "\n{\n";
+
+
+    // Declare initializers to convert from various other types
+    for (int ss = 0; ss < kBaseTypeCount; ++ss)
+    {
+        // Don't allow conversion from `void`
+        if (kBaseTypes[ss].tag == BaseType::Void)
+            continue;
+
+        // We need to emit a modifier so that the semantic-checking
+        // layer will know it can use these operations for implicit
+        // conversion.
+        ConversionCost conversionCost = getBaseTypeConversionCost(
+            kBaseTypes[tt],
+            kBaseTypes[ss]);
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__implicit_conversion(" << conversionCost << ")\n";
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "__init(" << kBaseTypes[ss].name << " value);\n";
+    }
+
+    sb << "};\n";
+}
+
+
+
+// Declare vector and matrix types
+
+sb << "__generic<T = float, let N : int = 4> __magic_type(Vector) struct vector\n{\n";
+sb << "    typedef T Element;\n";
+
+// Declare initializer taking a single scalar of the elemnt type
+sb << "    __implicit_conversion(" << kConversionCost_ScalarToVector << ")\n";
+sb << "    __init(T value);\n";
+
+sb << "};\n";
+
+// TODO: Probably need to do similar
+sb << "\n";
+sb << "\n";
+sb << "__generic<T = float, let R : int = 4, let C : int = 4>\n";
+sb << "__magic_type(Matrix)\n";
+sb << "struct matrix {};\n";
+sb << "\n";
+sb << "";
+
+
+
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for (int tt = 0; tt < kTypeCount; ++tt)
+{
+    // Declare HLSL vector types
+    for (int ii = 1; ii <= 4; ++ii)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << ii << "> " << kTypes[tt].name << ii << ";\n";
+    }
+
+    // Declare HLSL matrix types
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].name << rr << "x" << cc << ";\n";
+    }
+}
+
+// Declare additional built-in generic types
+//        EMIT_LINE_DIRECTIVE();
+
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_ConstantBufferType << ")\n";
+sb << "__magic_type(ConstantBuffer) struct ConstantBuffer {};\n";
+
+sb << "__generic<T>\n";
+sb << "__intrinsic_type(" << kIROp_TextureBufferType << ")\n";
+sb << "__magic_type(TextureBuffer) struct TextureBuffer {};\n";
+
+
+static const char* kComponentNames[]{ "x", "y", "z", "w" };
+static const char* kVectorNames[]{ "", "x", "xy", "xyz", "xyzw" };
+
+// Need to add constructors to the types above
+for (int N = 2; N <= 4; ++N)
+{
+    sb << "__generic<T> __extension vector<T, " << N << ">\n{\n";
+
+    // initialize from N scalars
+    sb << "__init(";
+    for (int ii = 0; ii < N; ++ii)
+    {
+        if (ii != 0) sb << ", ";
+        sb << "T " << kComponentNames[ii];
+    }
+    sb << ");\n";
+
+    // Initialize from an M-vector and then scalars
+    for (int M = 2; M < N; ++M)
+    {
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        for (int ii = M; ii < N; ++ii)
+        {
+            sb << ", T " << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    // initialize from another vector of the same size
+    //
+    // TODO(tfoley): this overlaps with implicit conversions.
+    // We should look for a way that we can define implicit
+    // conversions directly in the stdlib instead...
+    sb << "__generic<U> __init(vector<U," << N << ">);\n";
+
+    // Initialize from two vectors, of size M and N-M
+    for(int M = 2; M <= (N-2); ++M)
+    {
+        int K = N - M;
+        SLANG_ASSERT(K >= 2);
+
+        sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+        sb << ", vector<T," << K << "> ";
+        for (int ii = 0; ii < K; ++ii)
+        {
+            sb << kComponentNames[ii];
+        }
+        sb << ");\n";
+    }
+
+    sb << "}\n";
+}
+
+// The above extension was generic in the *type* of the vector,
+// but explicit in the *size*. We will now declare an extension
+// for each builtin type that is generic in the size.
+//
+for (int tt = 0; tt < kBaseTypeCount; ++tt)
+{
+    if(kBaseTypes[tt].tag == BaseType::Void) continue;
+
+    sb << "__generic<let N : int> __extension vector<"
+        << kBaseTypes[tt].name << ",N>\n{\n";
+
+    for (int ff = 0; ff < kBaseTypeCount; ++ff)
+    {
+        if(kBaseTypes[ff].tag == BaseType::Void) continue;
+
+        // We need a constructor to make a vector from a scalar
+        // of another type.
+
+        if( tt != ff )
+        {
+            auto cost = getBaseTypeConversionCost(
+                kBaseTypes[tt],
+                kBaseTypes[ff]);
+            cost += kConversionCost_ScalarToVector;
+
+            sb << "    __implicit_conversion(" << cost << ")\n";
+            sb << "    __init(" << kBaseTypes[ff].name << " value);\n";
+        }
+    }
+
+    sb << "}\n";
+}
+
+for( int R = 2; R <= 4; ++R )
+for( int C = 2; C <= 4; ++C )
+{
+    sb << "__generic<T> __extension matrix<T, " << R << "," << C << ">\n{\n";
+
+    // initialize from R*C scalars
+    sb << "__init(";
+    for( int ii = 0; ii < R; ++ii )
+    for( int jj = 0; jj < C; ++jj )
+    {
+        if ((ii+jj) != 0) sb << ", ";
+        sb << "T m" << ii << jj;
+    }
+    sb << ");\n";
+
+    // Initialize from R C-vectors
+    sb << "__init(";
+    for (int ii = 0; ii < R; ++ii)
+    {
+        if(ii != 0) sb << ", ";
+        sb << "vector<T," << C << "> row" << ii;
+    }
+    sb << ");\n";
+
+
+    // initialize from another matrix of the same size
+    //
+    // TODO(tfoley): See comment about how this overlaps
+    // with implicit conversion, in the `vector` case above
+    sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n";
+
+    // initialize from a matrix of larger size
+    for(int rr = R; rr <= 4; ++rr)
+    for( int cc = C; cc <= 4; ++cc )
+    {
+        if(rr == R && cc == C) continue;
+        sb << "__init(matrix<T," << rr << "," << cc << "> value);\n";
+    }
+
+    sb << "}\n";
+}
+
+// Declare built-in texture and sampler types
+
+
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerState) << ")\n";
+sb << "struct SamplerState {};";
+        
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
+sb << "struct SamplerComparisonState {};";
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "Texture1D",		TextureType::Shape1D,	1 },
+    { "Texture2D",		TextureType::Shape2D,	2 },
+    { "Texture3D",		TextureType::Shape3D,	3 },
+    { "TextureCube",	TextureType::ShapeCube,	3 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* name = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        for (int accessLevel = 0; accessLevel < kBaseTextureAccessLevelCount; ++accessLevel)
+        {
+            auto access = kBaseTextureAccessLevels[accessLevel].access;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+            flavor |= (access << 8);
+
+            // emit a generic signature
+            // TODO: allow for multisample count to come in as well...
+            sb << "__generic<T = float4> ";
+
+            sb << "__magic_type(Texture," << int(flavor) << ")\n";
+            sb << "__intrinsic_type(" << kIROp_TextureType << ", " << flavor << ")\n";
+            sb << "struct ";
+            sb << kBaseTextureAccessLevels[accessLevel].name;
+            sb << name;
+            if (isMultisample) sb << "MS";
+            if (isArray) sb << "Array";
+//                        if (isShadow) sb << "Shadow";
+            sb << "\n{";
+
+            if( !isMultisample )
+            {
+                sb << "float CalculateLevelOfDetail(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                sb << "float CalculateLevelOfDetailUnclamped(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+            }
+
+            // `GetDimensions`
+
+            for(int isFloat = 0; isFloat < 2; ++isFloat)
+            for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
+            {
+                {
+                    sb << "__glsl_version(450)\n";
+                    sb << "__target_intrinsic(glsl, \"(";
+
+                    int aa = 0;
+                    String lodStr = "0";
+                    if (includeMipInfo)
+                    {
+                        int mipLevelArg = aa++;
+                        lodStr = "int($";
+                        lodStr.append(mipLevelArg);
+                        lodStr.append(")");
+                    }
+
+                    int cc = 0;
+                    switch(baseShape)
+                    {
+                    case TextureType::Shape1D:
+                        sb << "($" << aa++ << " = textureSize($P, " << lodStr << "))";
+                        cc = 1;
+                        break;
+
+                    case TextureType::Shape2D:
+                    case TextureType::ShapeCube:
+                        sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
+                        cc = 2;
+                        break;
+
+                    case TextureType::Shape3D:
+                        sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").z)";
+                        cc = 3;
+                        break;
+
+                    default:
+                        SLANG_UNEXPECTED("unhandled resource shape");
+                        break;
+                    }
+
+                    if(isArray)
+                    {
+                        sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ")." << kComponentNames[cc] << ")";
+                    }
+
+                    if(isMultisample)
+                    {
+                        sb << ", ($" << aa++ << " = textureSamples($P))";
+                    }
+
+                    if (includeMipInfo)
+                    {
+                        sb << ", ($" << aa++ << " = textureQueryLevels($P))";
+                    }
+
+
+                    sb << ")\")\n";
+                    sb << "__intrinsic_op\n";
+
+                }
+
+                char const* t = isFloat ? "out float " : "out uint ";
+
+                sb << "void GetDimensions(";
+                if(includeMipInfo)
+                    sb << "uint mipLevel, ";
+
+                switch(baseShape)
+                {
+                case TextureType::Shape1D:
+                    sb << t << "width";
+                    break;
+
+                case TextureType::Shape2D:
+                case TextureType::ShapeCube:
+                    sb << t << "width,";
+                    sb << t << "height";
+                    break;
+
+                case TextureType::Shape3D:
+                    sb << t << "width,";
+                    sb << t << "height,";
+                    sb << t << "depth";
+                    break;
+
+                default:
+                    assert(!"unexpected");
+                    break;
+                }
+
+                if(isArray)
+                {
+                    sb << ", " << t << "elements";
+                }
+
+                if(isMultisample)
+                {
+                    sb << ", " << t << "sampleCount";
+                }
+
+                if(includeMipInfo)
+                    sb << ", " << t << "numberOfLevels";
+
+                sb << ");\n";
+            }
+
+            // `GetSamplePosition()`
+            if( isMultisample )
+            {
+                sb << "float2 GetSamplePosition(int s);\n";
+            }
+
+            // `Load()`
+
+            if( kBaseTextureTypes[tt].coordCount + isArray < 4 )
+            {
+                int loadCoordCount = kBaseTextureTypes[tt].coordCount + isArray + (isMultisample?0:1);
+
+                // When translating to GLSL, we need to break apart the `location` argument.
+                //
+                // TODO: this should realy be handled by having this member actually get lowered!
+                static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
+                static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($P, $0, $1)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ");\n";
+
+                if (isMultisample)
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetchOffset($P, $0, $1, $2)\")\n";
+                }
+                else
+                {
+                    sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << loadCoordCount << " offset";
+                sb << ");\n";
+
+
+                sb << "T Load(";
+                sb << "int" << loadCoordCount << " location";
+                if(isMultisample)
+                {
+                    sb << ", int sampleIndex";
+                }
+                sb << ", int" << kBaseTextureTypes[tt].coordCount << " offset";
+                sb << ", out uint status";
+                sb << ");\n";
+            }
+
+            if(baseShape != TextureType::ShapeCube)
+            {
+                // subscript operator
+                sb << "__intrinsic_op __subscript(uint" << kBaseTextureTypes[tt].coordCount + isArray << " location) -> T;\n";
+            }
+
+            if( !isMultisample )
+            {
+                // `Sample()`
+
+                sb << "__target_intrinsic(glsl, \"texture($p, $1)\")\n";
+
+                // TODO: only enable if IR is being used?
+                sb << "__intrinsic_op(sample)\n";
+
+                sb << "__intrinsic_op\n";
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T Sample(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp);\n";
+
+                sb << "T Sample(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                }
+                sb << "float clamp, out uint status);\n";
+
+
+                // `SampleBias()`
+                sb << "__target_intrinsic(glsl, \"texture($p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleBias(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleBias(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleCmp()` and `SampleCmpLevelZero`
+                sb << "T SampleCmp(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                int baseCoordCount = kBaseTextureTypes[tt].coordCount;
+                int arrCoordCount = baseCoordCount + isArray;
+                if (arrCoordCount < 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureLod($p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    sb << ", 0.0)\")\n";
+                }
+                else if(arrCoordCount <= 3)
+                {
+                    int extCoordCount = arrCoordCount + 1;
+
+                    if (extCoordCount < 3)
+                        extCoordCount = 3;
+
+                    sb << "__target_intrinsic(glsl, \"textureGrad($p, ";
+
+                    sb << "vec" << extCoordCount << "($1,";
+                    for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
+                    {
+                        sb << " 0.0,";
+                    }
+                    sb << "$2)";
+
+                    // Construct gradients
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ", vec" << baseCoordCount << "(0.0)";
+                    sb << ")\")\n";
+                }
+                sb << "__intrinsic_op\n";
+                sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float compareValue";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    // Note(tfoley): MSDN seems confused, and claims that the `offset`
+                    // parameter for `SampleCmp` is available for everything but 3D
+                    // textures, while `Sample` and `SampleBias` are consistent in
+                    // saying they only exclude `offset` for cube maps (which makes
+                    // sense). I'm going to assume the documentation for `SampleCmp`
+                    // is just wrong.
+
+                    sb << "T SampleCmp(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    sb << "T SampleCmpLevelZero(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float compareValue, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+
+                sb << "__target_intrinsic(glsl, \"textureGrad($p, $1, $2, $3)\")\n";
+                sb << "__intrinsic_op(sampleGrad)\n";
+                sb << "T SampleGrad(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY";
+                sb << ");\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureGradOffset($p, $1, $2, $3, $4)\")\n";
+                    sb << "__intrinsic_op(sampleGrad)\n";
+                    sb << "T SampleGrad(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+
+                // `SampleLevel`
+
+                sb << "__target_intrinsic(glsl, \"textureLod($p, $1, $2)\")\n";
+                sb << "__intrinsic_op\n";
+                sb << "T SampleLevel(SamplerState s, ";
+                sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                sb << "float level);\n";
+
+                if( baseShape != TextureType::ShapeCube )
+                {
+                    sb << "__target_intrinsic(glsl, \"textureLodOffset($p, $1, $2, $3)\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "T SampleLevel(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+                    sb << "float level, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+                }
+            }
+
+            sb << "\n};\n";
+
+            // `Gather*()` operations are handled via an `extension` declaration,
+            // because this lets us capture the element type of the texture.
+            //
+            // TODO: longer-term there should be something like a `TextureElementType`
+            // interface, that both scalars and vectors implement, that then exposes
+            // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
+            //
+            static const struct {
+                char const* genericPrefix;
+                char const* elementType;
+            } kGatherExtensionCases[] = {
+                { "__generic<T, let N : int>", "vector<T,N>" },
+
+                // TODO: need a case here for scalars `T`, but also
+                // need to ensure that case doesn't accidentally match
+                // for `T = vector<...>`, which requires actual checking
+                // of constraints on generic parameters.
+            };
+            for(auto cc : kGatherExtensionCases)
+            {
+                // TODO: this should really be an `if` around the entire `Gather` logic
+                if (isMultisample) break;
+
+                EMIT_LINE_DIRECTIVE();
+                sb << cc.genericPrefix << " __extension ";
+                sb << kBaseTextureAccessLevels[accessLevel].name;
+                sb << name;
+                if (isArray) sb << "Array";
+                sb << "<" << cc.elementType << " >";
+                sb << "\n{\n";
+
+
+                // `Gather`
+                // (tricky because it returns a 4-vector of the element type
+                // of the texture components...)
+                //
+                // TODO: is it actually correct to restrict these so that, e.g.,
+                // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
+                // it nominally doesn't have an alpha component?
+                static const struct {
+                    int componentIndex;
+                    char const* componentName;
+                } kGatherComponets[] = {
+                    { 0, "" },
+                    { 0, "Red" },
+                    { 1, "Green" },
+                    { 2, "Blue" },
+                    { 3, "Alpha" },
+                };
+
+                for(auto kk : kGatherComponets)
+                {
+                    auto componentIndex = kk.componentIndex;
+                    auto componentName = kk.componentName;
+
+                    EMIT_LINE_DIRECTIVE();
+                            
+                    sb << "__target_intrinsic(glsl, \"textureGather($p, $1, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $1, $2, " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+                    sb << "out uint status);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
+                    sb << "__intrinsic_op\n";
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4);\n";
+
+                    EMIT_LINE_DIRECTIVE();
+                    sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
+                    sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
+                    sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4, ";
+                    sb << "out uint status);\n";
+                }
+
+                EMIT_LINE_DIRECTIVE();
+                sb << "\n}\n";
+            }
+        }
+    }
+}
+
+
+for (auto op : unaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* fixity = (op.flags & POSTFIX) != 0 ? "__postfix " : "__prefix ";
+        char const* qual = (op.flags & ASSIGNMENT) != 0 ? "in out " : "";
+
+        // scalar version
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << type.name << " operator" << op.opName << "(" << qual << type.name << " value);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << type.name << ",N> operator" << op.opName << "(" << qual << "vector<" << type.name << ",N> value);\n";
+
+        // matrix version
+        sb << "__generic<let N : int, let M : int> ";
+        sb << fixity;
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << type.name << ",N,M> operator" << op.opName << "(" << qual << "matrix<" << type.name << ",N,M> value);\n";
+    }
+}
+
+for (auto op : binaryOps)
+{
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        if (op.flags & COMPARISON) resultType = "bool";
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        // TODO: handle `SHIFT`
+
+        // scalar version
+        sb << "__intrinsic_op(" << int(op.opCode) << ") " << resultType << " operator" << op.opName << "(" << leftQual << leftType << " left, " << rightType << " right);\n";
+
+        // vector version
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, vector<" << rightType << ",N> right);\n";
+
+        // matrix version
+
+        // skip matrix-matrix multiply operations here, so that GLSL doesn't see them
+        switch (op.opCode)
+        {
+        case kIROp_Mul:
+        case kIRPseudoOp_MulAssign:
+            break;
+
+        default:
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+            break;
+        }
+
+        // We are going to go ahead and explicitly define combined
+        // operations for the scalar-op-vector, etc. cases, rather
+        // than rely on promotion rules.
+
+        // scalar-vector and scalar-matrix
+        if (!(op.flags & ASSIGNMENT))
+        {
+            sb << "__generic<let N : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << leftType << " left, vector<" << rightType << ",N> right);\n";
+
+            sb << "__generic<let N : int, let M : int> ";
+            sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << leftType << " left, matrix<" << rightType << ",N,M> right);\n";
+        }
+
+        // vector-scalar and matrix-scalar
+        sb << "__generic<let N : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, " << rightType << " right);\n";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
+    }
+}
+
+sb << "\n";
+sb << "";
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
new file mode 100644
index 000000000..878cea188
--- /dev/null
+++ b/source/slang/glsl.meta.slang
@@ -0,0 +1,205 @@
+// Slang GLSL compatibility library
+
+${{{{
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for( int tt = 0; tt < kTypeCount; ++tt )
+{
+    // Declare GLSL aliases for HLSL types
+    for (int vv = 2; vv <= 4; ++vv)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << vv << "> " << kTypes[tt].glslPrefix << "vec" << vv << ";\n";
+        sb << "typedef matrix<" << kTypes[tt].name << "," << vv << "," << vv << "> " << kTypes[tt].glslPrefix << "mat" << vv << ";\n";
+    }
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].glslPrefix << "mat" << rr << "x" << cc << ";\n";
+    }
+}
+
+// Multiplication operations for vectors + matrices
+
+// scalar-vector and vector-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(vector<T,N> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(T x, vector<T,N> y);\n";
+
+// scalar-matrix and matrix-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(matrix<T,N,M> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(T x, matrix<T,N,M> y);\n";
+
+// vector-vector (dot product)
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T operator*(vector<T,N> x, vector<T,N> y);\n";
+
+// vector-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> operator*(vector<T,N> x, matrix<T,N,M> y);\n";
+
+// matrix-vector
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> operator*(matrix<T,N,M> x, vector<T,M> y);\n";
+
+// matrix-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> operator*(matrix<T,R,N> x, matrix<T,N,C> y);\n";
+
+
+
+//
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "1D",		TextureType::Shape1D,	1 },
+    { "2D",		TextureType::Shape2D,	2 },
+    { "3D",		TextureType::Shape3D,	3 },
+    { "Cube",	TextureType::ShapeCube,	3 },
+    { "Buffer", TextureType::ShapeBuffer,   1 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* shapeName = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        {
+            auto readAccess = SLANG_RESOURCE_ACCESS_READ;
+            auto readWriteAccess = SLANG_RESOURCE_ACCESS_READ_WRITE;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+                        
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+
+
+            unsigned readFlavor = flavor | (readAccess << 8);
+            unsigned readWriteFlavor = flavor | (readWriteAccess << 8);
+
+            StringBuilder nameBuilder;
+            nameBuilder << shapeName;
+            if (isMultisample) nameBuilder << "MS";
+            if (isArray) nameBuilder << "Array";
+            auto name = nameBuilder.ProduceString();
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(TextureSampler," << int(readFlavor) << ") struct ";
+            sb << "__sampler" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(Texture," << int(readFlavor) << ") struct ";
+            sb << "__texture" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(GLSLImageType," << int(readWriteFlavor) << ") struct ";
+            sb << "__image" << name;
+            sb << " {};\n";
+
+            // TODO(tfoley): flesh this out for all the available prefixes
+            static const struct
+            {
+                char const* prefix;
+                char const* elementType;
+            } kTextureElementTypes[] = {
+                { "", "vec4" },
+                { "i", "ivec4" },
+                { "u", "uvec4" },
+                { nullptr, nullptr },
+            };
+            for( auto ee = kTextureElementTypes; ee->prefix; ++ee )
+            {
+                sb << "typedef __sampler" << name << "<" << ee->elementType << "> " << ee->prefix << "sampler" << name << ";\n";
+                sb << "typedef __texture" << name << "<" << ee->elementType << "> " << ee->prefix << "texture" << name << ";\n";
+                sb << "typedef __image" << name << "<" << ee->elementType << "> " << ee->prefix << "image" << name << ";\n";
+            }
+        }
+    }
+}
+
+sb << "__generic<T> __magic_type(GLSLInputParameterBlockType) struct __GLSLInputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLOutputParameterBlockType) struct __GLSLOutputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLShaderStorageBufferType) struct __GLSLShaderStorageBuffer {};\n";
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct sampler {};";
+
+sb << "__magic_type(GLSLInputAttachmentType) struct subpassInput {};";
+
+// Define additional keywords
+
+sb << "syntax buffer : GLSLBufferModifier;\n";
+
+// [GLSL 4.3] Storage Qualifiers
+
+// TODO: need to support `shared` here with its GLSL meaning
+
+sb << "syntax patch : GLSLPatchModifier;\n";
+// `centroid` and `sample` handled centrally
+
+// [GLSL 4.5] Interpolation Qualifiers
+sb << "syntax smooth : SimpleModifier;\n";
+sb << "syntax flat : SimpleModifier;\n";
+sb << "syntax noperspectie : SimpleModifier;\n";
+
+
+// [GLSL 4.3.2] Constant Qualifier
+
+// We need to handle GLSL `const` separately from HLSL `const`,
+// since they mean such different things.
+
+// [GLSL 4.7.2] Precision Qualifiers
+sb << "syntax highp : SimpleModifier;\n";
+sb << "syntax mediump : SimpleModifier;\n";
+sb << "syntax lowp : SimpleModifier;\n";
+
+// [GLSL 4.8.1] The Invariant Qualifier
+
+sb << "syntax invariant : SimpleModifier;\n";
+
+// [GLSL 4.10] Memory Qualifiers
+
+sb << "syntax coherent : SimpleModifier;\n";
+sb << "syntax volatile : SimpleModifier;\n";
+sb << "syntax restrict : SimpleModifier;\n";
+sb << "syntax readonly : GLSLReadOnlyModifier;\n";
+sb << "syntax writeonly : GLSLWriteOnlyModifier;\n";
+
+// We will treat `subroutine` as a qualifier for now
+sb << "syntax subroutine : SimpleModifier;\n";
+
+
+
+}}}}
+\ No newline at end of file
diff --git a/source/slang/glsl.meta.slang.cpp b/source/slang/glsl.meta.slang.cpp
new file mode 100644
index 000000000..e43a51ea9
--- /dev/null
+++ b/source/slang/glsl.meta.slang.cpp
@@ -0,0 +1,206 @@
+sb << "// Slang GLSL compatibility library\n";
+sb << "\n";
+sb << "";
+
+
+static const struct {
+    char const* name;
+    char const* glslPrefix;
+} kTypes[] =
+{
+    {"float", ""},
+    {"int", "i"},
+    {"uint", "u"},
+    {"bool", "b"},
+};
+static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+for( int tt = 0; tt < kTypeCount; ++tt )
+{
+    // Declare GLSL aliases for HLSL types
+    for (int vv = 2; vv <= 4; ++vv)
+    {
+        sb << "typedef vector<" << kTypes[tt].name << "," << vv << "> " << kTypes[tt].glslPrefix << "vec" << vv << ";\n";
+        sb << "typedef matrix<" << kTypes[tt].name << "," << vv << "," << vv << "> " << kTypes[tt].glslPrefix << "mat" << vv << ";\n";
+    }
+    for (int rr = 2; rr <= 4; ++rr)
+    for (int cc = 2; cc <= 4; ++cc)
+    {
+        sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].glslPrefix << "mat" << rr << "x" << cc << ";\n";
+    }
+}
+
+// Multiplication operations for vectors + matrices
+
+// scalar-vector and vector-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(vector<T,N> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(T x, vector<T,N> y);\n";
+
+// scalar-matrix and matrix-scalar
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(matrix<T,N,M> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(T x, matrix<T,N,M> y);\n";
+
+// vector-vector (dot product)
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T operator*(vector<T,N> x, vector<T,N> y);\n";
+
+// vector-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> operator*(vector<T,N> x, matrix<T,N,M> y);\n";
+
+// matrix-vector
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> operator*(matrix<T,N,M> x, vector<T,M> y);\n";
+
+// matrix-matrix
+sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> operator*(matrix<T,R,N> x, matrix<T,N,C> y);\n";
+
+
+
+//
+
+// TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+static const struct {
+    char const*			name;
+    TextureType::Shape	baseShape;
+    int					coordCount;
+} kBaseTextureTypes[] = {
+    { "1D",		TextureType::Shape1D,	1 },
+    { "2D",		TextureType::Shape2D,	2 },
+    { "3D",		TextureType::Shape3D,	3 },
+    { "Cube",	TextureType::ShapeCube,	3 },
+    { "Buffer", TextureType::ShapeBuffer,   1 },
+};
+static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+{
+    char const* shapeName = kBaseTextureTypes[tt].name;
+    TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+    for (int isArray = 0; isArray < 2; ++isArray)
+    {
+        // Arrays of 3D textures aren't allowed
+        if (isArray && baseShape == TextureType::Shape3D) continue;
+
+        for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+        {
+            auto readAccess = SLANG_RESOURCE_ACCESS_READ;
+            auto readWriteAccess = SLANG_RESOURCE_ACCESS_READ_WRITE;
+
+            // TODO: any constraints to enforce on what gets to be multisampled?
+
+                        
+            unsigned flavor = baseShape;
+            if (isArray)		flavor |= TextureType::ArrayFlag;
+            if (isMultisample)	flavor |= TextureType::MultisampleFlag;
+//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
+
+
+
+            unsigned readFlavor = flavor | (readAccess << 8);
+            unsigned readWriteFlavor = flavor | (readWriteAccess << 8);
+
+            StringBuilder nameBuilder;
+            nameBuilder << shapeName;
+            if (isMultisample) nameBuilder << "MS";
+            if (isArray) nameBuilder << "Array";
+            auto name = nameBuilder.ProduceString();
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(TextureSampler," << int(readFlavor) << ") struct ";
+            sb << "__sampler" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(Texture," << int(readFlavor) << ") struct ";
+            sb << "__texture" << name;
+            sb << " {};\n";
+
+            sb << "__generic<T> ";
+            sb << "__magic_type(GLSLImageType," << int(readWriteFlavor) << ") struct ";
+            sb << "__image" << name;
+            sb << " {};\n";
+
+            // TODO(tfoley): flesh this out for all the available prefixes
+            static const struct
+            {
+                char const* prefix;
+                char const* elementType;
+            } kTextureElementTypes[] = {
+                { "", "vec4" },
+                { "i", "ivec4" },
+                { "u", "uvec4" },
+                { nullptr, nullptr },
+            };
+            for( auto ee = kTextureElementTypes; ee->prefix; ++ee )
+            {
+                sb << "typedef __sampler" << name << "<" << ee->elementType << "> " << ee->prefix << "sampler" << name << ";\n";
+                sb << "typedef __texture" << name << "<" << ee->elementType << "> " << ee->prefix << "texture" << name << ";\n";
+                sb << "typedef __image" << name << "<" << ee->elementType << "> " << ee->prefix << "image" << name << ";\n";
+            }
+        }
+    }
+}
+
+sb << "__generic<T> __magic_type(GLSLInputParameterBlockType) struct __GLSLInputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLOutputParameterBlockType) struct __GLSLOutputParameterBlock {};\n";
+sb << "__generic<T> __magic_type(GLSLShaderStorageBufferType) struct __GLSLShaderStorageBuffer {};\n";
+
+sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct sampler {};";
+
+sb << "__magic_type(GLSLInputAttachmentType) struct subpassInput {};";
+
+// Define additional keywords
+
+sb << "syntax buffer : GLSLBufferModifier;\n";
+
+// [GLSL 4.3] Storage Qualifiers
+
+// TODO: need to support `shared` here with its GLSL meaning
+
+sb << "syntax patch : GLSLPatchModifier;\n";
+// `centroid` and `sample` handled centrally
+
+// [GLSL 4.5] Interpolation Qualifiers
+sb << "syntax smooth : SimpleModifier;\n";
+sb << "syntax flat : SimpleModifier;\n";
+sb << "syntax noperspectie : SimpleModifier;\n";
+
+
+// [GLSL 4.3.2] Constant Qualifier
+
+// We need to handle GLSL `const` separately from HLSL `const`,
+// since they mean such different things.
+
+// [GLSL 4.7.2] Precision Qualifiers
+sb << "syntax highp : SimpleModifier;\n";
+sb << "syntax mediump : SimpleModifier;\n";
+sb << "syntax lowp : SimpleModifier;\n";
+
+// [GLSL 4.8.1] The Invariant Qualifier
+
+sb << "syntax invariant : SimpleModifier;\n";
+
+// [GLSL 4.10] Memory Qualifiers
+
+sb << "syntax coherent : SimpleModifier;\n";
+sb << "syntax volatile : SimpleModifier;\n";
+sb << "syntax restrict : SimpleModifier;\n";
+sb << "syntax readonly : GLSLReadOnlyModifier;\n";
+sb << "syntax writeonly : GLSLWriteOnlyModifier;\n";
+
+// We will treat `subroutine` as a qualifier for now
+sb << "syntax subroutine : SimpleModifier;\n";
+
+
+
+sb << "";
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
new file mode 100644
index 000000000..3b4b85b91
--- /dev/null
+++ b/source/slang/hlsl.meta.slang
@@ -0,0 +1,1065 @@
+// Slang HLSL compatibility library
+
+typedef uint UINT;
+
+__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer
+{
+    __intrinsic_op void Append(T value);
+
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+};
+
+__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer
+{
+    __intrinsic_op void GetDimensions(
+        out uint dim);
+
+    __intrinsic_op uint Load(int location);
+    __intrinsic_op uint Load(int location, out uint status);
+
+    __intrinsic_op uint2 Load2(int location);
+    __intrinsic_op uint2 Load2(int location, out uint status);
+
+    __intrinsic_op uint3 Load3(int location);
+    __intrinsic_op uint3 Load3(int location, out uint status);
+
+    __intrinsic_op uint4 Load4(int location);
+    __intrinsic_op uint4 Load4(int location, out uint status);
+};
+
+__generic<T> __magic_type(HLSLStructuredBufferType) struct StructuredBuffer
+{
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+
+    __intrinsic_op T Load(int location);
+    __intrinsic_op T Load(int location, out uint status);
+
+    __intrinsic_op __subscript(uint index) -> T;
+};
+
+__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer
+{
+    __intrinsic_op T Consume();
+
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+};
+
+__generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch
+{
+    __intrinsic_op __subscript(uint index) -> T;
+};
+
+__generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch
+{
+    __intrinsic_op __subscript(uint index) -> T { set; }
+};
+
+__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer
+{
+    // Note(tfoley): supports alll operations from `ByteAddressBuffer`
+    // TODO(tfoley): can this be made a sub-type?
+
+    __intrinsic_op void GetDimensions(
+        out uint dim);
+
+    __intrinsic_op uint Load(int location);
+    __intrinsic_op uint Load(int location, out uint status);
+
+    __intrinsic_op uint2 Load2(int location);
+    __intrinsic_op uint2 Load2(int location, out uint status);
+
+    __intrinsic_op uint3 Load3(int location);
+    __intrinsic_op uint3 Load3(int location, out uint status);
+
+    __intrinsic_op uint4 Load4(int location);
+    __intrinsic_op uint4 Load4(int location, out uint status);
+
+    // Added operations:
+
+    __intrinsic_op void InterlockedAdd(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedAdd(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedAnd(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedAnd(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedCompareExchange(
+        UINT dest,
+        UINT compare_value,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedCompareExchange(
+        UINT dest,
+        UINT compare_value,
+        UINT value);
+
+    __intrinsic_op void InterlockedCompareStore(
+        UINT dest,
+        UINT compare_value,
+        UINT value);
+    __intrinsic_op void InterlockedCompareStore(
+        UINT dest,
+        UINT compare_value);
+
+    __intrinsic_op void InterlockedExchange(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedExchange(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedMax(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedMax(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedMin(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedMin(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedOr(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedOr(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void InterlockedXor(
+        UINT dest,
+        UINT value,
+        out UINT original_value);
+    __intrinsic_op void InterlockedXor(
+        UINT dest,
+        UINT value);
+
+    __intrinsic_op void Store(
+        uint address,
+        uint value);
+
+    __intrinsic_op void Store2(
+        uint address,
+        uint2 value);
+
+    __intrinsic_op void Store3(
+        uint address,
+        uint3 value);
+
+    __intrinsic_op void Store4(
+        uint address,
+        uint4 value);
+};
+
+__generic<T> __magic_type(HLSLRWStructuredBufferType) struct RWStructuredBuffer
+{
+    __intrinsic_op uint DecrementCounter();
+
+    __intrinsic_op void GetDimensions(
+        out uint numStructs,
+        out uint stride);
+
+    __intrinsic_op void IncrementCounter();
+
+    __intrinsic_op T Load(int location);
+    __intrinsic_op T Load(int location, out uint status);
+
+    __intrinsic_op __subscript(uint index) -> T { get; set; }
+};
+
+__generic<T> __magic_type(HLSLPointStreamType) struct PointStream
+{
+    void Append(T value);
+    void RestartStrip();
+};
+
+__generic<T> __magic_type(HLSLLineStreamType) struct LineStream
+{
+    void Append(T value);
+    void RestartStrip();
+};
+
+__generic<T> __magic_type(HLSLTriangleStreamType) struct TriangleStream
+{
+    void Append(T value);
+    void RestartStrip();
+};
+
+// Note(tfoley): Trying to systematically add all the HLSL builtins
+
+// Try to terminate the current draw or dispatch call (HLSL SM 4.0)
+__intrinsic_op void abort();
+
+// Absolute value (HLSL SM 1.0)
+__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);
+
+// Inverse cosine (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);
+
+// Test if all components are non-zero (HLSL SM 1.0)
+__generic<T : __BuiltinType> __intrinsic_op T all(T x);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);
+
+// Barrier for writes to all memory spaces (HLSL SM 5.0)
+__intrinsic_op void AllMemoryBarrier();
+
+// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)
+__intrinsic_op void AllMemoryBarrierWithGroupSync();
+
+// Test if any components is non-zero (HLSL SM 1.0)
+__generic<T : __BuiltinType> __intrinsic_op T any(T x);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);
+
+
+// Reinterpret bits as a double (HLSL SM 5.0)
+__intrinsic_op double asdouble(uint lowbits, uint highbits);
+
+// Reinterpret bits as a float (HLSL SM 4.0)
+__intrinsic_op float asfloat( int x);
+__intrinsic_op float asfloat(uint x);
+__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);
+__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);
+
+
+// Inverse sine (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);
+
+// Reinterpret bits as an int (HLSL SM 4.0)
+__intrinsic_op int asint(float x);
+__intrinsic_op int asint(uint x);
+__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);
+__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);
+
+// Reinterpret bits of double as a uint (HLSL SM 5.0)
+__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);
+
+// Reinterpret bits as a uint (HLSL SM 4.0)
+__intrinsic_op uint asuint(float x);
+__intrinsic_op uint asuint(int x);
+__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);
+__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);
+
+// Inverse tangent (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl,"atan($0,$1)")
+__intrinsic_op
+T atan2(T y, T x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl,"atan($0,$1)")
+__intrinsic_op
+vector<T,N> atan2(vector<T,N> y, vector<T,N> x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl,"atan($0,$1)")
+__intrinsic_op
+matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);
+
+// Ceiling (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);
+
+
+// Check access status to tiled resource
+__intrinsic_op bool CheckAccessFullyMapped(uint status);
+
+// Clamp (HLSL SM 1.0)
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T clamp(T x, T min, T max);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> clamp(vector<T,N> x, vector<T,N> min, vector<T,N> max);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);
+
+// Clip (discard) fragment conditionally
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);
+
+// Cosine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cos(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cos(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cos(matrix<T,N,M> x);
+
+// Hyperbolic cosine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);
+
+// Population count
+__intrinsic_op uint countbits(uint value);
+
+// Cross product
+__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);
+
+// Convert encoded color
+__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);
+
+// Partial-difference derivatives
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, dFdx)
+__intrinsic_op
+T ddx(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, dFdx)
+__intrinsic_op
+vector<T,N> ddx(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, dFdx)
+__intrinsic_op
+matrix<T,N,M> ddx(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxCoarse)
+__intrinsic_op
+T ddx_coarse(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxCoarse)
+__intrinsic_op
+vector<T,N> ddx_coarse(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxCoarse)
+__intrinsic_op
+matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxFine)
+__intrinsic_op
+T ddx_fine(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxFine)
+__intrinsic_op
+vector<T,N> ddx_fine(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdxFine)
+__intrinsic_op
+matrix<T,N,M> ddx_fine(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, dFdy)
+__intrinsic_op
+T ddy(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, dFdy)
+__intrinsic_op
+vector<T,N> ddy(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, dFdy)
+__intrinsic_op
+ matrix<T,N,M> ddy(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyCoarse)
+__intrinsic_op
+T ddy_coarse(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyCoarse)
+__intrinsic_op
+vector<T,N> ddy_coarse(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyCoarse)
+__intrinsic_op
+matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyFine)
+__intrinsic_op
+T ddy_fine(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyFine)
+__intrinsic_op
+vector<T,N> ddy_fine(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__glsl_extension(GL_ARB_derivative_control)
+__target_intrinsic(glsl, dFdyFine)
+__intrinsic_op
+matrix<T,N,M> ddy_fine(matrix<T,N,M> x);
+
+
+// Radians to degrees
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);
+
+// Matrix determinant
+
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);
+
+// Barrier for device memory
+__intrinsic_op void DeviceMemoryBarrier();
+__intrinsic_op void DeviceMemoryBarrierWithGroupSync();
+
+// Vector distance
+
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);
+
+// Vector dot product
+
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);
+
+// Helper for computing distance terms for lighting (obsolete)
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);
+
+// Error message
+
+// __intrinsic_op void errorf( string format, ... );
+
+// Attribute evaluation
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
+
+// Base-e exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);
+
+// Base-2 exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);
+
+// Convert 16-bit float stored in low bits of integer
+__intrinsic_op float f16tof32(uint value);
+__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);
+
+// Convert to 16-bit float stored in low bits of integer
+__intrinsic_op uint f32tof16(float value);
+__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);
+
+// Flip surface normal to face forward, if needed
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);
+
+// Find first set bit starting at high bit and working down
+__intrinsic_op int firstbithigh(int value);
+__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);
+
+__intrinsic_op uint firstbithigh(uint value);
+__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);
+
+// Find first set bit starting at low bit and working up
+__intrinsic_op int firstbitlow(int value);
+__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);
+
+__intrinsic_op uint firstbitlow(uint value);
+__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);
+
+// Floor (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);
+
+// Fused multiply-add for doubles
+__intrinsic_op double fma(double a, double b, double c);
+__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
+__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
+
+// Floating point remainder of x/y
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// Fractional part
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, fract)
+__intrinsic_op
+T frac(T x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, fract)
+__intrinsic_op
+vector<T,N> frac(vector<T,N> x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, fract)
+__intrinsic_op
+matrix<T,N,M> frac(matrix<T,N,M> x);
+
+// Split float into mantissa and exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);
+
+// Texture filter width
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);
+
+// Get number of samples in render target
+__intrinsic_op uint GetRenderTargetSampleCount();
+
+// Get position of given sample
+__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);
+
+// Group memory barrier
+__intrinsic_op void GroupMemoryBarrier();
+__intrinsic_op void GroupMemoryBarrierWithGroupSync();
+
+// Atomics
+__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);
+__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);
+__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);
+
+__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);
+
+__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);
+__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);
+
+// Is floating-point value finite?
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);
+
+// Is floating-point value infinite?
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);
+
+// Is floating-point value not-a-number?
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);
+
+// Construct float from mantissa and exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);
+
+// Vector length
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);
+
+// Linear interpolation
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, mix)
+__intrinsic_op
+T lerp(T x, T y, T s);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, mix)
+__intrinsic_op
+vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, mix)
+__intrinsic_op
+matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);
+
+// Legacy lighting function (obsolete)
+__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);
+
+// Base-e logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);
+
+// Base-10 logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);
+
+// Base-2 logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);
+
+// multiply-add
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
+
+// maximum
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// minimum
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// split into integer and fractional parts (both with same sign)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);
+
+// msad4 (whatever that is)
+__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);
+
+// General inner products
+
+// scalar-scalar
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);
+
+// scalar-vector and vector-scalar
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);
+
+// scalar-matrix and matrix-scalar
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);
+
+// vector-vector (dot product)
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);
+
+// vector-matrix
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
+
+// matrix-vector
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
+
+// matrix-matrix
+__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
+
+// noise (deprecated)
+__intrinsic_op float noise(float x);
+__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);
+
+// Normalize a vector
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);
+
+// Raise to a power
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// Output message
+
+// __intrinsic_op void printf( string format, ... );
+
+// Tessellation factor fixup routines
+
+__intrinsic_op void Process2DQuadTessFactorsAvg(
+    in  float4 RawEdgeFactors,
+    in  float2 InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void Process2DQuadTessFactorsMax(
+    in  float4 RawEdgeFactors,
+    in  float2 InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void Process2DQuadTessFactorsMin(
+    in  float4 RawEdgeFactors,
+    in  float2 InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessIsolineTessFactors(
+    in  float RawDetailFactor,
+    in  float RawDensityFactor,
+    out float RoundedDetailFactor,
+    out float RoundedDensityFactor);
+
+__intrinsic_op void ProcessQuadTessFactorsAvg(
+    in  float4 RawEdgeFactors,
+    in  float InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessQuadTessFactorsMax(
+    in  float4 RawEdgeFactors,
+    in  float InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessQuadTessFactorsMin(
+    in  float4 RawEdgeFactors,
+    in  float InsideScale,
+    out float4 RoundedEdgeTessFactors,
+    out float2 RoundedInsideTessFactors,
+    out float2 UnroundedInsideTessFactors);
+
+__intrinsic_op void ProcessTriTessFactorsAvg(
+    in  float3 RawEdgeFactors,
+    in  float InsideScale,
+    out float3 RoundedEdgeTessFactors,
+    out float RoundedInsideTessFactor,
+    out float UnroundedInsideTessFactor);
+
+__intrinsic_op void ProcessTriTessFactorsMax(
+    in  float3 RawEdgeFactors,
+    in  float InsideScale,
+    out float3 RoundedEdgeTessFactors,
+    out float RoundedInsideTessFactor,
+    out float UnroundedInsideTessFactor);
+
+__intrinsic_op void ProcessTriTessFactorsMin(
+    in  float3 RawEdgeFactors,
+    in  float InsideScale,
+    out float3 RoundedEdgeTessFactors,
+    out float RoundedInsideTessFactors,
+    out float UnroundedInsideTessFactors);
+
+// Degrees to radians
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);
+
+// Approximate reciprocal
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);
+
+// Reflect incident vector across plane with given normal
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__intrinsic_op
+vector<T,N> reflect(vector<T,N> i, vector<T,N> n);
+
+// Refract incident vector given surface normal and index of refraction
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__intrinsic_op
+vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);
+
+// Reverse order of bits
+__intrinsic_op uint reversebits(uint value);
+__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);
+
+// Round-to-nearest
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);
+
+// Reciprocal of square root
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);
+
+// Clamp value to [0,1] range
+__generic<T : __BuiltinFloatingPointType>
+__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
+T saturate(T x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
+vector<T,N> saturate(vector<T,N> x);
+
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
+matrix<T,N,M> saturate(matrix<T,N,M> x);
+
+
+// Extract sign of value
+__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);
+
+
+// Sine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sin(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sin(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sin(matrix<T,N,M> x);
+
+// Sine and cosine
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);
+
+// Hyperbolic Sine
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);
+
+// Smooth step (Hermite interpolation)
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);
+
+// Square root
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);
+
+// Step function
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);
+
+// Tangent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);
+
+// Hyperbolic tangent
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);
+
+// Legacy texture-fetch operations
+
+/*
+__intrinsic_op float4 tex1D(sampler1D s, float t);
+__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);
+__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);
+__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);
+__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);
+__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);
+
+__intrinsic_op float4 tex2D(sampler2D s, float2 t);
+__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);
+__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);
+__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);
+__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);
+__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);
+
+__intrinsic_op float4 tex3D(sampler3D s, float3 t);
+__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);
+__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);
+__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);
+
+__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);
+__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);
+__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);
+__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);
+*/
+
+// Matrix transpose
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);
+
+// Truncate to integer
+__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);
+
+// Shader model 6.0 stuff
+
+__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);
+
+__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);
+
+__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);
+
+__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);
+
+__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);
+
+__intrinsic_op bool WaveAllEqual(bool expr);
+__intrinsic_op bool WaveAllTrue(bool expr);
+__intrinsic_op bool WaveAnyTrue(bool expr);
+
+uint64_t WaveBallot(bool expr);
+
+uint WaveGetLaneCount();
+uint WaveGetLaneIndex();
+uint WaveGetOrderedIndex();
+
+bool WaveIsHelperLane();
+
+bool WaveOnce();
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);
+
+// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
+typedef Texture2D texture2D;
+
+${{{{
+
+// Component-wise multiplication ops
+for(auto op : binaryOps)
+{
+    switch (op.opCode)
+    {
+    default:
+        continue;
+
+    case kIROp_Mul:
+    case kIRPseudoOp_MulAssign:
+        break;
+    }
+
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+    }
+}
+
+//
+
+// Buffer types
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseBufferAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]);
+
+for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
+{
+
+    sb << "__generic<T> __magic_type(Texture, ";
+    sb << ResourceType::makeFlavor(ResourceType::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access);
+    sb << ") struct ";
+    sb << kBaseBufferAccessLevels[aa].name;
+    sb << "Buffer {\n";
+
+    sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($$P, $0)$$z\")\n";
+    sb << "__intrinsic_op T Load(int location);\n";
+
+    sb << "__intrinsic_op T Load(int location, out uint status);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($$P, int($0))$$z\")\n";
+    sb << "__intrinsic_op __subscript(uint index) -> T";
+
+    if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
+    {
+        sb << " { get; set; }\n";
+    }
+    else
+    {
+        sb << ";\n";
+    }
+
+    sb << "};\n";
+}
+
+}}}}
+\ No newline at end of file
diff --git a/source/slang/hlsl.meta.slang.cpp b/source/slang/hlsl.meta.slang.cpp
new file mode 100644
index 000000000..e9e2277e6
--- /dev/null
+++ b/source/slang/hlsl.meta.slang.cpp
@@ -0,0 +1,1066 @@
+sb << "// Slang HLSL compatibility library\n";
+sb << "\n";
+sb << "typedef uint UINT;\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op void Append(T value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint dim);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint Load(int location);\n";
+sb << "    __intrinsic_op uint Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint2 Load2(int location);\n";
+sb << "    __intrinsic_op uint2 Load2(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint3 Load3(int location);\n";
+sb << "    __intrinsic_op uint3 Load3(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint4 Load4(int location);\n";
+sb << "    __intrinsic_op uint4 Load4(int location, out uint status);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLStructuredBufferType) struct StructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "\n";
+sb << "    __intrinsic_op T Load(int location);\n";
+sb << "    __intrinsic_op T Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T;\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op T Consume();\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch\n";
+sb << "{\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T;\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch\n";
+sb << "{\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T { set; }\n";
+sb << "};\n";
+sb << "\n";
+sb << "__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer\n";
+sb << "{\n";
+sb << "    // Note(tfoley): supports alll operations from `ByteAddressBuffer`\n";
+sb << "    // TODO(tfoley): can this be made a sub-type?\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint dim);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint Load(int location);\n";
+sb << "    __intrinsic_op uint Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint2 Load2(int location);\n";
+sb << "    __intrinsic_op uint2 Load2(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint3 Load3(int location);\n";
+sb << "    __intrinsic_op uint3 Load3(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op uint4 Load4(int location);\n";
+sb << "    __intrinsic_op uint4 Load4(int location, out uint status);\n";
+sb << "\n";
+sb << "    // Added operations:\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedAdd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedAdd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedAnd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedAnd(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedCompareExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedCompareExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedCompareStore(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value,\n";
+sb << "        UINT value);\n";
+sb << "    __intrinsic_op void InterlockedCompareStore(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT compare_value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedExchange(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedMax(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedMax(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedMin(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedMin(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedOr(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedOr(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void InterlockedXor(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value,\n";
+sb << "        out UINT original_value);\n";
+sb << "    __intrinsic_op void InterlockedXor(\n";
+sb << "        UINT dest,\n";
+sb << "        UINT value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store(\n";
+sb << "        uint address,\n";
+sb << "        uint value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store2(\n";
+sb << "        uint address,\n";
+sb << "        uint2 value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store3(\n";
+sb << "        uint address,\n";
+sb << "        uint3 value);\n";
+sb << "\n";
+sb << "    __intrinsic_op void Store4(\n";
+sb << "        uint address,\n";
+sb << "        uint4 value);\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLRWStructuredBufferType) struct RWStructuredBuffer\n";
+sb << "{\n";
+sb << "    __intrinsic_op uint DecrementCounter();\n";
+sb << "\n";
+sb << "    __intrinsic_op void GetDimensions(\n";
+sb << "        out uint numStructs,\n";
+sb << "        out uint stride);\n";
+sb << "\n";
+sb << "    __intrinsic_op void IncrementCounter();\n";
+sb << "\n";
+sb << "    __intrinsic_op T Load(int location);\n";
+sb << "    __intrinsic_op T Load(int location, out uint status);\n";
+sb << "\n";
+sb << "    __intrinsic_op __subscript(uint index) -> T { get; set; }\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLPointStreamType) struct PointStream\n";
+sb << "{\n";
+sb << "    void Append(T value);\n";
+sb << "    void RestartStrip();\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLLineStreamType) struct LineStream\n";
+sb << "{\n";
+sb << "    void Append(T value);\n";
+sb << "    void RestartStrip();\n";
+sb << "};\n";
+sb << "\n";
+sb << "__generic<T> __magic_type(HLSLTriangleStreamType) struct TriangleStream\n";
+sb << "{\n";
+sb << "    void Append(T value);\n";
+sb << "    void RestartStrip();\n";
+sb << "};\n";
+sb << "\n";
+sb << "// Note(tfoley): Trying to systematically add all the HLSL builtins\n";
+sb << "\n";
+sb << "// Try to terminate the current draw or dispatch call (HLSL SM 4.0)\n";
+sb << "__intrinsic_op void abort();\n";
+sb << "\n";
+sb << "// Absolute value (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Inverse cosine (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Test if all components are non-zero (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T all(T x);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Barrier for writes to all memory spaces (HLSL SM 5.0)\n";
+sb << "__intrinsic_op void AllMemoryBarrier();\n";
+sb << "\n";
+sb << "// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)\n";
+sb << "__intrinsic_op void AllMemoryBarrierWithGroupSync();\n";
+sb << "\n";
+sb << "// Test if any components is non-zero (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T any(T x);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Reinterpret bits as a double (HLSL SM 5.0)\n";
+sb << "__intrinsic_op double asdouble(uint lowbits, uint highbits);\n";
+sb << "\n";
+sb << "// Reinterpret bits as a float (HLSL SM 4.0)\n";
+sb << "__intrinsic_op float asfloat( int x);\n";
+sb << "__intrinsic_op float asfloat(uint x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Inverse sine (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Reinterpret bits as an int (HLSL SM 4.0)\n";
+sb << "__intrinsic_op int asint(float x);\n";
+sb << "__intrinsic_op int asint(uint x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);\n";
+sb << "\n";
+sb << "// Reinterpret bits of double as a uint (HLSL SM 5.0)\n";
+sb << "__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);\n";
+sb << "\n";
+sb << "// Reinterpret bits as a uint (HLSL SM 4.0)\n";
+sb << "__intrinsic_op uint asuint(float x);\n";
+sb << "__intrinsic_op uint asuint(int x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);\n";
+sb << "\n";
+sb << "// Inverse tangent (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
+sb << "__intrinsic_op\n";
+sb << "T atan2(T y, T x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> atan2(vector<T,N> y, vector<T,N> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl,\"atan($0,$1)\")\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Ceiling (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Check access status to tiled resource\n";
+sb << "__intrinsic_op bool CheckAccessFullyMapped(uint status);\n";
+sb << "\n";
+sb << "// Clamp (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T clamp(T x, T min, T max);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> clamp(vector<T,N> x, vector<T,N> min, vector<T,N> max);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);\n";
+sb << "\n";
+sb << "// Clip (discard) fragment conditionally\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Cosine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cos(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cos(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cos(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Hyperbolic cosine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Population count\n";
+sb << "__intrinsic_op uint countbits(uint value);\n";
+sb << "\n";
+sb << "// Cross product\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);\n";
+sb << "\n";
+sb << "// Convert encoded color\n";
+sb << "__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);\n";
+sb << "\n";
+sb << "// Partial-difference derivatives\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, dFdx)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddx(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, dFdx)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddx(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, dFdx)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddx(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddx_coarse(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddx_coarse(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxFine)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddx_fine(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxFine)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddx_fine(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdxFine)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddx_fine(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, dFdy)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddy(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, dFdy)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddy(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, dFdy)\n";
+sb << "__intrinsic_op\n";
+sb << " matrix<T,N,M> ddy(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddy_coarse(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddy_coarse(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyCoarse)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyFine)\n";
+sb << "__intrinsic_op\n";
+sb << "T ddy_fine(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyFine)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> ddy_fine(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__glsl_extension(GL_ARB_derivative_control)\n";
+sb << "__target_intrinsic(glsl, dFdyFine)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> ddy_fine(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Radians to degrees\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Matrix determinant\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);\n";
+sb << "\n";
+sb << "// Barrier for device memory\n";
+sb << "__intrinsic_op void DeviceMemoryBarrier();\n";
+sb << "__intrinsic_op void DeviceMemoryBarrierWithGroupSync();\n";
+sb << "\n";
+sb << "// Vector distance\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// Vector dot product\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// Helper for computing distance terms for lighting (obsolete)\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);\n";
+sb << "\n";
+sb << "// Error message\n";
+sb << "\n";
+sb << "// __intrinsic_op void errorf( string format, ... );\n";
+sb << "\n";
+sb << "// Attribute evaluation\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);\n";
+sb << "\n";
+sb << "// Base-e exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Base-2 exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Convert 16-bit float stored in low bits of integer\n";
+sb << "__intrinsic_op float f16tof32(uint value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Convert to 16-bit float stored in low bits of integer\n";
+sb << "__intrinsic_op uint f32tof16(float value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);\n";
+sb << "\n";
+sb << "// Flip surface normal to face forward, if needed\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);\n";
+sb << "\n";
+sb << "// Find first set bit starting at high bit and working down\n";
+sb << "__intrinsic_op int firstbithigh(int value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);\n";
+sb << "\n";
+sb << "__intrinsic_op uint firstbithigh(uint value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Find first set bit starting at low bit and working up\n";
+sb << "__intrinsic_op int firstbitlow(int value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);\n";
+sb << "\n";
+sb << "__intrinsic_op uint firstbitlow(uint value);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Floor (HLSL SM 1.0)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Fused multiply-add for doubles\n";
+sb << "__intrinsic_op double fma(double a, double b, double c);\n";
+sb << "__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);\n";
+sb << "__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);\n";
+sb << "\n";
+sb << "// Floating point remainder of x/y\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// Fractional part\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, fract)\n";
+sb << "__intrinsic_op\n";
+sb << "T frac(T x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, fract)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> frac(vector<T,N> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, fract)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> frac(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Split float into mantissa and exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);\n";
+sb << "\n";
+sb << "// Texture filter width\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Get number of samples in render target\n";
+sb << "__intrinsic_op uint GetRenderTargetSampleCount();\n";
+sb << "\n";
+sb << "// Get position of given sample\n";
+sb << "__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);\n";
+sb << "\n";
+sb << "// Group memory barrier\n";
+sb << "__intrinsic_op void GroupMemoryBarrier();\n";
+sb << "__intrinsic_op void GroupMemoryBarrierWithGroupSync();\n";
+sb << "\n";
+sb << "// Atomics\n";
+sb << "__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);\n";
+sb << "__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);\n";
+sb << "__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);\n";
+sb << "\n";
+sb << "// Is floating-point value finite?\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Is floating-point value infinite?\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Is floating-point value not-a-number?\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Construct float from mantissa and exponent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);\n";
+sb << "\n";
+sb << "// Vector length\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);\n";
+sb << "\n";
+sb << "// Linear interpolation\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, mix)\n";
+sb << "__intrinsic_op\n";
+sb << "T lerp(T x, T y, T s);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, mix)\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, mix)\n";
+sb << "__intrinsic_op\n";
+sb << "matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);\n";
+sb << "\n";
+sb << "// Legacy lighting function (obsolete)\n";
+sb << "__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);\n";
+sb << "\n";
+sb << "// Base-e logarithm\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Base-10 logarithm\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Base-2 logarithm\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// multiply-add\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);\n";
+sb << "\n";
+sb << "// maximum\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// minimum\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// split into integer and fractional parts (both with same sign)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);\n";
+sb << "\n";
+sb << "// msad4 (whatever that is)\n";
+sb << "__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);\n";
+sb << "\n";
+sb << "// General inner products\n";
+sb << "\n";
+sb << "// scalar-scalar\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);\n";
+sb << "\n";
+sb << "// scalar-vector and vector-scalar\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// scalar-matrix and matrix-scalar\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// vector-vector (dot product)\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);\n";
+sb << "\n";
+sb << "// vector-matrix\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// matrix-vector\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);\n";
+sb << "\n";
+sb << "// matrix-matrix\n";
+sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);\n";
+sb << "\n";
+sb << "// noise (deprecated)\n";
+sb << "__intrinsic_op float noise(float x);\n";
+sb << "__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);\n";
+sb << "\n";
+sb << "// Normalize a vector\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);\n";
+sb << "\n";
+sb << "// Raise to a power\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);\n";
+sb << "\n";
+sb << "// Output message\n";
+sb << "\n";
+sb << "// __intrinsic_op void printf( string format, ... );\n";
+sb << "\n";
+sb << "// Tessellation factor fixup routines\n";
+sb << "\n";
+sb << "__intrinsic_op void Process2DQuadTessFactorsAvg(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float2 InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void Process2DQuadTessFactorsMax(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float2 InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void Process2DQuadTessFactorsMin(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float2 InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessIsolineTessFactors(\n";
+sb << "    in  float RawDetailFactor,\n";
+sb << "    in  float RawDensityFactor,\n";
+sb << "    out float RoundedDetailFactor,\n";
+sb << "    out float RoundedDensityFactor);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessQuadTessFactorsAvg(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessQuadTessFactorsMax(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessQuadTessFactorsMin(\n";
+sb << "    in  float4 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float4 RoundedEdgeTessFactors,\n";
+sb << "    out float2 RoundedInsideTessFactors,\n";
+sb << "    out float2 UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessTriTessFactorsAvg(\n";
+sb << "    in  float3 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float3 RoundedEdgeTessFactors,\n";
+sb << "    out float RoundedInsideTessFactor,\n";
+sb << "    out float UnroundedInsideTessFactor);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessTriTessFactorsMax(\n";
+sb << "    in  float3 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float3 RoundedEdgeTessFactors,\n";
+sb << "    out float RoundedInsideTessFactor,\n";
+sb << "    out float UnroundedInsideTessFactor);\n";
+sb << "\n";
+sb << "__intrinsic_op void ProcessTriTessFactorsMin(\n";
+sb << "    in  float3 RawEdgeFactors,\n";
+sb << "    in  float InsideScale,\n";
+sb << "    out float3 RoundedEdgeTessFactors,\n";
+sb << "    out float RoundedInsideTessFactors,\n";
+sb << "    out float UnroundedInsideTessFactors);\n";
+sb << "\n";
+sb << "// Degrees to radians\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Approximate reciprocal\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Reflect incident vector across plane with given normal\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> reflect(vector<T,N> i, vector<T,N> n);\n";
+sb << "\n";
+sb << "// Refract incident vector given surface normal and index of refraction\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__intrinsic_op\n";
+sb << "vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);\n";
+sb << "\n";
+sb << "// Reverse order of bits\n";
+sb << "__intrinsic_op uint reversebits(uint value);\n";
+sb << "__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);\n";
+sb << "\n";
+sb << "// Round-to-nearest\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Reciprocal of square root\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Clamp value to [0,1] range\n";
+sb << "__generic<T : __BuiltinFloatingPointType>\n";
+sb << "__target_intrinsic(glsl, \"clamp($0, 0, 1)\") __intrinsic_op\n";
+sb << "T saturate(T x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int>\n";
+sb << "__target_intrinsic(glsl, \"clamp($0, 0, 1)\") __intrinsic_op\n";
+sb << "vector<T,N> saturate(vector<T,N> x);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>\n";
+sb << "__target_intrinsic(glsl, \"clamp($0, 0, 1)\") __intrinsic_op\n";
+sb << "matrix<T,N,M> saturate(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Extract sign of value\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "\n";
+sb << "// Sine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sin(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sin(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sin(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Sine and cosine\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);\n";
+sb << "\n";
+sb << "// Hyperbolic Sine\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Smooth step (Hermite interpolation)\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Square root\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Step function\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Tangent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Hyperbolic tangent\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Legacy texture-fetch operations\n";
+sb << "\n";
+sb << "/*\n";
+sb << "__intrinsic_op float4 tex1D(sampler1D s, float t);\n";
+sb << "__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);\n";
+sb << "__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);\n";
+sb << "__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);\n";
+sb << "\n";
+sb << "__intrinsic_op float4 tex2D(sampler2D s, float2 t);\n";
+sb << "__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
+sb << "__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);\n";
+sb << "__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);\n";
+sb << "\n";
+sb << "__intrinsic_op float4 tex3D(sampler3D s, float3 t);\n";
+sb << "__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);\n";
+sb << "__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);\n";
+sb << "\n";
+sb << "__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);\n";
+sb << "__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);\n";
+sb << "__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);\n";
+sb << "__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);\n";
+sb << "__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);\n";
+sb << "*/\n";
+sb << "\n";
+sb << "// Matrix transpose\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Truncate to integer\n";
+sb << "__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);\n";
+sb << "__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);\n";
+sb << "\n";
+sb << "// Shader model 6.0 stuff\n";
+sb << "\n";
+sb << "__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__intrinsic_op bool WaveAllEqual(bool expr);\n";
+sb << "__intrinsic_op bool WaveAllTrue(bool expr);\n";
+sb << "__intrinsic_op bool WaveAnyTrue(bool expr);\n";
+sb << "\n";
+sb << "uint64_t WaveBallot(bool expr);\n";
+sb << "\n";
+sb << "uint WaveGetLaneCount();\n";
+sb << "uint WaveGetLaneIndex();\n";
+sb << "uint WaveGetOrderedIndex();\n";
+sb << "\n";
+sb << "bool WaveIsHelperLane();\n";
+sb << "\n";
+sb << "bool WaveOnce();\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);\n";
+sb << "\n";
+sb << "__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);\n";
+sb << "__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);\n";
+sb << "__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);\n";
+sb << "\n";
+sb << "// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points\n";
+sb << "typedef Texture2D texture2D;\n";
+sb << "\n";
+sb << "";
+
+
+// Component-wise multiplication ops
+for(auto op : binaryOps)
+{
+    switch (op.opCode)
+    {
+    default:
+        continue;
+
+    case kIROp_Mul:
+    case kIRPseudoOp_MulAssign:
+        break;
+    }
+
+    for (auto type : kBaseTypes)
+    {
+        if ((type.flags & op.flags) == 0)
+            continue;
+
+        char const* leftType = type.name;
+        char const* rightType = leftType;
+        char const* resultType = leftType;
+
+        char const* leftQual = "";
+        if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+        sb << "__generic<let N : int, let M : int> ";
+        sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+    }
+}
+
+//
+
+// Buffer types
+
+static const struct {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseBufferAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]);
+
+for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
+{
+
+    sb << "__generic<T> __magic_type(Texture, ";
+    sb << ResourceType::makeFlavor(ResourceType::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access);
+    sb << ") struct ";
+    sb << kBaseBufferAccessLevels[aa].name;
+    sb << "Buffer {\n";
+
+    sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($P, $0)$z\")\n";
+    sb << "__intrinsic_op T Load(int location);\n";
+
+    sb << "__intrinsic_op T Load(int location, out uint status);\n";
+
+    sb << "__target_intrinsic(glsl, \"texelFetch($P, int($0))$z\")\n";
+    sb << "__intrinsic_op __subscript(uint index) -> T";
+
+    if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
+    {
+        sb << " { get; set; }\n";
+    }
+    else
+    {
+        sb << ";\n";
+    }
+
+    sb << "};\n";
+}
+
+sb << "";
diff --git a/source/slang/slang-stdlib.cpp b/source/slang/slang-stdlib.cpp
index 4d5fd6f87..cf45cbca8 100644
--- a/source/slang/slang-stdlib.cpp
+++ b/source/slang/slang-stdlib.cpp
@@ -8,1039 +8,6 @@
 #define STRINGIZE2(x) #x
 #define LINE_STRING STRINGIZE(__LINE__)
 
-enum { kCoreLibIncludeStringLine = __LINE__ + 1 };
-const char* kCoreLibIncludeStringChunks[] = { R"=(
-
-// A type that can be used as an operand for builtins
-interface __BuiltinType {}
-
-// A type that can be used for arithmetic operations
-interface __BuiltinArithmeticType : __BuiltinType {}
-
-// A type that logically has a sign (positive/negative/zero)
-interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}
-
-// A type that can represent integers
-interface __BuiltinIntegerType : __BuiltinArithmeticType {}
-
-// A type that can represent non-integers
-interface __BuiltinRealType : __BuiltinArithmeticType {}
-
-// A type that uses a floating-point representation
-interface __BuiltinFloatingPointType : __BuiltinRealType, __BuiltinSignedArithmeticType {}
-
-__generic<T,U> __intrinsic_op(Sequence) U operator,(T left, U right);
-
-__generic<T> __intrinsic_op(select) T operator?:(bool condition, T ifTrue, T ifFalse);
-__generic<T, let N : int> __intrinsic_op(select) vector<T,N> operator?:(vector<bool,N> condition, vector<T,N> ifTrue, vector<T,N> ifFalse);
-
-)=" };
-
-
-enum { kHLSLLibIncludeStringLine = __LINE__+1 };
-const char * kHLSLLibIncludeStringChunks[] = { R"=(
-
-typedef uint UINT;
-
-__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer
-{
-    __intrinsic_op void Append(T value);
-
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-};
-
-__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer
-{
-    __intrinsic_op void GetDimensions(
-        out uint dim);
-
-    __intrinsic_op uint Load(int location);
-    __intrinsic_op uint Load(int location, out uint status);
-
-    __intrinsic_op uint2 Load2(int location);
-    __intrinsic_op uint2 Load2(int location, out uint status);
-
-    __intrinsic_op uint3 Load3(int location);
-    __intrinsic_op uint3 Load3(int location, out uint status);
-
-    __intrinsic_op uint4 Load4(int location);
-    __intrinsic_op uint4 Load4(int location, out uint status);
-};
-
-__generic<T> __magic_type(HLSLStructuredBufferType) struct StructuredBuffer
-{
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-
-    __intrinsic_op T Load(int location);
-    __intrinsic_op T Load(int location, out uint status);
-
-    __intrinsic_op __subscript(uint index) -> T;
-};
-
-__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer
-{
-    __intrinsic_op T Consume();
-
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-};
-
-__generic<T, let N : int> __magic_type(HLSLInputPatchType) struct InputPatch
-{
-    __intrinsic_op __subscript(uint index) -> T;
-};
-
-__generic<T, let N : int> __magic_type(HLSLOutputPatchType) struct OutputPatch
-{
-    __intrinsic_op __subscript(uint index) -> T { set; }
-};
-
-__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer
-{
-    // Note(tfoley): supports alll operations from `ByteAddressBuffer`
-    // TODO(tfoley): can this be made a sub-type?
-
-    __intrinsic_op void GetDimensions(
-        out uint dim);
-
-    __intrinsic_op uint Load(int location);
-    __intrinsic_op uint Load(int location, out uint status);
-
-    __intrinsic_op uint2 Load2(int location);
-    __intrinsic_op uint2 Load2(int location, out uint status);
-
-    __intrinsic_op uint3 Load3(int location);
-    __intrinsic_op uint3 Load3(int location, out uint status);
-
-    __intrinsic_op uint4 Load4(int location);
-    __intrinsic_op uint4 Load4(int location, out uint status);
-
-    // Added operations:
-
-    __intrinsic_op void InterlockedAdd(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedAdd(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedAnd(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedAnd(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedCompareExchange(
-        UINT dest,
-        UINT compare_value,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedCompareExchange(
-        UINT dest,
-        UINT compare_value,
-        UINT value);
-
-    __intrinsic_op void InterlockedCompareStore(
-        UINT dest,
-        UINT compare_value,
-        UINT value);
-    __intrinsic_op void InterlockedCompareStore(
-        UINT dest,
-        UINT compare_value);
-
-    __intrinsic_op void InterlockedExchange(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedExchange(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedMax(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedMax(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedMin(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedMin(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedOr(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedOr(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void InterlockedXor(
-        UINT dest,
-        UINT value,
-        out UINT original_value);
-    __intrinsic_op void InterlockedXor(
-        UINT dest,
-        UINT value);
-
-    __intrinsic_op void Store(
-        uint address,
-        uint value);
-
-    __intrinsic_op void Store2(
-        uint address,
-        uint2 value);
-
-    __intrinsic_op void Store3(
-        uint address,
-        uint3 value);
-
-    __intrinsic_op void Store4(
-        uint address,
-        uint4 value);
-};
-
-__generic<T> __magic_type(HLSLRWStructuredBufferType) struct RWStructuredBuffer
-{
-    __intrinsic_op uint DecrementCounter();
-
-    __intrinsic_op void GetDimensions(
-        out uint numStructs,
-        out uint stride);
-
-    __intrinsic_op void IncrementCounter();
-
-    __intrinsic_op T Load(int location);
-    __intrinsic_op T Load(int location, out uint status);
-
-    __intrinsic_op __subscript(uint index) -> T { get; set; }
-};
-
-__generic<T> __magic_type(HLSLPointStreamType) struct PointStream
-{
-    void Append(T value);
-    void RestartStrip();
-};
-
-__generic<T> __magic_type(HLSLLineStreamType) struct LineStream
-{
-    void Append(T value);
-    void RestartStrip();
-};
-
-__generic<T> __magic_type(HLSLTriangleStreamType) struct TriangleStream
-{
-    void Append(T value);
-    void RestartStrip();
-};
-
-)=", R"=(
-
-// Note(tfoley): Trying to systematically add all the HLSL builtins
-
-// Try to terminate the current draw or dispatch call (HLSL SM 4.0)
-__intrinsic_op void abort();
-
-// Absolute value (HLSL SM 1.0)
-__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op T abs(T x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<T,N> abs(vector<T,N> x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> abs(matrix<T,N,M> x);
-
-// Inverse cosine (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T acos(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> acos(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> acos(matrix<T,N,M> x);
-
-// Test if all components are non-zero (HLSL SM 1.0)
-__generic<T : __BuiltinType> __intrinsic_op T all(T x);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> all(vector<T,N> x);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> all(matrix<T,N,M> x);
-
-// Barrier for writes to all memory spaces (HLSL SM 5.0)
-__intrinsic_op void AllMemoryBarrier();
-
-// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)
-__intrinsic_op void AllMemoryBarrierWithGroupSync();
-
-// Test if any components is non-zero (HLSL SM 1.0)
-__generic<T : __BuiltinType> __intrinsic_op T any(T x);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> any(vector<T,N> x);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> any(matrix<T,N,M> x);
-
-
-// Reinterpret bits as a double (HLSL SM 5.0)
-__intrinsic_op double asdouble(uint lowbits, uint highbits);
-
-// Reinterpret bits as a float (HLSL SM 4.0)
-__intrinsic_op float asfloat( int x);
-__intrinsic_op float asfloat(uint x);
-__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector< int,N> x);
-__generic<let N : int> __intrinsic_op vector<float,N> asfloat(vector<uint,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix< int,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<float,N,M> asfloat(matrix<uint,N,M> x);
-
-
-// Inverse sine (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T asin(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> asin(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> asin(matrix<T,N,M> x);
-
-// Reinterpret bits as an int (HLSL SM 4.0)
-__intrinsic_op int asint(float x);
-__intrinsic_op int asint(uint x);
-__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<float,N> x);
-__generic<let N : int> __intrinsic_op vector<int,N> asint(vector<uint,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<float,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<int,N,M> asint(matrix<uint,N,M> x);
-
-// Reinterpret bits of double as a uint (HLSL SM 5.0)
-__intrinsic_op void asuint(double value, out uint lowbits, out uint highbits);
-
-// Reinterpret bits as a uint (HLSL SM 4.0)
-__intrinsic_op uint asuint(float x);
-__intrinsic_op uint asuint(int x);
-__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<float,N> x);
-__generic<let N : int> __intrinsic_op vector<uint,N> asuint(vector<int,N> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<float,N,M> x);
-__generic<let N : int, let M : int> __intrinsic_op matrix<uint,N,M> asuint(matrix<int,N,M> x);
-
-// Inverse tangent (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T atan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> atan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> atan(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
-T atan2(T y, T x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
-vector<T,N> atan2(vector<T,N> y, vector<T,N> x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl,"atan($0,$1)")
-__intrinsic_op
-matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);
-
-// Ceiling (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ceil(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ceil(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ceil(matrix<T,N,M> x);
-
-
-// Check access status to tiled resource
-__intrinsic_op bool CheckAccessFullyMapped(uint status);
-
-// Clamp (HLSL SM 1.0)
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T clamp(T x, T min, T max);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> clamp(vector<T,N> x, vector<T,N> min, vector<T,N> max);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);
-
-// Clip (discard) fragment conditionally
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op void clip(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void clip(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void clip(matrix<T,N,M> x);
-
-// Cosine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cos(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cos(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cos(matrix<T,N,M> x);
-
-// Hyperbolic cosine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T cosh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> cosh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> cosh(matrix<T,N,M> x);
-
-// Population count
-__intrinsic_op uint countbits(uint value);
-
-// Cross product
-__generic<T : __BuiltinArithmeticType> __intrinsic_op vector<T,3> cross(vector<T,3> x, vector<T,3> y);
-
-// Convert encoded color
-__intrinsic_op int4 D3DCOLORtoUBYTE4(float4 x);
-
-// Partial-difference derivatives
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, dFdx)
-__intrinsic_op
-T ddx(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, dFdx)
-__intrinsic_op
-vector<T,N> ddx(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, dFdx)
-__intrinsic_op
-matrix<T,N,M> ddx(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
-T ddx_coarse(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
-vector<T,N> ddx_coarse(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxCoarse)
-__intrinsic_op
-matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
-T ddx_fine(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
-vector<T,N> ddx_fine(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdxFine)
-__intrinsic_op
-matrix<T,N,M> ddx_fine(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, dFdy)
-__intrinsic_op
-T ddy(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, dFdy)
-__intrinsic_op
-vector<T,N> ddy(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, dFdy)
-__intrinsic_op
- matrix<T,N,M> ddy(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
-T ddy_coarse(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
-vector<T,N> ddy_coarse(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyCoarse)
-__intrinsic_op
-matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);
-
-__generic<T : __BuiltinFloatingPointType>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
-T ddy_fine(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
-vector<T,N> ddy_fine(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__glsl_extension(GL_ARB_derivative_control)
-__target_intrinsic(glsl, dFdyFine)
-__intrinsic_op
-matrix<T,N,M> ddy_fine(matrix<T,N,M> x);
-
-
-// Radians to degrees
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T degrees(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> degrees(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> degrees(matrix<T,N,M> x);
-
-// Matrix determinant
-
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T determinant(matrix<T,N,N> m);
-
-// Barrier for device memory
-__intrinsic_op void DeviceMemoryBarrier();
-__intrinsic_op void DeviceMemoryBarrierWithGroupSync();
-
-// Vector distance
-
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T distance(vector<T,N> x, vector<T,N> y);
-
-// Vector dot product
-
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op T dot(vector<T,N> x, vector<T,N> y);
-
-// Helper for computing distance terms for lighting (obsolete)
-
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op vector<T,4> dst(vector<T,4> x, vector<T,4> y);
-
-// Error message
-
-// __intrinsic_op void errorf( string format, ... );
-
-// Attribute evaluation
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtCentroid(T x);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeAtSample(T x, uint sampleindex);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T EvaluateAttributeSnapped(T x, int2 offset);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
-
-// Base-e exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp(matrix<T,N,M> x);
-
-// Base-2 exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T exp2(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> exp2(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> exp2(matrix<T,N,M> x);
-
-// Convert 16-bit float stored in low bits of integer
-__intrinsic_op float f16tof32(uint value);
-__generic<let N : int> __intrinsic_op vector<float,N> f16tof32(vector<uint,N> value);
-
-// Convert to 16-bit float stored in low bits of integer
-__intrinsic_op uint f32tof16(float value);
-__generic<let N : int> __intrinsic_op vector<uint,N> f32tof16(vector<float,N> value);
-
-// Flip surface normal to face forward, if needed
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);
-
-// Find first set bit starting at high bit and working down
-__intrinsic_op int firstbithigh(int value);
-__generic<let N : int> __intrinsic_op vector<int,N> firstbithigh(vector<int,N> value);
-
-__intrinsic_op uint firstbithigh(uint value);
-__generic<let N : int> __intrinsic_op vector<uint,N> firstbithigh(vector<uint,N> value);
-
-// Find first set bit starting at low bit and working up
-__intrinsic_op int firstbitlow(int value);
-__generic<let N : int> __intrinsic_op vector<int,N> firstbitlow(vector<int,N> value);
-
-__intrinsic_op uint firstbitlow(uint value);
-__generic<let N : int> __intrinsic_op vector<uint,N> firstbitlow(vector<uint,N> value);
-
-// Floor (HLSL SM 1.0)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T floor(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> floor(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> floor(matrix<T,N,M> x);
-
-// Fused multiply-add for doubles
-__intrinsic_op double fma(double a, double b, double c);
-__generic<let N : int> __intrinsic_op vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
-__generic<let N : int, let M : int> __intrinsic_op matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
-
-// Floating point remainder of x/y
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fmod(T x, T y);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fmod(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// Fractional part
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, fract)
-__intrinsic_op
-T frac(T x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, fract)
-__intrinsic_op
-vector<T,N> frac(vector<T,N> x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, fract)
-__intrinsic_op
-matrix<T,N,M> frac(matrix<T,N,M> x);
-
-// Split float into mantissa and exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T frexp(T x, out T exp);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);
-
-// Texture filter width
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T fwidth(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> fwidth(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> fwidth(matrix<T,N,M> x);
-
-)=", R"=(
-
-// Get number of samples in render target
-__intrinsic_op uint GetRenderTargetSampleCount();
-
-// Get position of given sample
-__intrinsic_op float2 GetRenderTargetSamplePosition(int Index);
-
-// Group memory barrier
-__intrinsic_op void GroupMemoryBarrier();
-__intrinsic_op void GroupMemoryBarrierWithGroupSync();
-
-// Atomics
-__intrinsic_op void InterlockedAdd(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedAdd(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedAnd(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedAnd(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedCompareExchange(in out  int dest,  int compare_value,  int value, out  int original_value);
-__intrinsic_op void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedCompareStore(in out  int dest,  int compare_value,  int value);
-__intrinsic_op void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);
-
-__intrinsic_op void InterlockedExchange(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedExchange(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedMax(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedMax(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedMin(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedMin(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedOr(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedOr(in out uint dest, uint value, out uint original_value);
-
-__intrinsic_op void InterlockedXor(in out  int dest,  int value, out  int original_value);
-__intrinsic_op void InterlockedXor(in out uint dest, uint value, out uint original_value);
-
-// Is floating-point value finite?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isfinite(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isfinite(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isfinite(matrix<T,N,M> x);
-
-// Is floating-point value infinite?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isinf(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isinf(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isinf(matrix<T,N,M> x);
-
-// Is floating-point value not-a-number?
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op bool isnan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<bool,N> isnan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<bool,N,M> isnan(matrix<T,N,M> x);
-
-// Construct float from mantissa and exponent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T ldexp(T x, T exp);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);
-
-// Vector length
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op T length(vector<T,N> x);
-
-// Linear interpolation
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, mix)
-__intrinsic_op
-T lerp(T x, T y, T s);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, mix)
-__intrinsic_op
-vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, mix)
-__intrinsic_op
-matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);
-
-// Legacy lighting function (obsolete)
-__intrinsic_op float4 lit(float n_dot_l, float n_dot_h, float m);
-
-// Base-e logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log(matrix<T,N,M> x);
-
-// Base-10 logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log10(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log10(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log10(matrix<T,N,M> x);
-
-// Base-2 logarithm
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T log2(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> log2(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> log2(matrix<T,N,M> x);
-
-// multiply-add
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T mad(T mvalue, T avalue, T bvalue);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
-
-// maximum
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T max(T x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> max(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// minimum
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T min(T x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> min(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// split into integer and fractional parts (both with same sign)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T modf(T x, out T ip);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);
-
-// msad4 (whatever that is)
-__intrinsic_op uint4 msad4(uint reference, uint2 source, uint4 accum);
-
-// General inner products
-
-// scalar-scalar
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T mul(T x, T y);
-
-// scalar-vector and vector-scalar
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(vector<T,N> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> mul(T x, vector<T,N> y);
-
-// scalar-matrix and matrix-scalar
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(matrix<T,N,M> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op matrix<T,N,M> mul(T x, matrix<T,N,M> y);
-
-// vector-vector (dot product)
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);
-
-// vector-matrix
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
-
-// matrix-vector
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
-
-// matrix-matrix
-__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
-
-// noise (deprecated)
-__intrinsic_op float noise(float x);
-__generic<let N : int> __intrinsic_op float noise(vector<float, N> x);
-
-// Normalize a vector
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> normalize(vector<T,N> x);
-
-// Raise to a power
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T pow(T x, T y);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> pow(vector<T,N> x, vector<T,N> y);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);
-
-// Output message
-
-// __intrinsic_op void printf( string format, ... );
-
-// Tessellation factor fixup routines
-
-__intrinsic_op void Process2DQuadTessFactorsAvg(
-    in  float4 RawEdgeFactors,
-    in  float2 InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void Process2DQuadTessFactorsMax(
-    in  float4 RawEdgeFactors,
-    in  float2 InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void Process2DQuadTessFactorsMin(
-    in  float4 RawEdgeFactors,
-    in  float2 InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessIsolineTessFactors(
-    in  float RawDetailFactor,
-    in  float RawDensityFactor,
-    out float RoundedDetailFactor,
-    out float RoundedDensityFactor);
-
-__intrinsic_op void ProcessQuadTessFactorsAvg(
-    in  float4 RawEdgeFactors,
-    in  float InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessQuadTessFactorsMax(
-    in  float4 RawEdgeFactors,
-    in  float InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessQuadTessFactorsMin(
-    in  float4 RawEdgeFactors,
-    in  float InsideScale,
-    out float4 RoundedEdgeTessFactors,
-    out float2 RoundedInsideTessFactors,
-    out float2 UnroundedInsideTessFactors);
-
-__intrinsic_op void ProcessTriTessFactorsAvg(
-    in  float3 RawEdgeFactors,
-    in  float InsideScale,
-    out float3 RoundedEdgeTessFactors,
-    out float RoundedInsideTessFactor,
-    out float UnroundedInsideTessFactor);
-
-__intrinsic_op void ProcessTriTessFactorsMax(
-    in  float3 RawEdgeFactors,
-    in  float InsideScale,
-    out float3 RoundedEdgeTessFactors,
-    out float RoundedInsideTessFactor,
-    out float UnroundedInsideTessFactor);
-
-__intrinsic_op void ProcessTriTessFactorsMin(
-    in  float3 RawEdgeFactors,
-    in  float InsideScale,
-    out float3 RoundedEdgeTessFactors,
-    out float RoundedInsideTessFactors,
-    out float UnroundedInsideTessFactors);
-
-// Degrees to radians
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T radians(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> radians(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> radians(matrix<T,N,M> x);
-
-// Approximate reciprocal
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rcp(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rcp(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rcp(matrix<T,N,M> x);
-
-// Reflect incident vector across plane with given normal
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__intrinsic_op
-vector<T,N> reflect(vector<T,N> i, vector<T,N> n);
-
-// Refract incident vector given surface normal and index of refraction
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__intrinsic_op
-vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);
-
-// Reverse order of bits
-__intrinsic_op uint reversebits(uint value);
-__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);
-
-// Round-to-nearest
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T round(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> round(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> round(matrix<T,N,M> x);
-
-// Reciprocal of square root
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T rsqrt(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> rsqrt(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> rsqrt(matrix<T,N,M> x);
-
-// Clamp value to [0,1] range
-__generic<T : __BuiltinFloatingPointType>
-__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
-T saturate(T x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int>
-__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
-vector<T,N> saturate(vector<T,N> x);
-
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
-__target_intrinsic(glsl, "clamp($0, 0, 1)") __intrinsic_op
-matrix<T,N,M> saturate(matrix<T,N,M> x);
-
-
-// Extract sign of value
-__generic<T : __BuiltinSignedArithmeticType> __intrinsic_op int sign(T x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic_op vector<int,N> sign(vector<T,N> x);
-__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic_op matrix<int,N,M> sign(matrix<T,N,M> x);
-
-)=", R"=(
-
-
-// Sine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sin(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sin(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sin(matrix<T,N,M> x);
-
-// Sine and cosine
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(T x, out T s, out T c);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);
-
-// Hyperbolic Sine
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sinh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sinh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sinh(matrix<T,N,M> x);
-
-// Smooth step (Hermite interpolation)
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T smoothstep(T min, T max, T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);
-
-// Square root
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T sqrt(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> sqrt(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> sqrt(matrix<T,N,M> x);
-
-// Step function
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T step(T y, T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> step(vector<T,N> y, vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);
-
-// Tangent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tan(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tan(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tan(matrix<T,N,M> x);
-
-// Hyperbolic tangent
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T tanh(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> tanh(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> tanh(matrix<T,N,M> x);
-
-// Legacy texture-fetch operations
-
-/*
-__intrinsic_op float4 tex1D(sampler1D s, float t);
-__intrinsic_op float4 tex1D(sampler1D s, float t, float ddx, float ddy);
-__intrinsic_op float4 tex1Dbias(sampler1D s, float4 t);
-__intrinsic_op float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);
-__intrinsic_op float4 tex1Dlod(sampler1D s, float4 t);
-__intrinsic_op float4 tex1Dproj(sampler1D s, float4 t);
-
-__intrinsic_op float4 tex2D(sampler2D s, float2 t);
-__intrinsic_op float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);
-__intrinsic_op float4 tex2Dbias(sampler2D s, float4 t);
-__intrinsic_op float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);
-__intrinsic_op float4 tex2Dlod(sampler2D s, float4 t);
-__intrinsic_op float4 tex2Dproj(sampler2D s, float4 t);
-
-__intrinsic_op float4 tex3D(sampler3D s, float3 t);
-__intrinsic_op float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 tex3Dbias(sampler3D s, float4 t);
-__intrinsic_op float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 tex3Dlod(sampler3D s, float4 t);
-__intrinsic_op float4 tex3Dproj(sampler3D s, float4 t);
-
-__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t);
-__intrinsic_op float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 texCUBEbias(samplerCUBE s, float4 t);
-__intrinsic_op float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
-__intrinsic_op float4 texCUBElod(samplerCUBE s, float4 t);
-__intrinsic_op float4 texCUBEproj(samplerCUBE s, float4 t);
-*/
-
-// Matrix transpose
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,M,N> transpose(matrix<T,N,M> x);
-
-// Truncate to integer
-__generic<T : __BuiltinFloatingPointType> __intrinsic_op T trunc(T x);
-__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic_op vector<T,N> trunc(vector<T,N> x);
-__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> trunc(matrix<T,N,M> x);
-
-
-)=", R"=(
-
-// Shader model 6.0 stuff
-
-__intrinsic_op uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);
-
-__generic<T : __BuiltinType> __intrinsic_op T QuadReadLaneAt(T sourceValue, int quadLaneID);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);
-
-__generic<T : __BuiltinType> __intrinsic_op T QuadSwapX(T localValue);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapX(vector<T,N> localValue);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);
-
-__generic<T : __BuiltinType> __intrinsic_op T QuadSwapY(T localValue);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> QuadSwapY(vector<T,N> localValue);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);
-
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitAnd(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitAnd(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitOr(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitOr(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinIntegerType> __intrinsic_op T WaveAllBitXor(T expr);
-__generic<T : __BuiltinIntegerType, let N : int> __intrinsic_op vector<T,N> WaveAllBitXor(vector<T,N> expr);
-__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMax(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMax(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllMin(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllMin(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllProduct(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllProduct(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WaveAllSum(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WaveAllSum(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);
-
-__intrinsic_op bool WaveAllEqual(bool expr);
-__intrinsic_op bool WaveAllTrue(bool expr);
-__intrinsic_op bool WaveAnyTrue(bool expr);
-
-uint64_t WaveBallot(bool expr);
-
-uint WaveGetLaneCount();
-uint WaveGetLaneIndex();
-uint WaveGetOrderedIndex();
-
-bool WaveIsHelperLane();
-
-bool WaveOnce();
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixProduct(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixProduct(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinArithmeticType> __intrinsic_op T WavePrefixSum(T expr);
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op vector<T,N> WavePrefixSum(vector<T,N> expr);
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinType> __intrinsic_op T WaveReadFirstLane(T expr);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadFirstLane(vector<T,N> expr);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);
-
-__generic<T : __BuiltinType> __intrinsic_op T WaveReadLaneAt(T expr, int laneIndex);
-__generic<T : __BuiltinType, let N : int> __intrinsic_op vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);
-__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic_op matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);
-
-
-)=", R"=(
-
-// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
-typedef Texture2D texture2D;
-
-#line default
-)=" };
-
-
 namespace Slang
 {
     String Session::getStdlibPath()
@@ -1277,890 +244,11 @@ namespace Slang
 
         StringBuilder sb;
 
-        // generate operator overloads
-
-
         String path = getStdlibPath();
 
 #define EMIT_LINE_DIRECTIVE() sb << "#line " << (__LINE__+1) << " \"" << path << "\"\n"
 
-        // Generate declarations for all the base types
-
-        static const int kBaseTypeCount = sizeof(kBaseTypes) / sizeof(kBaseTypes[0]);
-        for (int tt = 0; tt < kBaseTypeCount; ++tt)
-        {
-            EMIT_LINE_DIRECTIVE();
-            sb << "__builtin_type(" << int(kBaseTypes[tt].tag) << ") struct " << kBaseTypes[tt].name;
-
-            // Declare interface conformances for this type
-
-            sb << "\n    : __BuiltinType\n";
-
-            switch (kBaseTypes[tt].tag)
-            {
-            case BaseType::Float:
-                sb << "\n    , __BuiltinFloatingPointType\n";
-                sb << "\n    ,  __BuiltinRealType\n";
-                // fall through to:
-            case BaseType::Int:
-                sb << "\n    ,  __BuiltinSignedArithmeticType\n";
-                // fall through to:
-            case BaseType::UInt:
-            case BaseType::UInt64:
-                sb << "\n    ,  __BuiltinArithmeticType\n";
-                // fall through to:
-            case BaseType::Bool:
-                sb << "\n    ,  __BuiltinType\n";
-                break;
-
-            default:
-                break;
-            }
-
-            sb << "\n{\n";
-
-
-            // Declare initializers to convert from various other types
-            for (int ss = 0; ss < kBaseTypeCount; ++ss)
-            {
-                // Don't allow conversion from `void`
-                if (kBaseTypes[ss].tag == BaseType::Void)
-                    continue;
-
-                // We need to emit a modifier so that the semantic-checking
-                // layer will know it can use these operations for implicit
-                // conversion.
-                ConversionCost conversionCost = getBaseTypeConversionCost(
-                    kBaseTypes[tt],
-                    kBaseTypes[ss]);
-
-                EMIT_LINE_DIRECTIVE();
-                sb << "__implicit_conversion(" << conversionCost << ")\n";
-
-                EMIT_LINE_DIRECTIVE();
-                sb << "__init(" << kBaseTypes[ss].name << " value);\n";
-            }
-
-            sb << "};\n";
-        }
-
-        // Declare vector and matrix types
-
-        sb << "__generic<T = float, let N : int = 4> __magic_type(Vector) struct vector\n{\n";
-        sb << "    typedef T Element;\n";
-
-        // Declare initializer taking a single scalar of the elemnt type
-        sb << "    __implicit_conversion(" << kConversionCost_ScalarToVector << ")\n";
-        sb << "    __init(T value);\n";
-
-        sb << "};\n";
-
-        // TODO: Probably need to do similar
-        sb << "__generic<T = float, let R : int = 4, let C : int = 4> __magic_type(Matrix) struct matrix {};\n";
-
-        static const struct {
-            char const* name;
-            char const* glslPrefix;
-        } kTypes[] =
-        {
-            {"float", ""},
-            {"int", "i"},
-            {"uint", "u"},
-            {"bool", "b"},
-        };
-        static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
-
-        for (int tt = 0; tt < kTypeCount; ++tt)
-        {
-            // Declare HLSL vector types
-            for (int ii = 1; ii <= 4; ++ii)
-            {
-                sb << "typedef vector<" << kTypes[tt].name << "," << ii << "> " << kTypes[tt].name << ii << ";\n";
-            }
-
-            // Declare HLSL matrix types
-            for (int rr = 2; rr <= 4; ++rr)
-            for (int cc = 2; cc <= 4; ++cc)
-            {
-                sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].name << rr << "x" << cc << ";\n";
-            }
-        }
-
-        // Declare additional built-in generic types
-//        EMIT_LINE_DIRECTIVE();
-
-
-        sb << "__generic<T>\n";
-        sb << "__intrinsic_type(" << kIROp_ConstantBufferType << ")\n";
-        sb << "__magic_type(ConstantBuffer) struct ConstantBuffer {};\n";
-
-        sb << "__generic<T>\n";
-        sb << "__intrinsic_type(" << kIROp_TextureBufferType << ")\n";
-        sb << "__magic_type(TextureBuffer) struct TextureBuffer {};\n";
-
-
-        static const char* kComponentNames[]{ "x", "y", "z", "w" };
-        static const char* kVectorNames[]{ "", "x", "xy", "xyz", "xyzw" };
-
-        // Need to add constructors to the types above
-        for (int N = 2; N <= 4; ++N)
-        {
-            sb << "__generic<T> __extension vector<T, " << N << ">\n{\n";
-
-            // initialize from N scalars
-            sb << "__init(";
-            for (int ii = 0; ii < N; ++ii)
-            {
-                if (ii != 0) sb << ", ";
-                sb << "T " << kComponentNames[ii];
-            }
-            sb << ");\n";
-
-            // Initialize from an M-vector and then scalars
-            for (int M = 2; M < N; ++M)
-            {
-                sb << "__init(vector<T," << M << "> " << kVectorNames[M];
-                for (int ii = M; ii < N; ++ii)
-                {
-                    sb << ", T " << kComponentNames[ii];
-                }
-                sb << ");\n";
-            }
-
-            // initialize from another vector of the same size
-            //
-            // TODO(tfoley): this overlaps with implicit conversions.
-            // We should look for a way that we can define implicit
-            // conversions directly in the stdlib instead...
-            sb << "__generic<U> __init(vector<U," << N << ">);\n";
-
-            // Initialize from two vectors, of size M and N-M
-            for(int M = 2; M <= (N-2); ++M)
-            {
-                int K = N - M;
-                SLANG_ASSERT(K >= 2);
-
-                sb << "__init(vector<T," << M << "> " << kVectorNames[M];
-                sb << ", vector<T," << K << "> ";
-                for (int ii = 0; ii < K; ++ii)
-                {
-                    sb << kComponentNames[ii];
-                }
-                sb << ");\n";
-            }
-
-            sb << "}\n";
-        }
-
-        // The above extension was generic in the *type* of the vector,
-        // but explicit in the *size*. We will now declare an extension
-        // for each builtin type that is generic in the size.
-        //
-        for (int tt = 0; tt < kBaseTypeCount; ++tt)
-        {
-            if(kBaseTypes[tt].tag == BaseType::Void) continue;
-
-            sb << "__generic<let N : int> __extension vector<"
-                << kBaseTypes[tt].name << ",N>\n{\n";
-
-            for (int ff = 0; ff < kBaseTypeCount; ++ff)
-            {
-                if(kBaseTypes[ff].tag == BaseType::Void) continue;
-
-                // We need a constructor to make a vector from a scalar
-                // of another type.
-
-                if( tt != ff )
-                {
-                    auto cost = getBaseTypeConversionCost(
-                        kBaseTypes[tt],
-                        kBaseTypes[ff]);
-                    cost += kConversionCost_ScalarToVector;
-
-                    sb << "    __implicit_conversion(" << cost << ")\n";
-                    sb << "    __init(" << kBaseTypes[ff].name << " value);\n";
-                }
-            }
-
-            sb << "}\n";
-        }
-
-        for( int R = 2; R <= 4; ++R )
-        for( int C = 2; C <= 4; ++C )
-        {
-            sb << "__generic<T> __extension matrix<T, " << R << "," << C << ">\n{\n";
-
-            // initialize from R*C scalars
-            sb << "__init(";
-            for( int ii = 0; ii < R; ++ii )
-            for( int jj = 0; jj < C; ++jj )
-            {
-                if ((ii+jj) != 0) sb << ", ";
-                sb << "T m" << ii << jj;
-            }
-            sb << ");\n";
-
-            // Initialize from R C-vectors
-            sb << "__init(";
-            for (int ii = 0; ii < R; ++ii)
-            {
-                if(ii != 0) sb << ", ";
-                sb << "vector<T," << C << "> row" << ii;
-            }
-            sb << ");\n";
-
-
-            // initialize from another matrix of the same size
-            //
-            // TODO(tfoley): See comment about how this overlaps
-            // with implicit conversion, in the `vector` case above
-            sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n";
-
-            // initialize from a matrix of larger size
-            for(int rr = R; rr <= 4; ++rr)
-            for( int cc = C; cc <= 4; ++cc )
-            {
-                if(rr == R && cc == C) continue;
-                sb << "__init(matrix<T," << rr << "," << cc << "> value);\n";
-            }
-
-            sb << "}\n";
-        }
-
-        // Declare built-in texture and sampler types
-
-
-
-        sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ")\n";
-        sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerState) << ")\n";
-        sb << "struct SamplerState {};";
-        
-        sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
-        sb << "__intrinsic_type(" << kIROp_SamplerType << ", " << int(SamplerStateType::Flavor::SamplerComparisonState) << ")\n";
-        sb << "struct SamplerComparisonState {};";
-
-        // TODO(tfoley): Need to handle `RW*` variants of texture types as well...
-        static const struct {
-            char const*			name;
-            TextureType::Shape	baseShape;
-            int					coordCount;
-        } kBaseTextureTypes[] = {
-            { "Texture1D",		TextureType::Shape1D,	1 },
-            { "Texture2D",		TextureType::Shape2D,	2 },
-            { "Texture3D",		TextureType::Shape3D,	3 },
-            { "TextureCube",	TextureType::ShapeCube,	3 },
-        };
-        static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
-
-
-        static const struct {
-            char const*         name;
-            SlangResourceAccess access;
-        } kBaseTextureAccessLevels[] = {
-            { "",                   SLANG_RESOURCE_ACCESS_READ },
-            { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
-            { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
-        };
-        static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
-
-        for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
-        {
-            char const* name = kBaseTextureTypes[tt].name;
-            TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
-
-            for (int isArray = 0; isArray < 2; ++isArray)
-            {
-                // Arrays of 3D textures aren't allowed
-                if (isArray && baseShape == TextureType::Shape3D) continue;
-
-                for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
-                for (int accessLevel = 0; accessLevel < kBaseTextureAccessLevelCount; ++accessLevel)
-                {
-                    auto access = kBaseTextureAccessLevels[accessLevel].access;
-
-                    // TODO: any constraints to enforce on what gets to be multisampled?
-
-                    unsigned flavor = baseShape;
-                    if (isArray)		flavor |= TextureType::ArrayFlag;
-                    if (isMultisample)	flavor |= TextureType::MultisampleFlag;
-//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
-
-                    flavor |= (access << 8);
-
-                    // emit a generic signature
-                    // TODO: allow for multisample count to come in as well...
-                    sb << "__generic<T = float4> ";
-
-                    sb << "__magic_type(Texture," << int(flavor) << ")\n";
-                    sb << "__intrinsic_type(" << kIROp_TextureType << ", " << flavor << ")\n";
-                    sb << "struct ";
-                    sb << kBaseTextureAccessLevels[accessLevel].name;
-                    sb << name;
-                    if (isMultisample) sb << "MS";
-                    if (isArray) sb << "Array";
-//                        if (isShadow) sb << "Shadow";
-                    sb << "\n{";
-
-                    if( !isMultisample )
-                    {
-                        sb << "float CalculateLevelOfDetail(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
-
-                        sb << "float CalculateLevelOfDetailUnclamped(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
-                    }
-
-                    // `GetDimensions`
-
-                    for(int isFloat = 0; isFloat < 2; ++isFloat)
-                    for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
-                    {
-                        {
-                            sb << "__glsl_version(450)\n";
-                            sb << "__target_intrinsic(glsl, \"(";
-
-                            int aa = 0;
-                            String lodStr = "0";
-                            if (includeMipInfo)
-                            {
-                                int mipLevelArg = aa++;
-                                lodStr = "int($";
-                                lodStr.append(mipLevelArg);
-                                lodStr.append(")");
-                            }
-
-                            int cc = 0;
-                            switch(baseShape)
-                            {
-                            case TextureType::Shape1D:
-                                sb << "($" << aa++ << " = textureSize($P, " << lodStr << "))";
-                                cc = 1;
-                                break;
-
-                            case TextureType::Shape2D:
-                            case TextureType::ShapeCube:
-                                sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
-                                cc = 2;
-                                break;
-
-                            case TextureType::Shape3D:
-                                sb << "($" << aa++ << " = textureSize($P, " << lodStr << ").x)";
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").y)";
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ").z)";
-                                cc = 3;
-                                break;
-
-                            default:
-                                SLANG_UNEXPECTED("unhandled resource shape");
-                                break;
-                            }
-
-                            if(isArray)
-                            {
-                                sb << ", ($" << aa++ << " = textureSize($P, " << lodStr << ")." << kComponentNames[cc] << ")";
-                            }
-
-                            if(isMultisample)
-                            {
-                                sb << ", ($" << aa++ << " = textureSamples($P))";
-                            }
-
-                            if (includeMipInfo)
-                            {
-                                sb << ", ($" << aa++ << " = textureQueryLevels($P))";
-                            }
-
-
-                            sb << ")\")\n";
-                            sb << "__intrinsic_op\n";
-
-                        }
-
-                        char const* t = isFloat ? "out float " : "out uint ";
-
-                        sb << "void GetDimensions(";
-                        if(includeMipInfo)
-                            sb << "uint mipLevel, ";
-
-                        switch(baseShape)
-                        {
-                        case TextureType::Shape1D:
-                            sb << t << "width";
-                            break;
-
-                        case TextureType::Shape2D:
-                        case TextureType::ShapeCube:
-                            sb << t << "width,";
-                            sb << t << "height";
-                            break;
-
-                        case TextureType::Shape3D:
-                            sb << t << "width,";
-                            sb << t << "height,";
-                            sb << t << "depth";
-                            break;
-
-                        default:
-                            assert(!"unexpected");
-                            break;
-                        }
-
-                        if(isArray)
-                        {
-                            sb << ", " << t << "elements";
-                        }
-
-                        if(isMultisample)
-                        {
-                            sb << ", " << t << "sampleCount";
-                        }
-
-                        if(includeMipInfo)
-                            sb << ", " << t << "numberOfLevels";
-
-                        sb << ");\n";
-                    }
-
-                    // `GetSamplePosition()`
-                    if( isMultisample )
-                    {
-                        sb << "float2 GetSamplePosition(int s);\n";
-                    }
-
-                    // `Load()`
-
-                    if( kBaseTextureTypes[tt].coordCount + isArray < 4 )
-                    {
-                        int loadCoordCount = kBaseTextureTypes[tt].coordCount + isArray + (isMultisample?0:1);
-
-                        // When translating to GLSL, we need to break apart the `location` argument.
-                        //
-                        // TODO: this should realy be handled by having this member actually get lowered!
-                        static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
-                        static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
-
-                        if (isMultisample)
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetch($P, $0, $1)\")\n";
-                        }
-                        else
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ")\")\n";
-                        }
-                        sb << "__intrinsic_op\n";
-                        sb << "T Load(";
-                        sb << "int" << loadCoordCount << " location";
-                        if(isMultisample)
-                        {
-                            sb << ", int sampleIndex";
-                        }
-                        sb << ");\n";
-
-                        if (isMultisample)
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetchOffset($P, $0, $1, $2)\")\n";
-                        }
-                        else
-                        {
-                            sb << "__target_intrinsic(glsl, \"texelFetch($P, ($0)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($0)." << kGLSLLoadLODSwizzle[loadCoordCount] << ", $1)\")\n";
-                        }
-                        sb << "__intrinsic_op\n";
-                        sb << "T Load(";
-                        sb << "int" << loadCoordCount << " location";
-                        if(isMultisample)
-                        {
-                            sb << ", int sampleIndex";
-                        }
-                        sb << ", int" << loadCoordCount << " offset";
-                        sb << ");\n";
-
-
-                        sb << "T Load(";
-                        sb << "int" << loadCoordCount << " location";
-                        if(isMultisample)
-                        {
-                            sb << ", int sampleIndex";
-                        }
-                        sb << ", int" << kBaseTextureTypes[tt].coordCount << " offset";
-                        sb << ", out uint status";
-                        sb << ");\n";
-                    }
-
-                    if(baseShape != TextureType::ShapeCube)
-                    {
-                        // subscript operator
-                        sb << "__intrinsic_op __subscript(uint" << kBaseTextureTypes[tt].coordCount + isArray << " location) -> T;\n";
-                    }
-
-                    if( !isMultisample )
-                    {
-                        // `Sample()`
-
-                        sb << "__target_intrinsic(glsl, \"texture($p, $1)\")\n";
-
-                        // TODO: only enable if IR is being used?
-                        sb << "__intrinsic_op(sample)\n";
-
-                        sb << "__intrinsic_op\n";
-                        sb << "T Sample(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2)\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "T Sample(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-                        sb << "T Sample(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
-                        }
-                        sb << "float clamp);\n";
-
-                        sb << "T Sample(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
-                        }
-                        sb << "float clamp, out uint status);\n";
-
-
-                        // `SampleBias()`
-                        sb << "__target_intrinsic(glsl, \"texture($p, $1, $2)\")\n";
-                        sb << "__intrinsic_op\n";
-                        sb << "T SampleBias(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureOffset($p, $1, $2, $3)\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "T SampleBias(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-                        // `SampleCmp()` and `SampleCmpLevelZero`
-                        sb << "T SampleCmp(SamplerComparisonState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float compareValue";
-                        sb << ");\n";
-
-                        int baseCoordCount = kBaseTextureTypes[tt].coordCount;
-                        int arrCoordCount = baseCoordCount + isArray;
-                        if (arrCoordCount < 3)
-                        {
-                            int extCoordCount = arrCoordCount + 1;
-
-                            if (extCoordCount < 3)
-                                extCoordCount = 3;
-
-                            sb << "__target_intrinsic(glsl, \"textureLod($p, ";
-
-                            sb << "vec" << extCoordCount << "($1,";
-                            for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
-                            {
-                                sb << " 0.0,";
-                            }
-                            sb << "$2)";
-
-                            sb << ", 0.0)\")\n";
-                        }
-                        else if(arrCoordCount <= 3)
-                        {
-                            int extCoordCount = arrCoordCount + 1;
-
-                            if (extCoordCount < 3)
-                                extCoordCount = 3;
-
-                            sb << "__target_intrinsic(glsl, \"textureGrad($p, ";
-
-                            sb << "vec" << extCoordCount << "($1,";
-                            for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
-                            {
-                                sb << " 0.0,";
-                            }
-                            sb << "$2)";
-
-                            // Construct gradients
-                            sb << ", vec" << baseCoordCount << "(0.0)";
-                            sb << ", vec" << baseCoordCount << "(0.0)";
-                            sb << ")\")\n";
-                        }
-                        sb << "__intrinsic_op\n";
-                        sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float compareValue";
-                        sb << ");\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            // Note(tfoley): MSDN seems confused, and claims that the `offset`
-                            // parameter for `SampleCmp` is available for everything but 3D
-                            // textures, while `Sample` and `SampleBias` are consistent in
-                            // saying they only exclude `offset` for cube maps (which makes
-                            // sense). I'm going to assume the documentation for `SampleCmp`
-                            // is just wrong.
-
-                            sb << "T SampleCmp(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float compareValue, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-
-                            sb << "T SampleCmpLevelZero(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float compareValue, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-
-                        sb << "__target_intrinsic(glsl, \"textureGrad($p, $1, $2, $3)\")\n";
-                        sb << "__intrinsic_op(sampleGrad)\n";
-                        sb << "T SampleGrad(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY";
-                        sb << ");\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureGradOffset($p, $1, $2, $3, $4)\")\n";
-                            sb << "__intrinsic_op(sampleGrad)\n";
-                            sb << "T SampleGrad(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-
-                        // `SampleLevel`
-
-                        sb << "__target_intrinsic(glsl, \"textureLod($p, $1, $2)\")\n";
-                        sb << "__intrinsic_op\n";
-                        sb << "T SampleLevel(SamplerState s, ";
-                        sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                        sb << "float level);\n";
-
-                        if( baseShape != TextureType::ShapeCube )
-                        {
-                            sb << "__target_intrinsic(glsl, \"textureLodOffset($p, $1, $2, $3)\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "T SampleLevel(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
-                            sb << "float level, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-                        }
-                    }
-
-                    sb << "\n};\n";
-
-                    // `Gather*()` operations are handled via an `extension` declaration,
-                    // because this lets us capture the element type of the texture.
-                    //
-                    // TODO: longer-term there should be something like a `TextureElementType`
-                    // interface, that both scalars and vectors implement, that then exposes
-                    // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
-                    //
-                    static const struct {
-                        char const* genericPrefix;
-                        char const* elementType;
-                    } kGatherExtensionCases[] = {
-                        { "__generic<T, let N : int>", "vector<T,N>" },
-
-                        // TODO: need a case here for scalars `T`, but also
-                        // need to ensure that case doesn't accidentally match
-                        // for `T = vector<...>`, which requires actual checking
-                        // of constraints on generic parameters.
-                    };
-                    for(auto cc : kGatherExtensionCases)
-                    {
-                        // TODO: this should really be an `if` around the entire `Gather` logic
-                        if (isMultisample) break;
-
-                        EMIT_LINE_DIRECTIVE();
-                        sb << cc.genericPrefix << " __extension ";
-                        sb << kBaseTextureAccessLevels[accessLevel].name;
-                        sb << name;
-                        if (isArray) sb << "Array";
-                        sb << "<" << cc.elementType << " >";
-                        sb << "\n{\n";
-
-
-                        // `Gather`
-                        // (tricky because it returns a 4-vector of the element type
-                        // of the texture components...)
-                        //
-                        // TODO: is it actually correct to restrict these so that, e.g.,
-                        // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
-                        // it nominally doesn't have an alpha component?
-                        static const struct {
-                            int componentIndex;
-                            char const* componentName;
-                        } kGatherComponets[] = {
-                            { 0, "" },
-                            { 0, "Red" },
-                            { 1, "Green" },
-                            { 2, "Blue" },
-                            { 3, "Alpha" },
-                        };
-
-                        for(auto kk : kGatherComponets)
-                        {
-                            auto componentIndex = kk.componentIndex;
-                            auto componentName = kk.componentName;
-
-                            EMIT_LINE_DIRECTIVE();
-                            
-                            sb << "__target_intrinsic(glsl, \"textureGather($p, $1, " << componentIndex << ")\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $1, $2, " << componentIndex << ")\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
-                            sb << "out uint status);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $1, int" << kBaseTextureTypes[tt].coordCount << "[]($2, $3, $4, $5), " << componentIndex << ")\")\n";
-                            sb << "__intrinsic_op\n";
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4);\n";
-
-                            EMIT_LINE_DIRECTIVE();
-                            sb << "vector<T, 4> Gather" << componentName << "(SamplerState s, ";
-                            sb << "float" << kBaseTextureTypes[tt].coordCount << " location, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset1, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset2, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset3, ";
-                            sb << "int" << kBaseTextureTypes[tt].coordCount << " offset4, ";
-                            sb << "out uint status);\n";
-                        }
-
-                        EMIT_LINE_DIRECTIVE();
-                        sb << "\n}\n";
-                    }
-                }
-            }
-        }
-
-
-        for (auto op : unaryOps)
-        {
-            for (auto type : kBaseTypes)
-            {
-                if ((type.flags & op.flags) == 0)
-                    continue;
-
-                char const* fixity = (op.flags & POSTFIX) != 0 ? "__postfix " : "__prefix ";
-                char const* qual = (op.flags & ASSIGNMENT) != 0 ? "in out " : "";
-
-                // scalar version
-                sb << fixity;
-                sb << "__intrinsic_op(" << int(op.opCode) << ") " << type.name << " operator" << op.opName << "(" << qual << type.name << " value);\n";
-
-                // vector version
-                sb << "__generic<let N : int> ";
-                sb << fixity;
-                sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << type.name << ",N> operator" << op.opName << "(" << qual << "vector<" << type.name << ",N> value);\n";
-
-                // matrix version
-                sb << "__generic<let N : int, let M : int> ";
-                sb << fixity;
-                sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << type.name << ",N,M> operator" << op.opName << "(" << qual << "matrix<" << type.name << ",N,M> value);\n";
-            }
-        }
-
-        for (auto op : binaryOps)
-        {
-            for (auto type : kBaseTypes)
-            {
-                if ((type.flags & op.flags) == 0)
-                    continue;
-
-                char const* leftType = type.name;
-                char const* rightType = leftType;
-                char const* resultType = leftType;
-
-                if (op.flags & COMPARISON) resultType = "bool";
-
-                char const* leftQual = "";
-                if(op.flags & ASSIGNMENT) leftQual = "in out ";
-
-                // TODO: handle `SHIFT`
-
-                // scalar version
-                sb << "__intrinsic_op(" << int(op.opCode) << ") " << resultType << " operator" << op.opName << "(" << leftQual << leftType << " left, " << rightType << " right);\n";
-
-                // vector version
-                sb << "__generic<let N : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, vector<" << rightType << ",N> right);\n";
-
-                // matrix version
-
-                // skip matrix-matrix multiply operations here, so that GLSL doesn't see them
-                switch (op.opCode)
-                {
-                case kIROp_Mul:
-                case kIRPseudoOp_MulAssign:
-                    break;
-
-                default:
-                    sb << "__generic<let N : int, let M : int> ";
-                    sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
-                    break;
-                }
-
-                // We are going to go ahead and explicitly define combined
-                // operations for the scalar-op-vector, etc. cases, rather
-                // than rely on promotion rules.
-
-                // scalar-vector and scalar-matrix
-                if (!(op.flags & ASSIGNMENT))
-                {
-                    sb << "__generic<let N : int> ";
-                    sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << leftType << " left, vector<" << rightType << ",N> right);\n";
-
-                    sb << "__generic<let N : int, let M : int> ";
-                    sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << leftType << " left, matrix<" << rightType << ",N,M> right);\n";
-                }
-
-                // vector-scalar and matrix-scalar
-                sb << "__generic<let N : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, " << rightType << " right);\n";
-
-                sb << "__generic<let N : int, let M : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
-            }
-        }
-
-        // Output a suitable `#line` directive to point at our raw stdlib code above
-        sb << "\n#line " << kCoreLibIncludeStringLine << " \"" << path << "\"\n";
-
-        int chunkCount = sizeof(kCoreLibIncludeStringChunks) / sizeof(kCoreLibIncludeStringChunks[0]);
-        for (int cc = 0; cc < chunkCount; ++cc)
-        {
-            sb << kCoreLibIncludeStringChunks[cc];
-        }
+        #include "core.meta.slang.cpp"
 
         coreLibraryCode = sb.ProduceString();
         return coreLibraryCode;
@@ -2173,90 +261,7 @@ namespace Slang
 
         StringBuilder sb;
 
-
-        // Component-wise multiplication ops
-        for(auto op : binaryOps)
-        {
-            switch (op.opCode)
-            {
-            default:
-                continue;
-
-            case kIROp_Mul:
-            case kIRPseudoOp_MulAssign:
-                break;
-            }
-
-            for (auto type : kBaseTypes)
-            {
-                if ((type.flags & op.flags) == 0)
-                    continue;
-
-                char const* leftType = type.name;
-                char const* rightType = leftType;
-                char const* resultType = leftType;
-
-                char const* leftQual = "";
-                if(op.flags & ASSIGNMENT) leftQual = "in out ";
-
-                sb << "__generic<let N : int, let M : int> ";
-                sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
-            }
-        }
-
-        //
-
-        // Buffer types
-
-        static const struct {
-            char const*         name;
-            SlangResourceAccess access;
-        } kBaseBufferAccessLevels[] = {
-            { "",                   SLANG_RESOURCE_ACCESS_READ },
-            { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
-            { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
-        };
-        static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]);
-
-        for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa)
-        {
-
-            sb << "__generic<T> __magic_type(Texture, ";
-            sb << ResourceType::makeFlavor(ResourceType::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access);
-            sb << ") struct ";
-            sb << kBaseBufferAccessLevels[aa].name;
-            sb << "Buffer {\n";
-
-            sb << "__intrinsic_op void GetDimensions(out uint dim);\n";
-
-            sb << "__target_intrinsic(glsl, \"texelFetch($P, $0)$z\")\n";
-            sb << "__intrinsic_op T Load(int location);\n";
-
-            sb << "__intrinsic_op T Load(int location, out uint status);\n";
-
-            sb << "__target_intrinsic(glsl, \"texelFetch($P, int($0))$z\")\n";
-            sb << "__intrinsic_op __subscript(uint index) -> T";
-
-            if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ)
-            {
-                sb << " { get; set; }\n";
-            }
-            else
-            {
-                sb << ";\n";
-            }
-
-            sb << "};\n";
-        }
-
-        // Output a suitable `#line` directive to point at our raw stdlib code above
-        sb << "\n#line " << kHLSLLibIncludeStringLine << " \"" << getStdlibPath() << "\"\n";
-
-        int chunkCount = sizeof(kHLSLLibIncludeStringChunks) / sizeof(kHLSLLibIncludeStringChunks[0]);
-        for (int cc = 0; cc < chunkCount; ++cc)
-        {
-            sb << kHLSLLibIncludeStringChunks[cc];
-        }
+        #include "hlsl.meta.slang.cpp"
 
         hlslLibraryCode = sb.ProduceString();
         return hlslLibraryCode;
@@ -2274,203 +279,7 @@ namespace Slang
 
         StringBuilder sb;
 
-        static const struct {
-            char const* name;
-            char const* glslPrefix;
-        } kTypes[] =
-        {
-            {"float", ""},
-            {"int", "i"},
-            {"uint", "u"},
-            {"bool", "b"},
-        };
-        static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
-
-        for( int tt = 0; tt < kTypeCount; ++tt )
-        {
-            // Declare GLSL aliases for HLSL types
-            for (int vv = 2; vv <= 4; ++vv)
-            {
-                sb << "typedef vector<" << kTypes[tt].name << "," << vv << "> " << kTypes[tt].glslPrefix << "vec" << vv << ";\n";
-                sb << "typedef matrix<" << kTypes[tt].name << "," << vv << "," << vv << "> " << kTypes[tt].glslPrefix << "mat" << vv << ";\n";
-            }
-            for (int rr = 2; rr <= 4; ++rr)
-            for (int cc = 2; cc <= 4; ++cc)
-            {
-                sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].glslPrefix << "mat" << rr << "x" << cc << ";\n";
-            }
-        }
-
-        // Multiplication operations for vectors + matrices
-
-        // scalar-vector and vector-scalar
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(vector<T,N> x, T y);\n";
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(mul) vector<T,N> operator*(T x, vector<T,N> y);\n";
-
-        // scalar-matrix and matrix-scalar
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(matrix<T,N,M> x, T y);\n";
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op(mul) matrix<T,N,M> operator*(T x, matrix<T,N,M> y);\n";
-
-        // vector-vector (dot product)
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T operator*(vector<T,N> x, vector<T,N> y);\n";
-
-        // vector-matrix
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> operator*(vector<T,N> x, matrix<T,N,M> y);\n";
-
-        // matrix-vector
-        sb << "__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> operator*(matrix<T,N,M> x, vector<T,M> y);\n";
-
-        // matrix-matrix
-        sb << "__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> operator*(matrix<T,R,N> x, matrix<T,N,C> y);\n";
-
-
-
-        //
-
-        // TODO(tfoley): Need to handle `RW*` variants of texture types as well...
-        static const struct {
-            char const*			name;
-            TextureType::Shape	baseShape;
-            int					coordCount;
-        } kBaseTextureTypes[] = {
-            { "1D",		TextureType::Shape1D,	1 },
-            { "2D",		TextureType::Shape2D,	2 },
-            { "3D",		TextureType::Shape3D,	3 },
-            { "Cube",	TextureType::ShapeCube,	3 },
-            { "Buffer", TextureType::ShapeBuffer,   1 },
-        };
-        static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
-
-
-        static const struct {
-            char const*         name;
-            SlangResourceAccess access;
-        } kBaseTextureAccessLevels[] = {
-            { "",                   SLANG_RESOURCE_ACCESS_READ },
-            { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
-            { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
-        };
-        static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
-
-        for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
-        {
-            char const* shapeName = kBaseTextureTypes[tt].name;
-            TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
-
-            for (int isArray = 0; isArray < 2; ++isArray)
-            {
-                // Arrays of 3D textures aren't allowed
-                if (isArray && baseShape == TextureType::Shape3D) continue;
-
-                for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
-                {
-                    auto readAccess = SLANG_RESOURCE_ACCESS_READ;
-                    auto readWriteAccess = SLANG_RESOURCE_ACCESS_READ_WRITE;
-
-                    // TODO: any constraints to enforce on what gets to be multisampled?
-
-                        
-                    unsigned flavor = baseShape;
-                    if (isArray)		flavor |= TextureType::ArrayFlag;
-                    if (isMultisample)	flavor |= TextureType::MultisampleFlag;
-//                        if (isShadow)		flavor |= TextureType::ShadowFlag;
-
-
-
-                    unsigned readFlavor = flavor | (readAccess << 8);
-                    unsigned readWriteFlavor = flavor | (readWriteAccess << 8);
-
-                    StringBuilder nameBuilder;
-                    nameBuilder << shapeName;
-                    if (isMultisample) nameBuilder << "MS";
-                    if (isArray) nameBuilder << "Array";
-                    auto name = nameBuilder.ProduceString();
-
-                    sb << "__generic<T> ";
-                    sb << "__magic_type(TextureSampler," << int(readFlavor) << ") struct ";
-                    sb << "__sampler" << name;
-                    sb << " {};\n";
-
-                    sb << "__generic<T> ";
-                    sb << "__magic_type(Texture," << int(readFlavor) << ") struct ";
-                    sb << "__texture" << name;
-                    sb << " {};\n";
-
-                    sb << "__generic<T> ";
-                    sb << "__magic_type(GLSLImageType," << int(readWriteFlavor) << ") struct ";
-                    sb << "__image" << name;
-                    sb << " {};\n";
-
-                    // TODO(tfoley): flesh this out for all the available prefixes
-                    static const struct
-                    {
-                        char const* prefix;
-                        char const* elementType;
-                    } kTextureElementTypes[] = {
-                        { "", "vec4" },
-                        { "i", "ivec4" },
-                        { "u", "uvec4" },
-                        { nullptr, nullptr },
-                    };
-                    for( auto ee = kTextureElementTypes; ee->prefix; ++ee )
-                    {
-                        sb << "typedef __sampler" << name << "<" << ee->elementType << "> " << ee->prefix << "sampler" << name << ";\n";
-                        sb << "typedef __texture" << name << "<" << ee->elementType << "> " << ee->prefix << "texture" << name << ";\n";
-                        sb << "typedef __image" << name << "<" << ee->elementType << "> " << ee->prefix << "image" << name << ";\n";
-                    }
-                }
-            }
-        }
-
-        sb << "__generic<T> __magic_type(GLSLInputParameterBlockType) struct __GLSLInputParameterBlock {};\n";
-        sb << "__generic<T> __magic_type(GLSLOutputParameterBlockType) struct __GLSLOutputParameterBlock {};\n";
-        sb << "__generic<T> __magic_type(GLSLShaderStorageBufferType) struct __GLSLShaderStorageBuffer {};\n";
-
-        sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct sampler {};";
-
-        sb << "__magic_type(GLSLInputAttachmentType) struct subpassInput {};";
-
-        // Define additional keywords
-
-        sb << "syntax buffer : GLSLBufferModifier;\n";
-
-        // [GLSL 4.3] Storage Qualifiers
-
-        // TODO: need to support `shared` here with its GLSL meaning
-
-        sb << "syntax patch : GLSLPatchModifier;\n";
-        // `centroid` and `sample` handled centrally
-
-        // [GLSL 4.5] Interpolation Qualifiers
-        sb << "syntax smooth : SimpleModifier;\n";
-        sb << "syntax flat : SimpleModifier;\n";
-        sb << "syntax noperspectie : SimpleModifier;\n";
-
-
-        // [GLSL 4.3.2] Constant Qualifier
-
-        // We need to handle GLSL `const` separately from HLSL `const`,
-        // since they mean such different things.
-
-        // [GLSL 4.7.2] Precision Qualifiers
-        sb << "syntax highp : SimpleModifier;\n";
-        sb << "syntax mediump : SimpleModifier;\n";
-        sb << "syntax lowp : SimpleModifier;\n";
-
-        // [GLSL 4.8.1] The Invariant Qualifier
-
-        sb << "syntax invariant : SimpleModifier;\n";
-
-        // [GLSL 4.10] Memory Qualifiers
-
-        sb << "syntax coherent : SimpleModifier;\n";
-        sb << "syntax volatile : SimpleModifier;\n";
-        sb << "syntax restrict : SimpleModifier;\n";
-        sb << "syntax readonly : GLSLReadOnlyModifier;\n";
-        sb << "syntax writeonly : GLSLWriteOnlyModifier;\n";
-
-        // We will treat `subroutine` as a qualifier for now
-        sb << "syntax subroutine : SimpleModifier;\n";
+        #include "glsl.meta.slang.cpp"
 
         glslLibraryCode = sb.ProduceString();
         return glslLibraryCode;
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index bc41ddb71..1f55138e4 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -229,6 +229,65 @@
       <Project>{f9be7957-8399-899e-0c49-e714fddd4b65}</Project>
     </ProjectReference>
   </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="core.meta.slang">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+    </CustomBuild>
+    <CustomBuild Include="glsl.meta.slang">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+    </CustomBuild>
+    <CustomBuild Include="hlsl.meta.slang">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe %(Identity)</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">slang-generate %(Identity)</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">slang-generate %(Identity)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Identity).cpp</Outputs>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Identity).cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)slang-generate.exe</AdditionalInputs>
+    </CustomBuild>
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index 4bd7ff9a0..9a85ce966 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -64,4 +64,9 @@
     <ClCompile Include="ir.cpp" />
     <ClCompile Include="lower-to-ir.cpp" />
   </ItemGroup>
+  <ItemGroup>
+    <None Include="core.meta.slang" />
+    <None Include="glsl.meta.slang" />
+    <None Include="hlsl.meta.slang" />
+  </ItemGroup>
 </Project>
 \ No newline at end of file
author	Tim Foley <tfoley@nvidia.com>	2017-09-07 14:35:07 -0700
committer	Tim Foley <tfoley@nvidia.com>	2017-09-11 09:50:56 -0700
commit	14137cbd2ddd7deebcdf8cc85c30d534bec8e40b (patch)
tree	5f9b010837de0c78f2f96e59388bf76e4cbd8575 /source
parent	0e566a63f0bafb7def65521315e9f19a2bc79e34 (diff)