35 files changed, 2399 insertions, 1474 deletions
diff --git a/build/visual-studio/slang/slang.vcxproj b/build/visual-studio/slang/slang.vcxproj
index a7359e001..f5d28d800 100644
--- a/build/visual-studio/slang/slang.vcxproj
+++ b/build/visual-studio/slang/slang.vcxproj
@@ -404,6 +404,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
     <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-mesh-outputs.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-uniform-buffer-load.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-varying-params.h" />
+    <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-vector-types.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-link.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-liveness.h" />
     <ClInclude Include="..\..\..\source\slang\slang-ir-loop-inversion.h" />
@@ -505,6 +506,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
     <ClInclude Include="..\..\..\source\slang\slang-serialize-value-type-info.h" />
     <ClInclude Include="..\..\..\source\slang\slang-serialize.h" />
     <ClInclude Include="..\..\..\source\slang\slang-spirv-val.h" />
+    <ClInclude Include="..\..\..\source\slang\slang-stdlib-textures.h" />
     <ClInclude Include="..\..\..\source\slang\slang-syntax.h" />
     <ClInclude Include="..\..\..\source\slang\slang-type-layout.h" />
     <ClInclude Include="..\..\..\source\slang\slang-type-system-shared.h" />
@@ -616,6 +618,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
     <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-types.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-uniform-buffer-load.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-varying-params.cpp" />
+    <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-vector-types.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-link.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-liveness.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-ir-loop-inversion.cpp" />
@@ -715,6 +718,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
     <ClCompile Include="..\..\..\source\slang\slang-spirv-core-grammar-embed.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-spirv-val.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-stdlib-api.cpp" />
+    <ClCompile Include="..\..\..\source\slang\slang-stdlib-textures.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-stdlib.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-syntax.cpp" />
     <ClCompile Include="..\..\..\source\slang\slang-type-layout.cpp" />
diff --git a/build/visual-studio/slang/slang.vcxproj.filters b/build/visual-studio/slang/slang.vcxproj.filters
index 3e89626be..193358c37 100644
--- a/build/visual-studio/slang/slang.vcxproj.filters
+++ b/build/visual-studio/slang/slang.vcxproj.filters
@@ -300,6 +300,9 @@
     <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-varying-params.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\source\slang\slang-ir-legalize-vector-types.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\source\slang\slang-ir-link.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -603,6 +606,9 @@
     <ClInclude Include="..\..\..\source\slang\slang-spirv-val.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\source\slang\slang-stdlib-textures.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\source\slang\slang-syntax.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -932,6 +938,9 @@
     <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-varying-params.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\source\slang\slang-ir-legalize-vector-types.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\source\slang\slang-ir-link.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -1229,6 +1238,9 @@
     <ClCompile Include="..\..\..\source\slang\slang-stdlib-api.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\source\slang\slang-stdlib-textures.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\source\slang\slang-stdlib.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 96e6d284a..956a5b29a 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -1065,8 +1065,6 @@ __generic<T> __extension vector<T, 4>
 
 ${{{{
 
-static const char* kComponentNames[]{ "x", "y", "z", "w" };
-
 // The above extensions are generic in the *type* of the vector,
 // but explicit in the *size*. We will now declare an extension
 // for each builtin type that is generic in the size.
@@ -1256,1054 +1254,6 @@ struct SamplerComparisonState
 
 ${{{{
 
-static const struct BaseTextureShapeInfo {
-    char const*			    shapeName;
-    TextureFlavor::Shape	baseShape;
-    int					    coordCount;
-} kBaseTextureShapes[] = {
-    { "1D",		TextureFlavor::Shape::Shape1D,	1 },
-    { "2D",		TextureFlavor::Shape::Shape2D,	2 },
-    { "3D",		TextureFlavor::Shape::Shape3D,	3 },
-    { "Cube",	TextureFlavor::Shape::ShapeCube,3 },
-};
-
-static const struct BaseTextureAccessInfo {
-    char const*         name;
-    SlangResourceAccess access;
-} kBaseTextureAccessLevels[] = {
-    { "",                   SLANG_RESOURCE_ACCESS_READ },
-    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
-    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
-};
-
-static const struct TextureTypePrefixInfo
-{
-    char const* name;
-    bool        combined;
-} kTexturePrefixes[] =
-{
-    { "Texture", false },
-    { "Sampler", true },
-};
-
-struct TextureTypeInfo
-{
-    TextureTypeInfo(
-        TextureTypePrefixInfo const& prefixInfo,
-        BaseTextureShapeInfo const& base,
-        bool isArray,
-        bool isMultisample,
-        BaseTextureAccessInfo const& accessInfo,
-        StringBuilder& inSB,
-        String const& inPath)
-        : prefixInfo(prefixInfo)
-        , base(base)
-        , isArray(isArray)
-        , isMultisample(isMultisample)
-        , accessInfo(accessInfo)
-        , sb(inSB)
-        , path(inPath)
-    {
-    }
-
-    TextureTypePrefixInfo const& prefixInfo;
-    BaseTextureShapeInfo const& base;
-    bool isArray;
-    bool isMultisample;
-    BaseTextureAccessInfo const& accessInfo;
-    StringBuilder& sb;
-    String path;
-
-    void emitTypeDecl()
-    {
-        char const* baseName = prefixInfo.name;
-        char const* baseShapeName = base.shapeName;
-        TextureFlavor::Shape baseShape = base.baseShape;
-
-        // Arrays of 3D textures aren't allowed
-        if (isArray && baseShape == TextureFlavor::Shape::Shape3D) return;
-
-        auto access = accessInfo.access;
-
-        // No such thing as RWTextureCube
-        if (access == SLANG_RESOURCE_ACCESS_READ_WRITE && baseShape == TextureFlavor::Shape::ShapeCube)
-        {
-            return;
-        }
-
-        bool isReadOnly = (access == SLANG_RESOURCE_ACCESS_READ);
-        // TODO: any constraints to enforce on what gets to be multisampled?
-
-        unsigned flavor = baseShape;
-        if (isArray)		flavor |= TextureFlavor::ArrayFlag;
-        if (isMultisample)	flavor |= TextureFlavor::MultisampleFlag;
-//                        if (isShadow)		flavor |= TextureFlavor::ShadowFlag;
-
-        flavor |= (access << 8);
-
-        // emit a generic signature
-        sb << "__generic<T = float4";
-        // Multi-sample rw texture types have an optional sampleCount parameter.
-        if (isMultisample)
-            sb << ", let sampleCount : int = 0";
-        sb << ">";
-            
-        if(prefixInfo.combined)
-        {
-            sb << "__magic_type(TextureSamplerType," << int(flavor) << ")\n";
-            sb << "__intrinsic_type(" << (kIROp_TextureSamplerType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n";
-        }
-        else
-        {
-            sb << "__magic_type(TextureType," << int(flavor) << ")\n";
-            sb << "__intrinsic_type(" << (kIROp_TextureType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n";
-        }
-        sb << "struct ";
-        sb << accessInfo.name;
-        sb << baseName;
-        sb << baseShapeName;
-        if (isMultisample) sb << "MS";
-        if (isArray) sb << "Array";
-//                        if (isShadow) sb << "Shadow";
-        sb << "\n{";
-
-        char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, ";
-
-        if( !isMultisample )
-        {
-            sb << "__target_intrinsic(glsl, \"textureQueryLod($p, $2).x\")";
-            sb << "float CalculateLevelOfDetail(" << samplerStateParam;
-            sb << "float" << base.coordCount << " location);\n";
-
-            sb << "__target_intrinsic(glsl, \"textureQueryLod($p, $2).y\")";
-            sb << "float CalculateLevelOfDetailUnclamped(" << samplerStateParam;
-            sb << "float" << base.coordCount << " location);\n";
-        }
-
-        // `GetDimensions`
-        const char* dimParamTypes[] = {"out float ", "out int ", "out uint "};
-        for(auto t : dimParamTypes)
-        for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
-        {
-            {
-                sb << "__glsl_version(450)\n";
-                sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                sb << "__target_intrinsic(glsl, \"(";
-
-                int aa = 1;
-                String lodStr = ", 0";
-                if (includeMipInfo)
-                {
-                    int mipLevelArg = aa++;
-                    lodStr = ", int($";
-                    lodStr.append(mipLevelArg);
-                    lodStr.append(")");
-                }
-
-                String opStr = " = textureSize($0" + lodStr;
-                switch( access )
-                {
-                case SLANG_RESOURCE_ACCESS_READ_WRITE:
-                case SLANG_RESOURCE_ACCESS_RASTER_ORDERED:
-                    opStr = " = imageSize($0";
-                    break;
-
-                default:
-                    break;
-                }
-
-
-                int cc = 0;
-                switch(baseShape)
-                {
-                case TextureFlavor::Shape::Shape1D:
-                    sb << "($" << aa++ << opStr << ")"; 
-                    if (isArray)
-                    {
-                        sb << ".x";
-                    }
-                    sb << ")";
-                    cc = 1;
-                    break;
-
-                case TextureFlavor::Shape::Shape2D:
-                case TextureFlavor::Shape::ShapeCube:
-                    sb << "($" << aa++ << opStr << ").x)";
-                    sb << ", ($" << aa++ << opStr << ").y)";
-                    cc = 2;
-                    break;
-
-                case TextureFlavor::Shape::Shape3D:
-                    sb << "($" << aa++ << opStr << ").x)";
-                    sb << ", ($" << aa++ << opStr << ").y)";
-                    sb << ", ($" << aa++ << opStr << ").z)";
-                    cc = 3;
-                    break;
-
-                default:
-                    SLANG_UNEXPECTED("unhandled resource shape");
-                    break;
-                }
-
-                if(isArray)
-                {
-                    sb << ", ($" << aa++ << opStr << ")." << kComponentNames[cc] << ")";
-                }
-
-                if(isMultisample)
-                {
-                    sb << ", ($" << aa++ << " = textureSamples($0))";
-                }
-
-                if (includeMipInfo)
-                {
-                    sb << ", ($" << aa++ << " = textureQueryLevels($0))";
-                }
-
-
-                sb << ")\")\n";
-            }
-
-            sb << "[__readNone]\n";
-            sb << "void GetDimensions(";
-            if(includeMipInfo)
-                sb << "uint mipLevel, ";
-
-            switch(baseShape)
-            {
-            case TextureFlavor::Shape::Shape1D:
-                sb << t << "width";
-                break;
-
-            case TextureFlavor::Shape::Shape2D:
-            case TextureFlavor::Shape::ShapeCube:
-                sb << t << "width,";
-                sb << t << "height";
-                break;
-
-            case TextureFlavor::Shape::Shape3D:
-                sb << t << "width,";
-                sb << t << "height,";
-                sb << t << "depth";
-                break;
-
-            default:
-                assert(!"unexpected");
-                break;
-            }
-
-            if(isArray)
-            {
-                sb << ", " << t << "elements";
-            }
-
-            if(isMultisample)
-            {
-                sb << ", " << t << "sampleCount";
-            }
-
-            if(includeMipInfo)
-                sb << ", " << t << "numberOfLevels";
-
-            sb << ");\n";
-        }
-
-        // `GetSamplePosition()`
-        if( isMultisample )
-        {
-            sb << "float2 GetSamplePosition(int s);\n";
-        }
-
-        // `Load()`
-
-        if( base.coordCount + isArray < 4 )
-        {
-            // The `Load()` operation on an ordinary `Texture2D` takes
-            // an `int3` for the location, where `.xy` holds the texel
-            // coordinates, and `.z` holds the mip level to use.
-            //
-            // The third coordinate for mip level is absent in
-            // `Texure2DMS.Load()` and `RWTexture2D.Load`. This pattern
-            // is repreated for all the other texture shapes.
-            //
-            bool needsMipLevel = !isMultisample && (access == SLANG_RESOURCE_ACCESS_READ);
-
-            int loadCoordCount = base.coordCount + isArray + (needsMipLevel?1:0);
-
-            char const* glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetch" : "imageLoad";
-
-            // When translating to GLSL, we need to break apart the `location` argument.
-            //
-            // TODO: this should realy be handled by having this member actually get lowered!
-            static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
-            static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
-
-            // TODO: The GLSL translations here only handle the read-only texture
-            // cases (stuff that lowers to `texture*` in GLSL) and not the stuff
-            // that lowers to `image*`.
-            //
-            // At some point it may make sense to separate the read-only and
-            // `RW`/`RasterizerOrdered` cases here rather than try to share code.
-
-            if (isMultisample)
-            {
-                sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, $1, $2)$z\")\n";
-            }
-            else
-            {
-                sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, ";
-                if( needsMipLevel )
-                {
-                    sb << "($1)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($1)." << kGLSLLoadLODSwizzle[loadCoordCount];
-                }
-                else
-                {
-                    sb << "$1";
-                }
-                sb << ")$z\")\n";
-
-            }
-
-            // CUDA
-            if (isMultisample)
-            {
-            }
-            else
-            {
-                if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
-                {
-                    const int coordCount = base.coordCount;
-                    const int vecCount = coordCount + int(isArray);
-
-                    if( baseShape != TextureFlavor::Shape::ShapeCube )
-                    {
-                        sb << "__target_intrinsic(cuda, \"surf" << coordCount << "D";
-                        if (isArray)
-                        {
-                            sb << "Layered";
-                        }
-                        sb << "read";
-                        sb << "<$T0>($0";
-                        for (int i = 0; i < coordCount; ++i)
-                        {
-                            sb << ", ($1)";
-                            if (vecCount > 1)
-                            {
-                                sb << '.' << char(i + 'x');
-                            }
-
-                            // Surface access is *byte* addressed in x in CUDA
-                            if (i == 0) 
-                            {
-                                sb << " * $E";
-                            }
-                        }
-                        if (isArray)
-                        {
-                            sb << ", int(($1)." << char(coordCount + 'x') << ")";
-                        }
-                        sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
-                    }
-                    else
-                    {
-                        sb << "__target_intrinsic(cuda, \"surfCubemap";
-                        if (isArray)
-                        {
-                            sb << "Layered";
-                        }
-                        sb << "read";
-
-                        // Surface access is *byte* addressed in x in CUDA
-                        sb << "<$T0>($0, ($1).x * $E, ($1).y, ($1).z"; 
-                        if (isArray)
-                        {
-                            sb << ", int(($1).w)";
-                        }
-                        sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
-                    }
-                }
-                else if (access == SLANG_RESOURCE_ACCESS_READ)
-                {
-                    // We can allow this on Texture1D
-                    if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
-                    {
-                        sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, ($1).x)\")\n";
-                    }
-                }
-            }
-
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T Load(";
-            sb << "int" << loadCoordCount << " location";
-            if(isMultisample)
-            {
-                sb << ", int sampleIndex";
-            }
-            sb << ");\n";
-
-
-            // GLSL
-            glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetchOffset" : "imageLoad";
-            if (isMultisample)
-            {
-                sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, $0, $1, $2)$z\")\n";
-            }
-            else
-            {
-                sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                sb << "__target_intrinsic(glsl, \"$c" << glslFuncName << "($0, ";
-                if( needsMipLevel )
-                {
-                    sb << "($1)." << kGLSLLoadCoordsSwizzle[loadCoordCount] << ", ($1)." << kGLSLLoadLODSwizzle[loadCoordCount];
-                    sb << ", $2)$z\")\n";
-                }
-                else
-                {
-                    sb << "$1, 0, $2)$z\")\n";
-                }
-            }
-
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T Load(";
-            sb << "int" << loadCoordCount << " location";
-            if(isMultisample)
-            {
-                sb << ", int sampleIndex";
-            }
-            sb << ", constexpr int" << base.coordCount << " offset";
-            sb << ");\n";
-
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T Load(";
-            sb << "int" << loadCoordCount << " location";
-            if(isMultisample)
-            {
-                sb << ", int sampleIndex";
-            }
-            sb << ", constexpr int" << base.coordCount << " offset";
-            sb << ", out uint status";
-            sb << ");\n";
-        }
-
-        if(baseShape != TextureFlavor::Shape::ShapeCube)
-        {
-            int N = base.coordCount + isArray;
-
-            char const* uintNs[] = { "", "uint", "uint2", "uint3", "uint4" };
-            char const* ivecNs[] = {  "", "int", "ivec2", "ivec3", "ivec4" };
-
-            auto uintN = uintNs[N];
-            auto ivecN = ivecNs[N];
-
-            // subscript operator
-            sb << "__subscript(" << uintN << " location) -> T {\n";
-
-            // !!!!!!!!!!!!!!!!!!!! get !!!!!!!!!!!!!!!!!!!!!!!
-
-            // GLSL/SPIR-V distinguished sampled vs. non-sampled images
-            {
-                switch( access )
-                {
-                case SLANG_RESOURCE_ACCESS_NONE:
-                case SLANG_RESOURCE_ACCESS_READ:
-                    sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
-                    sb << "__target_intrinsic(glsl, \"$ctexelFetch($0, " << ivecN << "($1)";
-                    if( !isMultisample )
-                    {
-                        sb << ", 0";
-                    }
-                    else
-                    {
-                        // TODO: how to handle passing through sample index?
-                        sb << ", 0";
-                    }
-                    break;
-
-                default:
-                    sb << "__target_intrinsic(glsl, \"$cimageLoad($0, " << ivecN << "($1)";
-                    if( isMultisample )
-                    {
-                        // TODO: how to handle passing through sample index?
-                        sb << ", 0";
-                    }
-                    break;
-                }
-                sb << ")$z\")\n";
-            }
-
-            // CUDA
-            {
-                if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
-                {
-                    const int coordCount = base.coordCount;
-                    const int vecCount = coordCount + int(isArray);
-
-                    sb << "__target_intrinsic(cuda, \"surf";
-                    if( baseShape != TextureFlavor::Shape::ShapeCube )
-                    {
-                        sb << coordCount << "D";
-                    }
-                    else
-                    {
-                        sb << "Cubemap";
-                    }
-
-                    sb << (isArray ? "Layered" : "");
-                    sb << "read$C<$T0>($0";
-                                
-                    for (int i = 0; i < vecCount; ++i)
-                    {
-                        sb << ", ($1)";
-                        if (vecCount > 1)
-                        {
-                            sb << '.' << char(i + 'x');    
-                        }
-                        // Surface access is *byte* addressed in x in CUDA
-                        if (i == 0)
-                        {
-                            sb << " * $E";
-                        }
-                    }
-
-                    sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
-                }
-                else if (access == SLANG_RESOURCE_ACCESS_READ)
-                {
-                    // We can allow this on Texture1D
-                    if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
-                    {
-                        sb << "__target_intrinsic(cuda, \"tex1Dfetch<$T0>($0, $1)\")\n";
-                    }
-                }
-            }
-
-            // Output that has get
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << " get;\n";
-
-            // !!!!!!!!!!!!!!!!!!!! set !!!!!!!!!!!!!!!!!!!!!!!
-
-            if (!(access == SLANG_RESOURCE_ACCESS_NONE || access == SLANG_RESOURCE_ACCESS_READ))
-            {
-                // GLSL
-                sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\")\n";
-
-                // CUDA
-                {
-                    const int coordCount = base.coordCount;
-                    const int vecCount = coordCount + int(isArray);
-
-                    sb << "__target_intrinsic(cuda, \"surf";
-                    if( baseShape != TextureFlavor::Shape::ShapeCube )
-                    {
-                        sb << coordCount << "D";
-                    }
-                    else
-                    {
-                        sb << "Cubemap";
-                    }
-
-                    sb << (isArray ? "Layered" : "");
-                    sb << "write$C<$T0>($2, $0";
-                    for (int i = 0; i < vecCount; ++i)
-                    {
-                        sb << ", ($1)";
-                        if (vecCount > 1)
-                        {
-                            sb << '.' << char(i + 'x');
-                        }
-
-                        // Surface access is *byte* addressed in x in CUDA
-                        if (i == 0)
-                        {
-                            sb << " * $E";
-                        }
-                    }
-
-                    sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";
-                }
-
-                // Set
-                sb << " [nonmutating] set;\n";
-            }
-
-            // !!!!!!!!!!!!!!!!!! ref !!!!!!!!!!!!!!!!!!!!!!!!!
-                    
-            // Depending on the access level of the texture type,
-            // we either have just a getter (the default), or both
-            // a getter and setter.
-            switch( access )
-            {
-            case SLANG_RESOURCE_ACCESS_NONE:
-            case SLANG_RESOURCE_ACCESS_READ:
-                break;
-            default:
-                sb << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n";
-                break;
-            }
-
-            sb << "}\n";
-        }
-
-        if( !isMultisample )
-        {
-            // `Sample()`
-
-            sb << "__target_intrinsic(glsl, \"$ctexture($p, $2)$z\")\n";
-
-            // CUDA
-            {
-                const int coordCount = base.coordCount;
-                const int vecCount = coordCount + int(isArray);
-
-                if( baseShape != TextureFlavor::Shape::ShapeCube )
-                {
-                    sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D";
-                    if (isArray)
-                    {
-                        sb << "Layered";
-                    }
-                    sb << "<$T0>($0";
-                    for (int i = 0; i < coordCount; ++i)
-                    {
-                        sb << ", ($2)";
-                        if (vecCount > 1)
-                        {
-                            sb << '.' << char(i + 'x');
-                        }
-                    }
-                    if (isArray)
-                    {
-                        sb << ", int(($2)." << char(coordCount + 'x') << ")";
-                    }
-                    sb << ")\")\n";
-                }
-                else
-                {
-                    sb << "__target_intrinsic(cuda, \"texCubemap";
-                    if (isArray)
-                    {
-                        sb << "Layered";
-                    }
-                    sb << "<$T0>($0, ($2).x, ($2).y, ($2).z"; 
-                    if (isArray)
-                    {
-                        sb << ", int(($2).w)";
-                    }
-                    sb << ")\")\n";
-                }
-            }
-
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T Sample(" << samplerStateParam;;
-            sb << "float" << base.coordCount + isArray << " location);\n";
-
-            if( baseShape != TextureFlavor::Shape::ShapeCube )
-            {
-                sb << "__target_intrinsic(glsl, \"$ctextureOffset($p, $2, $3)$z\")\n";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "T Sample(" << samplerStateParam;;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "constexpr int" << base.coordCount << " offset);\n";
-            }
-
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T Sample(" << samplerStateParam;
-            sb << "float" << base.coordCount + isArray << " location, ";
-            if( baseShape != TextureFlavor::Shape::ShapeCube )
-            {
-                sb << "constexpr int" << base.coordCount << " offset, ";
-            }
-            sb << "float clamp);\n";
-
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T Sample(" << samplerStateParam;
-            sb << "float" << base.coordCount + isArray << " location, ";
-            if( baseShape != TextureFlavor::Shape::ShapeCube )
-            {
-                sb << "constexpr int" << base.coordCount << " offset, ";
-            }
-            sb << "float clamp, out uint status);\n";
-
-            // `SampleBias()`
-            sb << "__target_intrinsic(glsl, \"$ctexture($p, $2, $3)$z\")\n";
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T SampleBias(" << samplerStateParam;
-            sb << "float" << base.coordCount + isArray << " location, float bias);\n";
-
-            if( baseShape != TextureFlavor::Shape::ShapeCube )
-            {
-                sb << "__target_intrinsic(glsl, \"$ctextureOffset($p, $2, $3, $4)$z\")\n";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "T SampleBias(" << samplerStateParam;
-                sb << "float" << base.coordCount + isArray << " location, float bias, ";
-                sb << "constexpr int" << base.coordCount << " offset);\n";
-            }
-            int baseCoordCount = base.coordCount;
-            int arrCoordCount = baseCoordCount + isArray;
-            if (arrCoordCount <= 3)
-            {
-                // `SampleCmp()` and `SampleCmpLevelZero`
-                sb << "__target_intrinsic(glsl, \"texture($p, vec" << arrCoordCount + 1 << "($2, $3))\")";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "float SampleCmp(SamplerComparisonState s, ";
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "float compareValue";
-                sb << ");\n";
-                sb << "__target_intrinsic(glsl, \"textureLod($p, vec" << arrCoordCount + 1 << "($2, $3), 0)\")";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "float SampleCmpLevelZero(SamplerComparisonState s, ";
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "float compareValue";
-                sb << ");\n";
-            }
-            if (arrCoordCount < 3)
-            {
-                int extCoordCount = arrCoordCount + 1;
-
-                if (extCoordCount < 3)
-                    extCoordCount = 3;
-
-                sb << "__target_intrinsic(glsl, \"$ctextureLod($p, ";
-
-                sb << "vec" << extCoordCount << "($2,";
-                for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
-                {
-                    sb << " 0.0,";
-                }
-                sb << "$3)";
-
-                sb << ", 0.0)$z\")\n";
-            }
-            else if(arrCoordCount <= 3)
-            {
-                int extCoordCount = arrCoordCount + 1;
-
-                if (extCoordCount < 3)
-                    extCoordCount = 3;
-
-                sb << "__target_intrinsic(glsl, \"$ctextureGrad($p, ";
-
-                sb << "vec" << extCoordCount << "($2,";
-                for (int ii = arrCoordCount; ii < extCoordCount - 1; ++ii)
-                {
-                    sb << " 0.0,";
-                }
-                sb << "$3)";
-
-                // Construct gradients
-                sb << ", vec" << baseCoordCount << "(0.0)";
-                sb << ", vec" << baseCoordCount << "(0.0)";
-                sb << ")$z\")\n";
-            }
-                    
-
-            if( baseShape != TextureFlavor::Shape::ShapeCube )
-            {
-                // Note(tfoley): MSDN seems confused, and claims that the `offset`
-                // parameter for `SampleCmp` is available for everything but 3D
-                // textures, while `Sample` and `SampleBias` are consistent in
-                // saying they only exclude `offset` for cube maps (which makes
-                // sense). I'm going to assume the documentation for `SampleCmp`
-                // is just wrong.
-                sb << "__target_intrinsic(glsl, \"textureOffset($p, vec" << arrCoordCount + 1 << "($2, $3), $4)\")";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "float SampleCmp(SamplerComparisonState s, ";
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "float compareValue, ";
-                sb << "constexpr int" << base.coordCount << " offset);\n";
-
-                sb << "__target_intrinsic(glsl, \"textureLodOffset($p, vec" << arrCoordCount + 1 << "($2, $3), 0, $4)\")";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "float SampleCmpLevelZero(SamplerComparisonState s, ";
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "float compareValue, ";
-                sb << "constexpr int" << base.coordCount << " offset);\n";
-            }
-
-            // TODO(JS): Not clear how to map this to CUDA, because in HLSL, the gradient is a vector based on
-            // the dimension. On CUDA there is texNDGrad, but it always just takes ddx, ddy.
-            // I could just assume 0 for elements not supplied, and ignore z. For now will just leave                  
-            sb << "__target_intrinsic(glsl, \"$ctextureGrad($p, $2, $3, $4)$z\")\n";
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T SampleGrad(" << samplerStateParam;
-            sb << "float" << base.coordCount + isArray << " location, ";
-            sb << "float" << base.coordCount << " gradX, ";
-            sb << "float" << base.coordCount << " gradY";
-            sb << ");\n";
-
-            if( baseShape != TextureFlavor::Shape::ShapeCube )
-            {
-                sb << "__target_intrinsic(glsl, \"$ctextureGradOffset($p, $2, $3, $4, $5)$z\")\n";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "T SampleGrad(" << samplerStateParam;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "float" << base.coordCount << " gradX, ";
-                sb << "float" << base.coordCount << " gradY, ";
-                sb << "constexpr int" << base.coordCount << " offset);\n";
-
-                sb << "__glsl_extension(GL_ARB_sparse_texture_clamp)";
-                sb << "__target_intrinsic(glsl, \"$ctextureGradOffsetClampARB($p, $2, $3, $4, $5, $6)$z\")\n";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "T SampleGrad(" << samplerStateParam;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "float" << base.coordCount << " gradX, ";
-                sb << "float" << base.coordCount << " gradY, ";
-                sb << "constexpr int" << base.coordCount << " offset, ";
-                sb << "float lodClamp);\n";
-
-            }
-
-            // `SampleLevel`
-
-            sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n";
-
-            // SPIR-V
-            {
-                // TODO:
-                // Need to:
-                // - Construct sampled image type OpTypeSampledImage of image type
-                // - Construct OpSampledImage from image and sampler
-                // - Call OpImageSampleExplicitLod
-                // test ./tests/compute/texture-simpler.slang
-            }
-
-            // CUDA
-            {
-                const int coordCount = base.coordCount;
-                const int vecCount = coordCount + int(isArray);
-
-                if( baseShape != TextureFlavor::Shape::ShapeCube )
-                {
-                    sb << "__target_intrinsic(cuda, \"tex" << coordCount << "D";
-                    if (isArray)
-                    {
-                        sb << "Layered";
-                    }
-                    sb << "Lod<$T0>($0";
-                    for (int i = 0; i < coordCount; ++i)
-                    {
-                        sb << ", ($2)";
-                        if (vecCount > 1)
-                        {
-                            sb << '.' << char(i + 'x');
-                        }
-                    }
-                    if (isArray)
-                    {
-                        sb << ", int(($2)." << char(coordCount + 'x') << ")";
-                    }
-                    sb << ", $3)\")\n";
-                }
-                else
-                {
-                    sb << "__target_intrinsic(cuda, \"texCubemap";
-                    if (isArray)
-                    {
-                        sb << "Layered";
-                    }
-                    sb << "Lod<$T0>($0, ($2).x, ($2).y, ($2).z";
-                    if (isArray)
-                    {
-                        sb << ", int(($2).w)"; 
-                    }
-                    sb << ", $3)\")\n";
-                }
-            }
-            if (isReadOnly)
-                sb << "[__readNone]\n";
-            sb << "T SampleLevel(" << samplerStateParam;
-            sb << "float" << base.coordCount + isArray << " location, ";
-            sb << "float level);\n";
-
-            if( baseShape != TextureFlavor::Shape::ShapeCube )
-            {
-                sb << "__target_intrinsic(glsl, \"$ctextureLodOffset($p, $2, $3, $4)$z\")\n";
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << "T SampleLevel(" << samplerStateParam;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << "float level, ";
-                sb << "constexpr int" << base.coordCount << " offset);\n";
-            }
-        }
-
-        sb << "\n};\n";
-
-        // `Gather*()` operations are handled via an `extension` declaration,
-        // because this lets us capture the element type of the texture.
-        //
-        // TODO: longer-term there should be something like a `TextureElementType`
-        // interface, that both scalars and vectors implement, that then exposes
-        // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
-        //
-        static const struct {
-            char const* genericPrefix;
-            char const* elementType;
-            char const* outputType;
-        } kGatherExtensionCases[] = {
-            { "__generic<T, let N : int>", "vector<T,N>", "vector<T, 4>" },
-            { "", "float", "vector<float, 4>" },
-            { "", "int" , "vector<int, 4>"},
-            { "", "uint", "vector<uint, 4>"},
-
-            // TODO: need a case here for scalars `T`, but also
-            // need to ensure that case doesn't accidentally match
-            // for `T = vector<...>`, which requires actual checking
-            // of constraints on generic parameters.
-        };
-        for(auto cc : kGatherExtensionCases)
-        {
-            // TODO: this should really be an `if` around the entire `Gather` logic
-            if (isMultisample) break;
-
-            EMIT_LINE_DIRECTIVE();
-            sb << cc.genericPrefix << " __extension ";
-            sb << accessInfo.name;
-            sb << baseName;
-            sb << baseShapeName;
-            if (isArray) sb << "Array";
-            sb << "<" << cc.elementType << " >";
-            sb << "\n{\n";
-
-            // `Gather`
-            // (tricky because it returns a 4-vector of the element type
-            // of the texture components...)
-            //
-            // TODO: is it actually correct to restrict these so that, e.g.,
-            // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
-            // it nominally doesn't have an alpha component?
-            static const struct {
-                int componentIndex;
-                char const* componentName;
-            } kGatherComponets[] = {
-                { 0, "" },
-                { 0, "Red" },
-                { 1, "Green" },
-                { 2, "Blue" },
-                { 3, "Alpha" },
-            };
-            enum Cmp
-            { NotCmp,
-              Cmp
-            };
-
-            for(auto cmp : {NotCmp, Cmp})
-            for(auto kk : kGatherComponets)
-            {
-                auto samplerOrComparisonSampler = cmp == Cmp ? "SamplerComparisonState s, " : samplerStateParam;
-
-                auto componentIndex = kk.componentIndex;
-                auto componentName = kk.componentName;
-
-                auto outputType = cc.outputType;
-
-                const auto cmpName          = cmp == Cmp ? "Cmp" : "";
-                const auto cmpValueParam    = cmp == Cmp ? "float compareValue, " : "";
-                const auto cmpValueParamEnd = cmp == Cmp ? ", float compareValue" : "";
-                const auto supportsGLSL     = componentIndex == 0 || cmp == NotCmp;
-
-                EMIT_LINE_DIRECTIVE();
-
-                if(supportsGLSL)
-                {
-                    if(cmp == Cmp)
-                        sb << "__target_intrinsic(glsl, \"textureGather($p, $2, $3)\")\n";
-                    else
-                        sb << "__target_intrinsic(glsl, \"textureGather($p, $2, " << componentIndex << ")\")\n";
-                }
-                if (base.coordCount == 2 && cmp == NotCmp)
-                {
-                    // Gather only works on 2D in CUDA without comparison
-                    // "It is based on the base type of DataType except when readMode is equal to cudaReadModeNormalizedFloat (see Texture Reference API), in which case it is always float4."
-                    sb << "__target_intrinsic(cuda, \"tex2Dgather<$T0>($0, ($2).x, ($2).y, " << componentIndex << ")\")\n";
-                }
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
-                sb << "float" << base.coordCount + isArray << " location" << cmpValueParamEnd << ");\n";
-
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                EMIT_LINE_DIRECTIVE();
-                if(supportsGLSL)
-                {
-                    if(cmp == Cmp)
-                        sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, $4)\")\n";
-                    else
-                        sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, " << componentIndex << ")\")\n";
-                }
-                sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << cmpValueParam;
-                sb << "constexpr int" << base.coordCount << " offset);\n";
-
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                EMIT_LINE_DIRECTIVE();
-                sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << cmpValueParam;
-                sb << "constexpr int" << base.coordCount << " offset, ";
-                sb << "out uint status);\n";
-
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                EMIT_LINE_DIRECTIVE();
-                if(supportsGLSL)
-                {
-                    if(cmp == Cmp)
-                        sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, $3, ivec" << base.coordCount << "[]($4, $5, $6, $7))\")\n";
-                    else
-                        sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, ivec" << base.coordCount << "[]($3, $4, $5, $6), " << componentIndex << ")\")\n";
-                }
-                sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << cmpValueParam;
-                sb << "int" << base.coordCount << " offset1, ";
-                sb << "int" << base.coordCount << " offset2, ";
-                sb << "int" << base.coordCount << " offset3, ";
-                sb << "int" << base.coordCount << " offset4);\n";
-
-                if (isReadOnly)
-                    sb << "[__readNone]\n";
-                EMIT_LINE_DIRECTIVE();
-                sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
-                sb << "float" << base.coordCount + isArray << " location, ";
-                sb << cmpValueParam;
-                sb << "int" << base.coordCount << " offset1, ";
-                sb << "int" << base.coordCount << " offset2, ";
-                sb << "int" << base.coordCount << " offset3, ";
-                sb << "int" << base.coordCount << " offset4, ";
-                sb << "out uint status);\n";
-            }
-
-            EMIT_LINE_DIRECTIVE();
-            sb << "\n}\n";
-        }
-    } // TextureTypeInfo::emitTypeDecl
-}; // struct TextureTypeInfo
-
 for(auto& prefixInfo : kTexturePrefixes)
 for(auto& shapeInfo : kBaseTextureShapes)
 for(int isArray = 0; isArray < 2; ++isArray)
@@ -3067,6 +2017,17 @@ bool __isSignedInt()
     return __isSignedInt_impl(__declVal<T>());
 }
 
+__generic<T>
+__intrinsic_op($(kIROp_IsVector))
+bool __isVector_impl(T t);
+
+__generic<T>
+[__unsafeForceInlineEarly]
+bool __isVector()
+{
+    return __isVector_impl(__declVal<T>());
+}
+
 // Provide implementations to public generic arithmetic interfaces for builtin types.
 
 ${{{{
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 23815d2e9..c195428e3 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -5073,10 +5073,9 @@ WaveMask WaveGetActiveMask()
         __intrinsic_asm "WaveActiveBallot(true).x";
     case spirv:
         let _true = true;
-        let _scope = 3; // subgroup
         return (spirv_asm
         {
-            OpGroupNonUniformBallot $$uint4 result $_scope $_true
+            OpGroupNonUniformBallot $$uint4 result Subgroup $_true
         }).x;
     default:
         return __WaveGetActiveMask();
@@ -5097,10 +5096,9 @@ bool WaveMaskIsFirstLane(WaveMask mask)
     case hlsl:
         __intrinsic_asm "WaveIsFirstLane()";
     case spirv:
-        let _scope = 3u; // subgroup
         return spirv_asm
         {
-            OpGroupNonUniformElect $$bool result $_scope
+            OpGroupNonUniformElect $$bool result Subgroup
         };
     default:
         return false;
@@ -5121,10 +5119,9 @@ bool WaveMaskAllTrue(WaveMask mask, bool condition)
     case hlsl:
         __intrinsic_asm "WaveActiveAllTrue($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         return spirv_asm
         {
-            OpGroupNonUniformAll $$bool result $_scope $condition
+            OpGroupNonUniformAll $$bool result Subgroup $condition
         };
     default:
         return false;
@@ -5145,10 +5142,9 @@ bool WaveMaskAnyTrue(WaveMask mask, bool condition)
     case hlsl:
         __intrinsic_asm "WaveActiveAnyTrue($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         return spirv_asm
         {
-            OpGroupNonUniformAny $$bool result $_scope $condition
+            OpGroupNonUniformAny $$bool result Subgroup $condition
         };
     default:
         return false;
@@ -5169,10 +5165,9 @@ WaveMask WaveMaskBallot(WaveMask mask, bool condition)
     case hlsl:
         __intrinsic_asm "WaveActiveBallot($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         return (spirv_asm
         {
-            OpGroupNonUniformBallot $$uint4 result $_scope $condition
+            OpGroupNonUniformBallot $$uint4 result Subgroup $condition
         }).x;
     default:
         return 0;
@@ -5302,8 +5297,8 @@ T WaveMaskBroadcastLaneAt(WaveMask mask, T value, constexpr int lane)
     case cuda: __intrinsic_asm "__shfl_sync($0, $1, $2)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBroadcast $$T result $_scope $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformBroadcast $$T result Subgroup $value $ulane};
     }
 }
 
@@ -5319,8 +5314,8 @@ vector<T,N> WaveMaskBroadcastLaneAt(WaveMask mask, vector<T,N> value, constexpr
     case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result $_scope $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result Subgroup $value $ulane};
     }
 }
 __generic<T : __BuiltinType, let N : int, let M : int>
@@ -5342,8 +5337,8 @@ T WaveMaskReadLaneAt(WaveMask mask, T value, int lane)
     case cuda: __intrinsic_asm "__shfl_sync($0, $1, $2)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformShuffle $$T result $_scope $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $ulane};
     }
 }
 __generic<T : __BuiltinType, let N : int>
@@ -5358,8 +5353,8 @@ vector<T,N> WaveMaskReadLaneAt(WaveMask mask, vector<T,N> value, int lane)
     case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result $_scope $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane};
     }
 }
 __generic<T : __BuiltinType, let N : int, let M : int>
@@ -5422,8 +5417,7 @@ T WaveMaskBitAnd(WaveMask mask, T expr)
     case cuda: __intrinsic_asm "_waveAnd($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveBitAnd($1)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBitwiseAnd $$T result $_scope 0 $expr};
+        return spirv_asm {OpGroupNonUniformBitwiseAnd $$T result Subgroup 0 $expr};
     }
 }
 
@@ -5439,8 +5433,7 @@ vector<T,N> WaveMaskBitAnd(WaveMask mask, vector<T,N> expr)
     case cuda: __intrinsic_asm "_waveAndMultiple($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveBitAnd($1)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBitwiseAnd $$vector<T,N> result $_scope 0 $expr};
+        return spirv_asm {OpGroupNonUniformBitwiseAnd $$vector<T,N> result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinIntegerType, let N : int, let M : int>
@@ -5460,8 +5453,7 @@ T WaveMaskBitOr(WaveMask mask, T expr)
     case cuda: __intrinsic_asm "_waveOr($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveBitOr($1)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBitwiseOr $$T result $_scope 0 $expr};
+        return spirv_asm {OpGroupNonUniformBitwiseOr $$T result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinIntegerType, let N : int>
@@ -5476,8 +5468,7 @@ vector<T,N> WaveMaskBitOr(WaveMask mask, vector<T,N> expr)
     case cuda: __intrinsic_asm "_waveOrMultiple($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveBitOr($1)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBitwiseOr $$vector<T,N> result $_scope 0 $expr};
+        return spirv_asm {OpGroupNonUniformBitwiseOr $$vector<T,N> result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinIntegerType, let N : int, let M : int>
@@ -5497,8 +5488,7 @@ T WaveMaskBitXor(WaveMask mask, T expr)
     case cuda: __intrinsic_asm "_waveXor($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveBitXor($1)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBitwiseXor $$T result $_scope 0 $expr};
+        return spirv_asm {OpGroupNonUniformBitwiseXor $$T result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinIntegerType, let N : int>
@@ -5513,8 +5503,7 @@ vector<T,N> WaveMaskBitXor(WaveMask mask, vector<T,N> expr)
     case cuda: __intrinsic_asm "_waveXorMultiple($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveBitXor($1)";
     case spirv:
-        let _scope = 3u; // subgroup
-        return spirv_asm {OpGroupNonUniformBitwiseXor $$vector<T,N> result $_scope 0 $expr};
+        return spirv_asm {OpGroupNonUniformBitwiseXor $$vector<T,N> result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinIntegerType, let N : int, let M : int>
@@ -5534,13 +5523,12 @@ T WaveMaskMax(WaveMask mask, T expr)
     case cuda: __intrinsic_asm "_waveMax($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveMax($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFMax $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFMax $$T result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
-            return spirv_asm {OpGroupNonUniformSMax $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformSMax $$T result Subgroup 0 $expr};
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformUMax $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformUMax $$T result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinArithmeticType, let N : int>
@@ -5555,13 +5543,12 @@ vector<T,N> WaveMaskMax(WaveMask mask, vector<T,N> expr)
     case cuda: __intrinsic_asm "_waveMaxMultiple($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveMax($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFMax $$vector<T,N> result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFMax $$vector<T,N> result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
-            return spirv_asm {OpGroupNonUniformSMax $$vector<T,N> result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformSMax $$vector<T,N> result Subgroup 0 $expr};
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformUMax $$vector<T,N> result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformUMax $$vector<T,N> result Subgroup 0 $expr};
     }
 }
 
@@ -5582,13 +5569,12 @@ T WaveMaskMin(WaveMask mask, T expr)
     case cuda: __intrinsic_asm "_waveMin($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveMin($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFMin $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFMin $$T result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
-            return spirv_asm {OpGroupNonUniformSMin $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformSMin $$T result Subgroup 0 $expr};
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformUMin $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformUMin $$T result Subgroup 0 $expr};
     }
 }
 
@@ -5604,13 +5590,12 @@ vector<T,N> WaveMaskMin(WaveMask mask, vector<T,N> expr)
     case cuda: __intrinsic_asm "_waveMinMultiple($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveMin($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFMin $$vector<T,N>  result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFMin $$vector<T,N>  result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
-            return spirv_asm {OpGroupNonUniformSMin $$vector<T,N>  result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformSMin $$vector<T,N>  result Subgroup 0 $expr};
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformUMin $$vector<T,N>  result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformUMin $$vector<T,N>  result Subgroup 0 $expr};
     }
 }
 
@@ -5631,21 +5616,20 @@ T WaveMaskProduct(WaveMask mask, T expr)
     case cuda: __intrinsic_asm "_waveProduct($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveProduct($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFMul $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFMul $$T result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
         {
             return spirv_asm
             {
                 // TODO: use the correct integer width
                 OpBitcast $$uint %uvalue $expr;
-                OpGroupNonUniformIMul $$T %mulResult $_scope 0 %uvalue;
+                OpGroupNonUniformIMul $$T %mulResult Subgroup 0 %uvalue;
                 OpBitcast $$T result %mulResult
             };
         }
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformIMul $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformIMul $$T result Subgroup 0 $expr};
     }
 }
 
@@ -5661,21 +5645,20 @@ vector<T,N> WaveMaskProduct(WaveMask mask, vector<T,N> expr)
     case cuda: __intrinsic_asm "_waveProductMultiple($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveProduct($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFMul $$vector<T,N> result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFMul $$vector<T,N> result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
         {
             return spirv_asm
             {
                 // TODO: use the correct integer width
                 OpBitcast $$vector<uint,N> %uvalue $expr;
-                OpGroupNonUniformIMul $$vector<uint,N> %mulResult $_scope 0 %uvalue;
+                OpGroupNonUniformIMul $$vector<uint,N> %mulResult Subgroup 0 %uvalue;
                 OpBitcast $$vector<T,N> result %mulResult
             };
         }
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformIMul $$vector<T,N> result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformIMul $$vector<T,N> result Subgroup 0 $expr};
     }
 }
 
@@ -5696,21 +5679,20 @@ T WaveMaskSum(WaveMask mask, T expr)
     case cuda: __intrinsic_asm "_waveSum($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveSum($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFAdd $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFAdd $$T result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
         {
             return spirv_asm
             {
                 // TODO: use the correct integer width
                 OpBitcast $$uint %uvalue $expr;
-                OpGroupNonUniformIAdd $$T %mulResult $_scope 0 %uvalue;
+                OpGroupNonUniformIAdd $$T %mulResult Subgroup 0 %uvalue;
                 OpBitcast $$T result %mulResult
             };
         }
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformIAdd $$T result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformIAdd $$T result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinArithmeticType, let N : int>
@@ -5725,21 +5707,20 @@ vector<T,N> WaveMaskSum(WaveMask mask, vector<T,N> expr)
     case cuda: __intrinsic_asm "_waveSumMultiple($0, $1)";
     case hlsl: __intrinsic_asm "WaveActiveSum($1)";
     case spirv:
-        let _scope = 3u; // subgroup
         if (__isFloat<T>())
-            return spirv_asm {OpGroupNonUniformFAdd $$vector<T,N> result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformFAdd $$vector<T,N> result Subgroup 0 $expr};
         else if (__isSignedInt<T>())
         {
             return spirv_asm
             {
                 // TODO: use the correct integer width
                 OpBitcast $$vector<uint,N> %uvalue $expr;
-                OpGroupNonUniformIAdd $$vector<uint,N> %mulResult $_scope 0 %uvalue;
+                OpGroupNonUniformIAdd $$vector<uint,N> %mulResult Subgroup 0 %uvalue;
                 OpBitcast $$vector<T,N> result %mulResult
             };
         }
         else if (__isUnsignedInt<T>())
-            return spirv_asm {OpGroupNonUniformIAdd $$vector<T,N> result $_scope 0 $expr};
+            return spirv_asm {OpGroupNonUniformIAdd $$vector<T,N> result Subgroup 0 $expr};
     }
 }
 __generic<T : __BuiltinArithmeticType, let N : int, let M : int>
@@ -5763,10 +5744,9 @@ bool WaveMaskAllEqual(WaveMask mask, T value)
     case cuda:
         __intrinsic_asm "_waveAllEqual($0, $1)";
     case spirv:
-        let _scope = 3u; // subgroup
         return spirv_asm
         {
-            OpGroupNonUniformAllEqual $$bool result $_scope $value
+            OpGroupNonUniformAllEqual $$bool result Subgroup $value
         };
     default:
         return false;
@@ -5788,10 +5768,9 @@ bool WaveMaskAllEqual(WaveMask mask, vector<T,N> value)
     case cuda:
         __intrinsic_asm "_waveAllEqualMultiple($0, $1)";
     case spirv:
-        let _scope = 3u; // subgroup
         return spirv_asm
         {
-            OpGroupNonUniformAllEqual $$bool result $_scope $value
+            OpGroupNonUniformAllEqual $$bool result Subgroup $value
         };
     default:
         return false;
@@ -6526,10 +6505,9 @@ uint4 WaveActiveBallot(bool condition)
     case hlsl:
         __intrinsic_asm "WaveActiveBallot";
     case spirv:
-        let _scope = 3u; // Subgroup
         return spirv_asm
         {
-            OpGroupNonUniformBallot $$uint4 result $_scope $condition
+            OpGroupNonUniformBallot $$uint4 result Subgroup $condition
         };
     default:
         return WaveMaskBallot(WaveGetActiveMask(), condition);
@@ -6821,7 +6799,8 @@ T WaveBroadcastLaneAt(T value, constexpr int lane)
     case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt";
     case spirv:
-        return spirv_asm {OpGroupNonUniformBroadcast $$T result Subgroup $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformBroadcast $$T result Subgroup $value $ulane};
     default:
         return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane);
     }
@@ -6838,7 +6817,8 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane)
     case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt";
     case spirv:
-        return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result Subgroup $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformBroadcast $$vector<T,N> result Subgroup $value $ulane};
     default:
         return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane);
     }
@@ -6865,7 +6845,8 @@ T WaveReadLaneAt(T value, int lane)
     case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt";
     case spirv:
-        return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $ulane};
     default:
         return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane);
     }
@@ -6882,7 +6863,8 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane)
     case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt";
     case spirv:
-        return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane};
     default:
         return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane);
     }
@@ -6910,7 +6892,8 @@ T WaveShuffle(T value, int lane)
     case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt";
     case spirv:
-        return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformShuffle $$T result Subgroup $value $ulane};
     default:
         return WaveMaskShuffle(WaveGetActiveMask(), value, lane);
     }
@@ -6927,7 +6910,8 @@ vector<T,N> WaveShuffle(vector<T,N> value, int lane)
     case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
     case hlsl: __intrinsic_asm "WaveReadLaneAt";
     case spirv:
-        return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $lane};
+        let ulane = uint(lane);
+        return spirv_asm {OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane};
     default:
         return WaveMaskShuffle(WaveGetActiveMask(), value, lane);
     }
diff --git a/source/slang/slang-ast-expr.h b/source/slang/slang-ast-expr.h
index 6699426d5..9c604f6a0 100644
--- a/source/slang/slang-ast-expr.h
+++ b/source/slang/slang-ast-expr.h
@@ -657,6 +657,8 @@ public:
         SlangValue,
         SlangValueAddr,
         SlangType,
+        SampledType, // __sampledType(T), this becomes a 4 vector of the component type of T
+        TruncateMarker, // __truncate, an invented instruction which coerces to the result type by truncating the element count
         BuiltinVar,
         GLSL450Set,
     };
diff --git a/source/slang/slang-check-expr.cpp b/source/slang/slang-check-expr.cpp
index 055364d5e..88d95f04e 100644
--- a/source/slang/slang-check-expr.cpp
+++ b/source/slang/slang-check-expr.cpp
@@ -3942,7 +3942,7 @@ namespace Slang
             // be able to deduce types for operands
             const auto opInfo = spirvInfo->opInfos.lookup(SpvOp(inst.opcode.knownValue));
 
-            if(opInfo->numOperandTypes == 0 && inst.operands.getCount())
+            if(opInfo && opInfo->numOperandTypes == 0 && inst.operands.getCount())
             {
                 failed = true;
                 getSink()->diagnose(inst.opcode.token, Diagnostics::spirvInstructionWithTooManyOperands, inst.opcode.token, 0);
@@ -3953,16 +3953,21 @@ namespace Slang
             for(Index operandIndex = 0; operandIndex < inst.operands.getCount(); ++operandIndex)
             {
                 // Clamp to the end of the type info array, because the last one will be any variable operands
+                const auto invalidOperandKind = SPIRVCoreGrammarInfo::OperandKind{0xff};
                 const auto operandType
-                    = opInfo->operandTypes[std::min(operandIndex, Index(opInfo->numOperandTypes)-1)];
+                    = opInfo.has_value()
+                    ? opInfo->operandTypes[std::min(operandIndex, Index(opInfo->numOperandTypes)-1)]
+                    : invalidOperandKind;
                 const auto baseOperandType
                     = spirvInfo->operandKindUnderneathIds.lookup(operandType).value_or(operandType);
                 const auto needsIdWrapper = baseOperandType != operandType;
 
                 const auto check = [&](const auto& go, auto& operand) -> void {
-                    if(operand.flavor == SPIRVAsmOperand::SlangType)
+                    if(operand.flavor == SPIRVAsmOperand::SlangType
+                        || operand.flavor == SPIRVAsmOperand::SampledType)
                     {
-                        // This is a $$type operand, fill in the TypeExp member of the operand
+                        // This is a $$type operand or __sampledType(T)
+                        // operand, fill in its TypeExp member.
                         TypeExp& typeExpr = operand.type;
                         typeExpr.exp = operand.expr;
                         typeExpr = CheckProperType(typeExpr);
diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h
index 7c8bab1ad..76e96f2d5 100644
--- a/source/slang/slang-diagnostic-defs.h
+++ b/source/slang/slang-diagnostic-defs.h
@@ -255,6 +255,8 @@ DIAGNOSTIC(29109, Error, spirvOperandRange, "Literal ints must be in the range 0
 
 DIAGNOSTIC(29110, Error, unknownTargetName, "unknown target name '$0'")
 
+DIAGNOSTIC(29111, Error, spirvInvalidTruncate, "__truncate has been given a source smaller than its target")
+
 //
 // 3xxxx - Semantic analysis
 //
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index 846c4b5b4..07f1b2aee 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -1294,6 +1294,37 @@ struct SPIRVEmitContext
             }
         case kIROp_TextureType:
             {
+                // Some untyped constants from OpTypeImage
+                // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage
+
+                // indicates not a depth image
+                [[maybe_unused]]
+                const SpvWord notDepthImage = 0;
+                // indicates a depth image
+                [[maybe_unused]]
+                const SpvWord isDepthImage = 1;
+                // means no indication as to whether this is a depth or non-depth image
+                const SpvWord unknownDepthImage = 2;
+
+                // indicates non-arrayed content
+                const SpvWord notArrayed = 0;
+                // indicates arrayed content
+                const SpvWord isArrayed = 1;
+
+                // indicates single-sampled content
+                const SpvWord notMultisampled = 0;
+                // indicates multisampled content
+                const SpvWord isMultisampled = 1;
+
+                // indicates this is only known at run time, not at compile time
+                const SpvWord sampledUnknown = 0;
+                // indicates an image compatible with sampling operations
+                const SpvWord sampledImage = 1;
+                // indicates an image compatible with read/write operations (a storage or subpass data image).
+                const SpvWord readWriteImage = 2;
+
+                //
+
                 const auto texTypeInst = as<IRTextureType>(inst);
                 const auto sampledType = texTypeInst->getElementType();
                 SpvDim dim = SpvDim1D; // Silence uninitialized warnings from msvc...
@@ -1318,16 +1349,78 @@ struct SPIRVEmitContext
                         dim = SpvDimBuffer;
                         break;
                 }
-                bool arrayed = texTypeInst->isArray();
-                SpvWord depth = 2; // No knowledge of if this is a depth image
-                bool ms = texTypeInst->isMultisample();
-                // TODO: can we do better here?
-                SpvWord sampled = 0; // Only known at run time
-                // TODO: can we do better?
+                SpvWord arrayed = texTypeInst->isArray() ? isArrayed : notArrayed;
+
+                // Vulkan spec 16.1: "The “Depth” operand of OpTypeImage is ignored."
+                SpvWord depth = unknownDepthImage; // No knowledge of if this is a depth image
+                SpvWord ms = texTypeInst->isMultisample() ? isMultisampled : notMultisampled;
+
+                SpvWord sampled = sampledUnknown;
+                switch(texTypeInst->getAccess())
+                {
+                    case SlangResourceAccess::SLANG_RESOURCE_ACCESS_READ_WRITE:
+                    case SlangResourceAccess::SLANG_RESOURCE_ACCESS_RASTER_ORDERED:
+                        sampled = readWriteImage;
+                        break;
+                    case SlangResourceAccess::SLANG_RESOURCE_ACCESS_NONE:
+                    case SlangResourceAccess::SLANG_RESOURCE_ACCESS_READ:
+                        sampled = sampledImage;
+                        break;
+                }
+
+                // TODO: we need to do as _emitGLSLImageFormatModifier does,
+                // take a guess at the image format
                 SpvImageFormat format = SpvImageFormatUnknown;
+
+                //
+                // Capabilities, according to section 3.8
+                //
+                // SPIR-V requires that the sampled/rw info on the image isn't unknown
+                SLANG_ASSERT(sampled == sampledImage || sampled == readWriteImage);
+                switch(dim)
+                {
+                case SpvDim1D:
+                    requireSPIRVCapability(sampled == sampledImage ? SpvCapabilitySampled1D : SpvCapabilityImage1D);
+                    break;
+                case SpvDim2D:
+                    // Also requires Shader or Kernel, but these are a given (?)
+                    if(sampled == readWriteImage && ms == isMultisampled && arrayed == isArrayed)
+                        requireSPIRVCapability(SpvCapabilityImageMSArray);
+                    break;
+                case SpvDim3D:
+                    break;
+                case SpvDimCube:
+                    // Requires shader also
+                    if(sampled == readWriteImage && arrayed == isArrayed)
+                        requireSPIRVCapability(SpvCapabilityImageCubeArray);
+                    break;
+                case SpvDimRect:
+                    requireSPIRVCapability(sampled == sampledImage ? SpvCapabilitySampledRect : SpvCapabilityImageRect);
+                    break;
+                case SpvDimBuffer:
+                    requireSPIRVCapability(sampled == sampledImage ? SpvCapabilitySampledBuffer : SpvCapabilityImageBuffer);
+                    break;
+                case SpvDimSubpassData:
+                    requireSPIRVCapability(SpvCapabilityInputAttachment);
+                    break;
+                case SpvDimTileImageDataEXT:
+                    SLANG_UNIMPLEMENTED_X("OpTypeImage Capabilities for SpvDimTileImageDataEXT");
+                    break;
+                }
+                if(format == SpvImageFormatUnknown && sampled == readWriteImage)
+                {
+                    // TODO: It may not be necessary to have both of these
+                    // depending on if we read or write
+                    requireSPIRVCapability(SpvCapabilityStorageImageReadWithoutFormat);
+                    requireSPIRVCapability(SpvCapabilityStorageImageWriteWithoutFormat);
+                }
+
+                //
+                // The op itself
+                //
                 return emitOpTypeImage(
                     inst,
-                    sampledType,
+                    dropVector(sampledType),
                     dim,
                     SpvLiteralInteger::from32(depth),
                     SpvLiteralInteger::from32(arrayed),
@@ -1503,12 +1596,6 @@ struct SPIRVEmitContext
                     varInst,
                     SpvLiteralInteger::from32(int32_t(index))
                 );
-                emitOpDecorateIndex(
-                    getSection(SpvLogicalSectionID::Annotations),
-                    nullptr,
-                    varInst,
-                    SpvLiteralInteger::from32(int32_t(space))
-                );
                 break;
             case LayoutResourceKind::VaryingOutput:
                 emitOpDecorateLocation(
@@ -3829,7 +3916,6 @@ struct SPIRVEmitContext
         for(const auto spvInst : inst->getInsts())
         {
             const bool isLast = spvInst == inst->getLastChild();
-            const SpvOp opcode = SpvOp(spvInst->getOpcodeOperandWord());
 
             const auto parentForOpCode = [this](SpvOp opcode, SpvInstParent* defaultParent) -> SpvInstParent*{
                 const auto info = m_grammarInfo->opInfos.lookup(opcode);
@@ -3859,122 +3945,242 @@ struct SPIRVEmitContext
                 }
             };
 
-            switch (opcode)
-            {
-            case SpvOpCapability:
-                requireSPIRVCapability((SpvCapability)getIntVal(spvInst->getOperand(1)->getOperand(0)));
-                continue;
-            case SpvOpExtension:
-                ensureExtensionDeclaration(as<IRStringLit>(spvInst->getOperand(1)->getOperand(0))->getStringSlice());
-                continue;
-            default:
-                break;
-            }
+            const auto emitSpvAsmOperand = [&](IRSPIRVAsmOperand* operand){
+                switch(operand->getOp())
+                {
+                case kIROp_SPIRVAsmOperandEnum:
+                case kIROp_SPIRVAsmOperandLiteral:
+                {
+                    const auto v = as<IRConstant>(operand->getValue());
+                    SLANG_ASSERT(v);
+                    if(operand->getOperandCount() >= 2)
 
-            last = emitInstCustomOperandFunc(
-                parentForOpCode(opcode, parent),
-                // We want the "result instruction" to refer to the top level
-                // block which assumes its value, the others are free to refer
-                // to whatever, so just use the internal spv inst rep
-                // TODO: This is not correct, because the instruction which is
-                // assigned to result is not necessarily the last instruction
-                isLast ? as<IRInst>(inst) : spvInst,
-                opcode,
-                [&](){
-                    for(const auto operand : spvInst->getSPIRVOperands())
                     {
-                        switch(operand->getOp())
-                        {
-                        case kIROp_SPIRVAsmOperandEnum:
-                        case kIROp_SPIRVAsmOperandLiteral:
+                        const auto constantType = cast<IRType>(operand->getOperand(1));
+                        SpvInst* constant;
+                        switch(v->getOp())
                         {
-                            const auto v = as<IRConstant>(operand->getValue());
-                            SLANG_ASSERT(v);
-                            if(operand->getOperandCount() >= 2)
-                            {
-                                const auto constantType = cast<IRType>(operand->getOperand(1));
-                                SpvInst* constant;
-                                switch(v->getOp())
-                                {
-                                case kIROp_IntLit:
-                                {
-                                    // TODO: range checking
-                                    const auto i = cast<IRIntLit>(v)->getValue();
-                                    constant = emitIntConstant(i, constantType);
-                                    break;
-                                }
-                                case kIROp_StringLit:
-                                    SLANG_UNIMPLEMENTED_X("String constants in SPIR-V emit");
-                                default:
-                                    SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm");
-                                }
-                                emitOperand(constant);
-                            }
-                            else
-                            {
-                                switch(v->getOp())
-                                {
-                                case kIROp_StringLit:
-                                    emitOperand(SpvLiteralBits::fromUnownedStringSlice(v->getStringSlice()));
-                                    break;
-                                case kIROp_IntLit:
-                                {
-                                    // TODO: range checking
-                                    const auto i = cast<IRIntLit>(v)->getValue();
-                                    emitOperand(SpvLiteralInteger::from32(uint32_t(i)));
-                                    break;
-                                }
-                                default:
-                                    SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm");
-                                }
-                            }
-                            break;
-                        }
-                        case kIROp_SPIRVAsmOperandInst:
-                        {
-                            const auto i = operand->getValue();
-                            emitOperand(ensureInst(i));
-
-                            break;
-                        }
-                        case kIROp_SPIRVAsmOperandResult:
+                        case kIROp_IntLit:
                         {
-                            SLANG_ASSERT(isLast);
-                            emitOperand(kResultID);
+                            // TODO: range checking
+                            const auto i = cast<IRIntLit>(v)->getValue();
+                            constant = emitIntConstant(i, constantType);
                             break;
                         }
-                        case kIROp_SPIRVAsmOperandId:
-                        {
-                            const auto idName = cast<IRStringLit>(operand->getValue())->getStringSlice();
-                            SpvWord id;
-                            if(!idMap.tryGetValue(idName, id))
-                            {
-                                id = freshID();
-                                idMap.set(idName, id);
-                            }
-                            emitOperand(id);
-                            break;
+                        case kIROp_StringLit:
+                            SLANG_UNIMPLEMENTED_X("String constants in SPIR-V emit");
+                        default:
+                            SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm");
                         }
-                        case kIROp_SPIRVAsmOperandBuiltinVar:
+                        emitOperand(constant);
+                    }
+                    else
+                    {
+                        switch(v->getOp())
                         {
-                            const auto kind = (SpvBuiltIn)(getIntVal(operand->getOperand(0)));
-                            IRBuilder builder(operand);
-                            builder.setInsertBefore(operand);
-                            auto varInst = getBuiltinGlobalVar(builder.getPtrType(kIROp_PtrType, operand->getDataType(), SpvStorageClassInput), kind);
-                            emitOperand(varInst);
+                        case kIROp_StringLit:
+                            emitOperand(SpvLiteralBits::fromUnownedStringSlice(v->getStringSlice()));
                             break;
-                        }
-                        case kIROp_SPIRVAsmOperandGLSL450Set:
+                        case kIROp_IntLit:
                         {
-                            emitOperand(getGLSL450ExtInst());
+                            // TODO: range checking
+                            const auto i = cast<IRIntLit>(v)->getValue();
+                            emitOperand(SpvLiteralInteger::from32(uint32_t(i)));
                             break;
                         }
                         default:
                             SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm");
                         }
                     }
+                    break;
                 }
-            );
+                case kIROp_SPIRVAsmOperandInst:
+                {
+                    const auto i = operand->getValue();
+                    emitOperand(ensureInst(i));
+
+                    break;
+                }
+                case kIROp_SPIRVAsmOperandResult:
+                {
+                    SLANG_ASSERT(isLast);
+                    emitOperand(kResultID);
+                    break;
+                }
+                case kIROp_SPIRVAsmOperandId:
+                {
+                    const auto idName = cast<IRStringLit>(operand->getValue())->getStringSlice();
+                    SpvWord id;
+                    if(!idMap.tryGetValue(idName, id))
+                    {
+                        id = freshID();
+                        idMap.set(idName, id);
+                    }
+                    emitOperand(id);
+                    break;
+                }
+                case kIROp_SPIRVAsmOperandSampledType:
+                {
+                    // Make a 4 vector of the component type
+                    IRBuilder builder(m_irModule);
+                    const auto elementType = cast<IRType>(operand->getValue());
+                    const auto sampledType = builder.getVectorType(dropVector(elementType), 4);
+                    emitOperand(ensureInst(sampledType));
+                    break;
+                }
+                case kIROp_SPIRVAsmOperandBuiltinVar:
+                {
+                    const auto kind = (SpvBuiltIn)(getIntVal(operand->getOperand(0)));
+                    IRBuilder builder(operand);
+                    builder.setInsertBefore(operand);
+                    auto varInst = getBuiltinGlobalVar(builder.getPtrType(kIROp_PtrType, operand->getDataType(), SpvStorageClassInput), kind);
+                    emitOperand(varInst);
+                    break;
+                }
+                case kIROp_SPIRVAsmOperandGLSL450Set:
+                {
+                    emitOperand(getGLSL450ExtInst());
+                    break;
+                }
+                default:
+                    SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm");
+                }
+            };
+
+            if(spvInst->getOpcodeOperand()->getOp() == kIROp_SPIRVAsmOperandTruncate)
+            {
+                const auto getSlangType = [&](IRSPIRVAsmOperand* operand) -> IRType*{
+                    switch(operand->getOp())
+                    {
+                    case kIROp_SPIRVAsmOperandInst:
+                        return cast<IRType>(operand->getValue());
+                    case kIROp_SPIRVAsmOperandSampledType:
+                        {
+                            // Make a 4 vector of the component type
+                            IRBuilder builder(m_irModule);
+                            const auto elementType = cast<IRType>(operand->getValue());
+                            return builder.getVectorType(dropVector(elementType), 4);
+                        }
+                    case kIROp_SPIRVAsmOperandEnum:
+                    case kIROp_SPIRVAsmOperandLiteral:
+                    case kIROp_SPIRVAsmOperandResult:
+                    case kIROp_SPIRVAsmOperandId:
+                        SLANG_UNEXPECTED("truncate should have been given slang types");
+                    default:
+                        SLANG_UNREACHABLE("Unhandled case in emitSPIRVAsm");
+                    }
+                };
+
+                SLANG_ASSERT(spvInst->getSPIRVOperands().getCount() == 4);
+                const auto toType = getSlangType(spvInst->getSPIRVOperands()[0]);
+                const auto toIdOperand = spvInst->getSPIRVOperands()[1];
+                const auto fromType = getSlangType(spvInst->getSPIRVOperands()[2]);
+                const auto fromIdOperand = spvInst->getSPIRVOperands()[3];
+
+                // The component types must be the same
+                SLANG_ASSERT(isTypeEqual(dropVector(toType), dropVector(fromType)));
+
+                // If we don't need truncation, but a different result ID is
+                // expected, then just unify them in the idMap
+                if(isTypeEqual(toType, fromType))
+                {
+                    // TODO: if this is the last inst, we should just remove it
+                    // and rewrite the penultimate one
+                    last = emitInstCustomOperandFunc(
+                        parent,
+                        isLast ? as<IRInst>(inst) : spvInst,
+                        SpvOpCopyObject,
+                        [&](){
+                            emitOperand(toType);
+                            emitSpvAsmOperand(toIdOperand);
+                            emitSpvAsmOperand(fromIdOperand);
+                        }
+                    );
+                }
+                // Otherwise, if we are truncating to a scalar, extract the first element
+                else if(!as<IRVectorType>(toType))
+                {
+                    last = emitInstCustomOperandFunc(
+                        parent,
+                        isLast ? as<IRInst>(inst) : spvInst,
+                        SpvOpCompositeExtract,
+                        [&](){
+                            emitOperand(toType);
+                            emitSpvAsmOperand(toIdOperand);
+                            emitSpvAsmOperand(fromIdOperand);
+                            emitOperand(SpvLiteralInteger::from32(0));
+                        }
+                    );
+                }
+                // Otherwise, if we are truncating to a 1-vector from a scalar
+                else if(as<IRVectorType>(toType) && !as<IRVectorType>(fromType))
+                {
+                    last = emitInstCustomOperandFunc(
+                        parent,
+                        isLast ? as<IRInst>(inst) : spvInst,
+                        SpvOpCompositeConstruct,
+                        [&](){
+                            emitOperand(toType);
+                            emitSpvAsmOperand(toIdOperand);
+                            emitSpvAsmOperand(fromIdOperand);
+                        }
+                    );
+                }
+                // Otherwise, we are truncating a vector to a smaller vector
+                else
+                {
+                    const auto toVector = cast<IRVectorType>(toType);
+                    const auto toVectorSize = getIntVal(toVector->getElementCount());
+                    const auto fromVector = cast<IRVectorType>(fromType);
+                    const auto fromVectorSize = getIntVal(fromVector->getElementCount());
+                    if(toVectorSize > fromVectorSize)
+                        m_sink->diagnose(inst, Diagnostics::spirvInvalidTruncate);
+                    last = emitInstCustomOperandFunc(
+                        parent,
+                        isLast ? as<IRInst>(inst) : spvInst,
+                        SpvOpVectorShuffle,
+                        [&](){
+                            emitOperand(toType);
+                            emitSpvAsmOperand(toIdOperand);
+                            emitSpvAsmOperand(fromIdOperand);
+                            emitOperand(emitOpUndef(parent, nullptr, fromVector));
+                            for(Int32 i = 0; i < toVectorSize; ++i)
+                                emitOperand(SpvLiteralInteger::from32(i));
+                        }
+                    );
+                }
+            }
+            else
+            {
+                const SpvOp opcode = SpvOp(spvInst->getOpcodeOperandWord());
+
+                switch (opcode)
+                {
+                case SpvOpCapability:
+                    requireSPIRVCapability((SpvCapability)getIntVal(spvInst->getOperand(1)->getOperand(0)));
+                    continue;
+                case SpvOpExtension:
+                    ensureExtensionDeclaration(as<IRStringLit>(spvInst->getOperand(1)->getOperand(0))->getStringSlice());
+                    continue;
+                default:
+                    break;
+                }
+
+                last = emitInstCustomOperandFunc(
+                    parentForOpCode(opcode, parent),
+                    // We want the "result instruction" to refer to the top level
+                    // block which assumes its value, the others are free to refer
+                    // to whatever, so just use the internal spv inst rep
+                    // TODO: This is not correct, because the instruction which is
+                    // assigned to result is not necessarily the last instruction
+                    isLast ? as<IRInst>(inst) : spvInst,
+                    opcode,
+                    [&](){
+                        for(const auto operand : spvInst->getSPIRVOperands())
+                            emitSpvAsmOperand(operand);
+                    }
+                );
+            }
         }
 
         for(const auto& [name, id] : idMap)
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 8dc0d2983..68fd81cee 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -42,6 +42,7 @@
 #include "slang-ir-lower-size-of.h"
 #include "slang-ir-lower-reinterpret.h"
 #include "slang-ir-loop-unroll.h"
+#include "slang-ir-legalize-vector-types.h"
 #include "slang-ir-metadata.h"
 #include "slang-ir-optix-entry-point-uniforms.h"
 #include "slang-ir-restructure.h"
@@ -570,6 +571,12 @@ Result linkAndOptimizeIR(
             sink);
     }
 
+    if(isKhronosTarget(targetRequest))
+    {
+        // SPIR-V doesn't support 1-vectors
+        legalizeVectorTypes(irModule, sink);
+    }
+
     // Once specialization and type legalization have been performed,
     // we should perform some of our basic optimization steps again,
     // to see if we can clean up any temporaries created by legalization.
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index b248012a0..f48801162 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -983,6 +983,7 @@ INST(IsBool, IsBool, 1, 0)
 INST(IsFloat, IsFloat, 1, 0)
 INST(IsUnsignedInt, IsUnsignedInt, 1, 0)
 INST(IsSignedInt, IsSignedInt, 1, 0)
+INST(IsVector, IsVector, 1, 0)
 
 INST(ForwardDifferentiate,                   ForwardDifferentiate,            1, 0)
 
@@ -1076,7 +1077,9 @@ INST(SPIRVAsmInst, SPIRVAsmInst, 1, 0)
     // A literal string or 32-bit integer to be passed as operands
     INST(SPIRVAsmOperandLiteral, SPIRVAsmOperandLiteral, 1, HOISTABLE)
     // A reference to a slang IRInst, either a value or a type
-    INST(SPIRVAsmOperandInst, SPIRVAsmOperandInst, 1, HOISTABLE)
+    // This isn't hoistable, as we sometimes need to change the used value and
+    // instructions around the specific asm block
+    INST(SPIRVAsmOperandInst, SPIRVAsmOperandInst, 1, 0)
     // A named enumerator, the value is stored as a constant operand
     // It may have a second operand, which if present is a type with which to
     // construct a constant id to pass, instead of a literal constant
@@ -1091,7 +1094,13 @@ INST(SPIRVAsmInst, SPIRVAsmInst, 1, 0)
     // A special instruction which marks the place to insert the generated
     // result operand
     INST(SPIRVAsmOperandResult, SPIRVAsmOperandResult, 0, HOISTABLE)
-INST_RANGE(SPIRVAsmOperand, SPIRVAsmOperandLiteral, SPIRVAsmOperandResult)
+    // A special instruction which represents a type directed truncation
+    // operation where extra components are dropped
+    INST(SPIRVAsmOperandTruncate, __truncate, 0, HOISTABLE)
+    // A type function which returns the result type of sampling an image of
+    // this component type
+    INST(SPIRVAsmOperandSampledType, __sampledType, 1, HOISTABLE)
+INST_RANGE(SPIRVAsmOperand, SPIRVAsmOperandLiteral, SPIRVAsmOperandSampledType)
 
 
 #undef PARENT
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 9d2f44355..bfcca5b02 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -2373,6 +2373,8 @@ struct IRTryCall : IRTerminatorInst
 
 struct IRSwizzle : IRInst
 {
+    IR_LEAF_ISA(swizzle);
+
     IRUse base;
 
     IRInst* getBase() { return base.get(); }
@@ -2388,6 +2390,8 @@ struct IRSwizzle : IRInst
 
 struct IRSwizzleSet : IRInst
 {
+    IR_LEAF_ISA(swizzleSet);
+
     IRUse base;
     IRUse source;
 
@@ -2594,6 +2598,16 @@ struct IRGetTargetTupleElement : IRInst
     IRInst* getElementIndex() { return getOperand(1); }
 };
 
+struct IRMakeVector : IRInst
+{
+    IR_LEAF_ISA(MakeVector)
+};
+
+struct IRMakeVectorFromScalar : IRInst
+{
+    IR_LEAF_ISA(MakeVectorFromScalar)
+};
+
 // An Instruction that creates a differential pair value from a
 // primal and differential.
 
@@ -2893,6 +2907,8 @@ struct IRDebugLine : IRInst
     IRInst* getColEnd() { return getOperand(4); }
 };
 
+struct IRSPIRVAsm;
+
 struct IRSPIRVAsmOperand : IRInst
 {
     IR_PARENT_ISA(SPIRVAsmOperand);
@@ -2902,6 +2918,17 @@ struct IRSPIRVAsmOperand : IRInst
             return nullptr;
         return getOperand(0);
     }
+    IRSPIRVAsm* getAsmBlock()
+    {
+        const auto ret = as<IRSPIRVAsm>(getParent());
+        SLANG_ASSERT(ret);
+        return ret;
+    }
+};
+
+struct IRSPIRVAsmOperandInst : IRSPIRVAsmOperand
+{
+    IR_LEAF_ISA(SPIRVAsmOperandInst);
 };
 
 struct IRSPIRVAsmInst : IRInst
@@ -2911,15 +2938,23 @@ struct IRSPIRVAsmInst : IRInst
     IRSPIRVAsmOperand* getOpcodeOperand()
     {
         const auto opcodeOperand = cast<IRSPIRVAsmOperand>(getOperand(0));
-        SLANG_ASSERT(opcodeOperand->getOp() == kIROp_SPIRVAsmOperandEnum);
+        // This must be either:
+        // - An enum, such as 'OpNop'
+        // - The __truncate pseudo-instruction
+        // - A literal, like 107 (OpImageQuerySamples)
+        SLANG_ASSERT(opcodeOperand->getOp() == kIROp_SPIRVAsmOperandEnum
+            || opcodeOperand->getOp() == kIROp_SPIRVAsmOperandTruncate
+            || opcodeOperand->getOp() == kIROp_SPIRVAsmOperandLiteral);
         return opcodeOperand;
     }
 
     SpvWord getOpcodeOperandWord()
     {
         const auto o = getOpcodeOperand();
-        SLANG_ASSERT(o->getOp() != kIROp_SPIRVAsmOperandResult);
         const auto v = o->getValue();
+        // It's not valid to call this on an operand which doesn't have a value
+        // (such as __truncate)
+        SLANG_ASSERT(v);
         const auto i = cast<IRIntLit>(v);
         return SpvWord(i->getValue());
     }
@@ -3939,6 +3974,8 @@ public:
     IRSPIRVAsmOperand* emitSPIRVAsmOperandEnum(IRInst* inst, IRType* constantType);
     IRSPIRVAsmOperand* emitSPIRVAsmOperandBuiltinVar(IRInst* type, IRInst* builtinKind);
     IRSPIRVAsmOperand* emitSPIRVAsmOperandGLSL450Set();
+    IRSPIRVAsmOperand* emitSPIRVAsmOperandSampledType(IRType* elementType);
+    IRSPIRVAsmOperand* emitSPIRVAsmOperandTruncate();
     IRSPIRVAsmInst* emitSPIRVAsmInst(IRInst* opcode, List<IRInst*> operands);
     IRSPIRVAsm* emitSPIRVAsm(IRType* type);
     IRInst* emitGenericAsm(UnownedStringSlice asmText);
diff --git a/source/slang/slang-ir-legalize-vector-types.cpp b/source/slang/slang-ir-legalize-vector-types.cpp
new file mode 100644
index 000000000..9b99f2a98
--- /dev/null
+++ b/source/slang/slang-ir-legalize-vector-types.cpp
@@ -0,0 +1,193 @@
+#include "slang-ir-legalize-vector-types.h"
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-util.h"
+
+namespace Slang
+{
+    struct VectorTypeLoweringContext
+    {
+        IRModule* module;
+        DiagnosticSink* sink;
+
+        InstWorkList workList;
+        InstHashSet workListSet;
+
+        Dictionary<IRInst*, IRInst*> replacements;
+
+        VectorTypeLoweringContext(IRModule* module)
+            :module(module), workList(module), workListSet(module)
+        {}
+
+        void addToWorkList(IRInst* inst)
+        {
+            for (auto ii = inst->getParent(); ii; ii = ii->getParent())
+            {
+                if (as<IRGeneric>(ii))
+                    return;
+            }
+
+            if (workListSet.contains(inst))
+                return;
+
+            workList.add(inst);
+            workListSet.add(inst);
+        }
+
+        bool is1Vector(IRType* t)
+        {
+            const auto lenLit = composeGetters<IRIntLit>(t, &IRVectorType::getElementCount);
+            return lenLit ? getIntVal(lenLit) == 1 : false;
+        };
+
+        bool has1VectorType(IRInst* i)
+        {
+            return is1Vector(i->getDataType());
+        }
+
+        bool has1VectorPtrType(IRInst* i)
+        {
+            const auto ptr = as<IRPtrTypeBase>(i->getDataType());
+            return ptr && is1Vector(ptr->getValueType());
+        }
+
+        // If necessary, this returns a new instruction which operates on the
+        // single component of a 1-vector.
+        // If no new instruction was created, then the old one is returned
+        // unmodified, when we replace the 1-vector type globally, only then
+        // will the return type of that instruction be updated; thus you
+        // shouldn't rely on this function returning an instruction with a non
+        // 1-vector return type (even if we didn't have the deferred
+        // replacement this is not true, as it'll only eliminate at most one
+        // level of 1-vectornes, and nested vectors exist)
+        IRInst* getReplacement(IRInst* inst)
+        {
+            IRInst* replacement = nullptr;
+            if(replacements.tryGetValue(inst, replacement))
+                return replacement;
+
+            IRBuilder builder(module);
+            builder.setInsertBefore(inst);
+            replacement = instMatch<IRInst*>(inst, nullptr,
+                // The following match instructions which take a 1-vector as an
+                // operand and are sensitive to the fact that it's a vector.
+                // Likewise for pointers.
+                [&](IRGetElement* getElement){
+                    const auto base = getElement->getBase();
+                    return has1VectorType(base) ? getReplacement(base) : nullptr;
+                },
+                [&](IRSwizzle* swizzle) -> IRInst*{
+                    const auto swizzled = swizzle->getBase();
+
+                    // Is this a swizzle of a 1-vector
+                    if(has1VectorType(swizzled))
+                    {
+                        // If this is a unary swizzle, just return the element
+                        // inside
+                        const auto scalar = getReplacement(swizzled);
+                        if(swizzle->getElementCount() == 1)
+                            return scalar;
+                        // Otherwise, create a broadcast of this scalar
+                        else
+                            return builder.emitMakeVectorFromScalar(
+                                swizzle->getFullType(),
+                                scalar);
+                    }
+                    return nullptr;
+                },
+                [&](IRGetElementPtr* gep){
+                    const auto base = gep->getBase();
+                    return has1VectorPtrType(base) ? getReplacement(base) : nullptr;
+                },
+                [&](IRSwizzledStore* swizzledStore){
+                    const auto base = swizzledStore->getDest();
+                    return has1VectorPtrType(base)
+                        ? builder.emitStore(getReplacement(base), swizzledStore->getSource())
+                        : nullptr;
+                },
+                // The following should match any instruction which can construct,
+                // specifically, a 1-vector. For example 'MakeVector'
+                //
+                // Instruction like, for example, arithmetic instructions don't
+                // need to be handled here, and they'll be fixed by the global
+                // 1-vector to scalar type replacement.
+                [&](IRMakeVectorFromScalar* makeVec){
+                    return has1VectorType(makeVec)
+                        ? getReplacement(makeVec->getOperand(0))
+                        : nullptr;
+                },
+                [&](IRMakeVector* makeVec){
+                    return has1VectorType(makeVec)
+                        ? getReplacement(makeVec->getOperand(0))
+                        : nullptr;
+                },
+                // Otherwise if this is a 1-vector type itself, replace it with
+                // the scalar version.
+                [&](IRVectorType* vecTy){
+                    return is1Vector(vecTy)
+                        ? getReplacement(vecTy->getElementType())
+                        : nullptr;
+                });
+
+            // Sadly it's not really possible to catch missing cases here, as
+            // there are heaps of instructions which don't do anything special
+            // with vectors, but can take or return vector types, for example
+            // arithmetic, IRGetElement, IRGetField etc...
+
+            // If we did get a replacement, add that to our mapping and return
+            // it, otherwise return the original (to maybe be updated later)
+            if(replacement)
+            {
+                replacements.set(inst, replacement);
+                addToWorkList(replacement);
+            }
+
+            return replacement ? replacement : inst;
+        }
+
+        void processModule()
+        {
+            addToWorkList(module->getModuleInst());
+
+            while (workList.getCount() != 0)
+            {
+                IRInst* inst = workList.getLast();
+
+                workList.removeLast();
+                workListSet.remove(inst);
+
+                // Run this inst through the replacer
+                getReplacement(inst);
+
+                for (auto child = inst->getLastChild(); child; child = child->getPrevInst())
+                {
+                    addToWorkList(child);
+                }
+            }
+
+            // Apply all replacements
+            //
+            // It's important to defer this as if we were updating things
+            // on-the-fly we would be losing information about what was
+            // actually a 1-vector or not. The alternative would be cloning
+            // every function with a 1-vector type as we process it, and
+            // cleaning up at the end. This involves less copying, but is
+            // necessarily a little less type-safe.
+            for (const auto& [old, replacement] : replacements)
+            {
+                if(old != replacement)
+                {
+                    old->replaceUsesWith(replacement);
+                    old->removeAndDeallocate();
+                }
+            }
+        }
+    };
+
+    void legalizeVectorTypes(IRModule* module, DiagnosticSink* sink)
+    {
+        VectorTypeLoweringContext context(module);
+        context.sink = sink;
+        context.processModule();
+    }
+}
diff --git a/source/slang/slang-ir-legalize-vector-types.h b/source/slang/slang-ir-legalize-vector-types.h
new file mode 100644
index 000000000..4428a10b0
--- /dev/null
+++ b/source/slang/slang-ir-legalize-vector-types.h
@@ -0,0 +1,13 @@
+#pragma once
+
+namespace Slang
+{
+    struct IRModule;
+    class DiagnosticSink;
+
+    // - [ ] Lower 0 length vectors to unit
+    // - [x] Lower 1 length vectors to scalar
+    // - [ ] Lower too long vectors to tuples
+    void legalizeVectorTypes(IRModule* module, DiagnosticSink* sink);
+
+}
diff --git a/source/slang/slang-ir-peephole.cpp b/source/slang/slang-ir-peephole.cpp
index 34ccdf924..f6e8a3458 100644
--- a/source/slang/slang-ir-peephole.cpp
+++ b/source/slang/slang-ir-peephole.cpp
@@ -922,6 +922,7 @@ struct PeepholeContext : InstPassBase
         case kIROp_IsUnsignedInt:
         case kIROp_IsSignedInt:
         case kIROp_IsBool:
+        case kIROp_IsVector:
             {
                 auto type = inst->getOperand(0)->getDataType();
                 if (auto vectorType = as<IRVectorType>(type))
@@ -950,6 +951,9 @@ struct PeepholeContext : InstPassBase
                     case kIROp_IsSignedInt:
                         result = isIntegralType(type) && getIntTypeInfo(type).isSigned;
                         break;
+                    case kIROp_IsVector:
+                        result = as<IRVectorType>(type);
+                        break;
                     }
                     inst->replaceUsesWith(builder.getBoolValue(result));
                     maybeRemoveOldInst(inst);
diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp
index a4b33324b..63be9d19d 100644
--- a/source/slang/slang-ir-spirv-legalize.cpp
+++ b/source/slang/slang-ir-spirv-legalize.cpp
@@ -158,36 +158,42 @@ struct SPIRVLegalizationContext : public SourceEmitterBase
             auto user = use->getUser();
             IRBuilder builder(user);
             builder.setInsertBefore(user);
-            switch (user->getOp())
+            if(as<IRGetElement>(user) || as<IRFieldExtract>(user))
             {
-            case kIROp_GetElement:
-            case kIROp_FieldExtract:
-                {
-                    auto basePtrType = as<IRPtrTypeBase>(addr->getDataType());
-                    IRType* ptrType = nullptr;
-                    if (basePtrType->hasAddressSpace())
-                        ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType(), basePtrType->getAddressSpace());
-                    else
-                        ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType());
-                    IRInst* subAddr = nullptr;
-                    if (user->getOp() == kIROp_GetElement)
-                        subAddr = builder.emitElementAddress(ptrType, addr, as<IRGetElement>(user)->getIndex());
-                    else
-                        subAddr = builder.emitFieldAddress(ptrType, addr, as<IRFieldExtract>(user)->getField());
-
-                    for (auto u = user->firstUse; u; u = u->nextUse)
-                    {
-                        workList.add(WorkItem{ subAddr, u });
-                    }
-                    instsToRemove.add(user);
-                    break;
-                }
-            default:
+                auto basePtrType = as<IRPtrTypeBase>(addr->getDataType());
+                IRType* ptrType = nullptr;
+                if (basePtrType->hasAddressSpace())
+                    ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType(), basePtrType->getAddressSpace());
+                else
+                    ptrType = builder.getPtrType(kIROp_PtrType, user->getDataType());
+                IRInst* subAddr = nullptr;
+                if (user->getOp() == kIROp_GetElement)
+                    subAddr = builder.emitElementAddress(ptrType, addr, as<IRGetElement>(user)->getIndex());
+                else
+                    subAddr = builder.emitFieldAddress(ptrType, addr, as<IRFieldExtract>(user)->getField());
+
+                for (auto u = user->firstUse; u; u = u->nextUse)
                 {
-                    auto val = builder.emitLoad(addr);
-                    builder.replaceOperand(use, val);
-                    break;
+                    workList.add(WorkItem{ subAddr, u });
                 }
+                instsToRemove.add(user);
+            }
+            else if(const auto spirvAsmOperand = as<IRSPIRVAsmOperandInst>(user))
+            {
+                // If this is being used in an asm block, insert the load to
+                // just prior to the block.
+                const auto asmBlock = spirvAsmOperand->getAsmBlock();
+                builder.setInsertBefore(asmBlock);
+                auto loadedValue = builder.emitLoad(addrInst);
+                builder.setInsertBefore(spirvAsmOperand);
+                auto loadedValueOperand = builder.emitSPIRVAsmOperandInst(loadedValue);
+                spirvAsmOperand->replaceUsesWith(loadedValueOperand);
+                spirvAsmOperand->removeAndDeallocate();
+            }
+            else
+            {
+                auto val = builder.emitLoad(addr);
+                builder.replaceOperand(use, val);
             }
         }
 
@@ -212,9 +218,6 @@ struct SPIRVLegalizationContext : public SourceEmitterBase
                 innerType = arrayType->getElementType();
             }
 
-            if (as<IRResourceTypeBase>(innerType))
-                return;
-
             SpvStorageClass storageClass = SpvStorageClassPrivate;
             // Figure out storage class based on var layout.
             if (auto layout = getVarLayout(inst))
@@ -231,6 +234,15 @@ struct SPIRVLegalizationContext : public SourceEmitterBase
                 }
             }
 
+            // Textures and Samplers can't be in Uniform for Vulkan, if they are
+            // placed here then put them in UniformConstant instead
+            if (storageClass == SpvStorageClassUniform
+                && (as<IRTextureTypeBase>(inst->getDataType())
+                    || as<IRSamplerStateTypeBase>(inst->getDataType())))
+            {
+                storageClass = SpvStorageClassUniformConstant;
+            }
+
             // Strip any HLSL wrappers
             IRBuilder builder(m_sharedContext->m_irModule);
             bool needLoad = true;
@@ -875,6 +887,19 @@ struct SPIRVLegalizationContext : public SourceEmitterBase
         }
     }
 
+    void processConstructor(IRInst* inst)
+    {
+        // If all of the operands to this instruction are global, we can hoist
+        // this constructor to be a global too. This is important to make sure
+        // that vectors made of constant components end up being emitted as
+        // constant vectors (using OpConstantComposite).
+        UIndex opIndex = 0;
+        for (auto operand = inst->getOperands(); opIndex < inst->getOperandCount(); operand++, opIndex++)
+            if(operand->get()->getParent() != m_module->getModuleInst())
+                return;
+        inst->insertAtEnd(m_module->getModuleInst());
+    }
+
     void processModule()
     {
         // Process global params before anything else, so we don't generate inefficient
@@ -936,6 +961,25 @@ struct SPIRVLegalizationContext : public SourceEmitterBase
             case kIROp_Switch:
                 processSwitch(as<IRSwitch>(inst));
                 break;
+
+            case kIROp_MakeVectorFromScalar:
+            case kIROp_MakeUInt64:
+            case kIROp_MakeVector:
+            case kIROp_MakeMatrix:
+            case kIROp_MakeMatrixFromScalar:
+            case kIROp_MatrixReshape:
+            case kIROp_MakeArray:
+            case kIROp_MakeArrayFromElement:
+            case kIROp_MakeStruct:
+            case kIROp_MakeTuple:
+            case kIROp_MakeTargetTuple:
+            case kIROp_MakeResultValue:
+            case kIROp_MakeResultError:
+            case kIROp_MakeOptionalValue:
+            case kIROp_MakeOptionalNone:
+                processConstructor(inst);
+                break;
+
             default:
                 for (auto child = inst->getLastChild(); child; child = child->getPrevInst())
                 {
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index d3cfea6e9..6777c0b3a 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -5772,6 +5772,31 @@ namespace Slang
         return i;
     }
 
+    IRSPIRVAsmOperand* IRBuilder::emitSPIRVAsmOperandSampledType(IRType* elementType)
+    {
+        SLANG_ASSERT(as<IRSPIRVAsm>(m_insertLoc.getParent()));
+        const auto i = createInst<IRSPIRVAsmOperand>(
+            this,
+            kIROp_SPIRVAsmOperandSampledType,
+            getTypeType(),
+            elementType
+        );
+        addInst(i);
+        return i;
+    }
+
+    IRSPIRVAsmOperand* IRBuilder::emitSPIRVAsmOperandTruncate()
+    {
+        SLANG_ASSERT(as<IRSPIRVAsm>(m_insertLoc.getParent()));
+        const auto i = createInst<IRSPIRVAsmOperand>(
+            this,
+            kIROp_SPIRVAsmOperandTruncate,
+            getVoidType()
+        );
+        addInst(i);
+        return i;
+    }
+
     IRSPIRVAsmInst* IRBuilder::emitSPIRVAsmInst(IRInst* opcode, List<IRInst*> operands)
     {
         SLANG_ASSERT(as<IRSPIRVAsm>(m_insertLoc.getParent()));
@@ -6631,6 +6656,14 @@ namespace Slang
         case kIROp_SPIRVAsmOperandResult:
             dump(context, "result");
             return;
+        case kIROp_SPIRVAsmOperandTruncate:
+            dump(context, "__truncate");
+            return;
+        case kIROp_SPIRVAsmOperandSampledType:
+            dump(context, "__sampledType(");
+            dumpInstExpr(context, inst->getOperand(0));
+            dump(context, ")");
+            return;
         }
 
         dump(context, opInfo.name);
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 413410880..54ef87009 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -2483,7 +2483,6 @@ template<typename R, typename T>
 static T thisArg(R (T::*&&())());
 }
 
-#if __cplusplus >= 201703L
 // A tool to "pattern match" an instruction against multiple cases
 // Use like:
 //
@@ -2499,7 +2498,6 @@ static T thisArg(R (T::*&&())());
 template<typename R, typename F, typename... Fs>
 R instMatch(IRInst* i, R def, F f, Fs... fs)
 {
-    static_assert(__cplusplus >= 201703L, "Wait until we're on c++17 to use instMatch");
     // Recursive case
     using P = decltype(detail::argType(std::function{std::declval<F>()}));
     if(auto s = as<P>(i))
@@ -2531,7 +2529,6 @@ R instMatch(IRInst*, R def)
 template<typename F, typename... Fs>
 void instMatch_(IRInst* i, F f, Fs... fs)
 {
-    static_assert(__cplusplus >= 201703L, "Wait until we're on c++17 to use instMatch_");
     // Recursive case
     using P = decltype(detail::argType(std::function{std::declval<F>()}));
     if(auto s = as<P>(i))
@@ -2546,7 +2543,6 @@ void instMatch_(IRInst*)
 {
     // Base case with no eliminators
 }
-#endif
 
 // A tool to compose a bunch of downcasts and accessors
 // `composeGetters<R>(x, &MyStruct::getFoo, &MyOtherStruct::getBar)` translates to
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index ed1da3d25..4266b46f9 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -3862,6 +3862,20 @@ struct ExprLoweringVisitorBase : public ExprVisitor<Derived, LoweredValInfo>
                     }
                     return builder->emitSPIRVAsmOperandInst(i);
                 }
+            case SPIRVAsmOperand::SampledType:
+                {
+                    IRType* i;
+                    {
+                        IRBuilderInsertLocScope insertScope(builder);
+                        builder->setInsertBefore(spirvAsmInst);
+                        i = lowerType(context, operand.type.type);
+                    }
+                    return builder->emitSPIRVAsmOperandSampledType(i);
+                }
+            case SPIRVAsmOperand::TruncateMarker:
+                {
+                    return builder->emitSPIRVAsmOperandTruncate();
+                }
             }
             SLANG_UNREACHABLE("Unhandled case in visitSPIRVAsmExpr");
         };
diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp
index c3eba8c58..306d2cbec 100644
--- a/source/slang/slang-parser.cpp
+++ b/source/slang/slang-parser.cpp
@@ -6310,6 +6310,19 @@ namespace Slang
         {
             return SPIRVAsmOperand{SPIRVAsmOperand::ResultMarker, parser->ReadToken()};
         }
+        // The handy __sampledType function
+        if(AdvanceIf(parser, "__sampledType"))
+        {
+            parser->ReadToken(TokenType::LParent);
+            const auto typeExpr = parser->ParseType();
+            parser->ReadMatchingToken(TokenType::RParent);
+            return SPIRVAsmOperand{SPIRVAsmOperand::SampledType, Token{}, typeExpr};
+        }
+        // The pseudo-operand for component truncation
+        else if(parser->LookAheadToken("__truncate"))
+        {
+            return SPIRVAsmOperand{SPIRVAsmOperand::TruncateMarker, parser->ReadToken()};
+        }
         else if (AdvanceIf(parser, "builtin"))
         {
             // reference to a builtin var.
@@ -6325,7 +6338,6 @@ namespace Slang
         {
             return SPIRVAsmOperand{ SPIRVAsmOperand::GLSL450Set, parser->ReadToken() };
         }
-
         // A regular identifier
         else if(parser->LookAheadToken(TokenType::Identifier))
         {
@@ -6362,7 +6374,8 @@ namespace Slang
         // A $foo variable
         else if(AdvanceIf(parser, TokenType::Dollar))
         {
-            return slangIdentOperand(SPIRVAsmOperand::SlangValue);
+            Expr* expr = parseAtomicExpr(parser);
+            return SPIRVAsmOperand{SPIRVAsmOperand::SlangValue, Token{}, expr};
         }
         // A $$foo type
         else if(AdvanceIf(parser, TokenType::DollarDollar))
@@ -6465,7 +6478,7 @@ namespace Slang
             || resultOperand)
         {
             // Insert the LHS result-type operand
-            if(ret.operands.getCount() == opInfo->resultTypeIndex && resultTypeOperand)
+            if(opInfo && ret.operands.getCount() == opInfo->resultTypeIndex && resultTypeOperand)
             {
                 ret.operands.add(*resultTypeOperand);
                 resultTypeOperand.reset();
@@ -6473,14 +6486,14 @@ namespace Slang
             }
 
             // Insert the LHS result operand
-            if(ret.operands.getCount() == opInfo->resultIdIndex && resultOperand)
+            if(opInfo && ret.operands.getCount() == opInfo->resultIdIndex && resultOperand)
             {
                 ret.operands.add(*resultOperand);
                 resultOperand.reset();
                 continue;
             }
 
-            if(ret.operands.getCount() == opInfo->maxOperandCount)
+            if(opInfo && ret.operands.getCount() == opInfo->maxOperandCount)
             {
                 parser->diagnose(
                     parser->tokenReader.peekLoc(),
diff --git a/source/slang/slang-stdlib-textures.cpp b/source/slang/slang-stdlib-textures.cpp
new file mode 100644
index 000000000..e5af1367f
--- /dev/null
+++ b/source/slang/slang-stdlib-textures.cpp
@@ -0,0 +1,1241 @@
+#include "slang-stdlib-textures.h"
+
+#define EMIT_LINE_DIRECTIVE() sb << "#line " << (__LINE__+1) << " \"slang-stdlib-textures.cpp\"\n"
+
+namespace Slang
+{
+
+// Concatenate anything which can be passed to a StringBuilder
+template<typename... Ts>
+String cat(const Ts&... xs)
+{
+    return (StringBuilder{} << ... << xs);
+};
+
+//
+// Utilities
+//
+
+const auto indentWidth = 4;
+static const char spaces[] = "                    ";
+static_assert(SLANG_COUNT_OF(spaces) % indentWidth == 1);
+
+struct BraceScope
+{
+    BraceScope(const char*& i, StringBuilder& sb, const char* end = "\n")
+    :i(i), sb(sb), end(end)
+    {
+        // If we hit this assert, it means that we are indenting too deep and
+        // need more spaces in 'spaces' above.
+        SLANG_ASSERT(i != spaces);
+        sb << i << "{\n";
+        i -= indentWidth;
+    }
+    ~BraceScope()
+    {
+        // If we hit this assert, it means that we've got a bug unindenting
+        // more than we've indented.
+        SLANG_ASSERT(*i != '\0');
+        i += indentWidth;
+        sb << i << "}" << end;
+    }
+    const char*& i;
+    StringBuilder& sb;
+    const char* end;
+};
+
+TextureTypeInfo::TextureTypeInfo(
+    TextureTypePrefixInfo const& prefixInfo,
+    BaseTextureShapeInfo const& base,
+    bool isArray,
+    bool isMultisample,
+    BaseTextureAccessInfo const& accessInfo,
+    StringBuilder& inSB,
+    String const& inPath)
+    : prefixInfo(prefixInfo)
+    , base(base)
+    , isArray(isArray)
+    , isMultisample(isMultisample)
+    , accessInfo(accessInfo)
+    , sb(inSB)
+    , path(inPath)
+{
+    i = spaces + SLANG_COUNT_OF(spaces) - 1;
+}
+
+void TextureTypeInfo::writeFuncBody(
+    const char* funcName,
+    const String& glsl,
+    const String& cuda,
+    const String& spirv)
+{
+    BraceScope funcScope{i, sb};
+    {
+        sb << i << "__target_switch\n";
+        BraceScope switchScope{i, sb};
+        sb << i << "case cpp:\n";
+        sb << i << "case hlsl:\n";
+        sb << i << "__intrinsic_asm \"." << funcName << "\";\n";
+        if(glsl.getLength())
+        {
+            sb << i << "case glsl:\n";
+            sb << i << "__intrinsic_asm \"" << glsl << "\";\n";
+        }
+        if(cuda.getLength())
+        {
+            sb << i << "case cuda:\n";
+            sb << i << "__intrinsic_asm \"" << cuda << "\";\n";
+        }
+        if(spirv.getLength())
+        {
+            sb << i << "case spirv:\n";
+            sb << i << "return spirv_asm\n";
+            BraceScope spirvScope{i, sb, ";\n"};
+            sb << spirv << "\n";
+        }
+    }
+}
+
+void TextureTypeInfo::writeFuncDecorations(
+    const String& glsl,
+    const String& cuda)
+{
+    if(glsl.getLength())
+        sb << i << "__target_intrinsic(glsl, \"" << glsl << "\")\n";
+    if(cuda.getLength())
+        sb << i << "__target_intrinsic(cuda, \"" << cuda << "\")\n";
+}
+
+void TextureTypeInfo::writeFuncWithSig(
+    const char* funcName,
+    const String& sig,
+    const String& glsl,
+    const String& spirv,
+    const String& cuda,
+    const ReadNoneMode readNoneMode)
+{
+    const bool isReadOnly = (accessInfo.access == SLANG_RESOURCE_ACCESS_READ);
+    const bool rn =
+        readNoneMode == ReadNoneMode::Always
+        || readNoneMode == ReadNoneMode::IfReadOnly && isReadOnly;
+    if(spirv.getLength())
+    {
+        if(rn)
+            sb << i << "[__readNone]\n";
+        sb << i << sig << "\n";
+        writeFuncBody(funcName, glsl, cuda, spirv);
+    }
+    else
+    {
+        writeFuncDecorations(glsl, cuda);
+        if(rn)
+            sb << i << "[__readNone]\n";
+        sb << i << sig << ";\n";
+    }
+    sb << "\n";
+}
+
+void TextureTypeInfo::writeFunc(
+    const char* returnType,
+    const char* funcName,
+    const String& params,
+    const String& glsl,
+    const String& spirv,
+    const String& cuda,
+    const ReadNoneMode readNoneMode)
+{
+    writeFuncWithSig(
+        funcName,
+        cat(returnType, " ", funcName, "(", params, ")"),
+        glsl,
+        spirv,
+        cuda,
+        readNoneMode
+    );
+}
+
+void TextureTypeInfo::emitTypeDecl()
+{
+    char const* baseName = prefixInfo.name;
+    char const* baseShapeName = base.shapeName;
+    TextureFlavor::Shape baseShape = base.baseShape;
+
+    // Arrays of 3D textures aren't allowed
+    if (isArray && baseShape == TextureFlavor::Shape::Shape3D) return;
+
+    auto access = accessInfo.access;
+
+    // No such thing as RWTextureCube
+    if (access == SLANG_RESOURCE_ACCESS_READ_WRITE && baseShape == TextureFlavor::Shape::ShapeCube)
+    {
+        return;
+    }
+
+    // TODO: any constraints to enforce on what gets to be multisampled?
+
+    unsigned flavor = baseShape;
+    if (isArray)		flavor |= TextureFlavor::ArrayFlag;
+    if (isMultisample)	flavor |= TextureFlavor::MultisampleFlag;
+    // if (isShadow)		flavor |= TextureFlavor::ShadowFlag;
+
+    flavor |= (access << 8);
+
+    // emit a generic signature
+    sb << "__generic<T = float4";
+    // Multi-sample rw texture types have an optional sampleCount parameter.
+    if (isMultisample)
+        sb << ", let sampleCount : int = 0";
+    sb << ">";
+
+    if(prefixInfo.combined)
+    {
+        sb << "__magic_type(TextureSamplerType," << int(flavor) << ")\n";
+        sb << "__intrinsic_type(" << (kIROp_TextureSamplerType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n";
+    }
+    else
+    {
+        sb << "__magic_type(TextureType," << int(flavor) << ")\n";
+        sb << "__intrinsic_type(" << (kIROp_TextureType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n";
+    }
+    sb << "struct ";
+    sb << accessInfo.name;
+    sb << baseName;
+    sb << baseShapeName;
+    if (isMultisample) sb << "MS";
+    if (isArray) sb << "Array";
+    // if (isShadow) sb << "Shadow";
+    sb << "\n";
+
+    // The struct body
+    {
+        BraceScope structBodyScope{i, sb, ";\n"};
+
+        writeQueryFunctions();
+
+        if(baseShape != TextureFlavor::Shape::ShapeCube)
+            writeSubscriptFunctions();
+
+        if( !isMultisample )
+            writeSampleFunctions();
+    }
+
+    writeGatherExtensions();
+} // TextureTypeInfo::emitTypeDecl
+
+void TextureTypeInfo::writeQueryFunctions()
+{
+    static const char* kComponentNames[]{ "x", "y", "z", "w" };
+
+    TextureFlavor::Shape baseShape = base.baseShape;
+
+    char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, ";
+    auto access = accessInfo.access;
+
+    if( !isMultisample )
+    {
+        writeFunc(
+            "float",
+            "CalculateLevelOfDetail",
+            cat(samplerStateParam, "float", base.coordCount, " location"),
+            cat("textureQueryLod($p, $2).x"),
+            "",
+            "",
+            ReadNoneMode::Never
+        );
+
+        writeFunc(
+            "float",
+            "CalculateLevelOfDetailUnclamped",
+            cat(samplerStateParam, "float", base.coordCount, " location"),
+            cat("textureQueryLod($p, $2).y"),
+            "",
+            "",
+            ReadNoneMode::Never
+        );
+    }
+
+    // `GetDimensions`
+    const char* dimParamTypes[] = {"out float ", "out int ", "out uint "};
+    for(auto t : dimParamTypes)
+    for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
+    {
+        StringBuilder glsl;
+        {
+
+            glsl << "(";
+
+            int aa = 1;
+            String lodStr = ", 0";
+            if (includeMipInfo)
+            {
+                int mipLevelArg = aa++;
+                lodStr = ", int($";
+                lodStr.append(mipLevelArg);
+                lodStr.append(")");
+            }
+
+            String opStr = " = textureSize($0" + lodStr;
+            switch( access )
+            {
+            case SLANG_RESOURCE_ACCESS_READ_WRITE:
+            case SLANG_RESOURCE_ACCESS_RASTER_ORDERED:
+                opStr = " = imageSize($0";
+                break;
+
+            default:
+                break;
+            }
+
+
+            int cc = 0;
+            switch(baseShape)
+            {
+            case TextureFlavor::Shape::Shape1D:
+                glsl << "($" << aa++ << opStr << ")";
+                if (isArray)
+                {
+                    glsl << ".x";
+                }
+                glsl << ")";
+                cc = 1;
+                break;
+
+            case TextureFlavor::Shape::Shape2D:
+            case TextureFlavor::Shape::ShapeCube:
+                glsl << "($" << aa++ << opStr << ").x)";
+                glsl << ", ($" << aa++ << opStr << ").y)";
+                cc = 2;
+                break;
+
+            case TextureFlavor::Shape::Shape3D:
+                glsl << "($" << aa++ << opStr << ").x)";
+                glsl << ", ($" << aa++ << opStr << ").y)";
+                glsl << ", ($" << aa++ << opStr << ").z)";
+                cc = 3;
+                break;
+
+            default:
+                SLANG_UNEXPECTED("unhandled resource shape");
+                break;
+            }
+
+            if(isArray)
+            {
+                glsl << ", ($" << aa++ << opStr << ")." << kComponentNames[cc] << ")";
+            }
+
+            if(isMultisample)
+            {
+                glsl << ", ($" << aa++ << " = textureSamples($0))";
+            }
+
+            if (includeMipInfo)
+            {
+                glsl << ", ($" << aa++ << " = textureQueryLevels($0))";
+            }
+
+
+            glsl << ")";
+        }
+
+        StringBuilder params;
+        if(includeMipInfo)
+            params << "uint mipLevel, ";
+
+        switch(baseShape)
+        {
+        case TextureFlavor::Shape::Shape1D:
+            params << t << "width";
+            break;
+
+        case TextureFlavor::Shape::Shape2D:
+        case TextureFlavor::Shape::ShapeCube:
+            params << t << "width,";
+            params << t << "height";
+            break;
+
+        case TextureFlavor::Shape::Shape3D:
+            params << t << "width,";
+            params << t << "height,";
+            params << t << "depth";
+            break;
+
+        default:
+            assert(!"unexpected");
+            break;
+        }
+
+        if(isArray)
+        {
+            params << ", " << t << "elements";
+        }
+
+        if(isMultisample)
+        {
+            params << ", " << t << "sampleCount";
+        }
+
+        if(includeMipInfo)
+            params << ", " << t << "numberOfLevels";
+
+        sb << "    __glsl_version(450)\n";
+        sb << "    __glsl_extension(GL_EXT_samplerless_texture_functions)\n";
+        writeFunc(
+            "void",
+            "GetDimensions",
+            params,
+            glsl,
+            "",
+            "",
+            ReadNoneMode::Always);
+    }
+
+    // `GetSamplePosition()`
+    if( isMultisample )
+    {
+        writeFunc("float2", "GetSamplePosition", "int s", "", "", "", ReadNoneMode::Never);
+    }
+
+    // `Load()`
+
+    if( base.coordCount + isArray < 4 )
+    {
+        // The `Load()` operation on an ordinary `Texture2D` takes
+        // an `int3` for the location, where `.xy` holds the texel
+        // coordinates, and `.z` holds the mip level to use.
+        //
+        // The third coordinate for mip level is absent in
+        // `Texure2DMS.Load()` and `RWTexture2D.Load`. This pattern
+        // is repreated for all the other texture shapes.
+        //
+        bool needsMipLevel = !isMultisample && (access == SLANG_RESOURCE_ACCESS_READ);
+
+        int loadCoordCount = base.coordCount + isArray + (needsMipLevel?1:0);
+
+        char const* glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetch" : "imageLoad";
+
+        // When translating to GLSL, we need to break apart the `location` argument.
+        //
+        // TODO: this should realy be handled by having this member actually get lowered!
+        static const char* kGLSLLoadCoordsSwizzle[] = { "", "", "x", "xy", "xyz", "xyzw" };
+        static const char* kGLSLLoadLODSwizzle[]    = { "", "", "y", "z", "w", "error" };
+
+        // TODO: The GLSL translations here only handle the read-only texture
+        // cases (stuff that lowers to `texture*` in GLSL) and not the stuff
+        // that lowers to `image*`.
+        //
+        // At some point it may make sense to separate the read-only and
+        // `RW`/`RasterizerOrdered` cases here rather than try to share code.
+
+        // CUDA
+        StringBuilder cudaBuilder;
+        if(!isMultisample)
+        {
+            if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
+            {
+                const int coordCount = base.coordCount;
+                const int vecCount = coordCount + int(isArray);
+
+                if( baseShape != TextureFlavor::Shape::ShapeCube )
+                {
+                    cudaBuilder << "surf" << coordCount << "D";
+                    if (isArray)
+                    {
+                        cudaBuilder << "Layered";
+                    }
+                    cudaBuilder << "read";
+                    cudaBuilder << "<$T0>($0";
+                    for (int j = 0; j < coordCount; ++j)
+                    {
+                        cudaBuilder << ", ($1)";
+                        if (vecCount > 1)
+                        {
+                            cudaBuilder << '.' << char(j + 'x');
+                        }
+
+                        // Surface access is *byte* addressed in x in CUDA
+                        if (j == 0)
+                        {
+                            cudaBuilder << " * $E";
+                        }
+                    }
+                    if (isArray)
+                    {
+                        cudaBuilder << ", int(($1)." << char(coordCount + 'x') << ")";
+                    }
+                    cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)";
+                }
+                else
+                {
+                    cudaBuilder << "__target_intrinsic(cuda, \"surfCubemap";
+                    if (isArray)
+                    {
+                        cudaBuilder << "Layered";
+                    }
+                    cudaBuilder << "read";
+
+                    // Surface access is *byte* addressed in x in CUDA
+                    cudaBuilder << "<$T0>($0, ($1).x * $E, ($1).y, ($1).z";
+                    if (isArray)
+                    {
+                        cudaBuilder << ", int(($1).w)";
+                    }
+                    cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)";
+                }
+            }
+            else if (access == SLANG_RESOURCE_ACCESS_READ)
+            {
+                // We can allow this on Texture1D
+                if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
+                {
+                    cudaBuilder << "tex1Dfetch<$T0>($0, ($1).x)";
+                }
+            }
+        }
+
+        sb << i << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
+        writeFunc(
+            "T",
+            "Load",
+            cat("int", loadCoordCount, " location", isMultisample ? ", int sampleIndex" : ""),
+            isMultisample ? cat("$c", glslFuncName, "($0, $1, $2)$z")
+            : needsMipLevel ? cat(
+                "$c",
+                glslFuncName,
+                "($0, ($1).",
+                kGLSLLoadCoordsSwizzle[loadCoordCount],
+                ", ($1).",
+                kGLSLLoadLODSwizzle[loadCoordCount],
+                ")$z")
+            : cat("$c", glslFuncName, "($0, $1)$z"),
+            "",
+            cudaBuilder
+        );
+
+        glslFuncName = (access == SLANG_RESOURCE_ACCESS_READ) ? "texelFetchOffset" : "imageLoad";
+        sb << i << "__glsl_extension(GL_EXT_samplerless_texture_functions)";
+        writeFunc(
+            "T",
+            "Load",
+            cat(
+                "int", loadCoordCount, " location",
+                isMultisample ? ", int sampleIndex" : "",
+                ", constexpr int", base.coordCount, " offset"
+            ),
+            isMultisample ? cat("$c", glslFuncName, "($0, $0, $1, $2)$z")
+                : needsMipLevel ? cat(
+                    "$c", glslFuncName, "($0, ($1).", kGLSLLoadCoordsSwizzle[loadCoordCount],
+                    ", ($1).", kGLSLLoadLODSwizzle[loadCoordCount],
+                    ", $2)$z")
+                : cat("$c", glslFuncName, "($0, $1, 0, $2)$z")
+        );
+
+        writeFunc(
+            "T",
+            "Load",
+            cat(
+                "int", loadCoordCount, " location",
+                isMultisample ? ", int sampleIndex" : "",
+                ", constexpr int", base.coordCount, " offset",
+                ", out uint status"
+            )
+        );
+    }
+}
+
+static String spirvReadIntrinsic()
+{
+    StringBuilder spirvBuilder;
+    const char* i = "                    ";
+    spirvBuilder << i << "%sampled : __sampledType(T) = OpImageRead $this $location;\n";
+    spirvBuilder << i << "__truncate $$T result __sampledType(T) %sampled;";
+    return spirvBuilder;
+}
+
+static String spirvWriteIntrinsic()
+{
+    StringBuilder spirvBuilder;
+    const char* i = "                    ";
+    spirvBuilder << i << "OpImageWrite $this $location $newValue;";
+    return spirvBuilder;
+}
+
+void TextureTypeInfo::writeSubscriptFunctions()
+{
+    TextureFlavor::Shape baseShape = base.baseShape;
+    auto access = accessInfo.access;
+
+    int N = base.coordCount + isArray;
+
+    char const* uintNs[] = { "", "uint", "uint2", "uint3", "uint4" };
+    char const* ivecNs[] = {  "", "int", "ivec2", "ivec3", "ivec4" };
+
+    auto uintN = uintNs[N];
+    auto ivecN = ivecNs[N];
+
+    // subscript operator
+    sb << i << "__subscript(" << uintN << " location) -> T\n";
+    BraceScope subscriptScope{i, sb};
+
+    // !!!!!!!!!!!!!!!!!!!! get !!!!!!!!!!!!!!!!!!!!!!!
+
+    // GLSL/SPIR-V distinguishes sampled vs. non-sampled images
+    StringBuilder glslBuilder;
+    {
+        switch( access )
+        {
+        case SLANG_RESOURCE_ACCESS_NONE:
+        case SLANG_RESOURCE_ACCESS_READ:
+            sb << i << "__glsl_extension(GL_EXT_samplerless_texture_functions)\n";
+            glslBuilder << "$ctexelFetch($0, " << ivecN << "($1)";
+            if( !isMultisample )
+            {
+                glslBuilder << ", 0";
+            }
+            else
+            {
+                // TODO: how to handle passing through sample index?
+                glslBuilder << ", 0";
+            }
+            break;
+
+        default:
+            glslBuilder << "$cimageLoad($0, " << ivecN << "($1)";
+            if( isMultisample )
+            {
+                // TODO: how to handle passing through sample index?
+                glslBuilder << ", 0";
+            }
+            break;
+        }
+        glslBuilder << ")$z";
+    }
+
+    // CUDA
+    StringBuilder cudaBuilder;
+    {
+        if (access == SLANG_RESOURCE_ACCESS_READ_WRITE)
+        {
+            const int coordCount = base.coordCount;
+            const int vecCount = coordCount + int(isArray);
+
+            cudaBuilder << "surf";
+            if( baseShape != TextureFlavor::Shape::ShapeCube )
+            {
+                cudaBuilder << coordCount << "D";
+            }
+            else
+            {
+                cudaBuilder << "Cubemap";
+            }
+
+            cudaBuilder << (isArray ? "Layered" : "");
+            cudaBuilder << "read$C<$T0>($0";
+
+            for (int j = 0; j < vecCount; ++j)
+            {
+                cudaBuilder << ", ($1)";
+                if (vecCount > 1)
+                {
+                    cudaBuilder << '.' << char(j + 'x');
+                }
+                // Surface access is *byte* addressed in x in CUDA
+                if (j == 0)
+                {
+                    cudaBuilder << " * $E";
+                }
+            }
+
+            cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)";
+        }
+        else if (access == SLANG_RESOURCE_ACCESS_READ)
+        {
+            // We can allow this on Texture1D
+            if( baseShape == TextureFlavor::Shape::Shape1D && isArray == false)
+            {
+                cudaBuilder << "tex1Dfetch<$T0>($0, $1)";
+            }
+        }
+    }
+
+    // Output that has get
+    writeFuncWithSig(
+        "operator[]",
+        "get",
+        glslBuilder,
+        spirvReadIntrinsic(),
+        cudaBuilder
+    );
+
+    // !!!!!!!!!!!!!!!!!!!! set !!!!!!!!!!!!!!!!!!!!!!!
+
+    if (!(access == SLANG_RESOURCE_ACCESS_NONE || access == SLANG_RESOURCE_ACCESS_READ))
+    {
+        // CUDA
+        cudaBuilder.clear();
+        {
+            const int coordCount = base.coordCount;
+            const int vecCount = coordCount + int(isArray);
+
+            cudaBuilder << "surf";
+            if( baseShape != TextureFlavor::Shape::ShapeCube )
+            {
+                cudaBuilder << coordCount << "D";
+            }
+            else
+            {
+                cudaBuilder << "Cubemap";
+            }
+
+            cudaBuilder << (isArray ? "Layered" : "");
+            cudaBuilder << "write$C<$T0>($2, $0";
+            for (int j = 0; j < vecCount; ++j)
+            {
+                cudaBuilder << ", ($1)";
+                if (vecCount > 1)
+                {
+                    cudaBuilder << '.' << char(j + 'x');
+                }
+
+                // Surface access is *byte* addressed in x in CUDA
+                if (j == 0)
+                {
+                    cudaBuilder << " * $E";
+                }
+            }
+
+            cudaBuilder << ", SLANG_CUDA_BOUNDARY_MODE)";
+        }
+
+        // Set
+        sb << i << "[nonmutating]\n";
+        writeFuncWithSig(
+            "operator[]",
+            "set(T newValue)",
+            cat("imageStore($0, ", ivecN, "($1), $V2)"),
+            spirvWriteIntrinsic(),
+            cudaBuilder
+        );
+    }
+
+    // !!!!!!!!!!!!!!!!!! ref !!!!!!!!!!!!!!!!!!!!!!!!!
+
+    // Depending on the access level of the texture type,
+    // we either have just a getter (the default), or both
+    // a getter and setter.
+    switch( access )
+    {
+    case SLANG_RESOURCE_ACCESS_NONE:
+    case SLANG_RESOURCE_ACCESS_READ:
+        break;
+    default:
+        sb << i << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n";
+        break;
+    }
+}
+
+static String cudaSampleIntrinsic(const bool isArray, const BaseTextureShapeInfo& base, bool sampleLevel)
+{
+    StringBuilder cudaBuilder;
+
+    TextureFlavor::Shape baseShape = base.baseShape;
+    const int coordCount = base.coordCount;
+
+    if( baseShape != TextureFlavor::Shape::ShapeCube )
+    {
+        cudaBuilder << "tex" << coordCount << "D";
+        if (isArray)
+            cudaBuilder << "Layered";
+        if(sampleLevel)
+            cudaBuilder << "Lod";
+        cudaBuilder << "<$T0>($0";
+        for (int i = 0; i < coordCount; ++i)
+        {
+            cudaBuilder << ", ($2)";
+            cudaBuilder << '.' << "xyzw"[i];
+        }
+        if (isArray)
+            cudaBuilder << ", int(($2)." << char(coordCount + 'x') << ")";
+        if(sampleLevel)
+            cudaBuilder << ", $3";
+        cudaBuilder << ")";
+    }
+    else
+    {
+        cudaBuilder << "texCubemap";
+        if (isArray)
+            cudaBuilder << "Layered";
+        if(sampleLevel)
+            cudaBuilder << "Lod";
+        cudaBuilder << "<$T0>($0, ($2).x, ($2).y, ($2).z";
+        if (isArray)
+            cudaBuilder << ", int(($2).w)";
+        if(sampleLevel)
+            cudaBuilder << ", $3";
+        cudaBuilder << ")";
+    }
+
+    return cudaBuilder;
+}
+
+const char* noBias = nullptr;
+const char* noLodLevel = nullptr;
+const char* noGradX = nullptr;
+const char* noGradY = nullptr;
+const char* noConstOffset = nullptr;
+const char* noMinLod = nullptr;
+
+static String spirvSampleIntrinsic(
+    const TextureTypePrefixInfo& prefixInfo,
+    const char* bias = nullptr,
+    const char* lodLevel = nullptr,
+    const char* gradX = nullptr,
+    const char* gradY = nullptr,
+    const char* constOffset = nullptr,
+    const char* minLod = nullptr)
+{
+    StringBuilder spirvBuilder;
+    const char* i = "                ";
+
+    SLANG_ASSERT(!(!gradX ^ !gradY));
+
+    if(minLod)
+        spirvBuilder << i << "OpCapability MinLod;\n";
+
+    const char* sampledImage;
+    if(prefixInfo.combined)
+    {
+        sampledImage = "$this";
+    }
+    else
+    {
+        const char* sampledImageType = "%sampledImageType";
+        sampledImage = "%sampledImage";
+        spirvBuilder << i << sampledImageType << " = OpTypeSampledImage $$This;\n";
+        spirvBuilder << i << sampledImage << " : " << sampledImageType << " = OpSampledImage $this $s;\n";
+    }
+
+    const char* op = lodLevel || gradX ? "OpImageSampleExplicitLod" : "OpImageSampleImplicitLod";
+    spirvBuilder << i << "%sampled : __sampledType(T) = " << op << " " << sampledImage << " $location";
+    spirvBuilder << " None";
+    if(bias)
+        spirvBuilder << "|Bias";
+    if(lodLevel)
+        spirvBuilder << "|Lod";
+    if(gradX)
+        spirvBuilder << "|Grad";
+    if(constOffset)
+        spirvBuilder << "|ConstOffset";
+    if(minLod)
+        spirvBuilder << "|MinLod";
+
+    if(bias)
+        spirvBuilder << " $" << bias;
+    if(lodLevel)
+        spirvBuilder << " $" << lodLevel;
+    if(gradX)
+        spirvBuilder << " $" << gradX << " $" << gradY;
+    if(constOffset)
+        spirvBuilder << " $" << constOffset;
+    if(minLod)
+        spirvBuilder << " $" << minLod;
+    spirvBuilder << ";\n";
+    spirvBuilder << i << "__truncate $$T result __sampledType(T) %sampled;\n";
+    return spirvBuilder;
+}
+
+void TextureTypeInfo::writeSampleFunctions()
+{
+    TextureFlavor::Shape baseShape = base.baseShape;
+    char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, ";
+
+    // `Sample()`
+
+    writeFunc(
+        "T",
+        "Sample",
+        cat(samplerStateParam, "float", base.coordCount + isArray, " location"),
+        "$ctexture($p, $2)$z",
+        spirvSampleIntrinsic(prefixInfo),
+        cudaSampleIntrinsic(isArray, base, false)
+    );
+
+    if( baseShape != TextureFlavor::Shape::ShapeCube )
+    {
+        writeFunc(
+            "T",
+            "Sample",
+            cat(samplerStateParam, "float", base.coordCount + isArray, " location, ", "constexpr int", base.coordCount, " offset"),
+            "$ctextureOffset($p, $2, $3)$z",
+            spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, noGradX, noGradY, "offset")
+        );
+    }
+
+    writeFunc(
+        "T",
+        "Sample",
+        cat(
+            samplerStateParam,
+            "float", base.coordCount + isArray, " location, ",
+            baseShape == TextureFlavor::Shape::ShapeCube ? "" : cat("constexpr int", base.coordCount, " offset, "),
+            "float clamp"
+        ),
+        "",
+        spirvSampleIntrinsic(
+            prefixInfo,
+            noBias,
+            noLodLevel,
+            noGradX,
+            noGradY,
+            baseShape == TextureFlavor::Shape::ShapeCube ? nullptr : "offset",
+            "clamp"
+        )
+    );
+
+    // SPIR-V todo, use OpImageSparseSampleImplicitLod
+    writeFunc(
+        "T",
+        "Sample",
+        cat(
+            samplerStateParam,
+            "float", base.coordCount + isArray, " location, ",
+            baseShape != TextureFlavor::Shape::ShapeCube ? cat("constexpr int", base.coordCount, " offset, ") : "",
+            "float clamp, out uint status"
+        )
+    );
+
+    writeFunc(
+        "T",
+        "SampleBias",
+        cat(
+            samplerStateParam,
+            "float", base.coordCount + isArray, " location, ",
+            "float bias"
+        ),
+        "$ctexture($p, $2, $3)$z",
+        spirvSampleIntrinsic(prefixInfo, "bias")
+    );
+
+    if( baseShape != TextureFlavor::Shape::ShapeCube )
+    {
+        writeFunc(
+            "T",
+            "SampleBias",
+            cat(
+                samplerStateParam,
+                "float", base.coordCount + isArray, " location, ",
+                "float bias, ",
+                "constexpr int", base.coordCount, " offset"
+            ),
+            "$ctextureOffset($p, $2, $3, $4)$z",
+            spirvSampleIntrinsic(prefixInfo, "bias", noLodLevel, noGradX, noGradY, "offset")
+        );
+    }
+    int baseCoordCount = base.coordCount;
+    int arrCoordCount = baseCoordCount + isArray;
+    if (arrCoordCount <= 3)
+    {
+        // `SampleCmp()` and `SampleCmpLevelZero`
+
+        writeFunc(
+            "float",
+            "SampleCmp",
+            cat(
+                "SamplerComparisonState s, ",
+                "float", base.coordCount + isArray, " location, ",
+                "float compareValue"
+            ),
+            cat("texture($p, vec", arrCoordCount + 1, "($2, $3))")
+        );
+
+        writeFunc(
+            "float",
+            "SampleCmpLevelZero",
+            cat(
+                "SamplerComparisonState s, ",
+                "float", base.coordCount + isArray, " location, ",
+                "float compareValue"
+            ),
+            cat("textureLod($p, vec", arrCoordCount + 1, "($2, $3), 0)")
+        );
+    }
+
+    if( baseShape != TextureFlavor::Shape::ShapeCube )
+    {
+        // Note(tfoley): MSDN seems confused, and claims that the `offset`
+        // parameter for `SampleCmp` is available for everything but 3D
+        // textures, while `Sample` and `SampleBias` are consistent in
+        // saying they only exclude `offset` for cube maps (which makes
+        // sense). I'm going to assume the documentation for `SampleCmp`
+        // is just wrong.
+        writeFunc(
+            "float",
+            "SampleCmp",
+            cat(
+                "SamplerComparisonState s, ",
+                "float", base.coordCount + isArray, " location, ",
+                "float compareValue, "
+                "constexpr int", base.coordCount, " offset"
+            ),
+            cat("textureOffset($p, vec", arrCoordCount + 1, "($2, $3), $4)")
+        );
+
+        writeFunc(
+            "float",
+            "SampleCmpLevelZero",
+            cat(
+                "SamplerComparisonState s, ",
+                "float", base.coordCount + isArray, " location, ",
+                "float compareValue, "
+                "constexpr int", base.coordCount, " offset"
+            ),
+            cat("textureLodOffset($p, vec", arrCoordCount + 1, "($2, $3), 0, $4)")
+        );
+    }
+
+    // TODO(JS): Not clear how to map this to CUDA, because in HLSL, the gradient is a vector based on
+    // the dimension. On CUDA there is texNDGrad, but it always just takes ddx, ddy.
+    // I could just assume 0 for elements not supplied, and ignore z. For now will just leave
+    writeFunc(
+        "T",
+        "SampleGrad",
+        cat(
+            samplerStateParam,
+            "float", base.coordCount + isArray, " location, ",
+            "float", base.coordCount, " gradX, ",
+            "float", base.coordCount, " gradY, "
+        ),
+        "$ctextureGrad($p, $2, $3, $4)$z",
+        spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, "gradX", "gradY")
+    );
+
+    if( baseShape != TextureFlavor::Shape::ShapeCube )
+    {
+        writeFunc(
+            "T",
+            "SampleGrad",
+            cat(
+                samplerStateParam,
+                "float", base.coordCount + isArray, " location, ",
+                "float", base.coordCount, " gradX, ",
+                "float", base.coordCount, " gradY, ",
+                "constexpr int", base.coordCount, " offset "
+            ),
+            "$ctextureGradOffset($p, $2, $3, $4, $5)$z",
+            spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, "gradX", "gradY", "offset")
+        );
+
+        sb << i << "__glsl_extension(GL_ARB_sparse_texture_clamp)\n";
+        writeFunc(
+            "T",
+            "SampleGrad",
+            cat(
+                samplerStateParam,
+                "float", base.coordCount + isArray, " location, ",
+                "float", base.coordCount, " gradX, ",
+                "float", base.coordCount, " gradY, ",
+                "constexpr int", base.coordCount, " offset, ",
+                "float lodClamp"
+            ),
+            "$ctextureGradOffsetClampARB($p, $2, $3, $4, $5, $6)$z",
+            spirvSampleIntrinsic(prefixInfo, noBias, noLodLevel, "gradX", "gradY", "offset", "lodClamp")
+        );
+    }
+
+    // `SampleLevel`
+
+    writeFunc(
+        "T",
+        "SampleLevel",
+        cat(
+            samplerStateParam,
+            "float", base.coordCount + isArray, " location, ",
+            "float level"
+        ),
+        "$ctextureLod($p, $2, $3)$z",
+        spirvSampleIntrinsic(prefixInfo, noBias, "level"),
+        cudaSampleIntrinsic(isArray, base, true)
+    );
+
+    if( baseShape != TextureFlavor::Shape::ShapeCube )
+    {
+        writeFunc(
+            "T",
+            "SampleLevel",
+            cat(
+                samplerStateParam,
+                "float", base.coordCount + isArray, " location, ",
+                "float level, ",
+                "constexpr int", base.coordCount, " offset"
+            ),
+            "$ctextureLodOffset($p, $2, $3, $4)$z",
+            spirvSampleIntrinsic(prefixInfo, noBias, "level", noGradX, noGradY, "offset")
+        );
+    }
+}
+
+void TextureTypeInfo::writeGatherExtensions()
+{
+    char const* baseName = prefixInfo.name;
+    char const* baseShapeName = base.shapeName;
+
+    auto access = accessInfo.access;
+
+    bool isReadOnly = (access == SLANG_RESOURCE_ACCESS_READ);
+
+    char const* samplerStateParam = prefixInfo.combined ? "" : "SamplerState s, ";
+
+    // `Gather*()` operations are handled via an `extension` declaration,
+    // because this lets us capture the element type of the texture.
+    //
+    // TODO: longer-term there should be something like a `TextureElementType`
+    // interface, that both scalars and vectors implement, that then exposes
+    // a `Scalar` associated type, and `Gather` can return `vector<T.Scalar, 4>`.
+    //
+    static const struct {
+        char const* genericPrefix;
+        char const* elementType;
+        char const* outputType;
+    } kGatherExtensionCases[] = {
+        { "__generic<T, let N : int>", "vector<T,N>", "vector<T, 4>" },
+        { "", "float", "vector<float, 4>" },
+        { "", "int" , "vector<int, 4>"},
+        { "", "uint", "vector<uint, 4>"},
+
+        // TODO: need a case here for scalars `T`, but also
+        // need to ensure that case doesn't accidentally match
+        // for `T = vector<...>`, which requires actual checking
+        // of constraints on generic parameters.
+    };
+    for(auto cc : kGatherExtensionCases)
+    {
+        // TODO: this should really be an `if` around the entire `Gather` logic
+        if (isMultisample) break;
+
+        EMIT_LINE_DIRECTIVE();
+        sb << cc.genericPrefix << " __extension ";
+        sb << accessInfo.name;
+        sb << baseName;
+        sb << baseShapeName;
+        if (isArray) sb << "Array";
+        sb << "<" << cc.elementType << " >";
+        sb << "\n{\n";
+
+        // `Gather`
+        // (tricky because it returns a 4-vector of the element type
+        // of the texture components...)
+        //
+        // TODO: is it actually correct to restrict these so that, e.g.,
+        // `GatherAlpha()` isn't allowed on `Texture2D<float3>` because
+        // it nominally doesn't have an alpha component?
+        static const struct {
+            int componentIndex;
+            char const* componentName;
+        } kGatherComponets[] = {
+            { 0, "" },
+            { 0, "Red" },
+            { 1, "Green" },
+            { 2, "Blue" },
+            { 3, "Alpha" },
+        };
+        enum Cmp
+        { NotCmp,
+          Cmp
+        };
+
+        for(auto cmp : {NotCmp, Cmp})
+        for(auto kk : kGatherComponets)
+        {
+            auto samplerOrComparisonSampler = cmp == Cmp ? "SamplerComparisonState s, " : samplerStateParam;
+
+            auto componentIndex = kk.componentIndex;
+            auto componentName = kk.componentName;
+
+            auto outputType = cc.outputType;
+
+            const auto cmpName          = cmp == Cmp ? "Cmp" : "";
+            const auto cmpValueParam    = cmp == Cmp ? "float compareValue, " : "";
+            const auto cmpValueParamEnd = cmp == Cmp ? ", float compareValue" : "";
+            const auto supportsGLSL     = componentIndex == 0 || cmp == NotCmp;
+
+            EMIT_LINE_DIRECTIVE();
+
+            if(supportsGLSL)
+            {
+                if(cmp == Cmp)
+                    sb << "__target_intrinsic(glsl, \"textureGather($p, $2, $3)\")\n";
+                else
+                    sb << "__target_intrinsic(glsl, \"textureGather($p, $2, " << componentIndex << ")\")\n";
+            }
+            if (base.coordCount == 2 && cmp == NotCmp)
+            {
+                // Gather only works on 2D in CUDA without comparison
+                // "It is based on the base type of DataType except when readMode is equal to cudaReadModeNormalizedFloat (see Texture Reference API), in which case it is always float4."
+                sb << "__target_intrinsic(cuda, \"tex2Dgather<$T0>($0, ($2).x, ($2).y, " << componentIndex << ")\")\n";
+            }
+            if (isReadOnly)
+                sb << "[__readNone]\n";
+            sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
+            sb << "float" << base.coordCount + isArray << " location" << cmpValueParamEnd << ");\n";
+
+            if (isReadOnly)
+                sb << "[__readNone]\n";
+            EMIT_LINE_DIRECTIVE();
+            if(supportsGLSL)
+            {
+                if(cmp == Cmp)
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, $4)\")\n";
+                else
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffset($p, $2, $3, " << componentIndex << ")\")\n";
+            }
+            sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
+            sb << "float" << base.coordCount + isArray << " location, ";
+            sb << cmpValueParam;
+            sb << "constexpr int" << base.coordCount << " offset);\n";
+
+            if (isReadOnly)
+                sb << "[__readNone]\n";
+            EMIT_LINE_DIRECTIVE();
+            sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
+            sb << "float" << base.coordCount + isArray << " location, ";
+            sb << cmpValueParam;
+            sb << "constexpr int" << base.coordCount << " offset, ";
+            sb << "out uint status);\n";
+
+            if (isReadOnly)
+                sb << "[__readNone]\n";
+            EMIT_LINE_DIRECTIVE();
+            if(supportsGLSL)
+            {
+                if(cmp == Cmp)
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, $3, ivec" << base.coordCount << "[]($4, $5, $6, $7))\")\n";
+                else
+                    sb << "__target_intrinsic(glsl, \"textureGatherOffsets($p, $2, ivec" << base.coordCount << "[]($3, $4, $5, $6), " << componentIndex << ")\")\n";
+            }
+            sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
+            sb << "float" << base.coordCount + isArray << " location, ";
+            sb << cmpValueParam;
+            sb << "int" << base.coordCount << " offset1, ";
+            sb << "int" << base.coordCount << " offset2, ";
+            sb << "int" << base.coordCount << " offset3, ";
+            sb << "int" << base.coordCount << " offset4);\n";
+
+            if (isReadOnly)
+                sb << "[__readNone]\n";
+            EMIT_LINE_DIRECTIVE();
+            sb << outputType << " Gather" << cmpName << componentName << "(" << samplerOrComparisonSampler;
+            sb << "float" << base.coordCount + isArray << " location, ";
+            sb << cmpValueParam;
+            sb << "int" << base.coordCount << " offset1, ";
+            sb << "int" << base.coordCount << " offset2, ";
+            sb << "int" << base.coordCount << " offset3, ";
+            sb << "int" << base.coordCount << " offset4, ";
+            sb << "out uint status);\n";
+        }
+
+        EMIT_LINE_DIRECTIVE();
+        sb << "\n}\n";
+    }
+}
+
+}
diff --git a/source/slang/slang-stdlib-textures.h b/source/slang/slang-stdlib-textures.h
new file mode 100644
index 000000000..19008a3f0
--- /dev/null
+++ b/source/slang/slang-stdlib-textures.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include "slang-ir.h"
+#include "slang-type-system-shared.h"
+#include "../core/slang-string.h"
+
+namespace Slang
+{
+
+static const struct BaseTextureShapeInfo {
+    char const*			    shapeName;
+    TextureFlavor::Shape	baseShape;
+    int					    coordCount;
+} kBaseTextureShapes[] = {
+    { "1D",		TextureFlavor::Shape::Shape1D,	1 },
+    { "2D",		TextureFlavor::Shape::Shape2D,	2 },
+    { "3D",		TextureFlavor::Shape::Shape3D,	3 },
+    { "Cube",	TextureFlavor::Shape::ShapeCube,3 },
+};
+
+static const struct BaseTextureAccessInfo {
+    char const*         name;
+    SlangResourceAccess access;
+} kBaseTextureAccessLevels[] = {
+    { "",                   SLANG_RESOURCE_ACCESS_READ },
+    { "RW",                 SLANG_RESOURCE_ACCESS_READ_WRITE },
+    { "RasterizerOrdered",  SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+};
+
+static const struct TextureTypePrefixInfo
+{
+    char const* name;
+    bool        combined;
+} kTexturePrefixes[] =
+{
+    { "Texture", false },
+    { "Sampler", true },
+};
+
+struct TextureTypeInfo
+{
+    TextureTypeInfo(
+        TextureTypePrefixInfo const& prefixInfo,
+        BaseTextureShapeInfo const& base,
+        bool isArray,
+        bool isMultisample,
+        BaseTextureAccessInfo const& accessInfo,
+        StringBuilder& inSB,
+        String const& inPath);
+
+    TextureTypePrefixInfo const& prefixInfo;
+    BaseTextureShapeInfo const& base;
+    bool isArray;
+    bool isMultisample;
+    BaseTextureAccessInfo const& accessInfo;
+    StringBuilder& sb;
+    String path;
+
+    void emitTypeDecl();
+
+private:
+    //
+    // Functions for writing specific parts of a definition
+    //
+    void writeQueryFunctions();
+    void writeSubscriptFunctions();
+    void writeSampleFunctions();
+    void writeGatherExtensions();
+
+    //
+    // More general utilities
+    //
+    enum class ReadNoneMode
+    {
+        Never,
+        IfReadOnly,
+        Always
+    };
+
+    void writeFuncBody(
+        const char* funcName,
+        const String& glsl,
+        const String& cuda,
+        const String& spirv
+    );
+    void writeFuncDecorations(
+        const String& glsl,
+        const String& cuda
+    );
+    void writeFuncWithSig(
+        const char* funcName,
+        const String& sig,
+        const String& glsl = String{},
+        const String& spirv = String{},
+        const String& cuda = String{},
+        const ReadNoneMode readNoneMode = ReadNoneMode::IfReadOnly
+    );
+    void writeFunc(
+        const char* returnType,
+        const char* funcName,
+        const String& params,
+        const String& glsl = String{},
+        const String& spirv = String{},
+        const String& cuda = String{},
+        const ReadNoneMode readNoneMode = ReadNoneMode::IfReadOnly
+    );
+
+    // A pointer to a string representing the current level of indentation
+    const char* i;
+};
+
+}
diff --git a/source/slang/slang-stdlib.cpp b/source/slang/slang-stdlib.cpp
index 65d5cf758..5fec05c80 100644
--- a/source/slang/slang-stdlib.cpp
+++ b/source/slang/slang-stdlib.cpp
@@ -4,6 +4,7 @@
 #include "slang-ir.h"
 #include "slang-syntax.h"
 #include "slang-ir-util.h"
+#include "slang-stdlib-textures.h"
 #include "../core/slang-string-util.h"
 
 #define STRINGIZE(x) STRINGIZE2(x)
diff --git a/tests/bindings/glsl-parameter-blocks.slang.glsl b/tests/bindings/glsl-parameter-blocks.slang.glsl
index fbeddb905..c00ed8fe3 100644
--- a/tests/bindings/glsl-parameter-blocks.slang.glsl
+++ b/tests/bindings/glsl-parameter-blocks.slang.glsl
@@ -1,50 +1,31 @@
 #version 450
 layout(row_major) uniform;
 layout(row_major) buffer;
-
-#line 3 "tests/bindings/glsl-parameter-blocks.slang"
 struct Test_0
 {
     vec4 a_0;
 };
 
-
-#line 7
 layout(binding = 0)
 layout(std140) uniform _S1
 {
     vec4 a_0;
 }gTest_0;
-
-#line 3
 layout(binding = 1)
 uniform texture2D gTest_t_0;
 
-
-#line 1237 "core.meta.slang"
 layout(binding = 2)
 uniform sampler gTest_s_0;
 
-
-#line 89 "core"
 layout(location = 0)
 out vec4 _S2;
 
-
-#line 902 "core.meta.slang"
 layout(location = 0)
 in vec2 _S3;
 
-
-#line 12 "tests/bindings/glsl-parameter-blocks.slang"
 void main()
 {
-    vec4 _S4 = (texture(sampler2D(gTest_t_0,gTest_s_0), (_S3)));
-
-#line 14
-    _S2 = gTest_0.a_0 + _S4;
-
-#line 14
+    _S2 = gTest_0.a_0 + (texture(sampler2D(gTest_t_0,gTest_s_0), (_S3)));
     return;
 }
 
diff --git a/tests/bugs/gh-941.slang.glsl b/tests/bugs/gh-941.slang.glsl
index 4330ece53..111cdb33c 100644
--- a/tests/bugs/gh-941.slang.glsl
+++ b/tests/bugs/gh-941.slang.glsl
@@ -1,9 +1,7 @@
-//TEST_IGNORE_FILE:
-
 #version 450
-
 #extension GL_EXT_nonuniform_qualifier : require
-
+layout(row_major) uniform;
+layout(row_major) buffer;
 struct SLANG_ParameterGroup_C_0
 {
     vec2 uv_0;
@@ -11,15 +9,13 @@ struct SLANG_ParameterGroup_C_0
 };
 
 layout(binding = 2)
-layout(std140)
-uniform _S1
+layout(std140) uniform _S1
 {
     vec2 uv_0;
     uint index_0;
-} C_0;
-
+}C_0;
 layout(binding = 0)
-uniform texture2D t_0[];
+uniform texture2D  t_0[];
 
 layout(binding = 1)
 uniform sampler s_0;
@@ -29,11 +25,7 @@ out vec4 _S2;
 
 void main()
 {
-    vec4 _S3 = texture(
-    	sampler2D(
-    		t_0[C_0.index_0],
-    		s_0),
-		C_0.uv_0);
-    _S2 = _S3;
+    _S2 = (texture(sampler2D(t_0[C_0.index_0],s_0), (C_0.uv_0)));
     return;
-}
-\ No newline at end of file
+}
+
diff --git a/tests/compute/texture-simpler.slang b/tests/compute/texture-simpler.slang
index ce4b76f77..18af1cd62 100644
--- a/tests/compute/texture-simpler.slang
+++ b/tests/compute/texture-simpler.slang
@@ -1,4 +1,9 @@
-//TEST(compute, vulkan):COMPARE_COMPUTE:-vk -compute -shaderobj -output-using-type
+//TEST(smoke,compute):COMPARE_COMPUTE_EX:-cpu -compute  -shaderobj -output-using-type
+//TEST(smoke,compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type
+//TEST(smoke,compute):COMPARE_COMPUTE_EX:-slang -compute -dx12  -shaderobj -output-using-type
+//TEST(smoke,compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type
+//TEST(smoke,compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type -render-feature hardware-device
+//TEST(smoke,compute):COMPARE_COMPUTE_EX:-cuda -compute  -shaderobj -output-using-type
 
 //TEST_INPUT: Texture2D(size=4, content = one):name t2D
 Texture2D<float> t2D;
diff --git a/tests/cross-compile/non-uniform-indexing.slang.glsl b/tests/cross-compile/non-uniform-indexing.slang.glsl
index 96abe6bac..07e8f9e38 100644
--- a/tests/cross-compile/non-uniform-indexing.slang.glsl
+++ b/tests/cross-compile/non-uniform-indexing.slang.glsl
@@ -1,10 +1,9 @@
-//TEST_IGNORE_FILE
 #version 450
-
 #extension GL_EXT_nonuniform_qualifier : require
-
+layout(row_major) uniform;
+layout(row_major) buffer;
 layout(binding = 0)
-uniform texture2D t_0[10];
+uniform texture2D  t_0[10];
 
 layout(binding = 1)
 uniform sampler s_0;
@@ -17,12 +16,7 @@ in vec3 _S2;
 
 void main()
 {
-    vec4 _S3 = texture(
-    	sampler2D(
-    		t_0[nonuniformEXT(int(_S2.z))],
-    		s_0),
-		_S2.xy);
-
-    _S1 = _S3;
+    _S1 = (texture(sampler2D(t_0[nonuniformEXT(int(_S2.z))],s_0), (_S2.xy)));
     return;
 }
+
diff --git a/tests/cross-compile/vk-texture-indexing.slang.glsl b/tests/cross-compile/vk-texture-indexing.slang.glsl
index 73513c623..7fd768465 100644
--- a/tests/cross-compile/vk-texture-indexing.slang.glsl
+++ b/tests/cross-compile/vk-texture-indexing.slang.glsl
@@ -3,31 +3,23 @@
 #extension GL_EXT_nonuniform_qualifier : require
 layout(row_major) uniform;
 layout(row_major) buffer;
-
 layout(binding = 0)
 uniform texture2D  gParams_textures_0[10];
 
-
 float fetchData_0(uvec2 coords_0, uint index_0)
 {
-    float _S1 = (texelFetch((gParams_textures_0[nonuniformEXT(index_0)]), ivec2((coords_0)), 0).x);
-
-    return _S1;
+    return (texelFetch((gParams_textures_0[nonuniformEXT(index_0)]), ivec2((coords_0)), 0).x);
 }
 
 layout(location = 0)
-out vec4 _S2;
-
+out vec4 _S1;
 
 flat layout(location = 0)
-in uvec3 _S3;
-
+in uvec3 _S2;
 
 void main()
 {
-
-    _S2 = vec4(fetchData_0(_S3.xy, _S3.z));
-
+    _S1 = vec4(fetchData_0(_S2.xy, _S2.z));
     return;
 }
 
diff --git a/tests/expected-failure.txt b/tests/expected-failure.txt
index 3e158e2d1..df773a46b 100644
--- a/tests/expected-failure.txt
+++ b/tests/expected-failure.txt
@@ -2,13 +2,7 @@ tests/autodiff/global-param-hoisting.slang.1 (vk)
 tests/bugs/buffer-swizzle-store.slang.1 (vk)
 tests/bugs/gh-3075.slang.2 (vk)
 tests/bugs/ray-query-in-generic.slang.1 (vk)
-tests/compute/half-rw-texture-convert.slang.4 (vk)
-tests/compute/half-rw-texture-convert2.slang.4 (vk)
 tests/compute/ray-tracing-inline.slang.1 (vk)
-tests/compute/rw-texture-simple.slang.4 (vk)
-tests/compute/texture-sample-grad-offset-clamp.slang (vk)
-tests/compute/texture-simple.slang.4 (vk)
-tests/compute/texture-simpler.slang (vk)
 tests/language-feature/constants/constexpr-loop.slang.1 (vk)
 tests/optimization/func-resource-result/func-resource-result-complex.slang.1 (vk)
 tests/type/texture-sampler/texture-sampler-2d.slang (vk)
diff --git a/tests/language-feature/1-vector.slang b/tests/language-feature/1-vector.slang
new file mode 100644
index 000000000..bb8cedf3c
--- /dev/null
+++ b/tests/language-feature/1-vector.slang
@@ -0,0 +1,19 @@
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-shaderobj
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -use-dxil -shaderobj
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu -shaderobj
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj
+
+//TEST_INPUT:ubuffer(data=[1 2 3 4], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+// CHECK:      0
+// CHECK-NEXT: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    vector<int,1> i = vector<int,1>(dispatchThreadID.x);
+    outputBuffer[i.x] = i.x;
+}
diff --git a/tests/language-feature/spirv-asm/truncate.slang b/tests/language-feature/spirv-asm/truncate.slang
new file mode 100644
index 000000000..9837890f5
--- /dev/null
+++ b/tests/language-feature/spirv-asm/truncate.slang
@@ -0,0 +1,62 @@
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj -emit-spirv-directly -output-using-type
+
+//TEST_INPUT:ubuffer(data=[1 2 3 4], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+// CHECK:      8
+// CHECK-NEXT: 13
+// CHECK-NEXT: 18
+// CHECK-NEXT: 23
+
+//
+// This test tests the __truncate operator
+//
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    int i = dispatchThreadID.x;
+    int n = outputBuffer[i];
+
+    int scalar = n;
+
+    // 1-vectors are not valid in SPIR-V
+    // vector<int, 1> vector1 = vector<int, 1>(n);
+
+    vector<int, 4> vector4 = n + vector<int, 4>(0,1,2,3);
+    //int expected = 0 + n + n + (n + (n+1) + (n+2));
+
+    int r = 0;
+    spirv_asm
+    {
+        // scalar to scalar
+        __truncate $$int %a1 $$int $scalar;
+        %r1 : $$int = OpIAdd %a1 $r;
+
+        // scalar to 1-vector
+        // __truncate $$vector<int,1> %a2 $$int $scalar;
+        // %x1 : $$int = OpCompositeExtract %a2 0;
+        // %r2 : $$int = OpIAdd %x1 %r1;
+        %r2 : $$int = OpCopyObject %r1;
+
+        // 1-vector to scalar
+        // __truncate $$int %a3 $$vector<int,1> $vector1;
+        // %r3 : $$int = OpIAdd %a3 %r2;
+        %r3 : $$int = OpCopyObject %r2;
+
+        // n-vector to scalar
+        __truncate $$int %a4 $$vector<int,4> $vector4;
+        %r4 : $$int = OpIAdd %a4 %r3;
+
+        // n-vector to m-vector
+        __truncate $$vector<int,3> %a5 $$vector<int,4> $vector4;
+        %x2 : $$int = OpCompositeExtract %a5 0;
+        %x3 : $$int = OpCompositeExtract %a5 1;
+        %x4 : $$int = OpCompositeExtract %a5 2;
+        %r5 : $$int = OpIAdd %x2 %r4;
+        %r6 : $$int = OpIAdd %x3 %r5;
+        %r7 : $$int = OpIAdd %x4 %r6;
+
+        OpStore &r %r7
+    };
+    outputBuffer[i] = r;
+}
diff --git a/tests/legalization/vec1.slang b/tests/legalization/vec1.slang
new file mode 100644
index 000000000..f3de085b0
--- /dev/null
+++ b/tests/legalization/vec1.slang
@@ -0,0 +1,93 @@
+//TEST(smoke,compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj -output-using-type
+//TEST(smoke,compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -shaderobj -emit-spirv-directly -output-using-type
+
+// CHECK:      23
+// CHECK-NEXT: 23
+// CHECK-NEXT: 23
+// CHECK-NEXT: 23
+
+// This test tests that the 1-vector legalization works correctly.
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+// This struct helps test that nested access through 1-vectors works
+struct V
+{
+    // 1-vector of 1-vector
+    vector<vector<float, 1>, 1> oo;
+
+    // 1-vector of n-vector
+    vector<vector<float, 4>, 1> on;
+
+    // n-vector of 1-vector
+    vector<vector<float, 1>, 4> no;
+};
+
+vector<int, 1> get1Vec(int x)
+{
+    return x;
+}
+
+V getV()
+{
+    V v;
+
+    // Test swizzle store
+    v.oo.x.x = 1;
+
+    // Test assigning into subscript
+    v.on[0].wzyx = float4(4,3,2,1);
+
+    // Test assigning from vector
+    v.no.x = vector<float, 1>(1);
+
+    // Test assigning from scalar
+    v.no.y.x = 2;
+
+    // Test assigning from vector of vector
+    v.no.wz = vector<vector<float, 1>, 2>(3,4);
+
+    return v;
+}
+
+float sumV(V v)
+{
+    return v.oo[0][0]
+        + v.on.x.x
+        + v.on.x.y
+        + v.on.x.z
+        + v.on.x.w
+        // Test arithmetic
+        + (v.no.x + v.no.y + v.no.z + v.no.w).x;
+}
+
+float3 splat(vector<float, 1> v)
+{
+    // Test swizzle
+    return v.xxx;
+}
+
+// This function helps test that this legalization happens with generic length
+// vectors specialized to 1
+float triangle<let N : int>()
+{
+    vector<float, N> v;
+    for(int i = 0; i < N; ++i)
+        v[i] = i+1;
+
+    float ret = 0;
+    for(int i = 0; i < N; ++i)
+        ret += v[i];
+    return ret;
+}
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    const V v = getV();
+    outputBuffer[dispatchThreadID.x]
+        = sumV(v)
+        + triangle<1>()
+        + splat(v.oo.x).z;
+}
diff --git a/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl b/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl
index 7f734bf75..744ee2f44 100644
--- a/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl
+++ b/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl
@@ -3,18 +3,12 @@
 #extension GL_NV_ray_tracing_motion_blur : require
 layout(row_major) uniform;
 layout(row_major) buffer;
-
-#line 5 "tests/nv-extensions/nv-ray-tracing-motion-blur.slang"
 layout(binding = 0)
 uniform texture2D samplerPosition_0;
 
-
-#line 7
 layout(binding = 2)
 uniform sampler sampler_0;
 
-
-#line 6
 layout(binding = 1)
 uniform texture2D samplerNormal_0;
 
@@ -24,8 +18,6 @@ struct Light_0
     vec4 color_0;
 };
 
-
-#line 14
 struct Uniforms_0
 {
     Light_0 light_0;
@@ -34,8 +26,6 @@ struct Uniforms_0
     mat4x4 model_0;
 };
 
-
-#line 21
 layout(binding = 3)
 layout(std140) uniform _S1
 {
@@ -44,45 +34,31 @@ layout(std140) uniform _S1
     mat4x4 view_0;
     mat4x4 model_0;
 }ubo_0;
-
-#line 26
 layout(binding = 5)
 uniform accelerationStructureEXT as_0;
 
-
-#line 24
 layout(rgba32f)
 layout(binding = 4)
 uniform image2D outputImage_0;
 
-
-#line 33
 struct ReflectionRay_0
 {
     float color_1;
 };
 
-
-#line 5218 "hlsl.meta.slang"
 layout(location = 0)
 rayPayloadEXT
 ReflectionRay_0 p_0;
 
-
-#line 28 "tests/nv-extensions/nv-ray-tracing-motion-blur.slang"
 struct ShadowRay_0
 {
     float hitDistance_0;
 };
 
-
-#line 5286 "hlsl.meta.slang"
 layout(location = 1)
 rayPayloadEXT
 ShadowRay_0 p_1;
 
-
-#line 5079
 struct RayDesc_0
 {
     vec3 Origin_0;
@@ -91,115 +67,60 @@ struct RayDesc_0
     float TMax_0;
 };
 
-
-#line 5243
 void TraceMotionRay_0(accelerationStructureEXT AccelerationStructure_0, uint RayFlags_0, uint InstanceInclusionMask_0, uint RayContributionToHitGroupIndex_0, uint MultiplierForGeometryContributionToHitGroupIndex_0, uint MissShaderIndex_0, RayDesc_0 Ray_0, float CurrentTime_0, inout ShadowRay_0 Payload_0)
 {
-
-#line 5288
     p_1 = Payload_0;
     traceRayMotionNV(AccelerationStructure_0, RayFlags_0, InstanceInclusionMask_0, RayContributionToHitGroupIndex_0, MultiplierForGeometryContributionToHitGroupIndex_0, MissShaderIndex_0, Ray_0.Origin_0, Ray_0.TMin_0, Ray_0.Direction_0, Ray_0.TMax_0, CurrentTime_0, (1));
-
-#line 5302
     Payload_0 = p_1;
     return;
 }
 
-
-#line 3527
 float saturate_0(float x_0)
 {
     return clamp(x_0, 0.0, 1.0);
 }
 
-
-#line 5168
 void TraceRay_0(accelerationStructureEXT AccelerationStructure_1, uint RayFlags_1, uint InstanceInclusionMask_1, uint RayContributionToHitGroupIndex_1, uint MultiplierForGeometryContributionToHitGroupIndex_1, uint MissShaderIndex_1, RayDesc_0 Ray_1, inout ReflectionRay_0 Payload_1)
 {
-
-#line 5220
     p_0 = Payload_1;
     traceRayEXT(AccelerationStructure_1, RayFlags_1, InstanceInclusionMask_1, RayContributionToHitGroupIndex_1, MultiplierForGeometryContributionToHitGroupIndex_1, MissShaderIndex_1, Ray_1.Origin_0, Ray_1.TMin_0, Ray_1.Direction_0, Ray_1.TMax_0, (0));
-
-#line 5233
     Payload_1 = p_0;
     return;
 }
 
-
-#line 38 "tests/nv-extensions/nv-ray-tracing-motion-blur.slang"
 void main()
 {
     uvec3 _S2 = ((gl_LaunchIDEXT));
-
-#line 40
     ivec2 launchID_0 = ivec2(_S2.xy);
     uvec3 _S3 = ((gl_LaunchSizeEXT));
-
-#line 41
     ivec2 launchSize_0 = ivec2(_S3.xy);
-
     vec2 inUV_0 = vec2((float(launchID_0.x) + 0.5) / float(launchSize_0.x), (float(launchID_0.y) + 0.5) / float(launchSize_0.y));
-
-#line 48
-    vec4 _S4 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0)));
-
-#line 48
-    vec3 P_0 = _S4.xyz;
-    vec4 _S5 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0)));
-
-#line 49
-    vec3 N_0 = _S5.xyz * 2.0 - 1.0;
-
-
+    vec3 P_0 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0))).xyz;
+    vec3 N_0 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0))).xyz * 2.0 - 1.0;
     vec3 lightDelta_0 = ubo_0.light_0.position_0.xyz - P_0;
     float lightDist_0 = length(lightDelta_0);
     vec3 L_0 = normalize(lightDelta_0);
-    float _S6 = 1.0 / (lightDist_0 * lightDist_0);
-
+    float _S4 = 1.0 / (lightDist_0 * lightDist_0);
     RayDesc_0 ray_0;
     ray_0.Origin_0 = P_0;
     ray_0.TMin_0 = 0.00000099999999747524;
     ray_0.Direction_0 = lightDelta_0;
     ray_0.TMax_0 = lightDist_0;
-
-
     ShadowRay_0 shadowRay_0;
     shadowRay_0.hitDistance_0 = 0.0;
-
-
-
     TraceMotionRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, 1.0, shadowRay_0);
-
-#line 69
     float atten_0;
-
-#line 87
     if(shadowRay_0.hitDistance_0 < lightDist_0)
     {
-
-#line 87
         atten_0 = 0.0;
-
-#line 87
     }
     else
     {
-
-#line 87
-        atten_0 = _S6;
-
-#line 87
+        atten_0 = _S4;
     }
-
-#line 93
     vec3 color_2 = ubo_0.light_0.color_0.xyz * saturate_0(dot(N_0, L_0)) * atten_0;
-
-
     ReflectionRay_0 reflectionRay_0;
     TraceRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, reflectionRay_0);
-
-#line 117
     imageStore((outputImage_0), ivec2((uvec2(launchID_0))), vec4(color_2 + reflectionRay_0.color_1, 1.0));
     return;
 }
diff --git a/tests/vkray/anyhit.slang.glsl b/tests/vkray/anyhit.slang.glsl
index 9d3584e1f..8255599b9 100644
--- a/tests/vkray/anyhit.slang.glsl
+++ b/tests/vkray/anyhit.slang.glsl
@@ -1,4 +1,3 @@
-// anyhit.slang.glsl
 #version 460
 #extension GL_EXT_ray_tracing : require
 layout(row_major) uniform;
@@ -12,8 +11,7 @@ layout(binding = 0)
 layout(std140) uniform _S1
 {
     int mode_0;
-} gParams_0;
-
+}gParams_0;
 layout(binding = 1)
 uniform texture2D gParams_alphaMap_0;
 
@@ -24,24 +22,21 @@ struct SphereHitAttributes_0
 {
     vec3 normal_0;
 };
+
 hitAttributeEXT SphereHitAttributes_0 _S2;
 
 struct ShadowRay_0
 {
     vec4 hitDistance_0;
 };
+
 rayPayloadInEXT ShadowRay_0 _S3;
 
 void main()
 {
     if(gParams_0.mode_0 != 0)
     {
-        float val_0 = textureLod(
-            sampler2D(gParams_alphaMap_0, gParams_sampler_0),
-            _S2.normal_0.xy,
-            (0.0)).x;
-
-        if(val_0 > 0.0)
+        if((textureLod(sampler2D(gParams_alphaMap_0,gParams_sampler_0), (_S2.normal_0.xy), (0.0)).x) > 0.0)
         {
             terminateRayEXT;;
         }
@@ -50,7 +45,6 @@ void main()
             ignoreIntersectionEXT;;
         }
     }
-
     return;
 }
 
diff --git a/tests/vkray/callable.slang.glsl b/tests/vkray/callable.slang.glsl
index dd99cc440..871ffcbb7 100644
--- a/tests/vkray/callable.slang.glsl
+++ b/tests/vkray/callable.slang.glsl
@@ -1,18 +1,12 @@
 #version 460
-
-#if USE_NV_RT
-#extension GL_NV_ray_tracing : require
-#define callableDataInEXT callableDataInNV
-#define hitAttributeEXT hitAttributeNV
-#define ignoreIntersectionEXT ignoreIntersectionNV
-#define rayPayloadInEXT rayPayloadInNV
-#define terminateRayEXT terminateRayNV
-#else
 #extension GL_EXT_ray_tracing : require
-#endif
+layout(row_major) uniform;
+layout(row_major) buffer;
+layout(binding = 0)
+uniform texture2D gAlbedoMap_0;
 
-layout(binding = 0) uniform texture2D gAlbedoMap_0;
-layout(binding = 1) uniform sampler gSampler_0;
+layout(binding = 1)
+uniform sampler gSampler_0;
 
 struct MaterialPayload_0
 {
@@ -24,12 +18,7 @@ callableDataInEXT MaterialPayload_0 _S1;
 
 void main()
 {
-    vec4 _S2 = textureLod(
-        sampler2D(gAlbedoMap_0,gSampler_0),
-        _S1.uv_0,
-        float(0));
-
-    _S1.albedo_0 = _S2;
-
+    _S1.albedo_0 = (textureLod(sampler2D(gAlbedoMap_0,gSampler_0), (_S1.uv_0), (0.0)));
     return;
 }
+
diff --git a/tests/vkray/raygen.slang.glsl b/tests/vkray/raygen.slang.glsl
index 69dc74c53..80a63d5ad 100644
--- a/tests/vkray/raygen.slang.glsl
+++ b/tests/vkray/raygen.slang.glsl
@@ -10,6 +10,7 @@ uniform sampler sampler_0;
 
 layout(binding = 1)
 uniform texture2D samplerNormal_0;
+
 struct Light_0
 {
     vec4 position_0;
@@ -95,23 +96,18 @@ void main()
     uvec3 _S6 = ((gl_LaunchIDEXT));
     float _S7 = float(_S6.y) + 0.5;
     uvec3 _S8 = ((gl_LaunchSizeEXT));
-
     vec2 inUV_0 = vec2(_S5, _S7 / float(_S8.y));
-    vec4 _S9 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0)));
-    vec3 P_0 = _S9.xyz;
-    vec4 _S10 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0)));
-    vec3 N_0 = _S10.xyz * 2.0 - 1.0;
-
+    vec3 P_0 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0))).xyz;
+    vec3 N_0 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0))).xyz * 2.0 - 1.0;
     vec3 lightDelta_0 = ubo_0.light_0.position_0.xyz - P_0;
     float lightDist_0 = length(lightDelta_0);
     vec3 L_0 = normalize(lightDelta_0);
-    float _S11 = 1.0 / (lightDist_0 * lightDist_0);
+    float _S9 = 1.0 / (lightDist_0 * lightDist_0);
     RayDesc_0 ray_0;
     ray_0.Origin_0 = P_0;
     ray_0.TMin_0 = 0.00000099999999747524;
     ray_0.Direction_0 = lightDelta_0;
     ray_0.TMax_0 = lightDist_0;
-
     ShadowRay_0 shadowRay_0;
     shadowRay_0.hitDistance_0 = 0.0;
     TraceRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, shadowRay_0);
@@ -122,14 +118,14 @@ void main()
     }
     else
     {
-        atten_0 = _S11;
+        atten_0 = _S9;
     }
     vec3 color_2 = ubo_0.light_0.color_0.xyz * saturate_0(dot(N_0, L_0)) * atten_0;
-
     ReflectionRay_0 reflectionRay_0;
     TraceRay_1(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, reflectionRay_0);
     vec3 color_3 = color_2 + reflectionRay_0.color_1;
-    uvec3 _S12 = ((gl_LaunchIDEXT));
-    imageStore((outputImage_0), ivec2((uvec2(ivec2(_S12.xy)))), vec4(color_3, 1.0));
+    uvec3 _S10 = ((gl_LaunchIDEXT));
+    imageStore((outputImage_0), ivec2((uvec2(ivec2(_S10.xy)))), vec4(color_3, 1.0));
     return;
 }
+