15 files changed, 1701 insertions, 5 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 67f44cdac..a84e88ca8 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -41,8 +41,28 @@ struct ByteAddressBuffer
     uint4 Load4(int location);
 
     uint4 Load4(int location, out uint status);
+
+    T Load<T>(int location)
+    {
+        return __byteAddressBufferLoad<T>(this, location);
+    }
 };
 
+__intrinsic_op($(kIROp_ByteAddressBufferLoad))
+T __byteAddressBufferLoad<T>(ByteAddressBuffer buffer, int offset);
+
+__intrinsic_op($(kIROp_ByteAddressBufferLoad))
+T __byteAddressBufferLoad<T>(RWByteAddressBuffer buffer, int offset);
+
+__intrinsic_op($(kIROp_ByteAddressBufferLoad))
+T __byteAddressBufferLoad<T>(RasterizerOrderedByteAddressBuffer buffer, int offset);
+
+__intrinsic_op($(kIROp_ByteAddressBufferStore))
+void __byteAddressBufferStore<T>(RWByteAddressBuffer buffer, int offset, T value);
+
+__intrinsic_op($(kIROp_ByteAddressBufferStore))
+void __byteAddressBufferStore<T>(RasterizerOrderedByteAddressBuffer buffer, int offset, T value);
+
 __generic<T>
 __magic_type(HLSLStructuredBufferType)
 __intrinsic_type($(kIROp_HLSLStructuredBufferType))
@@ -135,6 +155,11 @@ struct $(item.name)
 
     uint4 Load4(int location, out uint status);
 
+    T Load<T>(int location)
+    {
+        return __byteAddressBufferLoad<T>(this, location);
+    }
+
     // Added operations:
 
     __target_intrinsic(glsl, "($3 = atomicAdd($0._data[$1/4], $2))")
@@ -241,6 +266,11 @@ struct $(item.name)
     void Store4(
         uint address,
         uint4 value);
+
+    void Store<T>(int offset, T value)
+    {
+        __byteAddressBufferStore(this, offset, value);
+    }
 };
 
 ${{{{
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 72ee1644d..9da7008b6 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2199,6 +2199,30 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
         emitOperand(inst->getOperand(0), outerPrec);
         break;
 
+    case kIROp_ByteAddressBufferLoad:
+        m_writer->emit("(");
+        emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
+        m_writer->emit(").Load<");
+        emitType(inst->getDataType());
+        m_writer->emit(">(");
+        emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+        m_writer->emit(")");
+        break;
+
+    case kIROp_ByteAddressBufferStore:
+        {
+            auto prec = getInfo(EmitOp::Postfix);
+            needClose = maybeEmitParens(outerPrec, prec);
+
+            emitOperand(inst->getOperand(0), leftSide(outerPrec, prec));
+            m_writer->emit(".Store(");
+            emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+            m_writer->emit(",");
+            emitOperand(inst->getOperand(2), getInfo(EmitOp::General));
+            m_writer->emit(")");
+        }
+        break;
+
     default:
         diagnoseUnhandledInst(inst);
         break;
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index c05f14f25..251b164cd 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -1290,7 +1290,44 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
             emitOperand(inst->getOperand(0), outerPrec);
             return true;
         }
+        case kIROp_StructuredBufferLoad:
+        {
+            auto outerPrec = inOuterPrec;
+            auto prec = getInfo(EmitOp::Postfix);
+            bool needClose = maybeEmitParens(outerPrec, prec);
+
+            emitOperand(inst->getOperand(0), leftSide(outerPrec, prec));
+            m_writer->emit("._data[");
+            emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+            m_writer->emit("]");
+
+            maybeCloseParens(needClose);
+            return true;
+        }
+        case kIROp_StructuredBufferStore:
+        {
+            auto outerPrec = inOuterPrec;
+
+            auto assignPrec = getInfo(EmitOp::Assign);
+            bool assignNeedsClose = maybeEmitParens(outerPrec, assignPrec);
 
+            {
+                auto subscriptPrec = getInfo(EmitOp::Postfix);
+                bool subscriptNeedsClose = maybeEmitParens(assignPrec, subscriptPrec);
+
+                emitOperand(inst->getOperand(0), leftSide(assignPrec, subscriptPrec));
+                m_writer->emit("._data[");
+                emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+                m_writer->emit("]");
+
+                maybeCloseParens(subscriptNeedsClose);
+            }
+
+            m_writer->emit(" = ");
+            emitOperand(inst->getOperand(2), rightSide(assignPrec, outerPrec));
+            maybeCloseParens(assignNeedsClose);
+            return true;
+        }
         default: break;
     }
 
@@ -1458,6 +1495,7 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
         case kIROp_FloatType:   
         case kIROp_DoubleType:
         {
+            _requireBaseType(cast<IRBasicType>(type)->getBaseType());
             m_writer->emit(getDefaultBuiltinTypeName(type->op));
             return;
         }
diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp
index f0238ce70..e48a166c5 100644
--- a/source/slang/slang-emit-hlsl.cpp
+++ b/source/slang/slang-emit-hlsl.cpp
@@ -435,27 +435,86 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
         }
         case kIROp_BitCast:
         {
+            // For simplicity, we will handle all bit-cast operations
+            // by first casting the "from" type to an intermediate
+            // integer type to hold the bits, and then convert *the*
+            // type over to the desired "to" type.
+            //
+            // A fundamental invariant that must be guaranteed
+            // by earlier steps is that a bit-cast instruction
+            // is only generated when the "from" and "to" types
+            // have the same size, and (in the case where they
+            // are vectors) number of elements.
+            //
+            // In textual order, the conversion to the "to" type
+            // comes first.
+            //
             auto toType = extractBaseType(inst->getDataType());
             switch (toType)
             {
                 default:
                     diagnoseUnhandledInst(inst);
                     break;
-                case BaseType::UInt:
-                    break;
+
+                case BaseType::Int8:
+                case BaseType::Int16:
                 case BaseType::Int:
+                case BaseType::Int64:
+                case BaseType::UInt8:
+                case BaseType::UInt16:
+                case BaseType::UInt:
+                case BaseType::UInt64:
+                    // Because the intermediate type will always
+                    // be an integer type, we can convert to
+                    // another integer type of the same size
+                    // via a cast.
                     m_writer->emit("(");
                     emitType(inst->getDataType());
                     m_writer->emit(")");
                     break;
+
                 case BaseType::Float:
+                    // Note: at present HLSL only supports
+                    // reinterpreting integer bits as a `float`.
+                    //
+                    // There is no current function (it seems)
+                    // for bit-casting an `int16_t` to a `half`.
+                    //
+                    // TODO: There is an `asdouble` function
+                    // for converting two 32-bit integer values into
+                    // one `double`. We could use that for
+                    // bit casts of 64-bit values with a bit of
+                    // extra work, but doing so might be best
+                    // handled in an IR pass that legalizes
+                    // bit-casts.
+                    //
                     m_writer->emit("asfloat");
                     break;
             }
-
             m_writer->emit("(");
+            int closeCount = 1;
+
+            auto fromType = extractBaseType(inst->getOperand(0)->getDataType());
+            switch( fromType )
+            {
+                default:
+                    diagnoseUnhandledInst(inst);
+                    break;
+
+                case BaseType::UInt:
+                case BaseType::Int:
+                    break;
+
+                case BaseType::Float:
+                    m_writer->emit("asuint(");
+                    closeCount++;
+                    break;
+            }
+
             emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
-            m_writer->emit(")");
+
+            while(closeCount--)
+                m_writer->emit(")");
             return true;
         }
         case kIROp_StringLit:
@@ -474,6 +533,113 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
             emitOperand(inst->getOperand(0), outerPrec);
             return true;
         }
+        case kIROp_ByteAddressBufferLoad:
+        {
+            // HLSL byte-address buffers have two kinds of `Load` operations.
+            //
+            // First we have the `Load`, `Load2`, `Load3`, and `Load4` operations,
+            // which are *not* generic/templated, and always return a scalar
+            // or vector of `uint`. These are available on all profiles that
+            // support byte-address buffers.
+            //
+            // Second we have the `Load<T>` generic, which itself comes in
+            // two flavors. The basic version can only handle the case where `T`
+            // is a scalar or vector, but can handle more types than the
+            // non-generic operations. The more complex version can handle
+            // aggregate tyeps as well, but we don't need to worry about
+            // that because we will have legalized such operations out
+            // already.
+            //
+            // Our task here is thus to pick between `Load`/`Load2`/`Load3`/`Load4`
+            // or `Load<T>`, always preferring the functions that are more
+            // universally available.
+            //
+            // We will thus inspect the type that is being loaded,
+            // and determine if it is a scalar or vector, and then
+            // if the elemnet type of that scalar/vector is `uint`.
+            //
+            auto elementType = inst->getDataType();
+            IRIntegerValue elementCount = 1;
+            if( auto vecType = as<IRVectorType>(elementType) )
+            {
+                if( auto elementCountInst = as<IRIntLit>(vecType->getElementCount()) )
+                {
+                    elementType = vecType->getElementType();
+                    elementCount = elementCountInst->getValue();
+                }
+            }
+
+            if( elementType->op == kIROp_UIntType )
+            {
+                // If we are in the case that can use `Load`/`Load2`/`Load3`/`Load4`,
+                // then we will always prefer to use it.
+                //
+                auto outerPrec = inOuterPrec;
+                auto prec = getInfo(EmitOp::Postfix);
+                bool needClose = maybeEmitParens(outerPrec, prec);
+
+                emitOperand(inst->getOperand(0), leftSide(outerPrec, prec));
+                m_writer->emit(".Load");
+                if( elementCount != 1 )
+                {
+                    m_writer->emit(elementCount);
+                }
+                m_writer->emit("(");
+                emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+                m_writer->emit(")");
+
+                maybeCloseParens(needClose);
+                return true;
+            }
+
+            // Otherwise we fall back to the base case, which
+            // is already handled by the base `CLikeSourceEmitter`
+            return false;
+        }
+        case kIROp_ByteAddressBufferStore:
+        {
+            // Similar to the case for a load, we want to specialize
+            // the generated code for the case where we store a `uint`
+            // or a vector of `uint`.
+            //
+            auto elementType = inst->getDataType();
+            IRIntegerValue elementCount = 1;
+            if( auto vecType = as<IRVectorType>(elementType) )
+            {
+                if( auto elementCountInst = as<IRIntLit>(vecType->getElementCount()) )
+                {
+                    elementType = vecType->getElementType();
+                    elementCount = elementCountInst->getValue();
+                }
+            }
+            if( elementType->op == kIROp_UIntType )
+            {
+                auto outerPrec = inOuterPrec;
+                auto prec = getInfo(EmitOp::Postfix);
+                bool needClose = maybeEmitParens(outerPrec, prec);
+
+                emitOperand(inst->getOperand(0), leftSide(outerPrec, prec));
+                m_writer->emit(".Store");
+                if( elementCount != 1 )
+                {
+                    m_writer->emit(elementCount);
+                }
+                m_writer->emit("(");
+                emitOperand(inst->getOperand(1), getInfo(EmitOp::General));
+                m_writer->emit(", ");
+                emitOperand(inst->getOperand(2), getInfo(EmitOp::General));
+                m_writer->emit(")");
+
+                maybeCloseParens(needClose);
+                return true;
+            }
+
+            // Otherwise we fall back to the base case, which
+            // is already handled by the base `CLikeSourceEmitter`
+            return false;
+        }
+        break;
+
         default: break;
     }
     // Not handled
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 3caef0a9f..efa56c261 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -5,6 +5,7 @@
 #include "../core/slang-type-text-util.h"
 
 #include "slang-ir-bind-existentials.h"
+#include "slang-ir-byte-address-legalize.h"
 #include "slang-ir-dce.h"
 #include "slang-ir-entry-point-uniforms.h"
 #include "slang-ir-glsl-legalize.h"
@@ -411,6 +412,94 @@ Result linkAndOptimizeIR(
         break;
     }
 
+    // For all targets, we translate load/store operations
+    // of aggregate types from/to byte-address buffers into
+    // stores of individual scalar or vector values.
+    //
+    {
+        ByteAddressBufferLegalizationOptions byteAddressBufferOptions;
+
+        // Depending on the target, we may decide to do
+        // more aggressive translation that reduces the
+        // load/store operations down to invididual scalars
+        // (splitting up vector ops).
+        //
+        switch( target )
+        {
+        default:
+            break;
+
+        case CodeGenTarget::GLSL:
+            // For GLSL targets, we want to translate the vector load/store
+            // operations into scalar ops. This is in part as a simplification,
+            // but it also ensures that our generated code respects the lax
+            // alignment rules for D3D byte-address buffers (the base address
+            // of a buffer need not be more than 4-byte aligned, and loads
+            // of vectors need only be aligned based on their element type).
+            //
+            // TODO: We should consider having an extended variant of `Load<T>`
+            // on byte-address buffers which expresses a programmer's knowledge
+            // that the load will have greater alignment than required by D3D.
+            // That could either come as an explicit guaranteed-alignment
+            // operand, or instead as something like a `Load4Aligned<T>` operation
+            // that returns a `vector<4,T>` and assumes `4*sizeof(T)` alignemtn.
+            //
+            byteAddressBufferOptions.scalarizeVectorLoadStore = true;
+
+            // For GLSL targets, there really isn't a low-level concept
+            // of a byte-address buffer at all, and the standard "shader storage
+            // buffer" (SSBO) feature is a lot closer to an HLSL structured
+            // buffer for our purposes.
+            //
+            // In particular, each SSBO can only have a single element type,
+            // so that even with bitcasts we can't have a single buffer declaration
+            // (e.g., one with `uint` elements) service all load/store operations
+            // (e.g., a `half` value can't be stored atomically if there are
+            // `uint` elements, unless we use explicit atomics).
+            //
+            // In order to simplify things, we will translate byte-address buffer
+            // ops to equivalent structured-buffer ops for GLSL targets, where
+            // each unique type being loaded/stored yields a different global
+            // parameter declaration of the buffer.
+            //
+            byteAddressBufferOptions.translateToStructuredBufferOps = true;
+            break;
+        }
+
+        // We also need to decide whether to translate
+        // any "leaf" load/store operations over to
+        // use only unsigned-integer types and then
+        // bit-cast, or if we prefer to leave them
+        // as load/store of the original type.
+        //
+        switch( target )
+        {
+        case CodeGenTarget::HLSL:
+            {
+                auto profile = targetRequest->targetProfile;
+                if( profile.getFamily() == ProfileFamily::DX )
+                {
+                    if(profile.GetVersion() <= ProfileVersion::DX_5_0)
+                    {
+                        // Fxc and earlier dxc versions do not support
+                        // a templates `.Load<T>` operation on byte-address
+                        // buffers, and instead need us to emit separate
+                        // `uint` loads and then bit-cast over to
+                        // the correct type.
+                        //
+                        byteAddressBufferOptions.useBitCastFromUInt = true;
+                    }
+                }
+            }
+            break;
+
+        default:
+            break;
+        }
+
+        legalizeByteAddressBufferOps(session, irModule, byteAddressBufferOptions);
+    }
+
     // For GLSL only, we will need to perform "legalization" of
     // the entry point and any entry-point parameters.
     //
diff --git a/source/slang/slang-glsl-extension-tracker.cpp b/source/slang/slang-glsl-extension-tracker.cpp
index 30acd8936..53e51d633 100644
--- a/source/slang/slang-glsl-extension-tracker.cpp
+++ b/source/slang/slang-glsl-extension-tracker.cpp
@@ -41,6 +41,8 @@ void GLSLExtensionTracker::requireBaseTypeExtension(BaseType baseType)
     switch (baseType)
     {
         case BaseType::Half:
+        case BaseType::UInt16:
+        case BaseType::Int16:
         {
             // https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt
             requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_shader_16bit_storage"));
diff --git a/source/slang/slang-glsl-extension-tracker.h b/source/slang/slang-glsl-extension-tracker.h
index 79dcd720e..5127674a3 100644
--- a/source/slang/slang-glsl-extension-tracker.h
+++ b/source/slang/slang-glsl-extension-tracker.h
@@ -37,7 +37,7 @@ public:
 protected:
     static uint32_t _getFlag(BaseType baseType) { return uint32_t(1) << int(baseType); }
 
-    uint32_t m_hasBaseTypeFlags = 0xffffffff & ~(_getFlag(BaseType::UInt64) + _getFlag(BaseType::Int64) + _getFlag(BaseType::Half));
+    uint32_t m_hasBaseTypeFlags = _getFlag(BaseType::Float) | _getFlag(BaseType::Int) | _getFlag(BaseType::UInt) | _getFlag(BaseType::Void) | _getFlag(BaseType::Bool);
 
     ProfileVersion m_profileVersion = ProfileVersion::GLSL_110;
 
diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp
new file mode 100644
index 000000000..e33408855
--- /dev/null
+++ b/source/slang/slang-ir-byte-address-legalize.cpp
@@ -0,0 +1,924 @@
+// slang-ir-byte-address-legalize.cpp
+#include "slang-ir-byte-address-legalize.h"
+
+// This file implements an IR pass that translates load/store operations
+// on byte-address buffers to be legal for a chosen target.
+//
+// Currently this pass only applies to the operations generated for
+// the generic `*ByteAddressBuffer.Load<T>` and `.Store<T>` operations,
+// and not the non-generic versions that traffic in `uint` (e.g.,
+// `Load2` or `Store3`).
+
+#include "slang-ir-insts.h"
+#include "slang-ir-layout.h"
+
+namespace Slang
+{
+
+// As is typical for IR passes in Slang, we will encapsulate the state
+// while we process the code in a context type.
+//
+struct ByteAddressBufferLegalizationContext
+{
+    // We need access to the original session, as well as the options
+    // that control what constructs we legalize, and how.
+    //
+    Session* m_session = nullptr;
+    ByteAddressBufferLegalizationOptions m_options;
+
+    // We will also use a central IR builder when generating new
+    // code as part of legalization (rather than create/destroy
+    // IR builders on the fly).
+    //
+    SharedIRBuilder m_sharedBuilder;
+    IRBuilder m_builder;
+
+    // Everything starts with a request to process a module,
+    // which delegates to the central recrusive walk of the IR.
+    //
+    void processModule(IRModule* module)
+    {
+        m_sharedBuilder.session = m_session;
+        m_sharedBuilder.module = module;
+
+        m_builder.sharedBuilder = &m_sharedBuilder;
+
+        processInstRec(module->getModuleInst());
+    }
+
+    // We recursively walk the entire IR structure (except
+    // for decorations), and process any byte-address buffer
+    // load or store operations.
+    //
+    void processInstRec(IRInst* inst)
+    {
+        switch( inst->op )
+        {
+        case kIROp_ByteAddressBufferLoad:
+            processLoad(inst);
+            break;
+
+        case kIROp_ByteAddressBufferStore:
+            processStore(inst);
+            break;
+        }
+
+        IRInst* nextChild = nullptr;
+        for( IRInst* child = inst->getFirstChild(); child; child = nextChild )
+        {
+            nextChild = child->getNextInst();
+            processInstRec(child);
+        }
+    }
+
+    // The logic for both the load and store cases is similar,
+    // so we will present the entire load case first and then
+    // move on to stores.
+    //
+    void processLoad(IRInst* load)
+    {
+        // What we want to do with a load depends on the type
+        // being loaded.
+        //
+        auto type = load->getDataType();
+
+        // We start by looking at the type being loaded so
+        // that we can opt out if it is legal.
+        //
+        if( isTypeLegalForByteAddressLoadStore(type) )
+            return;
+
+        // If the type is one that requires legalization,
+        // then we will set up to insert new IR instructions
+        // to replace it.
+        //
+        m_builder.setInsertBefore(load);
+
+        // We then emit a "legal load" with the same buffer
+        // and byte offset from the original.
+        //
+        auto buffer = load->getOperand(0);
+        auto offset = load->getOperand(1);
+        auto legalLoad = emitLegalLoad(type, buffer, offset, 0);
+
+        // If it currently possible for the legalization
+        // to fail (perhaps because of something else that
+        // is invalid in the IR), so we will defensively
+        // leave the code along in that case.
+        //
+        if(!legalLoad)
+            return;
+
+        // If we were able to generate a legal load operation,
+        // then the value it yields can be used to fully
+        // replace the previous illegal load.
+        //
+        load->replaceUsesWith(legalLoad);
+        load->removeAndDeallocate();
+    }
+
+    bool isTypeLegalForByteAddressLoadStore(IRType* type)
+    {
+        // Whether or not a type is legal to use for
+        // byte-address buffer load/store depends on
+        // properties of the target, which will have
+        // been passed into this pass via its options.
+        //
+        // If we are expected to translate all byte-address
+        // operations to equivalent structured-buffer
+        // operations, then that means *no* type is
+        // legal for byte-address load/store.
+        //
+        if(m_options.translateToStructuredBufferOps)
+            return false;
+
+        // Basic types are usually legal to load/store
+        // on all targets.
+        //
+        if( auto basicType = as<IRBasicType>(type) )
+        {
+            // On targets that require translation to
+            // make all load/store use `uint` values,
+            // any scalar type that isn't `uint` is
+            // illegal.
+            //
+            if( m_options.useBitCastFromUInt
+                && basicType->getBaseType() != BaseType::UInt )
+            {
+                return false;
+            }
+
+            // Otherwise, scalar types are assumed
+            // legal for load/store.
+            //
+            return true;
+        }
+
+        // Vector types also depend on the options.
+        //
+        if( as<IRVectorType>(type) )
+        {
+            // If we've been asked to scalarize all
+            // vector load/store, then we need to
+            // tread them as illegal.
+            //
+            if(m_options.scalarizeVectorLoadStore)
+                return false;
+
+        }
+
+        // All other types are treated as always illegal,
+        // so that we will legalize the load/store ops
+        // in all cases.
+        //
+        // Note: recent builds of dxc (perhaps coupled with
+        // recent shader models) support byte-address load/store
+        // of more complex types, but it is simpler for Slang
+        // to just legalize all the composite cases rather
+        // than rely on a downstream compiler.
+        //
+        return false;
+    }
+
+    // The core workhorse routine for the load case is `emitLegalLoad`,
+    // which tries to emit load operations that read a value of the
+    // given `type` from the given `buffer` at the required `baseOffset`
+    // plus the `immediateOffset` if any.
+    //
+    IRInst* emitLegalLoad(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset)
+    {
+        // The right way to load a value depends primarily
+        // on the type, and secondarily on the options
+        // that have been specified for this pass.
+        //
+        if( auto structType = as<IRStructType>(type) )
+        {
+            // When loading a value of `struct` type, we will
+            // load each field with its own operation.
+            //
+            // Note: A more "clever" implementation might try
+            // to emit a minimal number of loads of whatever
+            // is the largest supported type matching the
+            // alignment of `structType`, and then break those
+            // loaded values into fields with bit-level ops
+            // once they are in registers.
+            //
+            // Such an approach could conceivably allow more
+            // types to be loadable even on targets that
+            // don't directly support them (e.g., a structure
+            // with an `int` and two `int16_t` could be loadable
+            // even when targetting DXBC).
+            //
+            // The flip side to such an approach would be that
+            // it would complicate the generated code, and also
+            // make the rules about when a type is supported
+            // for byte-address load/store much more complicated.
+
+            // We collect the loaded per-field values into an
+            // array, which we will then use to construct the
+            // full value of the `struct` type.
+            //
+            List<IRInst*> fieldVals;
+            for( auto field : structType->getFields() )
+            {
+                auto fieldType = field->getFieldType();
+
+                // The relative offset of each field is calculated using
+                // the IR-based layout subsystem, which works with the
+                // "natural" in-memory layout of types.
+                //
+                // It is possible for layout computation to fail (e.g.,
+                // if the field type somehow wasn't one that can be
+                // laid out "naturally"). If the layout process fails,
+                // then we fail to legalize this load.
+                //
+                IRIntegerValue fieldOffset = 0;
+                SLANG_RETURN_NULL_ON_FAIL(getNaturalOffset(field, &fieldOffset));
+
+                // Otherwise, we load the field by recursively calling this function
+                // on the field type, with an adjusted immediate offset.
+                //
+                // If legalizing the field load fails, then we fail the load
+                // of the structure as well. Any loads that were generated
+                // for earlier fields will be left behind but can be eliminated
+                // as dead code.
+                //
+                auto fieldVal = emitLegalLoad(fieldType, buffer, baseOffset, immediateOffset + fieldOffset);
+                if(!fieldVal)
+                    return nullptr;
+
+                fieldVals.add(fieldVal);
+            }
+
+            // Once all the field values have been loaded, we can bind
+            // then together to make a singel value of the `struct` type,
+            // representing the reuslt of the legalized load.
+            //
+            return m_builder.emitMakeStruct(type, fieldVals);
+        }
+        else if( auto arrayType = as<IRArrayTypeBase>(type) )
+        {
+            // Loading a value of array type amounts to loading each
+            // of its elements. There is shared logic between the
+            // array, matrix, and vector cases, so we factor it into
+            // a subroutien that we will explain later.
+            //
+            // We need a known constant number of elements in an array
+            // to be able to emit per-element loads, so we skip
+            // legalization if the array type isn't in the right form
+            // for us to proceed.
+            //
+            auto elementCountInst = as<IRIntLit>(arrayType->getElementCount());
+            if( elementCountInst )
+            {
+                return emitLegalSequenceLoad(type, buffer, baseOffset, immediateOffset, kIROp_makeArray, arrayType->getElementType(), elementCountInst->getValue());
+            }
+        }
+        else if( auto matType = as<IRMatrixType>(type) )
+        {
+            // Handling a matrix is largely like an array, with the
+            // small detail that we need to construct the row type
+            // that we expect to load for each "element."
+            //
+            // TODO: The logic here assumes row-major layout, because
+            // the row-vs-column-major information has been dropped
+            // by this point in the IR.
+            //
+            // In order to allow both row- and column-major matrices
+            // to be loaded from byte-address buffers, we would need
+            // to make row-vs-column-major-ness be part of the IR
+            // type system so that IR layout can take it into account.
+            //
+            // For now we have to live with the "natural" layout of
+            // matrices always being row-major.
+            //
+            auto rowCountInst = as<IRIntLit>(matType->getRowCount());
+            if( rowCountInst )
+            {
+                auto rowType = m_builder.getVectorType(matType->getElementType(), matType->getColumnCount());
+                return emitLegalSequenceLoad(type, buffer, baseOffset, immediateOffset, kIROp_MakeMatrix, rowType, rowCountInst->getValue());
+            }
+        }
+        else if( auto vecType = as<IRVectorType>(type) )
+        {
+            // One of the options that can vary per-target is whether to
+            // scalarize vetor load/store operations. When that option
+            // is turned on, we can treat a vector load just like an
+            // array load.
+            //
+            auto elementCountInst = as<IRIntLit>(vecType->getElementCount());
+            if( m_options.scalarizeVectorLoadStore && elementCountInst)
+            {
+                return emitLegalSequenceLoad(type, buffer, baseOffset, immediateOffset, kIROp_makeVector, vecType->getElementType(), elementCountInst->getValue());
+            }
+
+            // If we aren't scalarizing a vetor load then we next need
+            // to consider the case where the target might only support
+            // byte-address load/store of unsigned integer data (e.g.,
+            // this is the case for D3D11/DXBC).
+            //
+            // We can still support loads of vectors with other element
+            // types by first loading the data as unsigned integers, and
+            // then bit-casting it to the correct type (e.g., load a
+            // `uint4` with `Load4()` and then bit-cast to `float4` using
+            // `asfloat()`).
+            //
+            if(m_options.useBitCastFromUInt)
+            {
+                // We will look at the element type of the vector (which must
+                // be a basic type for this to work).
+                //
+                if( auto elementType = as<IRBasicType>(vecType->getElementType()) )
+                {
+                    // If there is a distinct unsigned integer type of the
+                    // same size as the element type, then we can use that
+                    // for our load.
+                    //
+                    if( auto unsignedElementType = getSameSizeUIntType(elementType) )
+                    {
+                        // We form the appropriate unsigned-integer vector type,
+                        // and then emit a load for it.
+                        //
+                        auto unsignedVecType = m_builder.getVectorType(unsignedElementType, vecType->getElementCount());
+                        auto unsignedVecVal = emitSimpleLoad(unsignedVecType, buffer, baseOffset, immediateOffset);
+
+                        // Once we have loaded the bits into a temporary,
+                        // we can bit-cast it to the correct tyep and
+                        // we have our result.
+                        //
+                        return m_builder.emitBitCast(vecType, unsignedVecVal);
+                    }
+                }
+            }
+
+            // Any cases of vectors not handled above are allowed to fall through
+            // and be handled in the catch-all logic below.
+        }
+        else if( auto basicType = as<IRBasicType>(type) )
+        {
+            // Most basic scalar types can be handled directly on targets,
+            // but as described above for vectors, the D3D11/DXBC target
+            // only support loading `uint` values, so we need to emulate
+            // loads of other types (like `float`) by first loading a
+            // `uint` and then bit-casting.
+            //
+            if(m_options.useBitCastFromUInt)
+            {
+                if( auto unsignedType = getSameSizeUIntType(basicType) )
+                {
+                    auto unsignedVal = emitSimpleLoad(unsignedType, buffer, baseOffset, immediateOffset);
+                    return m_builder.emitBitCast(basicType, unsignedVal);
+                }
+            }
+        }
+
+        // If none of the many special cases above was triggered, then we
+        // are in the base case and assume we want to emit a single load
+        // for the type we were given.
+        //
+        return emitSimpleLoad(type, buffer, baseOffset, immediateOffset);
+    }
+
+    // Loading of sequences for arrays, matrices, and vectors is
+    // bottlenecked through a single function.
+    //
+    IRInst* emitLegalSequenceLoad(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset, IROp op, IRType* elementType, IRIntegerValue elementCount)
+    {
+        // Or goal here is to produce a value of the given `type`, loaded from `buffer`
+        // at `baseOffset` plus `immediateOffset`.
+        //
+        // We will do this by emitting `elementCount` loads for the elements of
+        // the given `elementType`, and then grouping them into the final sequence
+        // using the given `op` (which will be something like `kIROp_MakeArray`).
+
+        // To know how many bytes to step between loads, we must compute
+        // the "stride" of the element type.
+        //
+        IRSizeAndAlignment elementLayout;
+        SLANG_RETURN_NULL_ON_FAIL(getNaturalSizeAndAlignment(elementType, &elementLayout));
+        IRIntegerValue elementStride = elementLayout.getStride();
+
+        // We will collect all the element values into an array so
+        // that we can construct the sequence when we are done.
+        //
+        List<IRInst*> elementVals;
+        for( IRIntegerValue ii = 0; ii < elementCount; ++ii )
+        {
+            auto elementVal = emitLegalLoad(elementType, buffer, baseOffset, immediateOffset + ii*elementStride);
+            if(!elementVal)
+                return nullptr;
+
+            elementVals.add(elementVal);
+        }
+
+        // Once we are done loading the elements we construct the sequence value.
+        //
+        return m_builder.emitIntrinsicInst(type, op, elementVals.getCount(), elementVals.getBuffer());
+    }
+
+    // All of the loading operations above eventually bottom out at `emitSimpleLoad`,
+    // which is meant to handle the base case where we do *not* want to
+    // recurse on the structure of `type`.
+    //
+    IRInst* emitSimpleLoad(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset)
+    {
+        // For all of the operations above this in the call chain we have been
+        // tracking a pair of a `baseOffset` as an IR instruction, and an
+        // `immediateOffset` value. Keeping things split avoided introducing
+        // a bunch of `add` instructions that could be constant-folded away.
+        //
+        // Instead, now that we are about to emit a load "for real"
+        // we want to turn those two offset values into one.
+        //
+        IRInst* offset = emitOffsetAddIfNeeded(baseOffset, immediateOffset);
+
+        // At this point there is one last (major) detail we need to
+        // get into, which is that some targets (currently just GLSL)
+        // do not actually have anything like byte-address buffers
+        // as a built-in feature.
+        //
+        // Instead, GLSL has "shader storage buffers" which are
+        // tied to a particular element type when declared. E.g.,:
+        //
+        //      buffer MyBuffer { uint _data[]; } myBuffer;
+        //
+        // The `myBuffer` declaration above can be used to load
+        // `uint` values, but isn't much use if you want to load/store
+        // a `half` or a `double` efficiently (and atomically,
+        // where possible/guaranteed).
+        //
+        // Shader storage buffers like this are closer in spirit to
+        // HLSL/Slang "structured buffers," so we think of this code
+        // path as converting byte-address buffer operations into
+        // structured-buffer operations.
+        //
+        // To make things work for GLSL output, we need to generate
+        // multiple `buffer` declarations that all alias one another
+        // (accomplished by giving them the same `binding`), but that
+        // declare buffers with different element types.
+        //
+        if( m_options.translateToStructuredBufferOps )
+        {
+            // In order to emit a suitable structured-buffer load,
+            // we need to find or create a global declaration for
+            // a structured buffer that is "equivalent" to `buffer`,
+            // but has `type` as its element type.
+            //
+            // That operation could conceivably fail, and when it
+            // does we will fall back to the default handling of
+            // emitting a byte-address buffer load (which will
+            // then fail to generate valid GLSL code).
+            //
+            if( auto structuredBuffer = getEquivalentStructuredBuffer(type, buffer) )
+            {
+                // The `offset` instruction represents the byte offset of
+                // the thing we are trying to load, and we need to translate
+                // that into an *index* for use with a structured buffer.
+                //
+                // We convert the offset to an index by dividing by the
+                // stride of `type` as computed with our "natural layout" rules.
+                //
+                // This logic will be invalid if `offset` isn't a multiple of
+                // the stride of `type`, but that case would have been
+                // undefined behavior anyway.
+                //
+                auto offsetType = offset->getDataType();
+
+                IRSizeAndAlignment typeLayout;
+                SLANG_RETURN_NULL_ON_FAIL(getNaturalSizeAndAlignment(type, &typeLayout));
+                auto typeStrideVal = typeLayout.getStride();
+
+                auto typeStrideInst = m_builder.getIntValue(offsetType, typeStrideVal);
+                IRInst* divArgs[] = { offset, typeStrideInst };
+                auto index = m_builder.emitIntrinsicInst(offsetType, kIROp_Div, 2, divArgs);
+
+                IRInst* args[] = { structuredBuffer, index };
+                return m_builder.emitIntrinsicInst(type, kIROp_StructuredBufferLoad, 2, args);
+            }
+        }
+
+        // When we finally run out of special cases to handle, we just emit
+        // a byte-address buffer load operation directly, assuming it will
+        // work for the chosen target.
+        //
+        {
+            IRInst* loadArgs[] = { buffer, offset };
+            return m_builder.emitIntrinsicInst(type, kIROp_ByteAddressBufferLoad, 2, loadArgs);
+        }
+    }
+
+    IRInst* emitOffsetAddIfNeeded(IRInst* baseOffset, IRIntegerValue immediateOffset)
+    {
+        // We need to create an instruction to represent
+        // `baseOffset` plus `immediateOffset`.
+        //
+        // An important special case is when `immediateOffset` is zero:
+        //
+        if(immediateOffset == 0)
+            return baseOffset;
+
+        // Otherwise, we emit an `add` instruction of the appropriate type
+        //
+        auto type = baseOffset->getDataType();
+        IRInst* args[] = { baseOffset, m_builder.getIntValue(type, immediateOffset) };
+        return m_builder.emitIntrinsicInst(type, kIROp_Add, 2, args);
+    }
+
+    // At this point we have gone through the main logic of the load path,
+    // and before we turn our attention to the store path we can go
+    // ahead and define some of the utility functions that the code above
+    // requires.
+
+    // In order to handle interesting types on D3D11/DXBC, we need to
+    // be able to map a base type to another type of the same size.
+    //
+    BaseType getSameSizeUIntBaseType(IROp op)
+    {
+        // For now we are only handling the 32-bit types here, because
+        // the D3D11/DXBC target will not be able to handle 16- or
+        // 64-bit types anyway. This could be improved over time
+        // if needed.
+        //
+        switch( op )
+        {
+        case kIROp_IntType:
+        case kIROp_FloatType:
+        case kIROp_BoolType:
+            // The basic 32-bit types (and `bool`) can be handled by
+            // loading `uint` values and then bit-casting.
+            //
+            // Note: We aren't listing `kIROp_UIntType` here because
+            // we don't want to introduce a bit-cast in the case where
+            // the load was already for a `uint`.
+            //
+            return BaseType::UInt;
+
+        default:
+            // All other types map to a sentinel value of `Void` to
+            // indicate that a bit-cast solution shouldn't be attempted:
+            // either load the original type, or fail.
+            //
+            return BaseType::Void;
+
+        }
+    }
+    IRBasicType* getSameSizeUIntType(IRType* type)
+    {
+        auto unsignedBaseType = getSameSizeUIntBaseType(type->op);
+        if(unsignedBaseType == BaseType::Void)
+            return nullptr;
+
+        return m_builder.getBasicType(unsignedBaseType);
+    }
+
+    // When replacing byte-address buffer load/store operations with
+    // structured buffer ones, we need to be able to map an IR instruction
+    // that represents a byte-address buffer to one that represents an
+    // "equivalent" structured buffer.
+    //
+    // An important/tricky detail here is that the byte-address buffer
+    // might have been passed in as a function parameter, or be indexed
+    // from an array, etc.
+    //
+    // The logic here assumes this pass has run after a full legalization
+    // pass on resource parameter usage, so that any references to
+    // buffers in an instruction are "grounded" in a known global shader
+    // parameter.
+
+    IRInst* getEquivalentStructuredBuffer(IRType* elementType, IRInst* byteAddressBuffer)
+    {
+        // The simple case for replacement is when the byte-address buffer to
+        // be replaced is a global shader parameter. That path will get its
+        // own routine.
+        if(auto byteAddressBufferParam = as<IRGlobalParam>(byteAddressBuffer))
+        {
+            return getEquivalentStructuredBufferParam(elementType, byteAddressBufferParam);
+        }
+
+        if( byteAddressBuffer->op == kIROp_getElement )
+        {
+            // If the code is fetching the byte-address buffer from an
+            // array, then we need to create an "equivalent" structured
+            // buffer array, and then index into that.
+            //
+            auto byteAddressBufferArray = byteAddressBuffer->getOperand(0);
+            auto index = byteAddressBuffer->getOperand(1);
+
+            auto structuredBufferArray = getEquivalentStructuredBuffer(elementType, byteAddressBufferArray);
+            if(!structuredBufferArray)
+                return nullptr;
+
+            auto structuredBufferArrayType = as<IRArrayTypeBase>(structuredBufferArray->getDataType());
+            if(!structuredBufferArrayType)
+                return nullptr;
+
+            // If we succeeded in creating a declaration for an array of
+            // structured buffers to index into, we can now emit a new
+            // operation to index into that array instead, and the result
+            // will work as our "equivalent" structured buffer.
+            //
+            return m_builder.emitElementExtract(structuredBufferArrayType->getElementType(), structuredBufferArray, index);
+        }
+
+        // If we failed to pattern-match the byte-address buffer operand
+        // against something we can handle, then we need to bail out
+        // of our attempt to legalize things here.
+        //
+        // TODO: Should we make this case an error?
+        //
+        return nullptr;
+    }
+
+    // Our seach for an "equivalent" structured buffer should bottom out when
+    // we find a global shader parameter of byte-address buffer type, or an
+    // array (of array of array of ...) byte-address buffer type.
+    //
+    // We then want to create an equivalent shader parameter of a matching
+    // structured buffer (or array...) type.
+    //
+    // To avoid creating too many buffers (e.g., one per load), we will cache and
+    // re-use the buffers we declare in this way. Note that we do *not* need
+    // to worry if the deduplication is perfect, because we are already assuming
+    // that the target will handle multiple buffers with the same `binding`
+    // correctly.
+    //
+    Dictionary<KeyValuePair<IRInst*, IRInst*>, IRGlobalParam*> m_cachedStructuredBuffers;
+    IRGlobalParam* getEquivalentStructuredBufferParam(IRType* elementType, IRGlobalParam* byteAddressBufferParam)
+    {
+        KeyValuePair<IRInst*, IRInst*> key(elementType, byteAddressBufferParam);
+
+        IRGlobalParam* structuredBufferParam;
+        if(!m_cachedStructuredBuffers.TryGetValue(key, structuredBufferParam))
+        {
+            structuredBufferParam = createEquivalentStructuredBufferParam(elementType, byteAddressBufferParam);
+            m_cachedStructuredBuffers.Add(key, structuredBufferParam);
+        }
+        return structuredBufferParam;
+    }
+
+    IRGlobalParam* createEquivalentStructuredBufferParam(IRType* elementType, IRGlobalParam* byteAddressBufferParam)
+    {
+        // When we need to create a new structured buffer to stand in for
+        // some byte-address buffer (with a new `elementType` being used
+        // for load/store), we need to figure out the "equivalent" type
+        // to use for the new buffer.
+        //
+        auto byteAddressBufferParamType = byteAddressBufferParam->getDataType();
+        auto structuredBufferParamType = getEquivalentStructuredBufferParamType(elementType, byteAddressBufferParamType);
+        if(!structuredBufferParamType)
+            return nullptr;
+
+        // Next we will create a global shader parameter using the new
+        // type.
+        //
+        // Note: we are creating a new `IRBuilder` here rather than using
+        // `m_builder` because this logic could get called during the middle
+        // of legalizing a load or store, and we don't want to mess with
+        // the insertion location of `m_builder`.
+        //
+        IRBuilder paramBuilder;
+        paramBuilder.sharedBuilder = &m_sharedBuilder;
+        paramBuilder.setInsertBefore(byteAddressBufferParam);
+
+        auto structuredBufferParam = paramBuilder.createGlobalParam(structuredBufferParamType);
+
+        // The new parameter needs to be given a layout to match the existing
+        // parameter, so that it is given the same `binding` in the generated code.
+        //
+        if( auto layoutDecoration = byteAddressBufferParam->findDecoration<IRLayoutDecoration>() )
+        {
+            paramBuilder.addLayoutDecoration(structuredBufferParam, layoutDecoration->getLayout());
+        }
+
+        return structuredBufferParam;
+    }
+
+    IRType* getEquivalentStructuredBufferParamType(IRType* elementType, IRType* byteAddressBufferType)
+    {
+        // Our task in this function is to compute the type for
+        // a structure buffer that is equivalent to `byteAddressBufferType`,
+        // but with the given `elementType`.
+
+        switch( byteAddressBufferType->op )
+        {
+            // The basic `*ByteAddressBuffer` types map directly to the `*StructuredBuffer<elementType>` cases.
+        case kIROp_HLSLByteAddressBufferType:                   return m_builder.getType(kIROp_HLSLStructuredBufferType, elementType);
+        case kIROp_HLSLRWByteAddressBufferType:                 return m_builder.getType(kIROp_HLSLRWStructuredBufferType, elementType);
+        case kIROp_HLSLRasterizerOrderedByteAddressBufferType:  return m_builder.getType(kIROp_HLSLRasterizerOrderedStructuredBufferType, elementType);
+
+        case kIROp_ArrayType:
+        case kIROp_UnsizedArrayType:
+            {
+                // Array types (both sized and unsized) need to translate
+                // their element type to an equivalent structured buffer
+                // and build a new array type with the same element count.
+                //
+                auto arrayType = cast<IRArrayTypeBase>(byteAddressBufferType);
+                return m_builder.getArrayTypeBase(
+                    byteAddressBufferType->op,
+                    getEquivalentStructuredBufferParamType(elementType, arrayType->getElementType()),
+                    arrayType->getElementCount());
+            }
+
+        default:
+            return nullptr;
+        }
+    }
+
+    // At this point we've covered all the logic for the load case down
+    // to the last detail.
+    //
+    // All that remains is to go over the equivalent logic for the case
+    // of byte-address buffer stores, which mostly parallels code we've
+    // already discussed.
+
+    void processStore(IRInst* store)
+    {
+        // Just as for loads, the logic for stores is base don the type
+        // being used, but unlike in the load case we don't care about
+        // the type of the store operation, but instead the operand
+        // that represents the value to be stored.
+        //
+        auto value = store->getOperand(2);
+        auto type = value->getDataType();
+
+        // Types that are already legal to use don't require any processing.
+        //
+        if(isTypeLegalForByteAddressLoadStore(type))
+            return;
+
+        // Otherwise we set up to try and emit a replacement.
+        //
+        m_builder.setInsertBefore(store);
+
+        // It is possible that our attempt to emit a replacement will fail
+        // (this should only happen if we run into types that shouldn't
+        // actually be allowed on a target), and in those cases we will
+        // leave the original store around as well (this is at worst a
+        // performance issue, but we should still consider trying to
+        // tighten this up and make all uhandled cases be hard errors).
+        //
+        auto result = emitLegalStore(type, store->getOperand(0), store->getOperand(1), 0, value);
+        if(SLANG_FAILED(result))
+            return;
+
+        store->removeAndDeallocate();
+    }
+
+    Result emitLegalStore(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset, IRInst* value)
+    {
+        // The flow for emitting a legal store is very similar to that for
+        // legal loads; we will recurse on the structure of `type` and
+        // emit stores for fields/elements as needed.
+
+        if( auto structType = as<IRStructType>(type) )
+        {
+            // To store a structure, we store each of its fields at
+            // the appropriate relative offset.
+            //
+            for( auto field : structType->getFields() )
+            {
+                auto fieldType = field->getFieldType();
+
+                IRIntegerValue fieldOffset;
+                SLANG_RETURN_ON_FAIL(getNaturalOffset(field, &fieldOffset));
+
+                auto fieldVal = m_builder.emitFieldExtract(fieldType, value, field->getKey());
+                SLANG_RETURN_ON_FAIL(emitLegalStore(fieldType, buffer, baseOffset, immediateOffset + fieldOffset, fieldVal));
+            }
+            return SLANG_OK;
+        }
+        else if( auto arrayType = as<IRArrayTypeBase>(type) )
+        {
+            // Arrays and other sequences bottleneck through a helper
+            // function, which we will cover later.
+            //
+            auto elementCountInst = as<IRIntLit>(arrayType->getElementCount());
+            if( elementCountInst )
+            {
+                return emitLegalSequenceStore(buffer, baseOffset, immediateOffset, value, arrayType->getElementType(), elementCountInst->getValue());
+            }
+        }
+        else if( auto matType = as<IRMatrixType>(type) )
+        {
+            // Matrix storesget the same caveat as the load case:
+            // we are only supporting row-major layout for now.
+            //
+            auto rowCountInst = as<IRIntLit>(matType->getRowCount());
+            if( rowCountInst )
+            {
+                auto rowType = m_builder.getVectorType(matType->getElementType(), matType->getColumnCount());
+                return emitLegalSequenceStore(buffer, baseOffset, immediateOffset, value, rowType, rowCountInst->getValue());
+            }
+        }
+        else if( auto vecType = as<IRVectorType>(type) )
+        {
+            auto elementCountInst = as<IRIntLit>(vecType->getElementCount());
+            if( m_options.scalarizeVectorLoadStore && elementCountInst)
+            {
+                return emitLegalSequenceStore(buffer, baseOffset, immediateOffset, value, vecType->getElementType(), elementCountInst->getValue());
+            }
+
+            if(m_options.useBitCastFromUInt)
+            {
+                auto elementType = as<IRBasicType>(vecType->getElementType());
+                if( auto unsignedElementType = getSameSizeUIntType(elementType) )
+                {
+                    // The bit-cast case for stores is similar to the case
+                    // for loads, except that we cast the value before
+                    // storing it (instead of casting a value after loading).
+                    //
+                    auto unsignedVecType = m_builder.getVectorType(unsignedElementType, vecType->getElementCount());
+                    auto unsignedVecVal = m_builder.emitBitCast(unsignedVecType, value);
+                    return emitSimpleStore(unsignedVecType, buffer, baseOffset, immediateOffset, unsignedVecVal);
+                }
+            }
+        }
+        else if( auto basicType = as<IRBasicType>(type) )
+        {
+            if(m_options.useBitCastFromUInt)
+            {
+                if( auto unsignedType = getSameSizeUIntType(basicType) )
+                {
+                    auto unsignedVal = m_builder.emitBitCast(unsignedType, value);
+                    return emitSimpleStore(unsignedType, buffer, baseOffset, immediateOffset, unsignedVal);
+                }
+            }
+        }
+
+        return emitSimpleStore(type, buffer, baseOffset, immediateOffset, value);
+    }
+
+    Result emitSimpleStore(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOfset, IRInst* value)
+    {
+        IRInst* offset = emitOffsetAddIfNeeded(baseOffset, immediateOfset);
+
+        if( m_options.translateToStructuredBufferOps )
+        {
+            if( auto structuredBuffer = getEquivalentStructuredBuffer(type, buffer) )
+            {
+                // Similar to the load case, if we are replacing byte-address
+                // buffers with structured buffers, then once we find the
+                // "equivalent" buffer to use, we emit a structured-buffer store,
+                // with an index computed by dividing the offset by the stride.
+                //
+                auto indexType = offset->getDataType();
+
+                IRSizeAndAlignment typeLayout;
+                SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(type, &typeLayout));
+
+                auto typeStride = m_builder.getIntValue(indexType, typeLayout.getStride());
+
+                IRInst* divArgs[] = { offset, typeStride };
+                auto index = m_builder.emitIntrinsicInst(indexType, kIROp_Div, 2, divArgs);
+
+                IRInst* args[] = { structuredBuffer, index, value };
+                m_builder.emitIntrinsicInst(type, kIROp_StructuredBufferStore, 3, args);
+                return SLANG_OK;
+            }
+
+        }
+
+        {
+            IRInst* storeArgs[] = { buffer, offset, value };
+            m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs);
+            return SLANG_OK;
+        }
+    }
+
+    Result emitLegalSequenceStore(IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset, IRInst* value, IRType* elementType, IRIntegerValue elementCount)
+    {
+        // The store case for sequences is similar to the load case.
+        //
+        // We iterate over the elements and fetch then store each one.
+        //
+        IRSizeAndAlignment elementLayout;
+        SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(elementType, &elementLayout));
+        IRIntegerValue elementStride = elementLayout.getStride();
+
+        auto indexType = m_builder.getIntType();
+        for( IRIntegerValue ii = 0; ii < elementCount; ++ii )
+        {
+            auto elementIndex = m_builder.getIntValue(indexType, ii);
+            auto elementVal = m_builder.emitElementExtract(elementType, value, elementIndex);
+            SLANG_RETURN_ON_FAIL(emitLegalStore(elementType, buffer, baseOffset, immediateOffset + ii*elementStride, elementVal));
+        }
+
+        return SLANG_OK;
+    }
+};
+
+
+void legalizeByteAddressBufferOps(
+    Session*                                    session,
+    IRModule*                                   module,
+    ByteAddressBufferLegalizationOptions const& options)
+{
+    ByteAddressBufferLegalizationContext context;
+    context.m_session = session;
+    context.m_options = options;
+    context.processModule(module);
+}
+
+}
+
diff --git a/source/slang/slang-ir-byte-address-legalize.h b/source/slang/slang-ir-byte-address-legalize.h
new file mode 100644
index 000000000..7b5c8ed3e
--- /dev/null
+++ b/source/slang/slang-ir-byte-address-legalize.h
@@ -0,0 +1,27 @@
+// slang-ir-byte-address-legalize.h
+#pragma once
+
+namespace Slang
+{
+class Session;
+struct IRModule;
+
+struct ByteAddressBufferLegalizationOptions
+{
+    bool scalarizeVectorLoadStore = false;
+    bool useBitCastFromUInt = false;
+    bool translateToStructuredBufferOps = false;
+};
+
+    /// Legalize byte-address buffer `Load()` and `Store()` operations.
+    ///
+    /// This function translates load/store operations that involve
+    /// aggregate types into primitive load-store operations on
+    /// scalar or vector types.
+    ///
+void legalizeByteAddressBufferOps(
+    Session*                                    session,
+    IRModule*                                   module,
+    ByteAddressBufferLegalizationOptions const& options);
+}
+
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 6c01a700a..46ad566fd 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -233,6 +233,50 @@ INST(getElementPtr, getElementPtr, 2, 0)
 // "Subscript" an image at a pixel coordinate to get pointer
 INST(ImageSubscript, imageSubscript, 2, 0)
 
+// Load (almost) arbitrary-type data from a byte-address buffer
+//
+// %dst = byteAddressBufferLoad(%buffer, %offset)
+//
+// where
+// - `buffer` is a value of some `ByteAddressBufferTypeBase` type
+// - `offset` is an `int`
+// - `dst` is a value of some type containing only ordinary data
+//
+INST(ByteAddressBufferLoad, byteAddressBufferLoad, 2, 0)
+
+// Store (almost) arbitrary-type data to a byte-address buffer
+//
+// byteAddressBufferLoad(%buffer, %offset, %src)
+//
+// where
+// - `buffer` is a value of some `ByteAddressBufferTypeBase` type
+// - `offset` is an `int`
+// - `src` is a value of some type containing only ordinary data
+//
+INST(ByteAddressBufferStore, byteAddressBufferStore, 3, 0)
+
+// Load data from a structured buffer
+//
+// %dst = structuredBufferLoad(%buffer, %index)
+//
+// where
+// - `buffer` is a value of some `StructuredBufferTypeBase` type with element type T
+// - `offset` is an `int`
+// - `dst` is a value of type T
+//
+INST(StructuredBufferLoad, structuredBufferLoad, 2, 0)
+
+// Store data to a structured buffer
+//
+// structuredBufferLoad(%buffer, %offset, %src)
+//
+// where
+// - `buffer` is a value of some `StructuredBufferTypeBase` type with element type T
+// - `offset` is an `int`
+// - `src` is a value of type T
+//
+INST(StructuredBufferStore, structuredBufferStore, 3, 0)
+
 // Construct a vector from a scalar
 //
 // %dst = constructVectorFromScalar %T %N %val
@@ -453,6 +497,12 @@ INST(HighLevelDeclDecoration,               highLevelDecl,          1, 0)
         /// An `[unsafeForceInlineEarly]` decoration specifies that calls to this function should be inline after initial codegen
     INST(UnsafeForceInlineEarlyDecoration, unsafeForceInlineEarly, 0, 0)
 
+        /// A `[naturalSizeAndAlignment(s,a)]` decoration is attached to a type to indicate that is has natural size `s` and alignment `a`
+    INST(NaturalSizeAndAlignmentDecoration, naturalSizeAndAlignment, 2, 0)
+
+        /// A `[naturalOffset(o)]` decoration is attached to a field to indicate that it has natural offset `o` in the parent type
+    INST(NaturalOffsetDecoration, naturalOffset, 1, 0)
+
     /* LinkageDecoration */
         INST(ImportDecoration, import, 1, 0)
         INST(ExportDecoration, export, 1, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index e307dc41e..957a53a0e 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -359,6 +359,27 @@ struct IRFormatDecoration : IRDecoration
 
 IR_SIMPLE_DECORATION(UnsafeForceInlineEarlyDecoration)
 
+struct IRNaturalSizeAndAlignmentDecoration : IRDecoration
+{
+    enum { kOp = kIROp_NaturalSizeAndAlignmentDecoration };
+    IR_LEAF_ISA(NaturalSizeAndAlignmentDecoration)
+
+    IRIntLit* getSizeOperand() { return cast<IRIntLit>(getOperand(0)); }
+    IRIntLit* getAlignmentOperand() { return cast<IRIntLit>(getOperand(1)); }
+
+    IRIntegerValue getSize() { return getSizeOperand()->getValue(); }
+    IRIntegerValue getAlignment() { return getAlignmentOperand()->getValue(); }
+};
+
+struct IRNaturalOffsetDecoration : IRDecoration
+{
+    enum { kOp = kIROp_NaturalOffsetDecoration };
+    IR_LEAF_ISA(NaturalOffsetDecoration)
+
+    IRIntLit* getOffsetOperand() { return cast<IRIntLit>(getOperand(0)); }
+
+    IRIntegerValue getOffset() { return getOffsetOperand()->getValue(); }
+};
 
 // An instruction that specializes another IR value
 // (representing a generic) to a particular set of generic arguments 
diff --git a/source/slang/slang-ir-layout.cpp b/source/slang/slang-ir-layout.cpp
new file mode 100644
index 000000000..0003d279a
--- /dev/null
+++ b/source/slang/slang-ir-layout.cpp
@@ -0,0 +1,239 @@
+// slang-ir-layout.cpp
+#include "slang-ir-layout.h"
+
+#include "slang-ir-insts.h"
+
+// This file implements facilities for computing and caching layout
+// information on IR types.
+//
+// Unlike the AST-level layout system, this code currently only
+// handles the notion of "natural" layout for IR types, which is
+// the layout they use when stored in general-purpose memory
+// without additional constraints.
+//
+// In general, "natural" layout for all targets is assumed to follow
+// the same basic rules:
+//
+// * Scalars are all naturally aligned and have the "obvious" size
+//
+// * Arrays are laid out by separating elements by their "stride" (size rounded up to alignment)
+//
+// * Vectors are laid out as arrays of elements
+//
+// * Matrices are laid out as arrays of rows
+//
+// * Structures are laid out by packing fields in order, placing each field on the "next"
+//   suitably aligned offset. The alignment of a structure is the maximum alignment of
+//   its fields.
+//
+// Right now this file implements a one-size-fits-all version of natural
+// layout that might not be a perfect fit for all targets. In particular
+// this code currently assumes:
+//
+// * The `bool` type is laid out as 4 bytes (equivalent to an `int`)
+//
+// * The size of a structure or array type is *not* rounded up to a multiple
+//   of its alignment. This means that fields may be laid out in
+//   the "tail padding" of previous fields in the same structure. This is
+//   correct behavior for VK/D3D, but does not match the behavior of typical
+//   C/C++ compilers.
+//
+// * All matrices are laid out in row-major order, regardless of any
+//   settings in user code.
+//
+// TODO: Addressing the above issues would require extending this file to somehow
+// get target-specific layout information as an input. One option would be
+// to attach information about "natural" layout on the target to the `IRModuleInst`
+// as a decoration, similar to how an LLVM IR module stores a "layout string."
+
+namespace Slang
+{
+
+static Result _calcNaturalSizeAndAlignment(IRType* type, IRSizeAndAlignment* outSizeAndAlignment)
+{
+    switch( type->op )
+    {
+
+#define CASE(TYPE, SIZE, ALIGNMENT)                                 \
+    case kIROp_##TYPE##Type:                                        \
+        *outSizeAndAlignment = IRSizeAndAlignment(SIZE, ALIGNMENT); \
+        return SLANG_OK                                             \
+        /* end */
+
+    // Most base types are "naturally aligned" (meaning alignment and size are the same)
+#define BASE(TYPE, SIZE) CASE(TYPE, SIZE, SIZE)
+
+    BASE(Int8,      1);
+    BASE(UInt8,     1);
+
+    BASE(Int16,     2);
+    BASE(UInt16,    2);
+    BASE(Half,      2);
+
+    BASE(Int,       4);
+    BASE(UInt,      4);
+    BASE(Float,     4);
+
+    BASE(Int64,     8);
+    BASE(UInt64,    8);
+    BASE(Double,    8);
+
+    // We are currently handling `bool` following the HLSL
+    // precednet of storing it in 4 bytes.
+    //
+    // TODO: It would be good to try to make this follow
+    // per-platform conventions, or at least to be able
+    // to use a 1-byte encoding where available.
+    //
+    BASE(Bool,      4);
+
+    // The Slang `void` type is treated as a zero-byte
+    // type, so that it does not influence layout at all.
+    //
+    CASE(Void,      0,  1);
+
+#undef CASE
+
+#undef CASE
+
+    case kIROp_StructType:
+        {
+            auto structType = cast<IRStructType>(type);
+            IRSizeAndAlignment structLayout;
+            for( auto field : structType->getFields() )
+            {
+                IRSizeAndAlignment fieldTypeLayout;
+                SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(field->getFieldType(), &fieldTypeLayout));
+
+                structLayout.size = align(structLayout.size, fieldTypeLayout.alignment);
+                structLayout.alignment = std::max(structLayout.alignment, fieldTypeLayout.alignment);
+
+                IRIntegerValue fieldOffset = structLayout.size;
+                if( auto module = type->getModule() )
+                {
+                    // If we are in a situation where attaching new
+                    // decorations is possible, then we want to
+                    // cache the field offset on the IR field
+                    // instruction.
+                    //
+                    SharedIRBuilder sharedBuilder;
+                    sharedBuilder.module = module;
+                    sharedBuilder.session = module->getSession();
+
+                    IRBuilder builder;
+                    builder.sharedBuilder = &sharedBuilder;
+
+                    auto intType = builder.getIntType();
+                    builder.addDecoration(
+                        field,
+                        kIROp_NaturalOffsetDecoration,
+                        builder.getIntValue(intType, fieldOffset));
+                }
+
+                structLayout.size += fieldTypeLayout.size;
+            }
+            *outSizeAndAlignment = structLayout;
+            return SLANG_OK;
+        }
+        break;
+
+    case kIROp_ArrayType:
+        {
+            auto arrayType = cast<IRArrayType>(type);
+
+            auto elementCountLit = as<IRIntLit>(arrayType->getElementCount());
+            if(!elementCountLit)
+                return SLANG_FAIL;
+            auto elementCount = elementCountLit->getValue();
+
+            if( elementCount == 0 )
+            {
+                *outSizeAndAlignment = IRSizeAndAlignment(0, 1);
+                return SLANG_OK;
+            }
+
+            auto elementType = arrayType->getElementType();
+            IRSizeAndAlignment elementTypeLayout;
+            SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(elementType, &elementTypeLayout));
+
+            auto elementStride = elementTypeLayout.getStride();
+
+            *outSizeAndAlignment = IRSizeAndAlignment(
+                elementStride * (elementCount - 1) + elementTypeLayout.size,
+                elementTypeLayout.alignment);
+            return SLANG_OK;
+        }
+        break;
+
+    default:
+        return SLANG_FAIL;
+    }
+}
+
+Result getNaturalSizeAndAlignment(IRType* type, IRSizeAndAlignment* outSizeAndAlignment)
+{
+    if( auto decor = type->findDecoration<IRNaturalSizeAndAlignmentDecoration>() )
+    {
+        *outSizeAndAlignment = IRSizeAndAlignment(decor->getSize(), (int)decor->getAlignment());
+        return SLANG_OK;
+    }
+
+    IRSizeAndAlignment sizeAndAlignment;
+    SLANG_RETURN_ON_FAIL(_calcNaturalSizeAndAlignment(type, &sizeAndAlignment));
+
+    if( auto module = type->getModule() )
+    {
+        SharedIRBuilder sharedBuilder;
+        sharedBuilder.module = module;
+        sharedBuilder.session = module->getSession();
+
+        IRBuilder builder;
+        builder.sharedBuilder = &sharedBuilder;
+
+        auto intType = builder.getIntType();
+        builder.addDecoration(
+            type,
+            kIROp_NaturalSizeAndAlignmentDecoration,
+            builder.getIntValue(intType, sizeAndAlignment.size),
+            builder.getIntValue(intType, sizeAndAlignment.alignment));
+    }
+
+    *outSizeAndAlignment = sizeAndAlignment;
+    return SLANG_OK;
+}
+
+
+Result getNaturalOffset(IRStructField* field, IRIntegerValue* outOffset)
+{
+    if( auto decor = field->findDecoration<IRNaturalOffsetDecoration>() )
+    {
+        *outOffset = decor->getOffset();
+        return SLANG_OK;
+    }
+
+    // Offsets are computed as part of layout out types,
+    // so we expect that layout of the "parent" type
+    // of the field should add an offset to it if
+    // possible.
+
+    auto structType = as<IRStructType>(field->getParent());
+    if(!structType)
+        return SLANG_FAIL;
+
+    IRSizeAndAlignment structTypeLayout;
+    SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(structType, &structTypeLayout));
+
+    if( auto decor = field->findDecoration<IRNaturalOffsetDecoration>() )
+    {
+        *outOffset = decor->getOffset();
+        return SLANG_OK;
+    }
+
+    // If attempting to lay out the parent type didn't
+    // cause the field to get an offset, then we are
+    // in an unexpected case with no easy answer.
+    //
+    return SLANG_FAIL;
+}
+
+}
diff --git a/source/slang/slang-ir-layout.h b/source/slang/slang-ir-layout.h
new file mode 100644
index 000000000..64653b5f3
--- /dev/null
+++ b/source/slang/slang-ir-layout.h
@@ -0,0 +1,70 @@
+// slang-ir-layout.h
+#pragma once
+
+// This file provides utilities for computing and caching the *natural*
+// layout of types in the IR.
+//
+// The natural layout is the layout a target uses for a type when it is
+// stored in unconstrainted general-purpose memory (to the extent that
+// the target supports unconstrained general-purpose memory).
+//
+// For targets like the CPU and CUDA which support a simple flat address
+// space, the natural layout is the only layout used for any type.
+//
+// For targets like D3D DXBC/DXIL and Vulkan SPIR-V, the natural layout
+// matches how a type is stored in a "structured buffer" or "shader
+// storage buffer."
+//
+
+#include "slang-ir.h"
+
+
+namespace Slang
+{
+
+    /// Align `value` to the next multiple of `alignment`, which must be a power of two.
+inline IRIntegerValue align(IRIntegerValue value, int alignment)
+{
+    return (value + alignment-1) & ~IRIntegerValue(alignment-1);
+}
+
+
+    /// The size and alignment of an IR type.
+struct IRSizeAndAlignment
+{
+    IRSizeAndAlignment()
+    {}
+
+    IRSizeAndAlignment(IRIntegerValue size, int alignment)
+        : size(size)
+        , alignment(alignment)
+    {}
+
+    IRIntegerValue  size = 0;
+
+    int             alignment = 1;
+
+    inline IRIntegerValue getStride()
+    {
+        return align(size, alignment);
+    }
+};
+
+    /// Compute (if necessary) and return the natural size and alignment of `type`.
+    ///
+    /// This operation may fail if `type` is not one that can be stored in
+    /// general-purpose memory for the current target. In that case the
+    /// type is considered to have no natural layout.
+    ///
+Result getNaturalSizeAndAlignment(IRType* type, IRSizeAndAlignment* outSizeAndAlignment);
+
+    /// Compute (if necessary) and return the natural offset of `field`
+    ///
+    /// This operation can fail if the parent type of `field` is not one
+    /// that can be stored in general-purpose memory. In that case, the
+    /// field is considered to have no natural offset.
+    ///
+Result getNaturalOffset(IRStructField* field, IRIntegerValue* outOffset);
+
+}
+
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index 766893da3..e97d38256 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -209,6 +209,7 @@
     <ClInclude Include="slang-hlsl-intrinsic-set.h" />
     <ClInclude Include="slang-image-format-defs.h" />
     <ClInclude Include="slang-ir-bind-existentials.h" />
+    <ClInclude Include="slang-ir-byte-address-legalize.h" />
     <ClInclude Include="slang-ir-clone.h" />
     <ClInclude Include="slang-ir-constexpr.h" />
     <ClInclude Include="slang-ir-dce.h" />
@@ -218,6 +219,7 @@
     <ClInclude Include="slang-ir-inline.h" />
     <ClInclude Include="slang-ir-inst-defs.h" />
     <ClInclude Include="slang-ir-insts.h" />
+    <ClInclude Include="slang-ir-layout.h" />
     <ClInclude Include="slang-ir-link.h" />
     <ClInclude Include="slang-ir-missing-return.h" />
     <ClInclude Include="slang-ir-restructure-scoping.h" />
@@ -295,6 +297,7 @@
     <ClCompile Include="slang-glsl-extension-tracker.cpp" />
     <ClCompile Include="slang-hlsl-intrinsic-set.cpp" />
     <ClCompile Include="slang-ir-bind-existentials.cpp" />
+    <ClCompile Include="slang-ir-byte-address-legalize.cpp" />
     <ClCompile Include="slang-ir-clone.cpp" />
     <ClCompile Include="slang-ir-constexpr.cpp" />
     <ClCompile Include="slang-ir-dce.cpp" />
@@ -302,6 +305,7 @@
     <ClCompile Include="slang-ir-entry-point-uniforms.cpp" />
     <ClCompile Include="slang-ir-glsl-legalize.cpp" />
     <ClCompile Include="slang-ir-inline.cpp" />
+    <ClCompile Include="slang-ir-layout.cpp" />
     <ClCompile Include="slang-ir-legalize-types.cpp" />
     <ClCompile Include="slang-ir-link.cpp" />
     <ClCompile Include="slang-ir-missing-return.cpp" />
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index 442f545c6..f46e77ebc 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -78,6 +78,9 @@
     <ClInclude Include="slang-ir-bind-existentials.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="slang-ir-byte-address-legalize.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="slang-ir-clone.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -105,6 +108,9 @@
     <ClInclude Include="slang-ir-insts.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="slang-ir-layout.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
     <ClInclude Include="slang-ir-link.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -332,6 +338,9 @@
     <ClCompile Include="slang-ir-bind-existentials.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="slang-ir-byte-address-legalize.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="slang-ir-clone.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -353,6 +362,9 @@
     <ClCompile Include="slang-ir-inline.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="slang-ir-layout.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="slang-ir-legalize-types.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>