122 files changed, 836 insertions, 16167 deletions
diff --git a/source/slang/check.cpp b/source/slang/check.cpp
index e07bdf156..21e3b894b 100644
--- a/source/slang/check.cpp
+++ b/source/slang/check.cpp
@@ -7922,16 +7922,15 @@ namespace Slang
             RefPtr<Expr> expr = inExpr;
             for (;;)
             {
-                auto& type = expr->type;
-                if (auto pointerLikeType = type->As<PointerLikeType>())
+                auto baseType = expr->type;
+                if (auto pointerLikeType = baseType->As<PointerLikeType>())
                 {
-                    type = QualType(pointerLikeType->elementType);
+                    auto elementType = QualType(pointerLikeType->elementType);
+                    elementType.IsLeftValue = baseType.IsLeftValue;
 
                     auto derefExpr = new DerefExpr();
                     derefExpr->base = expr;
-                    derefExpr->type = QualType(pointerLikeType->elementType);
-
-                    // TODO(tfoley): deal with l-value-ness here
+                    derefExpr->type = elementType;
 
                     expr = derefExpr;
                     continue;
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index fb9968232..ba1b2177a 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -2383,7 +2383,6 @@ struct EmitVisitor
         case kIROp_FieldAddress:
         case kIROp_getElementPtr:
         case kIROp_Specialize:
-        case kIROp_BufferElementRef:
             return true;
         }
 
@@ -2536,25 +2535,6 @@ struct EmitVisitor
         return true;
     }
 
-    bool isDerefBaseImplicit(
-        EmitContext*    /*context*/,
-        IRInst*        inst)
-    {
-        auto type = inst->getDataType();
-
-        if(as<IRUniformParameterGroupType>(type) && !as<IRParameterBlockType>(type))
-        {
-            // TODO: we need to be careful here, because
-            // HLSL shader model 6 allows these as explicit
-            // types.
-            return true;
-        }
-
-        return false;
-    }
-
-
-
     void emitIROperand(
         EmitContext*    ctx,
         IRInst*         inst,
@@ -3645,13 +3625,16 @@ struct EmitVisitor
 
                 IRFieldExtract* fieldExtract = (IRFieldExtract*) inst;
 
-                if (!isDerefBaseImplicit(ctx, fieldExtract->getBase()))
-                {
-                    auto prec = kEOp_Postfix;
-                    needClose = maybeEmitParens(outerPrec, prec);
+                auto prec = kEOp_Postfix;
+                needClose = maybeEmitParens(outerPrec, prec);
 
-                    emitIROperand(ctx, fieldExtract->getBase(), mode, leftSide(outerPrec, prec));
-                    emit(".");
+                auto base = fieldExtract->getBase();
+                emitIROperand(ctx, base, mode, leftSide(outerPrec, prec));
+                emit(".");
+                if(getTarget(ctx) == CodeGenTarget::GLSL
+                    && as<IRUniformParameterGroupType>(base->getDataType()))
+                {
+                    emit("_data.");
                 }
                 emit(getIRName(fieldExtract->getField()));
             }
@@ -3663,15 +3646,17 @@ struct EmitVisitor
 
                 IRFieldAddress* ii = (IRFieldAddress*) inst;
 
-                if (!isDerefBaseImplicit(ctx, ii->getBase()))
-                {
-                    auto prec = kEOp_Postfix;
-                    needClose = maybeEmitParens(outerPrec, prec);
+                auto prec = kEOp_Postfix;
+                needClose = maybeEmitParens(outerPrec, prec);
 
-                    emitIROperand(ctx, ii->getBase(), mode, leftSide(outerPrec, prec));
-                    emit(".");
+                auto base = ii->getBase();
+                emitIROperand(ctx, base, mode, leftSide(outerPrec, prec));
+                emit(".");
+                if(getTarget(ctx) == CodeGenTarget::GLSL
+                    && as<IRUniformParameterGroupType>(base->getDataType()))
+                {
+                    emit("_data.");
                 }
-
                 emit(getIRName(ii->getField()));
             }
             break;
@@ -3774,7 +3759,15 @@ struct EmitVisitor
             break;
 
         case kIROp_Load:
-            emitIROperand(ctx, inst->getOperand(0), mode, outerPrec);
+            {
+                auto base = inst->getOperand(0);
+                emitIROperand(ctx, base, mode, outerPrec);
+                if(getTarget(ctx) == CodeGenTarget::GLSL
+                    && as<IRUniformParameterGroupType>(base->getDataType()))
+                {
+                    emit("._data");
+                }
+            }
             break;
 
         case kIROp_Store:
@@ -3794,39 +3787,6 @@ struct EmitVisitor
             }
             break;
 
-        case kIROp_BufferLoad:
-        case kIROp_BufferElementRef:
-            {
-                auto prec = kEOp_Postfix;
-                needClose = maybeEmitParens(outerPrec, prec);
-
-                emitIROperand(ctx, inst->getOperand(0), mode, leftSide(outerPrec, prec));
-                emit("[");
-                emitIROperand(ctx, inst->getOperand(1), mode, kEOp_General);
-                emit("]");
-            }
-            break;
-
-        case kIROp_BufferStore:
-            {
-                auto precAssign = kEOp_Assign;
-                needClose = maybeEmitParens(outerPrec, precAssign);
-
-                auto outerPrecSubscript = precAssign;
-                auto precSubscript = kEOp_Postfix;
-                bool needCloseSubscript = maybeEmitParens(outerPrecSubscript, precSubscript);
-
-                emitIROperand(ctx, inst->getOperand(0), mode, leftSide(outerPrecSubscript, precSubscript));
-                emit("[");
-                emitIROperand(ctx, inst->getOperand(1), mode, kEOp_General);
-                emit("]");
-                maybeCloseParens(needCloseSubscript);
-
-                emit(" = ");
-                emitIROperand(ctx, inst->getOperand(2), mode, rightSide(outerPrec, precAssign));
-            }
-            break;
-
         case kIROp_GroupMemoryBarrierWithGroupSync:
             emit("GroupMemoryBarrierWithGroupSync()");
             break;
@@ -5618,63 +5578,19 @@ struct EmitVisitor
         }
     }
 
-    void emitHLSLParameterBlock(
-        EmitContext*            ctx,
-        IRGlobalVar*            varDecl,
-        IRParameterBlockType*   type)
-    {
-        emit("cbuffer ");
-
-        // Generate a dummy name for the block
-        emit("_S");
-        Emit(ctx->shared->uniqueIDCounter++);
-
-        auto varLayout = getVarLayout(ctx, varDecl);
-        SLANG_RELEASE_ASSERT(varLayout);
-
-        EmitVarChain blockChain(varLayout);
-
-        EmitVarChain containerChain = blockChain;
-        EmitVarChain elementChain = blockChain;
-
-        auto typeLayout = varLayout->typeLayout;
-        if( auto parameterGroupTypeLayout = typeLayout.As<ParameterGroupTypeLayout>() )
-        {
-            containerChain = EmitVarChain(parameterGroupTypeLayout->containerVarLayout, &blockChain);
-            elementChain = EmitVarChain(parameterGroupTypeLayout->elementVarLayout, &blockChain);
-
-            typeLayout = parameterGroupTypeLayout->elementVarLayout->getTypeLayout();
-        }
-
-        emitHLSLRegisterSemantic(LayoutResourceKind::ConstantBuffer, &containerChain);
-
-        emit("\n{\n");
-        indent();
-
-        auto elementType = type->getElementType();
-
-
-        emitIRType(ctx, elementType, getIRName(varDecl));
-
-        emitHLSLParameterGroupFieldLayoutSemantics(&elementChain);
-        emit(";\n");
-
-        dedent();
-        emit("}\n");
-    }
-
     void emitHLSLParameterGroup(
         EmitContext*                    ctx,
         IRGlobalVar*                    varDecl,
         IRUniformParameterGroupType*    type)
     {
-        if(auto parameterBlockType = as<IRParameterBlockType>(type))
+        if(as<IRTextureBufferType>(type))
         {
-            emitHLSLParameterBlock(ctx, varDecl, parameterBlockType);
-            return;
+            emit("tbuffer ");
+        }
+        else
+        {
+            emit("cbuffer ");
         }
-
-        emit("cbuffer ");
         emit(getIRName(varDecl));
 
         auto varLayout = getVarLayout(ctx, varDecl);
@@ -5701,111 +5617,37 @@ struct EmitVisitor
 
         auto elementType = type->getElementType();
 
-        if(auto structType = as<IRStructType>(elementType))
-        {
-            auto structTypeLayout = typeLayout.As<StructTypeLayout>();
-            SLANG_RELEASE_ASSERT(structTypeLayout);
-
-            UInt fieldIndex = 0;
-            for(auto ff : structType->getFields())
-            {
-                // TODO: need a plan to deal with the case where the IR-level
-                // `struct` type might not match the high-level type, so that
-                // the numbering of fields is different.
-                //
-                // The right plan is probably to require that the lowering pass
-                // create a fresh layout for any type/variable that it splits
-                // in this fashion, so that the layout information it attaches
-                // can always be assumed to apply to the actual instruciton.
-                //
-
-                auto fieldLayout = structTypeLayout->fields[fieldIndex++];
-
-                auto fieldKey = ff->getKey();
-                auto fieldType = ff->getFieldType();
-
-                // Fields of `void` type aren't valid in HLSL/GLSL.
-                //
-                // TODO: legalization should get rid of any fields that have
-                // empty, or effectively empty types (e.g., emptry structs
-                // should be translated over to `void`).
-                if(as<IRVoidType>(fieldType))
-                    continue;
-
-                emitIRVarModifiers(ctx, fieldLayout, fieldKey, fieldType);
-
-                emitIRType(ctx, fieldType, getIRName(fieldKey));
-
-                emitHLSLParameterGroupFieldLayoutSemantics(fieldLayout, &elementChain);
-
-                emit(";\n");
-            }
-        }
-        else
-        {
-            // TODO: during legalization we should turn `ParameterGroup<X>` where `X`
-            // is not a `struct` type into `ParameterGroup<S>` where `S` is defined
-            // as something like `struct S { X _; };`
-            //
-            emit("/* unexpected */");
-        }
+        emitIRType(ctx, elementType, getIRName(varDecl));
+        emit(";\n");
 
         dedent();
         emit("}\n");
     }
 
-    void emitGLSLParameterBlock(
-        EmitContext*            ctx,
-        IRGlobalVar*            varDecl,
-        IRParameterBlockType*   type)
+    void emitArrayBrackets(
+        EmitContext*    ctx,
+        IRType*         type)
     {
-        auto varLayout = getVarLayout(ctx, varDecl);
-        SLANG_RELEASE_ASSERT(varLayout);
-
-        EmitVarChain blockChain(varLayout);
-
-        EmitVarChain containerChain = blockChain;
-        EmitVarChain elementChain = blockChain;
+        SLANG_UNUSED(ctx);
 
-        auto typeLayout = varLayout->typeLayout;
-        if( auto parameterGroupTypeLayout = typeLayout.As<ParameterGroupTypeLayout>() )
+        if(auto arrayType = as<IRArrayType>(type))
         {
-            containerChain = EmitVarChain(parameterGroupTypeLayout->containerVarLayout, &blockChain);
-            elementChain = EmitVarChain(parameterGroupTypeLayout->elementVarLayout, &blockChain);
-
-            typeLayout = parameterGroupTypeLayout->elementVarLayout->getTypeLayout();
+            emit("[");
+            EmitVal(arrayType->getElementCount(), kEOp_General);
+            emit("]");
+        }
+        else if(auto unsizedArrayType = as<IRUnsizedArrayType>(type))
+        {
+            emit("[]");
         }
-
-        emitGLSLLayoutQualifier(LayoutResourceKind::DescriptorTableSlot, &containerChain);
-        emit("layout(std140) uniform ");
-
-        // Generate a dummy name for the block
-        emit("_S");
-        Emit(ctx->shared->uniqueIDCounter++);
-
-        emit("\n{\n");
-        indent();
-
-        auto elementType = type->getElementType();
-
-        emitIRType(ctx, elementType, getIRName(varDecl));
-        emit(";\n");
-
-        dedent();
-        emit("};\n");
     }
 
+
     void emitGLSLParameterGroup(
         EmitContext*                    ctx,
         IRGlobalVar*                    varDecl,
         IRUniformParameterGroupType*    type)
     {
-        if(auto parameterBlockType = as<IRParameterBlockType>(type))
-        {
-            emitGLSLParameterBlock(ctx, varDecl, parameterBlockType);
-            return;
-        }
-
         auto varLayout = getVarLayout(ctx, varDecl);
         SLANG_RELEASE_ASSERT(varLayout);
 
@@ -5814,7 +5656,7 @@ struct EmitVisitor
         EmitVarChain containerChain = blockChain;
         EmitVarChain elementChain = blockChain;
 
-        auto typeLayout = varLayout->typeLayout;
+        auto typeLayout = varLayout->typeLayout->unwrapArray();
         if( auto parameterGroupTypeLayout = typeLayout.As<ParameterGroupTypeLayout>() )
         {
             containerChain = EmitVarChain(parameterGroupTypeLayout->containerVarLayout, &blockChain);
@@ -5841,71 +5683,28 @@ struct EmitVisitor
             emit("layout(std140) uniform ");
         }
 
-        emit(getIRName(varDecl));
+        // Generate a dummy name for the block
+        emit("_S");
+        Emit(ctx->shared->uniqueIDCounter++);
 
         emit("\n{\n");
         indent();
 
         auto elementType = type->getElementType();
 
-        if(auto structType = as<IRStructType>(elementType))
-        {
-            auto structTypeLayout = typeLayout.As<StructTypeLayout>();
-            SLANG_RELEASE_ASSERT(structTypeLayout);
-
-            UInt fieldIndex = 0;
-            for(auto ff : structType->getFields())
-            {
-                // TODO: need a plan to deal with the case where the IR-level
-                // `struct` type might not match the high-level type, so that
-                // the numbering of fields is different.
-                //
-                // The right plan is probably to require that the lowering pass
-                // create a fresh layout for any type/variable that it splits
-                // in this fashion, so that the layout information it attaches
-                // can always be assumed to apply to the actual instruciton.
-                //
-
-                auto fieldLayout = structTypeLayout->fields[fieldIndex++];
-
-                auto fieldKey = ff->getKey();
-                auto fieldType = ff->getFieldType();
-                if(as<IRVoidType>(fieldType))
-                    continue;
-
-                // Note: we will emit matrix-layout modifiers here, but
-                // we will refrain from emitting other modifiers that
-                // might not be appropriate to the context (e.g., we
-                // shouldn't go emitting `uniform` just because these
-                // things are uniform...).
-                //
-                // TODO: we need a more refined set of modifiers that
-                // we should allow on fields, because we might end
-                // up supporting layout that isn't the default for
-                // the given block type (e.g., something other than
-                // `std140` for a uniform block).
-                //
-                emitIRMatrixLayoutModifiers(ctx, fieldLayout);
-
-                emitIRType(ctx, fieldType, getIRName(fieldKey));
+        emitIRType(ctx, elementType, "_data");
+        emit(";\n");
 
-//                    emitHLSLParameterGroupFieldLayoutSemantics(layout, fieldLayout);
+        dedent();
+        emit("} ");
 
-                emit(";\n");
-            }
-        }
-        else
-        {
-            emit("/* unexpected */");
-        }
+        emit(getIRName(varDecl));
 
-        // TODO: we should consider always giving parameter blocks
-        // names when outputting GLSL, since that shouldn't affect
-        // the semantics of things, and will reduce the risk of
-        // collisions in the global namespace...
+        // If the underlying variable was an array (or array of arrays, etc.)
+        // we need to emit all those array brackets here.
+        emitArrayBrackets(ctx, varDecl->getDataType()->getValueType());
 
-        dedent();
-        emit("};\n");
+        emit(";\n");
     }
 
     void emitIRParameterGroup(
@@ -6025,19 +5824,14 @@ struct EmitVisitor
 
 
         auto elementType = structuredBufferType->getElementType();
-        emitIRType(ctx, elementType, getIRName(varDecl) + "[]");
+        emitIRType(ctx, elementType, "_data[]");
         emit(";\n");
 
         dedent();
-        emit("}");
+        emit("} ");
 
-        // TODO: we need to consider the case where the type of the variable is
-        // an *array* of structured buffers, in which case we need to declare
-        // the block as an array too.
-        //
-        // The main challenge here is that then the block will have a name,
-        // and also the field inside the block will have a name, so that when
-        // the user had written `a[i][j]` we now need to emit `a[i].someName[j]`.
+        emit(getIRName(varDecl));
+        emitArrayBrackets(ctx, varDecl->getDataType()->getValueType());
 
         emit(";\n");
     }
@@ -6084,20 +5878,13 @@ struct EmitVisitor
         emit("\n{\n");
         indent();
 
-        emit("uint ");
-        emit(getIRName(varDecl));
-        emit("[];\n");
+        emit("uint _data[];\n");
 
         dedent();
-        emit("}");
+        emit("} ");
 
-        // TODO: we need to consider the case where the type of the variable is
-        // an *array* of structured buffers, in which case we need to declare
-        // the block as an array too.
-        //
-        // The main challenge here is that then the block will have a name,
-        // and also the field inside the block will have a name, so that when
-        // the user had written `a[i][j]` we now need to emit `a[i].someName[j]`.
+        emit(getIRName(varDecl));
+        emitArrayBrackets(ctx, varDecl->getDataType()->getValueType());
 
         emit(";\n");
     }
@@ -6129,6 +5916,16 @@ struct EmitVisitor
             Emit("}\n");
         }
 
+        // When a global shader parameter represents a "parameter group"
+        // (either a constant buffer or a parameter block with non-resource
+        // data in it), we will prefer to emit it as an ordinary `cbuffer`
+        // declaration or `uniform` block, even when emitting HLSL for
+        // D3D profiles that support the explicit `ConstantBuffer<T>` type.
+        //
+        // Alternatively, we could make this choice based on profile, and
+        // prefer `ConstantBuffer<T>` on profiles that support it and/or when
+        // the input code used that syntax.
+        //
         if (auto paramBlockType = as<IRUniformParameterGroupType>(varType))
         {
             emitIRParameterGroup(
@@ -6140,8 +5937,31 @@ struct EmitVisitor
 
         if(getTarget(ctx) == CodeGenTarget::GLSL)
         {
-            // When outputting GLSL, we need to transform any declaration of
-            // a `*StructuredBuffer<T>` into an ordinary `buffer` declaration.
+            // There are a number of types that are (or can be)
+            // "first-class" in D3D HLSL, but are second-class in GLSL in
+            // that they require explicit global declarations for each value/object,
+            // and don't support declaration as ordinary variables.
+            //
+            // This includes constant buffers (`uniform` blocks) and well as
+            // structured and byte-address buffers (both mapping to `buffer` blocks).
+            //
+            // We intercept these types, and arrays thereof, to produce the required
+            // global declarations. This assumes that earlier "legalization" passes
+            // already performed the work of pulling fields with these types out of
+            // aggregates.
+            //
+            // Note: this also assumes that these types are not used as function
+            // parameters/results, local variables, etc. Additional legalization
+            // steps are required to guarantee these conditions.
+            //
+            if (auto paramBlockType = as<IRUniformParameterGroupType>(unwrapArray(varType)))
+            {
+                emitGLSLParameterGroup(
+                    ctx,
+                    varDecl,
+                    paramBlockType);
+                return;
+            }
             if( auto structuredBufferType = as<IRHLSLStructuredBufferTypeBase>(unwrapArray(varType)) )
             {
                 emitIRStructuredBuffer_GLSL(
@@ -6150,9 +5970,6 @@ struct EmitVisitor
                     structuredBufferType);
                 return;
             }
-
-            // When outputting GLSL, we need to transform any declaration of
-            // a `*ByteAddressBuffer<T>` into an ordinary `buffer` declaration.
             if( auto byteAddressBufferType = as<IRByteAddressBufferTypeBase>(unwrapArray(varType)) )
             {
                 emitIRByteAddressBuffer_GLSL(
@@ -6166,7 +5983,15 @@ struct EmitVisitor
             // when outputting GLSL (well, except in the case where they
             // actually *require* redeclaration...).
             //
-            // TODO: can we detect this more robustly?
+            // Note: these won't be variables the user declare explicitly
+            // in their code, but rather variables that we generated as
+            // part of legalizing the varying input/output signature of
+            // an entry point for GL/Vulkan.
+            //
+            // TODO: This could be handled more robustly by attaching an
+            // appropriate decoration to these variables to indicate their
+            // purpose.
+            //
             if(getText(varDecl->mangledName).StartsWith("gl_"))
             {
                 // The variable represents an OpenGL system value,
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 950931fc2..98b50e574 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -18,26 +18,26 @@ __magic_type(HLSLByteAddressBufferType)
 __intrinsic_type($(kIROp_HLSLByteAddressBufferType))
 struct ByteAddressBuffer
 {
-    __target_intrinsic(glsl, "$1 = $0.length()")
+    __target_intrinsic(glsl, "$1 = $0._data.length()")
     void GetDimensions(
         out uint dim);
 
-    __target_intrinsic(glsl, "$0[$1]")
+    __target_intrinsic(glsl, "$0._data[$1]")
     uint Load(int location);
 
     uint Load(int location, out uint status);
 
-    __target_intrinsic(glsl, "uvec2($0[$1], $0[$1+1])")
+    __target_intrinsic(glsl, "uvec2($0._data[$1], $0._data[$1+1])")
     uint2 Load2(int location);
 
     uint2 Load2(int location, out uint status);
 
-    __target_intrinsic(glsl, "uvec3($0[$1], $0[$1+1], $0[$1+2])")
+    __target_intrinsic(glsl, "uvec3($0._data[$1], $0._data[$1+1], $0._data[$1+2])")
     uint3 Load3(int location);
 
     uint3 Load3(int location, out uint status);
 
-    __target_intrinsic(glsl, "uvec4($0[$1], $0[$1+1], $0[$1+2], $0[$1+3])")
+    __target_intrinsic(glsl, "uvec4($0._data[$1], $0._data[$1+1], $0._data[$1+2], $0._data[$1+3])")
     uint4 Load4(int location);
 
     uint4 Load4(int location, out uint status);
@@ -55,7 +55,11 @@ struct StructuredBuffer
     T Load(int location);
     T Load(int location, out uint status);
 
-    __subscript(uint index) -> T { __intrinsic_op(bufferLoad) get; };
+    __subscript(uint index) -> T
+    {
+        __target_intrinsic(glsl, "$0._data[$1]")
+        get;
+    };
 };
 
 __generic<T>
@@ -105,133 +109,133 @@ struct $(item.name)
     // Note(tfoley): supports all operations from `ByteAddressBuffer`
     // TODO(tfoley): can this be made a sub-type?
 
-    __target_intrinsic(glsl, "$1 = $0.length()")
+    __target_intrinsic(glsl, "$1 = $0._data.length()")
     void GetDimensions(
         out uint dim);
 
-    __target_intrinsic(glsl, "$0[$1]")
+    __target_intrinsic(glsl, "$0._data[$1]")
     uint Load(int location);
 
     uint Load(int location, out uint status);
 
-    __target_intrinsic(glsl, "uvec2($0[$1], $0[$1+4])")
+    __target_intrinsic(glsl, "uvec2($0._data[$1], $0._data[$1+4])")
     uint2 Load2(int location);
 
     uint2 Load2(int location, out uint status);
 
-    __target_intrinsic(glsl, "uvec3($0[$1], $0[$1+4], $0[$1+8])")
+    __target_intrinsic(glsl, "uvec3($0._data[$1], $0._data[$1+4], $0._data[$1+8])")
     uint3 Load3(int location);
 
     uint3 Load3(int location, out uint status);
 
-    __target_intrinsic(glsl, "uvec4($0[$1], $0[$1+4], $0[$1+8], $0[$1+12])")
+    __target_intrinsic(glsl, "uvec4($0._data[$1], $0._data[$1+4], $0._data[$1+8], $0._data[$1+12])")
     uint4 Load4(int location);
 
     uint4 Load4(int location, out uint status);
 
     // Added operations:
 
-    __target_intrinsic(glsl, "($3 = atomicAdd($0[$1], $2))")
+    __target_intrinsic(glsl, "($3 = atomicAdd($0._data[$1], $2))")
     void InterlockedAdd(
         UINT dest,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "atomicAdd($0[$1], $2)")
+    __target_intrinsic(glsl, "atomicAdd($0._data[$1], $2)")
     void InterlockedAdd(
         UINT dest,
         UINT value);
 
-    __target_intrinsic(glsl, "($3 = atomicAnd($0[$1], $2))")
+    __target_intrinsic(glsl, "($3 = atomicAnd($0._data[$1], $2))")
     void InterlockedAnd(
         UINT dest,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "atomicAnd($0[$1], $2)")
+    __target_intrinsic(glsl, "atomicAnd($0._data[$1], $2)")
     void InterlockedAnd(
         UINT dest,
         UINT value);
 
-    __target_intrinsic(glsl, "($4 = atomicCompSwap($0[$1], $2, $3))")
+    __target_intrinsic(glsl, "($4 = atomicCompSwap($0._data[$1], $2, $3))")
     void InterlockedCompareExchange(
         UINT dest,
         UINT compare_value,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "atomicCompSwap($0[$1], $2, $3)")
+    __target_intrinsic(glsl, "atomicCompSwap($0._data[$1], $2, $3)")
     void InterlockedCompareStore(
         UINT dest,
         UINT compare_value,
         UINT value);
 
-    __target_intrinsic(glsl, "($3 = atomicExchange($0[$1], $2))")
+    __target_intrinsic(glsl, "($3 = atomicExchange($0._data[$1], $2))")
     void InterlockedExchange(
         UINT dest,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "($3 = atomicMax($0[$1], $2))")
+    __target_intrinsic(glsl, "($3 = atomicMax($0._data[$1], $2))")
     void InterlockedMax(
         UINT dest,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "atomicMax($0[$1], $2)")
+    __target_intrinsic(glsl, "atomicMax($0._data[$1], $2)")
     void InterlockedMax(
         UINT dest,
         UINT value);
 
-    __target_intrinsic(glsl, "($3 = atomicMin($0[$1], $2))")
+    __target_intrinsic(glsl, "($3 = atomicMin($0._data[$1], $2))")
     void InterlockedMin(
         UINT dest,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "atomicMin($0[$1], $2)")
+    __target_intrinsic(glsl, "atomicMin($0._data[$1], $2)")
     void InterlockedMin(
         UINT dest,
         UINT value);
 
-    __target_intrinsic(glsl, "($3 = atomicOr($0[$1], $2))")
+    __target_intrinsic(glsl, "($3 = atomicOr($0._data[$1], $2))")
     void InterlockedOr(
         UINT dest,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "atomicOr($0[$1], $2)")
+    __target_intrinsic(glsl, "atomicOr($0._data[$1], $2)")
     void InterlockedOr(
         UINT dest,
         UINT value);
 
-    __target_intrinsic(glsl, "($3 = atomicXor($0[$1], $2))")
+    __target_intrinsic(glsl, "($3 = atomicXor($0._data[$1], $2))")
     void InterlockedXor(
         UINT dest,
         UINT value,
         out UINT original_value);
 
-    __target_intrinsic(glsl, "atomicXor($0[$1], $2)")
+    __target_intrinsic(glsl, "atomicXor($0._data[$1], $2)")
     void InterlockedXor(
         UINT dest,
         UINT value);
 
-    __target_intrinsic(glsl, "$0[$1] = $2")
+    __target_intrinsic(glsl, "$0._data[$1] = $2")
     void Store(
         uint address,
         uint value);
 
-    __target_intrinsic(glsl, "$0[$1] = $2.x, $0[$1+4] = $2.y")
+    __target_intrinsic(glsl, "$0._data[$1] = $2.x, $0._data[$1+4] = $2.y")
     void Store2(
         uint address,
         uint2 value);
 
-    __target_intrinsic(glsl, "$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z")
+    __target_intrinsic(glsl, "$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z")
     void Store3(
         uint address,
         uint3 value);
 
-    __target_intrinsic(glsl, "$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z, $0[$1+12] = $2.w")
+    __target_intrinsic(glsl, "$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z, $0._data[$1+12] = $2.w")
     void Store4(
         uint address,
         uint4 value);
@@ -270,11 +274,11 @@ struct $(item.name)
     T Load(int location);
     T Load(int location, out uint status);
 
-	__subscript(uint index) -> T
-	{
-        __intrinsic_op(bufferElementRef)
+    __subscript(uint index) -> T
+    {
+        __target_intrinsic(glsl, "$0._data[$1]")
         ref;
-	}
+    }
 };
 
 ${{{{
diff --git a/source/slang/hlsl.meta.slang.h b/source/slang/hlsl.meta.slang.h
index 8d908c13d..ea21a0fde 100644
--- a/source/slang/hlsl.meta.slang.h
+++ b/source/slang/hlsl.meta.slang.h
@@ -24,26 +24,26 @@ SLANG_SPLICE(kIROp_HLSLByteAddressBufferType
 SLANG_RAW(")\n")
 SLANG_RAW("struct ByteAddressBuffer\n")
 SLANG_RAW("{\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$1 = $0.length()\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$1 = $0._data.length()\")\n")
 SLANG_RAW("    void GetDimensions(\n")
 SLANG_RAW("        out uint dim);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$0[$1]\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$0._data[$1]\")\n")
 SLANG_RAW("    uint Load(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint Load(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"uvec2($0[$1], $0[$1+1])\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"uvec2($0._data[$1], $0._data[$1+1])\")\n")
 SLANG_RAW("    uint2 Load2(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint2 Load2(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"uvec3($0[$1], $0[$1+1], $0[$1+2])\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"uvec3($0._data[$1], $0._data[$1+1], $0._data[$1+2])\")\n")
 SLANG_RAW("    uint3 Load3(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint3 Load3(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"uvec4($0[$1], $0[$1+1], $0[$1+2], $0[$1+3])\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"uvec4($0._data[$1], $0._data[$1+1], $0._data[$1+2], $0._data[$1+3])\")\n")
 SLANG_RAW("    uint4 Load4(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint4 Load4(int location, out uint status);\n")
@@ -64,7 +64,11 @@ SLANG_RAW("\n")
 SLANG_RAW("    T Load(int location);\n")
 SLANG_RAW("    T Load(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __subscript(uint index) -> T { __intrinsic_op(bufferLoad) get; };\n")
+SLANG_RAW("    __subscript(uint index) -> T\n")
+SLANG_RAW("    {\n")
+SLANG_RAW("        __target_intrinsic(glsl, \"$0._data[$1]\")\n")
+SLANG_RAW("        get;\n")
+SLANG_RAW("    };\n")
 SLANG_RAW("};\n")
 SLANG_RAW("\n")
 SLANG_RAW("__generic<T>\n")
@@ -132,133 +136,133 @@ SLANG_RAW("{\n")
 SLANG_RAW("    // Note(tfoley): supports all operations from `ByteAddressBuffer`\n")
 SLANG_RAW("    // TODO(tfoley): can this be made a sub-type?\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$1 = $0.length()\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$1 = $0._data.length()\")\n")
 SLANG_RAW("    void GetDimensions(\n")
 SLANG_RAW("        out uint dim);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$0[$1]\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$0._data[$1]\")\n")
 SLANG_RAW("    uint Load(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint Load(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"uvec2($0[$1], $0[$1+4])\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"uvec2($0._data[$1], $0._data[$1+4])\")\n")
 SLANG_RAW("    uint2 Load2(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint2 Load2(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"uvec3($0[$1], $0[$1+4], $0[$1+8])\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"uvec3($0._data[$1], $0._data[$1+4], $0._data[$1+8])\")\n")
 SLANG_RAW("    uint3 Load3(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint3 Load3(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"uvec4($0[$1], $0[$1+4], $0[$1+8], $0[$1+12])\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"uvec4($0._data[$1], $0._data[$1+4], $0._data[$1+8], $0._data[$1+12])\")\n")
 SLANG_RAW("    uint4 Load4(int location);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    uint4 Load4(int location, out uint status);\n")
 SLANG_RAW("\n")
 SLANG_RAW("    // Added operations:\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicAdd($0[$1], $2))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicAdd($0._data[$1], $2))\")\n")
 SLANG_RAW("    void InterlockedAdd(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"atomicAdd($0[$1], $2)\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"atomicAdd($0._data[$1], $2)\")\n")
 SLANG_RAW("    void InterlockedAdd(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicAnd($0[$1], $2))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicAnd($0._data[$1], $2))\")\n")
 SLANG_RAW("    void InterlockedAnd(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"atomicAnd($0[$1], $2)\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"atomicAnd($0._data[$1], $2)\")\n")
 SLANG_RAW("    void InterlockedAnd(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($4 = atomicCompSwap($0[$1], $2, $3))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($4 = atomicCompSwap($0._data[$1], $2, $3))\")\n")
 SLANG_RAW("    void InterlockedCompareExchange(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT compare_value,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"atomicCompSwap($0[$1], $2, $3)\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"atomicCompSwap($0._data[$1], $2, $3)\")\n")
 SLANG_RAW("    void InterlockedCompareStore(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT compare_value,\n")
 SLANG_RAW("        UINT value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicExchange($0[$1], $2))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicExchange($0._data[$1], $2))\")\n")
 SLANG_RAW("    void InterlockedExchange(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicMax($0[$1], $2))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicMax($0._data[$1], $2))\")\n")
 SLANG_RAW("    void InterlockedMax(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"atomicMax($0[$1], $2)\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"atomicMax($0._data[$1], $2)\")\n")
 SLANG_RAW("    void InterlockedMax(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicMin($0[$1], $2))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicMin($0._data[$1], $2))\")\n")
 SLANG_RAW("    void InterlockedMin(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"atomicMin($0[$1], $2)\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"atomicMin($0._data[$1], $2)\")\n")
 SLANG_RAW("    void InterlockedMin(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicOr($0[$1], $2))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicOr($0._data[$1], $2))\")\n")
 SLANG_RAW("    void InterlockedOr(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"atomicOr($0[$1], $2)\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"atomicOr($0._data[$1], $2)\")\n")
 SLANG_RAW("    void InterlockedOr(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicXor($0[$1], $2))\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"($3 = atomicXor($0._data[$1], $2))\")\n")
 SLANG_RAW("    void InterlockedXor(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value,\n")
 SLANG_RAW("        out UINT original_value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"atomicXor($0[$1], $2)\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"atomicXor($0._data[$1], $2)\")\n")
 SLANG_RAW("    void InterlockedXor(\n")
 SLANG_RAW("        UINT dest,\n")
 SLANG_RAW("        UINT value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$0[$1] = $2\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$0._data[$1] = $2\")\n")
 SLANG_RAW("    void Store(\n")
 SLANG_RAW("        uint address,\n")
 SLANG_RAW("        uint value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$0[$1] = $2.x, $0[$1+4] = $2.y\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$0._data[$1] = $2.x, $0._data[$1+4] = $2.y\")\n")
 SLANG_RAW("    void Store2(\n")
 SLANG_RAW("        uint address,\n")
 SLANG_RAW("        uint2 value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z\")\n")
 SLANG_RAW("    void Store3(\n")
 SLANG_RAW("        uint address,\n")
 SLANG_RAW("        uint3 value);\n")
 SLANG_RAW("\n")
-SLANG_RAW("    __target_intrinsic(glsl, \"$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z, $0[$1+12] = $2.w\")\n")
+SLANG_RAW("    __target_intrinsic(glsl, \"$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z, $0._data[$1+12] = $2.w\")\n")
 SLANG_RAW("    void Store4(\n")
 SLANG_RAW("        uint address,\n")
 SLANG_RAW("        uint4 value);\n")
@@ -306,11 +310,11 @@ SLANG_RAW("\n")
 SLANG_RAW("    T Load(int location);\n")
 SLANG_RAW("    T Load(int location, out uint status);\n")
 SLANG_RAW("\n")
-SLANG_RAW("\t__subscript(uint index) -> T\n")
-SLANG_RAW("\t{\n")
-SLANG_RAW("        __intrinsic_op(bufferElementRef)\n")
+SLANG_RAW("    __subscript(uint index) -> T\n")
+SLANG_RAW("    {\n")
+SLANG_RAW("        __target_intrinsic(glsl, \"$0._data[$1]\")\n")
 SLANG_RAW("        ref;\n")
-SLANG_RAW("\t}\n")
+SLANG_RAW("    }\n")
 SLANG_RAW("};\n")
 SLANG_RAW("\n")
 
diff --git a/source/slang/ir-inst-defs.h b/source/slang/ir-inst-defs.h
index 8f997cbe2..09c11ed16 100644
--- a/source/slang/ir-inst-defs.h
+++ b/source/slang/ir-inst-defs.h
@@ -217,10 +217,6 @@ INST(Var, var, 0, 0)
 INST(Load, load, 1, 0)
 INST(Store, store, 2, 0)
 
-INST(BufferLoad, bufferLoad, 2, 0)
-INST(BufferStore, bufferStore, 3, 0)
-INST(BufferElementRef, bufferElementRef, 2, 0)
-
 INST(FieldExtract, get_field, 2, 0)
 INST(FieldAddress, get_field_addr, 2, 0)
 
diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp
index 599b02ea7..2f16f4ebc 100644
--- a/source/slang/ir.cpp
+++ b/source/slang/ir.cpp
@@ -3576,7 +3576,6 @@ namespace Slang
         case kIROp_makeArray:
         case kIROp_makeStruct:
         case kIROp_Load:    // We are ignoring the possibility of loads from bad addresses, or `volatile` loads
-        case kIROp_BufferLoad:
         case kIROp_FieldExtract:
         case kIROp_FieldAddress:
         case kIROp_getElement:
diff --git a/source/slang/type-layout.cpp b/source/slang/type-layout.cpp
index 8fc48fe4f..2d21d7aef 100644
--- a/source/slang/type-layout.cpp
+++ b/source/slang/type-layout.cpp
@@ -2276,6 +2276,17 @@ RefPtr<TypeLayout> CreateTypeLayout(
     return typeLayout;
 }
 
+RefPtr<TypeLayout> TypeLayout::unwrapArray()
+{
+    TypeLayout* typeLayout = this;
+
+    while(auto arrayTypeLayout = dynamic_cast<ArrayTypeLayout*>(typeLayout))
+        typeLayout = arrayTypeLayout->elementTypeLayout;
+
+    return typeLayout;
+}
+
+
 RefPtr<GlobalGenericParamDecl> GenericParamTypeLayout::getGlobalGenericParamDecl()
 {
     auto declRefType = type->AsDeclRefType();
diff --git a/source/slang/type-layout.h b/source/slang/type-layout.h
index fa874cb80..6f6dad055 100644
--- a/source/slang/type-layout.h
+++ b/source/slang/type-layout.h
@@ -354,6 +354,13 @@ public:
         info.count = count;
         addResourceUsage(info);
     }
+
+        /// "Unwrap" any layers of array-ness from this type layout.
+        ///
+        /// If this is an `ArrayTypeLayout`, returns the result of unwrapping the elemnt type layout.
+        /// Otherwise, returns this type layout.
+        ///
+    RefPtr<TypeLayout> unwrapArray();
 };
 
 typedef unsigned int VarLayoutFlags;
diff --git a/source/slang/vm.cpp b/source/slang/vm.cpp
index fa59a741b..0f79c763b 100644
--- a/source/slang/vm.cpp
+++ b/source/slang/vm.cpp
@@ -846,72 +846,6 @@ void resumeThread(
             }
             break;
 
-        case kIROp_BufferLoad:
-            {
-                VMType type = decodeType(frame, &ip);
-                UInt argCount = decodeUInt(&ip);
-                void* argPtrs[16] = { 0 };
-                for( UInt aa = 0; aa < argCount; ++aa )
-                {
-                    void* argPtr = decodeOperandPtr<void>(frame, &ip);
-                    argPtrs[aa] = argPtr;
-                }
-
-                void* dest = decodeOperandPtr<void>(frame, &ip);
-
-                char* bufferData = *(char**)argPtrs[0];
-                uint32_t index = *(uint32_t*)argPtrs[1];
-
-                auto size = type.getSize();
-                char* elementData = bufferData + index*size;
-                memcpy(dest, elementData, size);
-            }
-            break;
-
-        case kIROp_BufferStore:
-            {
-                VMType resultType = decodeType(frame, &ip);
-                /*UInt argCount = */decodeUInt(&ip);
-
-                char* bufferData = decodeOperand<char*>(frame, &ip);
-                uint32_t index = decodeOperand<uint32_t>(frame, &ip);
-
-                auto srcPtrAndType = decodeOperandPtrAndType(frame, &ip);
-                void* srcPtr = srcPtrAndType.ptr;
-                VMType type = srcPtrAndType.type;
-
-                auto size = type.getSize();
-                char* elementData = bufferData + index*size;
-                memcpy(elementData, srcPtr, size);
-            }
-            break;
-
-        case kIROp_BufferElementRef:
-            {
-                VMType ptrType = decodeType(frame, &ip);
-                VMType type = ((VMPtrTypeImpl*)ptrType.getImpl())->base;
-
-                UInt argCount = decodeUInt(&ip);
-                void* argPtrs[16] = { 0 };
-                for( UInt aa = 0; aa < argCount; ++aa )
-                {
-                    void* argPtr = decodeOperandPtr<void>(frame, &ip);
-                    argPtrs[aa] = argPtr;
-                }
-
-                void* dest = decodeOperandPtr<void>(frame, &ip);
-
-                char* bufferData = *(char**)argPtrs[0];
-                uint32_t index = *(uint32_t*)argPtrs[1];
-
-                auto size = type.getSize();
-                char* elementData = bufferData + index*size;
-
-                *(void**)dest = elementData;
-            }
-            break;
-
-
         case kIROp_Call:
             {
                 VMType type = decodeType(frame, &ip);
diff --git a/tests/bindings/binding0.hlsl b/tests/bindings/binding0.hlsl
index 85f17e940..5516b0135 100644
--- a/tests/bindings/binding0.hlsl
+++ b/tests/bindings/binding0.hlsl
@@ -6,8 +6,15 @@
 
 #ifdef __SLANG__
 #define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+
 #else
 #define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 
 #define C C_0
 #define t t_0
@@ -22,12 +29,13 @@ float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
 Texture2D 		t R(: register(t0));
 SamplerState 	s R(: register(s0));
 
-cbuffer C R(: register(b0))
+BEGIN_CBUFFER(C)
 {
 	float c;
 }
+END_CBUFFER(C, register(b0))
 
 float4 main() : SV_TARGET
 {
-	return use(t,s) + use(c);
+	return use(t,s) + use(CBUFFER_REF(C,c));
 }
 \ No newline at end of file
diff --git a/tests/bindings/binding1.hlsl b/tests/bindings/binding1.hlsl
index 8709c31c6..47ab22bb9 100644
--- a/tests/bindings/binding1.hlsl
+++ b/tests/bindings/binding1.hlsl
@@ -13,8 +13,14 @@
 
 #ifdef __SLANG__
 #define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
 #else
 #define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 
 #define tB tB_0
 #define sB sB_0
@@ -32,17 +38,19 @@ Texture2D 		tB R(: register(t1));
 SamplerState 	sA R(: register(s0));
 SamplerState 	sB R(: register(s1));
 
-cbuffer C0 R(: register(b0))
+BEGIN_CBUFFER(C0)
 {
 	float c0;
 }
+END_CBUFFER(C0, register(b0))
 
-cbuffer C1 R(: register(b1))
+BEGIN_CBUFFER(C1)
 {
 	float c1;
 }
+END_CBUFFER(C1, register(b1))
 
 float4 main() : SV_TARGET
 {
-	return use(tB,sB) + use(c1);
+	return use(tB,sB) + use(CBUFFER_REF(C1,c1));
 }
 \ No newline at end of file
diff --git a/tests/bindings/explicit-binding.hlsl b/tests/bindings/explicit-binding.hlsl
index 9c38cdee0..420eafec1 100644
--- a/tests/bindings/explicit-binding.hlsl
+++ b/tests/bindings/explicit-binding.hlsl
@@ -5,8 +5,16 @@
 
 #ifdef __SLANG__
 #define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define BEGIN_CBUFFER_R(NAME, REG) cbuffer NAME : REG
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
 #else
 #define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define BEGIN_CBUFFER_R(NAME, REG) BEGIN_CBUFFER(NAME)
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 
 #define CA CA_0
 #define ca ca_0
@@ -49,25 +57,29 @@ SamplerState 	sc : register(s1);
 // and even to make things non-contiguous. Here we bind
 // the third constnat buffer to register `b9`
 //
-cbuffer CA R(: register(b0))
+BEGIN_CBUFFER(CA)
 {
 	float ca;
 }
+END_CBUFFER(CA, register(b0))
+
 //
-cbuffer CB R(: register(b1))
+BEGIN_CBUFFER(CB)
 {
 	float cb;
 }
+END_CBUFFER(CB, register(b1))
 //
-cbuffer CC : register(b9)
+BEGIN_CBUFFER_R(CC, register(b9))
 {
 	float cc;
 }
+END_CBUFFER(CC, register(b9))
 
 float4 main() : SV_TARGET
 {
 	// Go ahead and use everything in this case:
-	return use(ta, sa) + use(ca)
-		+  use(tb, sb) + use(cb)
-		+  use(tc, sc) + use(cc);
+	return use(ta, sa) + use(CBUFFER_REF(CA,ca))
+		+  use(tb, sb) + use(CBUFFER_REF(CB,cb))
+		+  use(tc, sc) + use(CBUFFER_REF(CC,cc));
 }
 \ No newline at end of file
diff --git a/tests/bindings/glsl-parameter-blocks.slang.glsl b/tests/bindings/glsl-parameter-blocks.slang.glsl
index 3ade8bb6b..a27fbb3db 100644
--- a/tests/bindings/glsl-parameter-blocks.slang.glsl
+++ b/tests/bindings/glsl-parameter-blocks.slang.glsl
@@ -25,8 +25,8 @@ struct Test
 layout(binding = 0)
 uniform ParameterBlock_gTest
 {
-    Test gTest;
-};
+    Test _data;
+} gTest;
 
 layout(binding = 1)
 uniform texture2D gTest_t;
@@ -42,7 +42,7 @@ in vec2 uv;
 
 void main()
 {
-	vec4 temp_a = gTest.a;
+	vec4 temp_a = gTest._data.a;
 
     vec4 temp_sample = texture(sampler2D(gTest_t, gTest_s), uv);
 
diff --git a/tests/bindings/multi-file-extra.hlsl b/tests/bindings/multi-file-extra.hlsl
index 92227d54a..fe8766dcd 100644
--- a/tests/bindings/multi-file-extra.hlsl
+++ b/tests/bindings/multi-file-extra.hlsl
@@ -7,8 +7,14 @@
 
 #ifdef __SLANG__
 #define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
 #else
 #define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 
 #define sharedC     sharedC_0
 #define sharedCA    sharedCA_0
@@ -50,13 +56,15 @@ float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
 // Start with some parameters that will appear in both shaders
 Texture2D sharedT R(: register(t0));
 SamplerState sharedS R(: register(s0));
-cbuffer sharedC R(: register(b0))
+
+BEGIN_CBUFFER(sharedC)
 {
-	float3 sharedCA R(: packoffset(c0));
-	float  sharedCB R(: packoffset(c0.w));
-	float3 sharedCC R(: packoffset(c1));
-	float2 sharedCD R(: packoffset(c2));
+	float3 sharedCA;
+	float  sharedCB;
+	float3 sharedCC;
+	float2 sharedCD;
 }
+END_CBUFFER(sharedC, register(b0))
 
 // Then some parameters specific to this shader.
 // These will be placed *after* the ones from the main file,
@@ -65,13 +73,15 @@ cbuffer sharedC R(: register(b0))
 
 Texture2D fragmentT R(: register(t4));
 SamplerState fragmentS R(: register(s2));
-cbuffer fragmentC R(: register(b2))
+
+BEGIN_CBUFFER(fragmentC)
 {
-	float3 fragmentCA R(: packoffset(c0));
-	float  fragmentCB R(: packoffset(c0.w));
-	float3 fragmentCC R(: packoffset(c1));
-	float2 fragmentCD R(: packoffset(c2));
+	float3 fragmentCA;
+	float  fragmentCB;
+	float3 fragmentCC;
+	float2 fragmentCD;
 }
+END_CBUFFER(fragmentC, register(b2))
 
 // And end with some shared parameters again
 Texture2D sharedTV R(: register(t2));
@@ -82,9 +92,9 @@ float4 main() : SV_TARGET
 {
 	// Go ahead and use everything here, just to make sure things got placed correctly
 	return use(sharedT, sharedS)
-		+  use(sharedCD)
+		+  use(CBUFFER_REF(sharedC,sharedCD))
 		+  use(fragmentT, fragmentS)
-		+  use(fragmentCD)
+		+  use(CBUFFER_REF(fragmentC, fragmentCD))
 		+  use(sharedTF, sharedS)
 		;
 }
 \ No newline at end of file
diff --git a/tests/bindings/multi-file.hlsl b/tests/bindings/multi-file.hlsl
index 6269c703e..8c719bbcf 100644
--- a/tests/bindings/multi-file.hlsl
+++ b/tests/bindings/multi-file.hlsl
@@ -8,8 +8,14 @@
 
 #ifdef __SLANG__
 #define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
 #else
 #define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 
 #define sharedC     sharedC_0
 #define sharedCA    sharedCA_0
@@ -55,13 +61,15 @@ float4 use(Texture2D t, SamplerState s)
 // Start with some parameters that will appear in both shaders
 Texture2D sharedT R(: register(t0));
 SamplerState sharedS R(: register(s0));
-cbuffer sharedC R(: register(b0))
+
+BEGIN_CBUFFER(sharedC)
 {
-    float3 sharedCA R(: packoffset(c0));
-    float  sharedCB R(: packoffset(c0.w));
-    float3 sharedCC R(: packoffset(c1));
-    float2 sharedCD R(: packoffset(c2));
+    float3 sharedCA;
+    float  sharedCB;
+    float3 sharedCC;
+    float2 sharedCD;
 }
+END_CBUFFER(sharedC, register(b0))
 
 // Then some parameters specific to this shader
 // (these will get placed before the ones in the `extra` file,
@@ -69,13 +77,15 @@ cbuffer sharedC R(: register(b0))
 
 Texture2D vertexT R(: register(t1));
 SamplerState vertexS R(: register(s1));
-cbuffer vertexC R(: register(b1))
+
+BEGIN_CBUFFER(vertexC)
 {
-    float3 vertexCA R(: packoffset(c0));
-    float  vertexCB R(: packoffset(c0.w));
-    float3 vertexCC R(: packoffset(c1));
-    float2 vertexCD R(: packoffset(c2));
+    float3 vertexCA;
+    float  vertexCB;
+    float3 vertexCC;
+    float2 vertexCD;
 }
+END_CBUFFER(vertexC, register(b1))
 
 // And end with some shared parameters again
 Texture2D sharedTV R(: register(t2));
@@ -86,9 +96,9 @@ float4 main() : SV_POSITION
 {
     // Go ahead and use everything here, just to make sure things got placed correctly
     return use(sharedT, sharedS)
-        +  use(sharedCD)
+        +  use(CBUFFER_REF(sharedC, sharedCD))
         +  use(vertexT, vertexS)
-        +  use(vertexCD)
+        +  use(CBUFFER_REF(vertexC, vertexCD))
         +  use(sharedTV, vertexS)
         ;
 }
 \ No newline at end of file
diff --git a/tests/bindings/packoffset.hlsl b/tests/bindings/packoffset.hlsl
deleted file mode 100644
index 81913d672..000000000
--- a/tests/bindings/packoffset.hlsl
+++ /dev/null
@@ -1,51 +0,0 @@
-//TEST:COMPARE_HLSL:-no-mangle -profile ps_4_0 -entry main
-
-// Let's make sure we generate correct output in cases
-// where there are non-trivial `packoffset`s needed
-
-#ifdef __SLANG__
-#define R(X) /**/
-#else
-#define R(X) X
-
-#define CA CA_0
-#define ca ca_0
-#define cb cb_0
-#define cc cc_0
-#define cd cd_0
-#define ce ce_0
-
-#define ta CA_ta_0
-#define sa CA_sa_0
-
-#endif
-
-float4 use(float  val) { return val; };
-float4 use(float2 val) { return float4(val,0.0,0.0); };
-float4 use(float3 val) { return float4(val,0.0); };
-float4 use(float4 val) { return val; };
-float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
-
-cbuffer CA R(: register(b0))
-{
-	float4 ca R(: packoffset(c0));
-	float3 cb R(: packoffset(c1.x));
-	float  cc R(: packoffset(c1.w));
-	float2 cd R(: packoffset(c2.x));
-	float2 ce R(: packoffset(c2.z));
-
-	Texture2D ta R(: register(t0));
-	SamplerState sa R(: register(s0));
-}
-
-float4 main() : SV_TARGET
-{
-	// Go ahead and use everything in this case:
-	return use(ta, sa)
-		+  use(ca)
-		+  use(cb)
-		+  use(cc)
-		+  use(cd)
-		+  use(ce)
-		;
-}
-\ No newline at end of file
diff --git a/tests/bindings/resources-in-cbuffer.hlsl b/tests/bindings/resources-in-cbuffer.hlsl
index 4d3d381d9..71eaf40aa 100644
--- a/tests/bindings/resources-in-cbuffer.hlsl
+++ b/tests/bindings/resources-in-cbuffer.hlsl
@@ -2,12 +2,20 @@
 
 // Confirm that resources inside constant buffers get correct locations,
 // including the case where there are *multiple* constant buffers
-// with reosurces.
+// with resources.
 
 #ifdef __SLANG__
 #define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME {
+#define MID_CBUFFER(NAME) /**/
+#define END_CBUFFER(NAME, REG) /**/ }
+#define CBUFFER_REF(NAME, FIELD) FIELD
 #else
 #define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME {
+#define MID_CBUFFER(NAME) };
+#define END_CBUFFER(NAME, REG) cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 
 #define CA CA_0
 #define caa caa_0
@@ -46,43 +54,52 @@ float4 use(float3 val) { return float4(val,0.0); };
 float4 use(float4 val) { return val; };
 float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
 
-cbuffer CA R(: register(b0))
-{
-	float4 caa R(: packoffset(c0));
-	float3 cab R(: packoffset(c1.x));
-	float  cac R(: packoffset(c1.w));
-	float2 cad R(: packoffset(c2.x));
-	float2 cae R(: packoffset(c2.z));
+BEGIN_CBUFFER(CA)
+
+	float4 caa;
+	float3 cab;
+	float  cac;
+	float2 cad;
+	float2 cae;
+
+MID_CBUFFER(CA)
 
 	Texture2D ta R(: register(t0));
 	SamplerState sa R(: register(s0));
-}
 
-cbuffer CB R(: register(b1))
-{
-	float4 cba R(: packoffset(c0));
-	float3 cbb R(: packoffset(c1.x));
-	float  cbc R(: packoffset(c1.w));
-	float2 cbd R(: packoffset(c2.x));
-	float2 cbe R(: packoffset(c2.z));
+END_CBUFFER(CA, register(b0))
+
+BEGIN_CBUFFER(CB)
+
+	float4 cba;
+	float3 cbb;
+	float  cbc;
+	float2 cbd;
+	float2 cbe;
+
+MID_CBUFFER(CB)
 
 	Texture2D tbx R(: register(t1));
 	Texture2D tby R(: register(t2));
 	SamplerState sb R(: register(s1));
-}
 
-cbuffer CC R(: register(b2))
-{
-	float4 cca R(: packoffset(c0));
-	float3 ccb R(: packoffset(c1.x));
-	float  ccc R(: packoffset(c1.w));
-	float2 ccd R(: packoffset(c2.x));
-	float2 cce R(: packoffset(c2.z));
+END_CBUFFER(CB, register(b1))
+
+BEGIN_CBUFFER(CC)
+
+	float4 cca;
+	float3 ccb;
+	float  ccc;
+	float2 ccd;
+	float2 cce;
+
+MID_CBUFFER(CC)
 
 	Texture2D tc R(: register(t3));
 	SamplerState scx R(: register(s2));
 	SamplerState scy R(: register(s3));
-}
+
+END_CBUFFER(CC, register(b2))
 
 float4 main() : SV_TARGET
 {
@@ -91,8 +108,8 @@ float4 main() : SV_TARGET
 		+  use(tbx, sb)
 		+  use(tby, scx)
 		+  use(tc,  scy)
-		+  use(cae)
-		+  use(cbe)
-		+  use(cce)
+		+  use(CBUFFER_REF(CA, cae))
+		+  use(CBUFFER_REF(CB, cbe))
+		+  use(CBUFFER_REF(CC, cce))
 		;
 }
 \ No newline at end of file
diff --git a/tests/bugs/gh-103.slang b/tests/bugs/gh-103.slang
index 65e71837b..4bad20b20 100644
--- a/tests/bugs/gh-103.slang
+++ b/tests/bugs/gh-103.slang
@@ -2,7 +2,15 @@
 
 // Ensure that matrix-times-scalar works
 
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
 #define C C_0
 #define a a_0
 #define b b_0
@@ -10,16 +18,19 @@
 
 float4x4 doIt(float4x4 a, float b)
 {
-	return a * b;
+    return a * b;
 }
 
-cbuffer C
+BEGIN_CBUFFER(C)
 {
-	float4x4 a;
-	float b;	
-};
+    float4x4 a;
+    float b;    
+}
+END_CBUFFER(C,register(b0))
 
 float4 main() : SV_TARGET
 {
-	return doIt(a, b)[0];
+    return doIt(
+        CBUFFER_REF(C,a),
+        CBUFFER_REF(C,b))[0];
 }
diff --git a/tests/bugs/gh-172.slang b/tests/bugs/gh-172.slang
index dd5f4d47a..6d9e69503 100644
--- a/tests/bugs/gh-172.slang
+++ b/tests/bugs/gh-172.slang
@@ -8,22 +8,27 @@
 
 cbuffer C
 {
-	Texture2D t0, t1;
-	SamplerState s;
-	float2 uv;
+    Texture2D t0, t1;
+    SamplerState s;
+    float2 uv;
 };
 
 float4 main() : SV_Target
 {
-	return t0.Sample(s, uv)
+    return t0.Sample(s, uv)
          + t1.Sample(s, uv);
 }
 
 #else
 
+struct SLANG_ParameterGroup_C_0
+{
+    float2 uv_0;
+};
+
 cbuffer C_0 : register(b0)
 {
-	float2 uv_0;
+    SLANG_ParameterGroup_C_0 C_0;
 };
 
 Texture2D C_t0_0 : register(t0);
@@ -32,8 +37,8 @@ SamplerState C_s_0 : register(s0);
 
 float4 main() : SV_TARGET
 {
-	return C_t0_0.Sample(C_s_0, uv_0)
-	     + C_t1_0.Sample(C_s_0, uv_0);
+    return C_t0_0.Sample(C_s_0, C_0.uv_0)
+         + C_t1_0.Sample(C_s_0, C_0.uv_0);
 }
 
 #endif
diff --git a/tests/bugs/gh-333.slang b/tests/bugs/gh-333.slang
index 2a23f7751..a1e3ea20d 100644
--- a/tests/bugs/gh-333.slang
+++ b/tests/bugs/gh-333.slang
@@ -2,7 +2,15 @@
 
 // Ensure declaration order in output is correct
 
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
 #define A A_0
 #define x x_0
 #define B B_0
@@ -23,13 +31,14 @@ struct B
 	Texture2D t;
 };
 
-cbuffer C
+BEGIN_CBUFFER(C)
 {
 	A a;
 	B b;
-};
+}
+END_CBUFFER(C,register(b0))
 
 float4 main() : SV_TARGET
 {
-	return a.x;
+	return CBUFFER_REF(C,a).x;
 }
diff --git a/tests/bugs/split-nested-types.hlsl b/tests/bugs/split-nested-types.hlsl
index 577f64a75..2bfea49c2 100644
--- a/tests/bugs/split-nested-types.hlsl
+++ b/tests/bugs/split-nested-types.hlsl
@@ -1,9 +1,19 @@
 //TEST:COMPARE_HLSL:-no-mangle -profile ps_5_0
 
 #ifdef __SLANG__
+
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+
 import split_nested_types;
+
 #else
 
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
 #define A A_0
 #define x x_0
 
@@ -31,12 +41,13 @@ struct M
 
 #endif
 
-cbuffer C
+BEGIN_CBUFFER(C)
 {
 	M m;
 }
+END_CBUFFER(C,register(b0))
 
 float4 main() : SV_TARGET
 {
-	return m.b.y;
+	return CBUFFER_REF(C,m).b.y;
 }
diff --git a/tests/bugs/vec-init-list.hlsl b/tests/bugs/vec-init-list.hlsl
index 2f82a96b0..fe8cba09f 100644
--- a/tests/bugs/vec-init-list.hlsl
+++ b/tests/bugs/vec-init-list.hlsl
@@ -2,7 +2,14 @@
 
 // Check handling of initializer list for vector
 
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 
 #define C C_0
 #define a a_0
@@ -10,10 +17,11 @@
 
 #endif
 
-cbuffer C : register(b0)
+BEGIN_CBUFFER(C)
 {
-	float4 a;	
-};
+    float4 a;
+}
+END_CBUFFER(C, register(b0))
 
 float w0(float x) { return x; }
 float w1(float x) { return x; }
@@ -22,6 +30,10 @@ float w3(float x) { return x; }
 
 float4 main() : SV_Position
 {
-    float4 wx = { w0(a.x), w1(a.x), w2(a.x), w3(a.x), };
+    float4 wx = {
+        w0(CBUFFER_REF(C,a).x),
+        w1(CBUFFER_REF(C,a).x),
+        w2(CBUFFER_REF(C,a).x),
+        w3(CBUFFER_REF(C,a).x), };
     return wx;
 }
diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl b/tests/bugs/vk-structured-buffer-binding.hlsl
index cdfeb19ab..3ef1bcc8c 100644
--- a/tests/bugs/vk-structured-buffer-binding.hlsl
+++ b/tests/bugs/vk-structured-buffer-binding.hlsl
@@ -1,5 +1,4 @@
-//TEST:SIMPLE: -profile ps_4_0 -entry main -target glsl
-///////TEST:REFLECTION:-profile ps_4_0 -target spirv
+//TEST:CROSS_COMPILE:-profile ps_4_0 -entry main -target spirv-assembly
 
 [[vk::binding(3, 4)]]
 RWStructuredBuffer<uint> gDoneGroups : register(u3);
diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl.expected b/tests/bugs/vk-structured-buffer-binding.hlsl.expected
deleted file mode 100644
index fc1cb93ea..000000000
--- a/tests/bugs/vk-structured-buffer-binding.hlsl.expected
+++ /dev/null
@@ -1,31 +0,0 @@
-result code = 0
-standard error = {
-}
-standard output = {
-#version 450
-layout(row_major) uniform;
-layout(row_major) buffer;
-
-#line 5 0
-layout(std430, binding = 3, set = 4) buffer _S1 {
-    uint gDoneGroups_0[];
-};
-
-#line 7
-layout(location = 0)
-out vec4 _S2;
-
-
-#line 7
-layout(location = 0)
-in vec3 _S3;
-
-
-#line 7
-void main()
-{
-    _S2 = vec4(gDoneGroups_0[uint(int(_S3.z))]);
-    return;
-}
-
-}
diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl.glsl b/tests/bugs/vk-structured-buffer-binding.hlsl.glsl
new file mode 100644
index 000000000..7298ea594
--- /dev/null
+++ b/tests/bugs/vk-structured-buffer-binding.hlsl.glsl
@@ -0,0 +1,26 @@
+// vk-structured-buffer-binding.hlsl.glsl
+//TEST_IGNORE_FILE:
+
+#version 450
+
+#define gDoneGroups gDoneGroups_0
+#define uv _S3
+#define SV_Target _S2
+
+layout(std430, binding = 3, set = 4)
+buffer _S1
+{
+    uint _data[];
+} gDoneGroups;
+
+layout(location = 0)
+out vec4 SV_Target;
+
+layout(location = 0)
+in vec3 uv;
+
+void main()
+{
+    SV_Target = vec4(gDoneGroups._data[uint(int(uv.z))]);
+    return;
+}
diff --git a/tests/compute/matrix-layout.hlsl b/tests/compute/matrix-layout.hlsl
index ad456d8be..034ac512c 100644
--- a/tests/compute/matrix-layout.hlsl
+++ b/tests/compute/matrix-layout.hlsl
@@ -32,7 +32,14 @@ cbuffer C0
 //TEST_INPUT:cbuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]):dxbinding(1),glbinding(1)
 cbuffer C1
 {
-    column_major
+
+// Note: support for the explicit `row_major` and `column_major` modifiers is being
+// disabled for now, since our current Vulkan output strategy cannot possibly match the
+// semantics of these modifiers in D3D. Once we do a more complete implementation of
+// matrix layout (see GitHub issue #695) we can add a directed test for all the
+// corners cases of explicit matrix layout.
+//
+//    column_major
     int3x4 cc;
     int    dd;
 };
@@ -47,7 +54,7 @@ int test(int val)
     int a = s.a[val / 4][val % 3];
     int b = s.b;
 
-    int c = cc[val / 4][val % 4];
+    int c = cc[val / 4][val % 3];
     int d = dd;
 
     return ((a*N + b) * N + c) * N + d;
diff --git a/tests/compute/matrix-layout.hlsl.expected.txt b/tests/compute/matrix-layout.hlsl.expected.txt
index cb8e2cae7..3b67fe0cb 100644
--- a/tests/compute/matrix-layout.hlsl.expected.txt
+++ b/tests/compute/matrix-layout.hlsl.expected.txt
@@ -1,12 +1,12 @@
-10D0111
-20D0511
-30D0911
-10D0D11
-60D0211
-70D0611
-50D0A11
-60D0E11
-B0D0311
-90D0711
-A0D0B11
-B0D0F11
+10D010D
+20D020D
+30D030D
+10D010D
+60D060D
+70D070D
+50D050D
+60D060D
+B0D0B0D
+90D090D
+A0D0A0D
+B0D0B0D
diff --git a/tests/cross-compile/array-of-buffers.slang b/tests/cross-compile/array-of-buffers.slang
new file mode 100644
index 000000000..de87e6d9d
--- /dev/null
+++ b/tests/cross-compile/array-of-buffers.slang
@@ -0,0 +1,32 @@
+// array-of-buffers.slang
+
+//TEST:CROSS_COMPILE:-target spirv-assembly -entry main -stage fragment
+//TEST:CROSS_COMPILE:-target dxil-assembly  -entry main -stage fragment -profile sm_6_0
+
+// This test ensures that we cross-compile arrays of structured/constant
+// buffers into appropriate GLSL, where these are not first-class types.
+//
+// Note that this test does *not* currently test the case of passing
+// a structured or constant buffer into a subroutine, which requires
+// further work.
+
+struct S { float4 f; };
+
+cbuffer C
+{
+    uint index;
+}
+
+ConstantBuffer<S>           cb [3];
+StructuredBuffer<S>         sb1[4];
+RWStructuredBuffer<float4>  sb2[5];
+ByteAddressBuffer           bb [6];
+
+float4 main() : SV_Target
+{
+    return cb [index]       .f
+         + sb1[index][index].f
+         + sb2[index][index]
+         + float4(bb[index].Load(index*4));
+}
+
diff --git a/tests/cross-compile/array-of-buffers.slang.glsl b/tests/cross-compile/array-of-buffers.slang.glsl
new file mode 100644
index 000000000..d3aa5082f
--- /dev/null
+++ b/tests/cross-compile/array-of-buffers.slang.glsl
@@ -0,0 +1,57 @@
+//TEST_IGNORE_FILE:
+#version 450
+
+struct SLANG_ParameterGroup_C_0
+{
+    uint index_0;
+};
+
+layout(binding = 0)
+layout(std140) uniform _S1
+{
+    SLANG_ParameterGroup_C_0 _data;
+} C_0;
+
+struct S_0
+{
+    vec4 f_0;
+};
+
+layout(binding = 1)
+layout(std140) uniform _S2
+{
+    S_0 _data;
+} cb_0[3];
+
+
+layout(std430, binding = 2)
+buffer _S3 {
+    S_0 _data[];
+} sb1_0[4];
+
+layout(std430, binding = 3)
+buffer _S4 {
+    vec4 _data[];
+} sb2_0[5];
+
+layout(std430, binding = 4)
+buffer _S5
+{
+    uint _data[];
+} bb_0[6];
+
+layout(location = 0)
+out vec4 _S6;
+
+void main()
+{
+    vec4 _S7 = cb_0[C_0._data.index_0]._data.f_0;
+
+    S_0 _S8 = sb1_0[C_0._data.index_0]._data[C_0._data.index_0];
+
+    vec4 _S9 = _S7 + _S8.f_0;
+    vec4 _S10 = _S9 + sb2_0[C_0._data.index_0]._data[C_0._data.index_0];
+    uint _S11 = bb_0[C_0._data.index_0]._data[int(C_0._data.index_0 * uint(4))];
+    _S6 = _S10 + vec4(_S11);
+    return;
+}
diff --git a/tests/cross-compile/array-of-buffers.slang.hlsl b/tests/cross-compile/array-of-buffers.slang.hlsl
new file mode 100644
index 000000000..129993edc
--- /dev/null
+++ b/tests/cross-compile/array-of-buffers.slang.hlsl
@@ -0,0 +1,35 @@
+//TEST_IGNORE_FILE:
+
+struct SLANG_ParameterGroup_C_0
+{
+    uint index_0;
+};
+
+cbuffer C_0 : register(b0)
+{
+    SLANG_ParameterGroup_C_0 C_0;
+}
+
+struct S_0
+{
+    float4 f_0;
+};
+
+ConstantBuffer<S_0>        cb_0 [3] : register(b1);
+StructuredBuffer<S_0>      sb1_0[4] : register(t0);
+RWStructuredBuffer<float4> sb2_0[5] : register(u1);
+ByteAddressBuffer          bb_0[6]  : register(t4);
+
+float4 main() : SV_TARGET
+{
+    float4 _S1 = cb_0[C_0.index_0].f_0;
+
+    S_0 _S2 = sb1_0[C_0.index_0][C_0.index_0];
+
+    float4 _S3 = _S1 + _S2.f_0;
+    float4 _S4 = _S3 + sb2_0[C_0.index_0][C_0.index_0];
+    uint _S5 = bb_0[C_0.index_0].Load(
+        (int) (C_0.index_0 * (uint) 4));
+
+    return _S4 + (float4) _S5;
+}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl
deleted file mode 100644
index c6b4ac197..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl
+++ /dev/null
@@ -1,65 +0,0 @@
-//TEST(smoke):COMPARE_HLSL:-no-mangle -profile sm_4_0 -entry RenderBaseVS -stage vertex -entry RenderPS -stage fragment
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-
-//--------------------------------------------------------------------------------------
-// File: Render.hlsl
-//
-// The shaders for rendering tessellated mesh and base mesh
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-    row_major matrix    g_mWorldViewProjection    ;//SLANG: : packoffset( c0 );
-}
-
-// The tessellated vertex structure
-struct TessedVertex
-{
-    uint BaseTriID;         // Which triangle of the base mesh this tessellated vertex belongs to?
-    float2 bc;              // Barycentric coordinates with regard to the base triangle
-};
-Buffer<float4>                  g_base_vb_buffer : register(t0);  // Base mesh vertex buffer
-StructuredBuffer<TessedVertex>  g_TessedVertices : register(t1);  // Tessellated mesh vertex buffer
-
-float4 bary_centric(float4 v1, float4 v2, float4 v3, float2 bc)
-{
-    return (1 - bc.x - bc.y) * v1 + bc.x * v2 + bc.y * v3;
-}
-
-float4 RenderVS( uint vertid : SV_VertexID ) : SV_POSITION
-{
-    TessedVertex input = g_TessedVertices[vertid];
-    
-    // Get the positions of the three vertices of the base triangle
-    float4 v[3];
-    [unroll]
-    for (int i = 0; i < 3; ++ i)
-    {
-        uint vert_id = input.BaseTriID * 3 + i;
-        v[i] = g_base_vb_buffer[vert_id];
-    }
-
-    // Calculate the position of this tessellated vertex from barycentric coordinates and then project it
-    return mul(bary_centric(v[0], v[1], v[2], input.bc), g_mWorldViewProjection);
-}
-
-struct BaseVertex
-{
-    float4 pos : POSITION;
-};
-
-float4 RenderBaseVS( BaseVertex input ) : SV_POSITION
-{
-    return mul( input.pos, g_mWorldViewProjection );
-}
-
-float4 RenderPS() : SV_TARGET
-{
-    return float4( 1.0f, 1.0f, 0.0f, 1.0f );
-}
-\ No newline at end of file
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl
deleted file mode 100644
index a4472179f..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl
+++ /dev/null
@@ -1,109 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSScanInBucket -entry CSScanBucketResult -entry CSScanAddBucketResult
-//--------------------------------------------------------------------------------------
-// File: ScanCS.hlsl
-//
-// A simple inclusive prefix sum(scan) implemented in CS4.0, 
-// using a typical up sweep and down sweep scheme
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<uint2> Input : register( t0 );     // Change uint2 here if scan other types, and
-RWStructuredBuffer<uint2> Result : register( u0 );  // also here
-
-#define groupthreads 128
-groupshared uint4 bucket[groupthreads];             // Change uint4 to the "type x2" if scan other types, e.g.
-                                                    // if scan uint2, then put uint4 here,
-                                                    // if scan float, then put float2 here
-
-void CSScan( uint3 DTid, uint GI, uint2 x )         // Change the type of x here if scan other types
-{
-    // since CS40 can only support one shared memory for one shader, we use .xy and .zw as ping-ponging buffers
-    // if scan a single element type like int, search and replace all .xy to .x and .zw to .y below
-    bucket[GI].xy = x; 
-    bucket[GI].zw = 0;
-
-    // Up sweep    
-    [unroll]
-    for ( uint stride = 2; stride <= groupthreads; stride <<= 1 )
-    {
-        GroupMemoryBarrierWithGroupSync();
-        
-        if ( (GI & (stride - 1)) == (stride - 1) )
-        {
-            bucket[GI].xy += bucket[GI - stride/2].xy;
-        }
-    }
-
-    if ( GI == (groupthreads - 1) ) 
-    {
-        bucket[GI].xy = 0;
-    }
-
-    // Down sweep
-    bool n = true;
-    [unroll]
-    for ( stride = groupthreads / 2; stride >= 1; stride >>= 1 )
-    {
-        GroupMemoryBarrierWithGroupSync();
-
-        uint a = stride - 1;
-        uint b = stride | a;
-
-        if ( n )        // ping-pong between passes
-        {
-            if ( ( GI & b) == b )
-            {
-                bucket[GI].zw = bucket[GI-stride].xy + bucket[GI].xy;
-            } else
-            if ( (GI & a) == a )
-            {
-                bucket[GI].zw = bucket[GI+stride].xy;
-            } else        
-            {
-                bucket[GI].zw = bucket[GI].xy;
-            }
-        } else
-        {
-            if ( ( GI & b) == b )
-            {
-                bucket[GI].xy = bucket[GI-stride].zw + bucket[GI].zw;
-            } else
-            if ( (GI & a) == a )
-            {
-                bucket[GI].xy = bucket[GI+stride].zw;
-            } else        
-            {
-                bucket[GI].xy = bucket[GI].zw;
-            }
-        }
-        
-        n = !n;
-    }    
-
-    Result[DTid.x] = bucket[GI].zw + x;
-}
-
-// scan in each bucket
-[numthreads( groupthreads, 1, 1 )]
-void CSScanInBucket( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex )
-{
-    uint2 x = Input[DTid.x];                    // Change the type of x here if scan other types 
-    CSScan( DTid, GI, x );
-}
-
-// record and scan the sum of each bucket
-[numthreads( groupthreads, 1, 1 )]
-void CSScanBucketResult( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex )
-{
-    uint2 x = Input[DTid.x*groupthreads - 1];   // Change the type of x here if scan other types
-    CSScan( DTid, GI, x );
-}
-
-StructuredBuffer<uint2> Input1 : register( t1 );
-
-// add the bucket scanned result to each bucket to get the final result
-[numthreads( groupthreads, 1, 1 )]
-void CSScanAddBucketResult( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex )
-{
-    Result[DTid.x] = Input[DTid.x] + Input1[Gid.x];
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl
deleted file mode 100644
index 1bd204efc..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl
+++ /dev/null
@@ -1,217 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSEdgeFactor
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_EdgeFactorCS.hlsl
-//
-// The CS to compute edge tessellation factor acoording to current world, view, projection matrix
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// http://jgt.akpeters.com/papers/akeninemoller01/tribox.html
-bool planeBoxOverlap(float3 normal, float d, float3 maxbox)
-{
-    float3 vmin = maxbox, vmax = maxbox;
-    [unroll]
-    for (int q = 0;q <= 2; ++ q)
-    {
-        if (normal[q] > 0.0f)
-        {
-            vmin[q] *= -1;
-        }
-        else
-        {
-            vmax[q] *= -1;
-        }
-    }
-    if (dot(normal, vmin) + d > 0.0f)
-    {
-        return false;
-    }
-    if (dot(normal, vmax) + d >= 0.0f)
-    {
-        return true;
-    }
-
-    return false;
-}
-
-/*======================== X-tests ========================*/
-bool AXISTEST_X01(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab)
-{
-    float p0 = ab.x * v0.y - ab.y * v0.z;
-    float p2 = ab.x * v2.y - ab.y * v2.z;
-    float min_v = min(p0, p2);
-    float max_v = max(p0, p2);
-    float rad = dot(fab, boxhalfsize.yz);
-    return (min_v < rad) && (max_v > -rad);
-}
-
-bool AXISTEST_X2(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab)
-{
-    float p0 = ab.x * v0.y - ab.y * v0.z;
-    float p1 = ab.x * v1.y - ab.y * v1.z;
-    float min_v = min(p0, p1);
-    float max_v = max(p0, p1);
-    float rad = dot(fab, boxhalfsize.yz);
-    return (min_v < rad) && (max_v > -rad);
-}
-
-/*======================== Y-tests ========================*/
-bool AXISTEST_Y02(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab)
-{
-    float p0 = -ab.x * v0.x + ab.y * v0.z;
-    float p2 = -ab.x * v2.x + ab.y * v2.z;
-    float min_v = min(p0, p2);
-    float max_v = max(p0, p2);
-    float rad = dot(fab, boxhalfsize.xz);
-    return (min_v < rad) && (max_v > -rad);
-}
-
-bool AXISTEST_Y1(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab)
-{
-    float p0 = -ab.x * v0.x + ab.y * v0.z;
-    float p1 = -ab.x * v1.x + ab.y * v1.z;
-    float min_v = min(p0, p1);
-    float max_v = max(p0, p1);
-    float rad = dot(fab, boxhalfsize.xz);
-    return (min_v < rad) && (max_v > -rad);
-}
-
-/*======================== Z-tests ========================*/
-bool AXISTEST_Z12(float3 v1, float3 v2, float3 boxhalfsize, float2 ab, float2 fab)
-{
-    float p1 = ab.x * v1.x - ab.y * v1.y;
-    float p2 = ab.x * v2.x - ab.y * v2.y;
-    float min_v = min(p1, p2);
-    float max_v = max(p1, p2);
-    float rad = dot(fab, boxhalfsize.xy);
-    return (min_v < rad) && (max_v > -rad);
-}
-
-bool AXISTEST_Z0(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab)
-{
-    float p0 = ab.x * v0.x - ab.y * v0.y;
-    float p1 = ab.x * v1.x - ab.y * v1.y;
-    float min_v = min(p0, p1);
-    float max_v = max(p0, p1);
-    float rad = dot(fab, boxhalfsize.xy);
-    return (min_v < rad) && (max_v > -rad);
-}
-
-bool triBoxOverlap(float3 boxcenter,float3 boxhalfsize,float3 triverts0, float3 triverts1, float3 triverts2)
-{
-    /*    use separating axis theorem to test overlap between triangle and box */
-    /*    need to test for overlap in these directions: */
-    /*    1) the {x,y,z}-directions (actually, since we use the AABB of the triangle */
-    /*       we do not even need to test these) */
-    /*    2) normal of the triangle */
-    /*    3) crossproduct(edge from tri, {x,y,z}-directin) */
-    /*       this gives 3x3=9 more tests */
-
-    /* This is the fastest branch on Sun */
-    /* move everything so that the boxcenter is in (0,0,0) */
-    float3 v0 = triverts0 - boxcenter;
-    float3 v1 = triverts1 - boxcenter;
-    float3 v2 = triverts2 - boxcenter;
-
-    /* compute triangle edges */
-    float3 e0 = v1 - v0;      /* tri edge 0 */
-    float3 e1 = v2 - v1;      /* tri edge 1 */
-    float3 e2 = v0 - v2;      /* tri edge 2 */
-
-    /* Bullet 3:  */
-    /*  test the 9 tests first (this was faster) */
-    float3 fe = abs(e0);
-    if (!AXISTEST_X01(v0, v2, boxhalfsize, e0.zy, fe.zy)
-        || !AXISTEST_Y02(v0, v2, boxhalfsize, e0.zx, fe.zx)
-        || !AXISTEST_Z12(v1, v2, boxhalfsize, e0.yx, fe.yx))
-    {
-        return false;
-    }
-
-    fe = abs(e1);
-    if (!AXISTEST_X01(v0, v2, boxhalfsize, e1.zy, fe.zy)
-        || !AXISTEST_Y02(v0, v2, boxhalfsize, e1.zx, fe.zx)
-        || !AXISTEST_Z0(v0, v1, boxhalfsize, e1.yx, fe.yx))
-    {
-        return false;
-    }
-
-    fe = abs(e2);
-    if (!AXISTEST_X2(v0, v1, boxhalfsize, e2.zy, fe.zy)
-        || !AXISTEST_Y1(v0, v1, boxhalfsize, e2.zx, fe.zx)
-        || !AXISTEST_Z12(v1, v2, boxhalfsize, e2.yx, fe.yx))
-    {
-        return false;
-    }
-
-    /* Bullet 1: */
-    /*  first test overlap in the {x,y,z}-directions */
-    /*  find min, max of the triangle each direction, and test for overlap in */
-    /*  that direction -- this is equivalent to testing a minimal AABB around */
-    /*  the triangle against the AABB */
-
-    float3 min_v = min(min(v0, v1), v2);
-    float3 max_v = max(max(v0, v1), v2);
-    if ((min_v.x > boxhalfsize.x || max_v.x < -boxhalfsize.x)
-        || (min_v.y > boxhalfsize.y || max_v.y < -boxhalfsize.y)
-        || (min_v.z > boxhalfsize.z || max_v.z < -boxhalfsize.z))
-    {
-        return false;
-    }
-
-    /* Bullet 2: */
-    /*  test if the box intersects the plane of the triangle */
-    /*  compute plane equation of triangle: normal*x+d=0 */
-    float3 normal = cross(e0, e1);
-    float d = -dot(normal, v0);  /* plane eq: normal.x+d=0 */
-    if (!planeBoxOverlap(normal, d, boxhalfsize))
-    {
-        return false;
-    }
-
-    return true;   /* box and triangle overlaps */
-}
-
-
-Buffer<float4> InputVertices : register(t0);
-RWStructuredBuffer<float4> EdgeFactorBufOut : register(u0);
-
-cbuffer cb
-{
-    row_major matrix    g_matWVP;
-    float2              g_tess_edge_length_scale;
-    int                 num_triangles;
-    float               dummy;
-}
-
-[numthreads(128, 1, 1)]
-void CSEdgeFactor( uint3 DTid : SV_DispatchThreadID )
-{
-    if (DTid.x < num_triangles)
-    {
-        float4 p0 = mul(InputVertices[DTid.x*3+0], g_matWVP);
-        float4 p1 = mul(InputVertices[DTid.x*3+1], g_matWVP);
-        float4 p2 = mul(InputVertices[DTid.x*3+2], g_matWVP);
-        p0 = p0 / p0.w;
-        p1 = p1 / p1.w;
-        p2 = p2 / p2.w;
-
-        float4 factor;
-        // Only triangles which are completely inside or intersect with the view frustum are taken into account 
-        if ( triBoxOverlap( float3(0, 0, 0.5), float3(1.02, 1.02, 0.52), p0.xyz, p1.xyz, p2.xyz ) )
-        {
-            factor.x = length((p0.xy - p2.xy) * g_tess_edge_length_scale);
-            factor.y = length((p1.xy - p0.xy) * g_tess_edge_length_scale);
-            factor.z = length((p2.xy - p1.xy) * g_tess_edge_length_scale);
-            factor.w = min(min(factor.x, factor.y), factor.z);
-            factor = clamp(factor, 0, 9);
-        } else
-        {
-            factor = 0;
-        }
-
-        EdgeFactorBufOut[DTid.x] = factor;
-    }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl
deleted file mode 100644
index 672996589..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSNumVerticesIndices
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_NumVerticesIndicesCS.hlsl
-//
-// The CS to compute number of vertices and triangles to be generated from edge tessellation factor
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_common.hlsl"
-
-StructuredBuffer<float4> InputEdgeFactor : register(t0);
-RWStructuredBuffer<uint2> NumVerticesIndicesOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
-    uint4 g_param;
-}
-
-[numthreads(128, 1, 1)]
-void CSNumVerticesIndices( uint3 DTid : SV_DispatchThreadID )
-{
-    if (DTid.x < g_param.x)
-    {
-        float4 edge_factor = InputEdgeFactor[DTid.x];
-        
-        PROCESSED_TESS_FACTORS_TRI processedTessFactors;
-        int num_points = TriProcessTessFactors(edge_factor, processedTessFactors, g_partitioning);
-
-        int num_index;
-        if (0 == num_points)
-        {
-            num_index = 0;
-        }
-        else if (3 == num_points)
-        {
-            num_index = 4;
-        }
-        else
-        {
-            int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now
-
-            int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor));
-            uint3 n = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity);
-            num_index = n.x + n.y + n.z;
-            num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3;
-            if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD )
-            {
-                num_index += 4;
-            }
-        }
-
-        NumVerticesIndicesOut[DTid.x] = uint2(num_points, num_index);
-    }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl
deleted file mode 100644
index f6f9081da..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl
+++ /dev/null
@@ -1,45 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSScatterVertexTriIDIndexID -entry CSScatterIndexTriIDIndexID
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_ScatterIDCS.hlsl
-//
-// The CS to scatter vertex ID and triangle ID
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<uint2> InputScanned : register(t0);
-RWStructuredBuffer<uint2> TriIDIndexIDOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
-    uint4 g_param;
-}
-
-[numthreads(128, 1, 1)]
-void CSScatterVertexTriIDIndexID( uint3 DTid : SV_DispatchThreadID )
-{
-    if (DTid.x < g_param.x)
-    {
-        uint start = InputScanned[DTid.x-1].x;
-        uint end = InputScanned[DTid.x].x;
-
-        for ( uint i = start; i < end; ++i ) 
-        {
-            TriIDIndexIDOut[i] = uint2(DTid.x, i - start);
-        }
-    }
-}
-
-[numthreads(128, 1, 1)]
-void CSScatterIndexTriIDIndexID( uint3 DTid : SV_DispatchThreadID )
-{
-    if (DTid.x < g_param.x)
-    {
-        uint start = InputScanned[DTid.x-1].y;
-        uint end = InputScanned[DTid.x].y;
-
-        for ( uint i = start; i < end; ++i ) 
-        {
-            TriIDIndexIDOut[i] = uint2(DTid.x, i - start);
-        }
-    }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl
deleted file mode 100644
index 8c0a5b63b..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl
+++ /dev/null
@@ -1,628 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSTessellationIndices
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_TessellateIndicesCS.hlsl
-//
-// The CS to tessellate indices
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_common.hlsl"
-
-StructuredBuffer<uint2> InputTriIDIndexID : register(t0);
-StructuredBuffer<float4> InputEdgeFactor : register(t1);
-StructuredBuffer<uint2> InputScanned : register(t2);
-
-RWByteAddressBuffer TessedIndicesOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
-    uint4 g_param;
-}
-
-
-int TransformIndex1(int index, int vertices_base)
-{
-    return vertices_base + index;
-}
-
-int TransformIndex2(int index, int vertices_base, INDEX_PATCH_CONTEXT IndexPatchContext)
-{
-    if( index >= IndexPatchContext.outsidePointIndexPatchBase ) // assumed remapped outide indices are > remapped inside vertices
-    {
-        if( index == IndexPatchContext.outsidePointIndexBadValue )
-        {
-            index = IndexPatchContext.outsidePointIndexReplacementValue;
-        }
-        else
-        {
-            index += IndexPatchContext.outsidePointIndexDeltaToRealValue;
-        }
-    }
-    else
-    {
-        if( index == IndexPatchContext.insidePointIndexBadValue )
-        {
-            index = IndexPatchContext.insidePointIndexReplacementValue;
-        }
-        else
-        {
-            index += IndexPatchContext.insidePointIndexDeltaToRealValue;
-        }
-    }
-
-    return vertices_base + index;
-}
-
-
-int AStitchRegular(bool bTrapezoid, int diagonals,
-                                 uint numInsideEdgePoints,
-                                 int2 outsideInsideEdgePointBaseOffset,
-                                 int i)
-{
-    if (bTrapezoid)
-    {
-        ++ outsideInsideEdgePointBaseOffset.x;
-    }
-
-    int pt;
-
-    if ((i < 4) && bTrapezoid)
-    {
-        if (i < 2)
-        {
-            pt = outsideInsideEdgePointBaseOffset.x - 1 + i; 
-        }
-        else if (i == 2)
-        {
-            pt = outsideInsideEdgePointBaseOffset.y;
-        }
-        else
-        {
-            pt = -1;
-        }
-    }
-
-    int index = i;
-    if (bTrapezoid)
-    {
-        index -= 4;
-    }
-
-    if (index >= 0)
-    {
-        uint uindex = (uint)index;
-        
-        switch( diagonals )
-        {
-        case DIAGONALS_INSIDE_TO_OUTSIDE:
-            if (uindex < 5 * numInsideEdgePoints - 5)
-            {
-                uint p = uindex / 5;
-                uint r = uindex - p * 5;
-                if (r < 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + p + r;
-                }
-                else if (r < 4)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + p + r;
-                }
-                else
-                {
-                    pt = -1;
-                }
-            }
-            else
-            {
-                int r = i - (4 + 5 * numInsideEdgePoints - 5);
-                if (r < 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r;
-                }
-                else if (r == 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1;
-                }
-                else
-                {
-                    pt = -1;
-                }
-            }
-            break;
-
-        case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
-            if (uindex < (numInsideEdgePoints / 2 - 1) * 5)
-            {
-                // First half
-                uint p = uindex / 5;
-                uint r = uindex - p * 5;
-                if (r < 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + p + r;
-                }
-                else if (r < 4)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + p;
-                }
-                else
-                {
-                    pt = -1;
-                }
-            }
-            else if (uindex < (numInsideEdgePoints / 2 - 1) * 5 + 8)
-            {
-                // Middle
-                uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5;
-                if (0 == r)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1;
-                }
-                else if (r < 3)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + (2 - r);
-                }
-                else if (r == 3)
-                {
-                    pt = -1;
-                }
-                else if (r < 6)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1 + (r - 4);
-                }
-                else if (r == 6)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + 1;
-                }
-                else if (r == 7)
-                {
-                    pt = -1;
-                }
-            }
-            //else if (uindex < (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5)
-            else if (uindex < numInsideEdgePoints * 5 - 2)
-            {
-                // Second half
-                uint p = (uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8) / 5 + numInsideEdgePoints / 2 + 1;
-                uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8 - (p - (numInsideEdgePoints / 2 + 1)) * 5;
-                if (r < 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + p - 1 + r;
-                }
-                else if (r < 4)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + p - 1 + r;
-                }
-                else
-                {
-                    pt = -1;
-                }
-            }
-            else
-            {
-                //int r = i - (4 + (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5);
-                int r = i - (numInsideEdgePoints * 5 + 2);
-                if (r < 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r;
-                }
-                else if (r == 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1;
-                }
-                else
-                {
-                    pt = -1;
-                }
-            }
-            break;
-
-        case DIAGONALS_MIRRORED:
-            if (uindex < (numInsideEdgePoints / 2 + 1) * 2)
-            {
-                uint p = uindex / 2;
-                uint r = uindex - p * 2;
-                if (0 == r)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + p;
-                }
-                else
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + p;
-                }
-            }
-            else if (uindex == (numInsideEdgePoints / 2 + 1) * 2)
-            {
-                pt = -1;
-            }
-            else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 1)
-            {
-                pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2;
-            }
-            //else if (uindex < (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2)
-            else if (uindex < numInsideEdgePoints * 2 + 4)
-            {
-                uint p = (uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2)) / 2 + numInsideEdgePoints / 2;
-                uint r = uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2) - (p - numInsideEdgePoints / 2) * 2;
-                if (0 == r)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + p;
-                }
-                else
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + p;
-                }
-            }
-            //else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2)
-            else if (uindex == numInsideEdgePoints * 2 + 4)
-            {
-                pt = -1;
-            }
-            else
-            {
-                //int r = i - (4 + (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2 + 1);
-                uint r = i - (numInsideEdgePoints * 2 + 9);
-                if (r < 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r;
-                }
-                else if (r == 2)
-                {
-                    pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1;
-                }
-                else
-                {
-                    pt = -1;
-                }
-            }
-            break;
-        }
-    }
-
-    return pt;
-}
-
-int AStitchTransition(int2 outsideInsideEdgePointBaseOffset, int2 outsideInsideNumHalfTessFactorPoints, 
-                                    int2 outsideInsideEdgeTessFactorParity,
-                                    uint i)
-{
-    outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity);
-    
-    uint2 out_in_first_half = uint2(outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y, insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y) * 4;
-
-    uint3 out_in_middle = 0;
-    if ((outsideInsideEdgeTessFactorParity.y != outsideInsideEdgeTessFactorParity.x) || (outsideInsideEdgeTessFactorParity.y == TESSELLATOR_PARITY_ODD))
-    {
-        if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x)
-        {
-            // Quad in the middle
-            out_in_middle.z = 5;
-            out_in_middle.xy = 1;
-        }
-        else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y)
-        {
-            // Triangle pointing inside
-            out_in_middle.z = 4;
-            out_in_middle.x = 1;
-        }
-        else
-        {
-            // Triangle pointing outside
-            out_in_middle.z = 4;
-            out_in_middle.y = 1;
-        }
-    }
-
-
-    int pt = -1;
-
-    if (i < out_in_first_half.y)
-    {
-        // Advance inside
-
-        uint p = i / 4;
-        uint r = i - p * 4;
-        p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].z;
-        if ((0 == r) || (2 == r))
-        {
-            pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y + r / 2;
-        }
-        else if (1 == r)
-        {
-            pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y;
-        }
-    }
-    else
-    {
-        i -= out_in_first_half.y;
-        
-        if (i < out_in_first_half.x)
-        {
-            // Advance outside
-
-            uint p = i / 4;
-            uint r = i - p * 4;
-            p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z;
-            if (r < 2)
-            {
-                pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y + r;
-            }
-            else if (r == 2)
-            {
-                pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y;
-                if (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].x)
-                {
-                    ++ pt;
-                }
-            }
-        }
-        else
-        {
-            i -= out_in_first_half.x;
-            
-            if (i < out_in_middle.z)
-            {
-                uint r = i;
-                if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x)
-                {
-                    // Quad in the middle
-                    if ((0 == r) || (2 == r))
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2;
-                    }
-                    else if ((1 == r) || (3 == r))
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + (3 == r);//(r - 1) / 2;
-                    }
-                }
-                else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y)
-                {
-                    // Triangle pointing inside
-                    if (r == 0)
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4;
-                    }
-                    else if (r < 3)
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + r - 1;
-                    }
-                }
-                else
-                {
-                    // Triangle pointing outside
-                    if ((0 == r) || (2 == r))
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2;
-                    }
-                    else if (1 == r)
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4;
-                    }
-                }
-            }
-            else
-            {
-                i -= out_in_middle.z;
-                
-                if (i < out_in_first_half.x)
-                {
-                    // Advance outside
-
-                    uint p = i / 4;
-                    uint r = i - p * 4;
-                    p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z;
-                    if (r < 2)
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y) + r;
-                    }
-                    else if (r == 2)
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y);
-                    }
-                }
-                else
-                {
-                    // Advance inside
-                    
-                    i -= out_in_first_half.x;
-
-                    uint p = i / 4;
-                    uint r = i - p * 4;
-                    p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].w;
-                    if ((0 == r) || (2 == r))
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y
-                            + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y) + (2 == r);//r / 2;
-                    }
-                    else if (1 == r)
-                    {
-                        pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x
-                            + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y);
-                        if (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].x)
-                        {
-                            ++ pt;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return pt;
-}
-
-[numthreads(128, 1, 1)]
-void CSTessellationIndices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
-    uint id = DTid.x;
-    //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers
-    
-    if ( id < g_param.x )
-    {
-        uint tri_id = InputTriIDIndexID[id].x;
-        uint index_id = InputTriIDIndexID[id].y;
-        uint base_vertex = InputScanned[tri_id-1].x;
-        
-        float4 outside_inside_factor = InputEdgeFactor[tri_id];
-        
-        PROCESSED_TESS_FACTORS_TRI processedTessFactors;
-        int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning);
-
-        uint tessed_indices;
-        if (3 == num_points)
-        {
-            if (index_id < 3)
-            {
-                tessed_indices = TransformIndex1(index_id, base_vertex);
-            }
-            else
-            {
-                tessed_indices = -1;
-            }
-        }
-        else
-        {
-            // Generate primitives for all the concentric rings, one side at a time for each ring
-            static const int startRing = 1;
-            int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now
-
-            int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor));
-            uint3 num = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity);
-            num.y += num.x;
-            num.z += num.y;
-            uint num_index = num.z;
-            num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3;
-            if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD )
-            {
-                num_index += 4;
-            }
-
-            int pt;
-
-            if (index_id < num.x)
-            {
-                int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing;
-
-                pt = AStitchTransition(int2(0, processedTessFactors.insideEdgePointBaseOffset),
-                        outsideInsideHalfTessFactor.xw,
-                        processedTessFactors.outsideInsideTessFactorParity.xw,
-                        index_id);
-                if (pt != -1)
-                {
-                    pt = TransformIndex1(pt, base_vertex);
-                }
-            }
-            else if (index_id < num.y)
-            {
-                int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing;
-
-                pt = AStitchTransition(
-                        int2(processedTessFactors.numPointsForOutsideInside.x - 1, processedTessFactors.insideEdgePointBaseOffset + numPointsForInsideEdge - 1),
-                        outsideInsideHalfTessFactor.yw,
-                        processedTessFactors.outsideInsideTessFactorParity.yw,
-                        index_id - num.x);
-                if (pt != -1)
-                {
-                    pt = TransformIndex1(pt, base_vertex);
-                }
-            }
-            else if (index_id < num.z)
-            {
-                int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing;
-
-                INDEX_PATCH_CONTEXT IndexPatchContext;
-                IndexPatchContext.insidePointIndexDeltaToRealValue    = processedTessFactors.insideEdgePointBaseOffset + 2 * (numPointsForInsideEdge - 1);
-                IndexPatchContext.insidePointIndexBadValue            = numPointsForInsideEdge - 1;
-                IndexPatchContext.insidePointIndexReplacementValue    = processedTessFactors.insideEdgePointBaseOffset;
-                IndexPatchContext.outsidePointIndexPatchBase          = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range
-                IndexPatchContext.outsidePointIndexDeltaToRealValue   = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y - 2 
-                                                                                    - IndexPatchContext.outsidePointIndexPatchBase;
-                IndexPatchContext.outsidePointIndexBadValue           = IndexPatchContext.outsidePointIndexPatchBase
-                                                                                    + processedTessFactors.numPointsForOutsideInside.z - 1;
-                IndexPatchContext.outsidePointIndexReplacementValue   = 0;
-
-                pt = AStitchTransition(int2(numPointsForInsideEdge, 0),
-                            outsideInsideHalfTessFactor.zw,
-                            processedTessFactors.outsideInsideTessFactorParity.zw,
-                            index_id - num.y);
-                if (pt != -1)
-                {
-                    pt = TransformIndex2(pt, base_vertex, IndexPatchContext);
-                }
-            }
-            else
-            {
-                if ((processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) && (index_id >= num_index - 4))
-                {
-                    int outsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset
-                        + ((processedTessFactors.numPointsForOutsideInside.w + 1) - (numRings + startRing)) * (numRings - startRing - 1) * 3;
-
-                    if (index_id - (num_index - 4) != 3)
-                    {
-                        pt = TransformIndex1(outsideEdgePointBaseOffset + index_id - (num_index - 4), base_vertex);
-                    }
-                    else
-                    {
-                        pt = -1;
-                    }
-                }
-                else
-                {
-                    int ring = GetRingFromIndexStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, index_id - num.z);
-
-                    int tn = TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, ring - 1) * 3;
-                    int n = NumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w - 2 * ring);
-
-                    int edge = (index_id - num.z - tn) / n;
-                    int index = (index_id - num.z - tn) - edge * n;
-
-                    int2 outsideInsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset
-                        + int2(0, 3 * (processedTessFactors.numPointsForOutsideInside.w - 3))
-                        + ((processedTessFactors.numPointsForOutsideInside.w - (ring + startRing)) + int2(1, -1)) * (ring - startRing - 1) * 3;
-
-                    int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * ring;
-                    int numLastPointsForInsideEdge = numPointsForInsideEdge + 2;
-
-                    if (edge < 2)
-                    {
-                        pt = AStitchRegular(true, DIAGONALS_MIRRORED,
-                                    numPointsForInsideEdge,
-                                    outsideInsideEdgePointBaseOffset + (int2(numLastPointsForInsideEdge, numPointsForInsideEdge) - 1) * edge,
-                                    index);
-                        if (pt != -1)
-                        {
-                            pt = TransformIndex1(pt, base_vertex);
-                        }
-                    }
-                    else
-                    {
-                        INDEX_PATCH_CONTEXT IndexPatchContext;
-                        IndexPatchContext.insidePointIndexDeltaToRealValue    = outsideInsideEdgePointBaseOffset.y + (numPointsForInsideEdge - 1) * 2;
-                        IndexPatchContext.insidePointIndexBadValue            = numPointsForInsideEdge - 1;
-                        IndexPatchContext.insidePointIndexReplacementValue    = outsideInsideEdgePointBaseOffset.y;
-                        IndexPatchContext.outsidePointIndexPatchBase          = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range
-                        IndexPatchContext.outsidePointIndexDeltaToRealValue   = outsideInsideEdgePointBaseOffset.x + (numLastPointsForInsideEdge - 1) * 2 
-                                                                                    - IndexPatchContext.outsidePointIndexPatchBase;
-                        IndexPatchContext.outsidePointIndexBadValue           = IndexPatchContext.outsidePointIndexPatchBase
-                                                                                    + numLastPointsForInsideEdge - 1;
-                        IndexPatchContext.outsidePointIndexReplacementValue   = outsideInsideEdgePointBaseOffset.x;
-
-                        pt = AStitchRegular(true, DIAGONALS_MIRRORED,
-                                        numPointsForInsideEdge,
-                                        int2(numPointsForInsideEdge, 0),
-                                        index);
-                        if (pt != -1)
-                        {
-                            pt = TransformIndex2(pt, base_vertex, IndexPatchContext);
-                        }
-                    }
-                }
-            }
-
-            tessed_indices = pt;
-        }
-
-        TessedIndicesOut.Store(id*4, tessed_indices);
-    }       
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl
deleted file mode 100644
index e1f6b9ec3..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl
+++ /dev/null
@@ -1,206 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSTessellationVertices
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_TessellateVerticesCS.hlsl
-//
-// The CS to tessellate vertices
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_common.hlsl"
-
-StructuredBuffer<uint2> InputTriIDIndexID : register(t0);
-StructuredBuffer<float4> InputEdgeFactor : register(t1);
-
-struct TessedVertex
-{
-    uint BaseTriID;
-    float2 bc;
-};
-RWStructuredBuffer<TessedVertex> TessedVerticesOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
-    uint4 g_param;
-}
-
-void PlacePointIn1D(PROCESSED_TESS_FACTORS_TRI processedTessFactors, int ctx_index, int pt, out float location, int parity)
-{
-    int numHalfTessFactorPoints = int(ceil(processedTessFactors.outsideInsideHalfTessFactor[ctx_index]));
-
-    bool bFlip;
-    if( pt >= numHalfTessFactorPoints )
-    {
-        pt = (numHalfTessFactorPoints << 1) - pt;
-        if( TESSELLATOR_PARITY_ODD == parity )
-        {
-            pt -= 1;
-        }
-        bFlip = true;
-    }
-    else
-    {
-        bFlip = false;
-    }
-
-    if( pt == numHalfTessFactorPoints ) 
-    {
-        location = 0.5f;
-    }    
-    else
-    {
-        unsigned int indexOnCeilHalfTessFactor = pt;
-        unsigned int indexOnFloorHalfTessFactor = indexOnCeilHalfTessFactor;
-        if( pt > processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor[ctx_index] )
-        {
-            indexOnFloorHalfTessFactor -= 1;
-        }
-        float locationOnFloorHalfTessFactor = indexOnFloorHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor[ctx_index];
-        float locationOnCeilHalfTessFactor = indexOnCeilHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor[ctx_index];
-
-        location = lerp(locationOnFloorHalfTessFactor, locationOnCeilHalfTessFactor, frac(processedTessFactors.outsideInsideHalfTessFactor[ctx_index]));
-
-        if( bFlip )
-        {
-            location = 1.0f - location;
-        }
-    }
-}
-
-[numthreads(128, 1, 1)]
-void CSTessellationVertices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex  )
-{
-    uint id = DTid.x;
-    //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers
-    
-    if ( id < g_param.x )
-    {
-        uint tri_id = InputTriIDIndexID[id].x;
-        uint vert_id = InputTriIDIndexID[id].y;
-        
-        float4 outside_inside_factor = InputEdgeFactor[tri_id];
-
-        PROCESSED_TESS_FACTORS_TRI processedTessFactors;
-        int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning);
-
-        float2 uv;
-        if (3 == num_points)
-        {
-            if (0 == vert_id)
-            {
-                uv = float2(0, 1);
-            }
-            else if (1 == vert_id)
-            {
-                uv = float2(0, 0);
-            }
-            else
-            {
-                uv = float2(1, 0);
-            }
-        }
-        else
-        {
-            if (vert_id < processedTessFactors.insideEdgePointBaseOffset)
-            {
-                // Generate exterior ring edge points, clockwise starting from point V (VW, the U==0 edge)
-
-                int edge;
-                if (vert_id < processedTessFactors.numPointsForOutsideInside.x - 1)
-                {
-                    edge = 0;
-                }
-                else
-                {
-                    vert_id -= processedTessFactors.numPointsForOutsideInside.x - 1;
-                    if (vert_id < processedTessFactors.numPointsForOutsideInside.y - 1)
-                    {
-                        edge = 1;
-                    }
-                    else
-                    {
-                        vert_id -= processedTessFactors.numPointsForOutsideInside.y - 1;
-                        edge = 2;
-                    }
-                }
-                
-                int p = vert_id;
-                int endPoint = processedTessFactors.numPointsForOutsideInside[edge] - 1;
-                float param;
-                int q = (edge & 0x1) ? p : endPoint - p; // whether to reverse point order given we are defining V or U (W implicit):
-                                                     // edge0, VW, has V decreasing, so reverse 1D points below
-                                                     // edge1, WU, has U increasing, so don't reverse 1D points  below
-                                                     // edge2, UV, has U decreasing, so reverse 1D points below
-                PlacePointIn1D(processedTessFactors, edge,q,param, processedTessFactors.outsideInsideTessFactorParity[edge]);
-                if (0 == edge)
-                {
-                    uv = float2(0, param);
-                }
-                else if (1 == edge)
-                {
-                    uv = float2(param, 0);
-                }
-                else
-                {
-                    uv = float2(param, 1 - param);
-                }
-            }
-            else
-            {
-                // Generate interior ring points, clockwise spiralling in
-
-                uint index = vert_id - processedTessFactors.insideEdgePointBaseOffset;
-                uint ring = 1 + (((3 * processedTessFactors.numPointsForOutsideInside.w - 6) - sqrt(sqr(3 * processedTessFactors.numPointsForOutsideInside.w - 6) - 4 * 3 * index)) + 0.001f) / 6;
-                index -= 3 * (processedTessFactors.numPointsForOutsideInside.w - ring - 1) * (ring - 1);
-
-                uint startPoint = ring;
-                uint endPoint = processedTessFactors.numPointsForOutsideInside.w - 1 - startPoint;
-                if (index < 3 * (endPoint - startPoint))
-                {
-                    uint edge = index / (endPoint - startPoint);
-                    uint p = index - edge * (endPoint - startPoint) + startPoint;
-
-                    int perpendicularAxisPoint = startPoint;
-                    float perpParam;
-                    PlacePointIn1D(processedTessFactors, 3, perpendicularAxisPoint, perpParam, processedTessFactors.outsideInsideTessFactorParity.w);
-                    perpParam = perpParam * 2 / 3;
-                    
-                    float param;
-                    int q = (edge & 0x1) ? p : endPoint - (p - startPoint); // whether to reverse point given we are defining V or U (W implicit):
-                                                             // edge0, VW, has V decreasing, so reverse 1D points below
-                                                             // edge1, WU, has U increasing, so don't reverse 1D points  below
-                                                             // edge2, UV, has U decreasing, so reverse 1D points below
-                    PlacePointIn1D(processedTessFactors, 3, q,param, processedTessFactors.outsideInsideTessFactorParity.w);
-                    // edge0 VW, has perpendicular parameter U constant
-                    // edge1 WU, has perpendicular parameter V constant
-                    // edge2 UV, has perpendicular parameter W constant 
-                    const unsigned int deriv = 2; // reciprocal is the rate of change of edge-parallel parameters as they are pushed into the triangle
-                    if (0 == edge)
-                    {
-                        uv = float2(perpParam, param - perpParam / deriv);
-                    }
-                    else if (1 == edge)
-                    {
-                        uv = float2(param - perpParam / deriv, perpParam);
-                    }
-                    else
-                    {
-                        uv = float2(param - perpParam / deriv, 1 - (param - perpParam / deriv + perpParam));
-                    }
-                }
-                else
-                {
-                    if( processedTessFactors.outsideInsideTessFactorParity.w != TESSELLATOR_PARITY_ODD )
-                    {
-                        // Last point is the point at the center.
-                        uv = 1 / 3.0f;
-                    }
-                }
-            }
-        }
-        
-        TessedVerticesOut[id].BaseTriID = tri_id;
-        TessedVerticesOut[id].bc = uv;
-    }    
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl
deleted file mode 100644
index 309044cdb..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl
+++ /dev/null
@@ -1,411 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_common.hlsl
-//
-// The common utils included by other shaders in the sample
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_defines.h"
-
-cbuffer cbNeverChanges : register(b0)
-{
-    uint4 insidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2];
-    uint4 outsidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2];
-}
-
-#define D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR    ( 64 )
-#define D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR     ( 63 )
-#define D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR     ( 2 )
-#define D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR      ( 1 )
-
-#define D3D11_TESSELLATOR_PARTITIONING_INTEGER            ( 0 )
-#define D3D11_TESSELLATOR_PARTITIONING_POW2               ( 1 )
-#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD     ( 2 )
-#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN    ( 3 )
-    
-#define TESSELLATOR_PARITY_EVEN                           ( 0 )
-#define TESSELLATOR_PARITY_ODD                            ( 1 )
-
-#define EPSILON 1e-6f
-#define MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON (D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON/2)
-
-#define DIAGONALS_INSIDE_TO_OUTSIDE                       ( 0 )
-#define DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE         ( 1 )
-#define DIAGONALS_MIRRORED                                ( 2 )
-
-
-// This is moved to macro defines at shader compile time, so that the partitioning mode can be changed during runtime
-//#define g_partitioning (D3D11_TESSELLATOR_PARTITIONING_POW2)
-
-
-struct PROCESSED_TESS_FACTORS_TRI
-{
-    float4 outsideInsideTessFactor;
-    int4 outsideInsideTessFactorParity;
-
-    float4 outsideInsideInvNumSegmentsOnFloorTessFactor; 
-    float4 outsideInsideInvNumSegmentsOnCeilTessFactor;
-    float4 outsideInsideHalfTessFactor;
-    int4 outsideInsideSplitPointOnFloorHalfTessFactor; 
-
-    // Stuff below is specific to the traversal order 
-    uint4 numPointsForOutsideInside;
-    uint insideEdgePointBaseOffset;
-};
-
-struct INDEX_PATCH_CONTEXT
-{
-    int insidePointIndexDeltaToRealValue;
-    int insidePointIndexBadValue;
-    int insidePointIndexReplacementValue;
-    int outsidePointIndexPatchBase;
-    int outsidePointIndexDeltaToRealValue;
-    int outsidePointIndexBadValue;
-    int outsidePointIndexReplacementValue;
-};
-
-bool4 isEven(float4 input)
-{
-    return (((uint4)input) & 1) ? false : true;
-}
-
-uint RemoveMSB(uint val)
-{
-    int check;
-    if( val <= 0x0000ffff )
-    {
-        check = ( val <= 0x000000ff ) ? 0x00000080 : 0x00008000;
-    }
-    else
-    {
-        check = ( val <= 0x00ffffff ) ? 0x00800000 : 0x80000000;
-    }
-    for (int i = 0; i < 8; i++, check >>= 1)
-    {
-        if( val & check )
-        {
-            return (val & ~check);
-        }
-    }
-    return 0;
-}
-
-uint4 NumPointsForTessFactor(float4 tessFactor, int4 parity)
-{
-    return TESSELLATOR_PARITY_ODD == parity ? uint4(ceil(0.5f + tessFactor / 2)) * 2 : uint4(ceil(tessFactor / 2)) * 2 + 1;
-}
-
-void ComputeTessFactorContext(float4 tessFactor, int4 parity,
-    out float4 invNumSegmentsOnFloorTessFactor,
-    out float4 invNumSegmentsOnCeilTessFactor,
-    out float4 halfTessFactor,
-    out int4 splitPointOnFloorHalfTessFactor)
-{
-    halfTessFactor = tessFactor / 2;
-    
-    halfTessFactor += 0.5 * ((TESSELLATOR_PARITY_ODD == parity) | (0.5f == halfTessFactor));
-    
-    float4 floorHalfTessFactor = floor(halfTessFactor);
-    float4 ceilHalfTessFactor = ceil(halfTessFactor);
-    int4 numHalfTessFactorPoints = int4(ceilHalfTessFactor);
-    
-    for (int index = 0; index < 4; ++ index)
-    {
-        if( ceilHalfTessFactor[index] == floorHalfTessFactor[index] )
-        {
-            splitPointOnFloorHalfTessFactor[index] =  /*pick value to cause this to be ignored*/ numHalfTessFactorPoints[index]+1;
-        }
-        else if( TESSELLATOR_PARITY_ODD == parity[index] )
-        {
-            if( floorHalfTessFactor[index] == 1 )
-            {
-                splitPointOnFloorHalfTessFactor[index] = 0;
-            }
-            else
-            {
-                splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index]) - 1) << 1) + 1;
-            }
-        }
-        else
-        {
-            splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index])) << 1) + 1;
-        }
-    }
-    
-    int4 numFloorSegments = int4(floorHalfTessFactor * 2);
-    int4 numCeilSegments = int4(ceilHalfTessFactor * 2);
-    int4 s = (TESSELLATOR_PARITY_ODD == parity);
-    numFloorSegments -= s;
-    numCeilSegments -= s;
-    invNumSegmentsOnFloorTessFactor = 1.0f / numFloorSegments;
-    invNumSegmentsOnCeilTessFactor = 1.0f / numCeilSegments;
-}
-
-int TriProcessTessFactors( inout float4 tessFactor,
-                           out PROCESSED_TESS_FACTORS_TRI processedTessFactors,
-                           int partitioning )
-{
-    processedTessFactors = (PROCESSED_TESS_FACTORS_TRI)0;
-    
-    int parity = TESSELLATOR_PARITY_EVEN;
-    switch( partitioning )
-    {
-        case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
-        default:
-            break;
-        case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
-            parity = TESSELLATOR_PARITY_ODD;
-            break;
-        case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
-            parity = TESSELLATOR_PARITY_EVEN;
-            break;
-    }
-
-    // Is the patch culled?
-    if( !(tessFactor.x > 0) || // NaN will pass
-        !(tessFactor.y > 0) ||
-        !(tessFactor.z > 0) )
-    {
-        return 0;
-    }
-
-    // Clamp edge TessFactors
-    float lowerBound, upperBound;
-    switch(partitioning)
-    {
-        case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
-        case D3D11_TESSELLATOR_PARTITIONING_POW2: // don't care about pow2 distinction for validation, just treat as integer
-        default:
-            lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
-            upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
-            break;
-         
-        case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
-            lowerBound = D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
-            upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
-            break;
-
-        case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
-            lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
-            upperBound = D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
-            break;
-    }
-
-    tessFactor.xyz = min( upperBound, max( lowerBound, tessFactor.xyz ) );
-
-    // Clamp inside TessFactors
-    if(D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD == partitioning)
-    {
-        if( (tessFactor.x > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
-            (tessFactor.y > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
-            (tessFactor.z > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON)) 
-            // Don't need the same check for insideTessFactor for tri patches, 
-            // since there is only one insideTessFactor, as opposed to quad 
-            // patches which have 2 insideTessFactors.
-        {
-            // Force picture frame
-            lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON;
-        }
-    }
-
-    tessFactor.w = min( upperBound, max( lowerBound, tessFactor.w ) );
-    // Note the above clamps map NaN to lowerBound
-
-    if (partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)
-    {
-        tessFactor = ceil(tessFactor);
-    }
-    else if (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)
-    {
-        static const int exponentMask = 0x7f800000;
-        static const int mantissaMask = 0x007fffff;
-        static const int exponentLSB = 0x00800000;
-
-        int4 bits = asint(tessFactor);
-        tessFactor = bits & mantissaMask ? asfloat((bits & exponentMask) + exponentLSB) : tessFactor;
-    }
-
-    // Process tessFactors
-    if ((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2))
-    {
-        bool4 e = isEven(tessFactor);
-        processedTessFactors.outsideInsideTessFactorParity.xyz = e.xyz ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
-        processedTessFactors.outsideInsideTessFactorParity.w = (e.w || (1 == tessFactor.w)) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
-    }
-    else
-    {
-        processedTessFactors.outsideInsideTessFactorParity = parity;
-    }
-    
-    processedTessFactors.outsideInsideTessFactor = tessFactor;
-
-    if (((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)) || (parity == TESSELLATOR_PARITY_ODD))
-    {
-        // Special case if all TessFactors are 1 
-        if( (1 == processedTessFactors.outsideInsideTessFactor.x) &&
-            (1 == processedTessFactors.outsideInsideTessFactor.y) &&
-            (1 == processedTessFactors.outsideInsideTessFactor.z) &&
-            (1 == processedTessFactors.outsideInsideTessFactor.w) )
-        {
-            return 3;
-        }
-    }
-
-    // Compute per-TessFactor metadata
-    ComputeTessFactorContext(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity,
-                             processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor,
-                             processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor,
-                             processedTessFactors.outsideInsideHalfTessFactor,
-                             processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor);
-
-    // Compute some initial data.
-
-    // outside edge offsets and storage
-    processedTessFactors.numPointsForOutsideInside = NumPointsForTessFactor(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity);
-    int NumPoints = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y + processedTessFactors.numPointsForOutsideInside.z - 3;
-
-    // inside edge offsets
-    {
-        uint pointCountMin = (processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) ? 4 : 3;
-        // max() allows degenerate transition regions when inside TessFactor == 1
-        processedTessFactors.numPointsForOutsideInside.w = max(pointCountMin, processedTessFactors.numPointsForOutsideInside.w);
-    }
-
-    processedTessFactors.insideEdgePointBaseOffset = NumPoints;
-
-    // inside storage, including interior edges above
-    {
-        int numInteriorRings = (processedTessFactors.numPointsForOutsideInside.w >> 1) - 1; 
-        int numInteriorPoints;
-        if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD )
-        {
-            numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1) - numInteriorRings);
-        }
-        else
-        {
-            numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1)) + 1;
-        }
-        NumPoints += numInteriorPoints;
-    }
-    
-    return NumPoints;
-}
-
-int NumStitchRegular(bool bTrapezoid, int diagonals, int numInsideEdgePoints)
-{
-    int num_index = 0;
-
-    if( bTrapezoid )
-    {
-        num_index += 8;
-    }
-    switch( diagonals )
-    {
-        case DIAGONALS_INSIDE_TO_OUTSIDE:
-            // Diagonals pointing from inside edge forward towards outside edge
-            num_index += 5 * numInsideEdgePoints - 5;
-            break;
-
-        case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
-            // Diagonals pointing from outside edge forward towards inside edge
-            num_index += 5 * numInsideEdgePoints - 2;
-            break;
-
-        case DIAGONALS_MIRRORED:
-            num_index += 2 * numInsideEdgePoints + 5;
-            break;
-    }
-
-    return num_index;
-}
-
-uint TotalNumStitchRegular(bool bTrapezoid, int diagonals,
-                                 int numPointsForInsideTessFactor, int ring)
-{
-    uint num_index = 0;
-
-    if( bTrapezoid )
-    {
-        num_index += 8 * (ring - 1);
-    }
-    switch( diagonals )
-    {
-        case DIAGONALS_INSIDE_TO_OUTSIDE:
-            // Diagonals pointing from inside edge forward towards outside edge
-            num_index += (5 * numPointsForInsideTessFactor - 35 - 5 * ring) * (ring - 1);
-            break;
-
-        case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
-            // Diagonals pointing from outside edge forward towards inside edge
-            num_index += (5 * numPointsForInsideTessFactor - 12 - 5 * ring) * (ring - 1);
-            break;
-
-        case DIAGONALS_MIRRORED:
-            num_index += (2 * numPointsForInsideTessFactor + 1 - 2 * ring) * (ring - 1);
-            break;
-    }
-
-    return num_index;
-}
-
-int sqr(int x)
-{
-    return x * x;
-}
-
-int GetRingFromIndexStitchRegular(bool bTrapezoid, int diagonals, int numPointsForInsideTessFactor, int index)
-{
-    int t = 0;
-    if (bTrapezoid)
-    {
-        t = 8;
-    }
-
-    switch( diagonals )
-    {
-        case DIAGONALS_INSIDE_TO_OUTSIDE:
-            t = (5 * numPointsForInsideTessFactor - (35 - t)) * 3;
-            return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30;
-
-        case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE:
-            t = (5 * numPointsForInsideTessFactor - (12 - t)) * 3;
-            return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30;
-
-        case DIAGONALS_MIRRORED:
-            t = ((t + 1) + 2 * numPointsForInsideTessFactor) * 3;
-            return 1 + uint((t + 6) - sqrt(sqr(t + 6) - 4 * 6 * (t + index)) + 0.001f) / 12;
-
-        default:
-            return -1;
-    }
-}
-
-uint3 NumStitchTransition(int4 outsideInsideNumHalfTessFactorPoints, 
-                                    int4 outsideInsideEdgeTessFactorParity)
-{
-    outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity);
-
-    uint3 num_index = insidePointIndex[outsideInsideNumHalfTessFactorPoints.w][MAX_FACTOR / 2 + 1].y * 8;
-    
-    [unroll]
-    for (int edge = 0; edge < 3; ++ edge)
-    {
-        num_index[edge] += outsidePointIndex[outsideInsideNumHalfTessFactorPoints[edge]][MAX_FACTOR / 2 + 1].y * 8;
-
-        if( (outsideInsideEdgeTessFactorParity.w != outsideInsideEdgeTessFactorParity[edge]) || (outsideInsideEdgeTessFactorParity.w == TESSELLATOR_PARITY_ODD))
-        {
-            if( outsideInsideEdgeTessFactorParity.w == outsideInsideEdgeTessFactorParity[edge] )
-            {
-                num_index[edge] += 5;
-            }
-            else
-            {
-                num_index[edge] += 4;
-            }
-        }
-    }
-
-    return num_index;
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h
deleted file mode 100644
index 6b4382393..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h
+++ /dev/null
@@ -1,9 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_defines.h
-//
-// This file defines common constants which are included by both CPU code and shader code
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#define MAX_FACTOR 16
diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl
deleted file mode 100644
index 1e40c80ef..000000000
--- a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl
+++ /dev/null
@@ -1,2567 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BC6HEncode.hlsl
-//
-// The Compute Shader for BC6H Encoder
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//#define REF_DEVICE
-
-#define UINTLENGTH            32
-#define NCHANNELS             3
-#define SIGNED_F16            96
-#define UNSIGNED_F16          95
-#define MAX_FLOAT             asfloat(0x7F7FFFFF)
-#define MIN_FLOAT             asfloat(0xFF7FFFFF)
-#define MAX_INT               asint(0x7FFFFFFF)
-#define MIN_INT               asint(0x80000000)
-
-cbuffer cbCS : register( b0 )
-{
-    uint g_tex_width;
-    uint g_num_block_x;
-    uint g_format;            //either SIGNED_F16 for DXGI_FORMAT_BC6H_SF16 or UNSIGNED_F16 for DXGI_FORMAT_BC6H_UF16
-    uint g_mode_id;
-    uint g_start_block_id;
-    uint g_num_total_blocks;
-};
-
-static const uint candidateModeMemory[14] = { 0x00, 0x01, 0x02, 0x06, 0x0A, 0x0E, 0x12, 0x16, 0x1A, 0x1E, 0x03, 0x07, 0x0B, 0x0F };
-static const uint candidateModeFlag[14] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-static const bool candidateModeTransformed[14] = { true, true, true, true, true, true, true, true, true, false, false, true, true, true };
-static const uint4 candidateModePrec[14] = { uint4(10,5,5,5), uint4(7,6,6,6),
-    uint4(11,5,4,4), uint4(11,4,5,4), uint4(11,4,4,5), uint4(9,5,5,5),
-    uint4(8,6,5,5), uint4(8,5,6,5), uint4(8,5,5,6), uint4(6,6,6,6),
-    uint4(10,10,10,10), uint4(11,9,9,9), uint4(12,8,8,8), uint4(16,4,4,4) };
-
-/*static const uint4x4 candidateSection[32] = 
-{
-    {0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1}, {0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1}, {0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1}, {0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1},
-    {0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1},
-    {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1},
-    {0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1},
-    {0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1}, {0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0}, {0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0},
-    {0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1},
-    {0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0}, {0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0},
-    {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0}, {0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0}, {0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0}
-};*/
-
-static const uint candidateSectionBit[32] = 
-{
-    0xCCCC, 0x8888, 0xEEEE, 0xECC8,
-    0xC880, 0xFEEC, 0xFEC8, 0xEC80,
-    0xC800, 0xFFEC, 0xFE80, 0xE800,
-    0xFFE8, 0xFF00, 0xFFF0, 0xF000,
-    0xF710, 0x008E, 0x7100, 0x08CE,
-    0x008C, 0x7310, 0x3100, 0x8CCE,
-    0x088C, 0x3110, 0x6666, 0x366C,
-    0x17E8, 0x0FF0, 0x718E, 0x399C
-};
-
-static const uint candidateFixUpIndex1D[32] = 
-{
-    15,15,15,15,
-    15,15,15,15,
-    15,15,15,15,
-    15,15,15,15,
-    15, 2, 8, 2,
-     2, 8, 8,15,
-     2, 8, 2, 2,
-     8, 8, 2, 2
-};
-
-//0, 9, 18, 27, 37, 46, 55, 64
-static const uint aStep1[64] = {0,0,0,0,0,1,1,1,
-                              1,1,1,1,1,1,2,2,
-                              2,2,2,2,2,2,2,3,
-                              3,3,3,3,3,3,3,3,
-                              3,4,4,4,4,4,4,4,
-                              4,4,5,5,5,5,5,5,
-                              5,5,5,6,6,6,6,6,
-                              6,6,6,6,7,7,7,7};
-                                  
-//0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64
-static const uint aStep2[64] = { 0, 0, 0, 1, 1, 1, 1, 2,
-                               2, 2, 2, 2, 3, 3, 3, 3,
-                               4, 4, 4, 4, 5, 5, 5, 5,
-                               6, 6, 6, 6, 6, 7, 7, 7,
-                               7, 8, 8, 8, 8, 9, 9, 9,
-                               9,10,10,10,10,10,11,11,
-                              11,11,12,12,12,12,13,13,
-                              13,13,14,14,14,14,15,15};
-
-static const float3 RGB2LUM = float3(0.2126f, 0.7152f, 0.0722f);
-
-#define THREAD_GROUP_SIZE    64
-#define BLOCK_SIZE_Y         4
-#define BLOCK_SIZE_X         4
-#define BLOCK_SIZE           (BLOCK_SIZE_Y * BLOCK_SIZE_X)
-
-
-//Forward declaration
-uint3 float2half( float3 pixel_f );
-int3 start_quantize( uint3 pixel_h );
-void quantize( inout int2x3 endPoint, uint prec );
-void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed );
-void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed );
-void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed );
-
-void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed );
-void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed );
-void unquantize( inout int2x3 color, uint prec );
-uint3 finish_unquantize( int3 color );
-void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i );
-void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i );
-float3 half2float(uint3 color_h );
-
-void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index );
-void block_package( inout uint4 block, int2x3 endPoint, uint mode_type );
-
-void swap(inout int3 lhs, inout int3 rhs)
-{
-    int3 tmp = lhs;
-    lhs = rhs;
-    rhs = tmp;
-}
-
-Texture2D<float4> g_Input : register( t0 ); 
-StructuredBuffer<uint4> g_InBuff : register( t1 );
-
-RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
-
-struct SharedData
-{
-    float3 pixel;
-    int3 pixel_ph;
-    float3 pixel_hr;
-    float pixel_lum;
-    float error;
-    uint best_mode;
-    uint best_partition;
-    int3 endPoint_low;
-    int3 endPoint_high;
-    float endPoint_lum_low;
-    float endPoint_lum_high;
-};
-
-groupshared SharedData shared_temp[THREAD_GROUP_SIZE];
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryModeG10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID )
-{
-    const uint MAX_USED_THREAD = 16;
-    uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
-    uint blockInGroup = GI / MAX_USED_THREAD;
-    uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
-    uint threadBase = blockInGroup * MAX_USED_THREAD;
-    uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
-    if (blockID >= g_num_total_blocks)
-    {
-        return;
-    }
-#endif
-    
-    uint block_y = blockID / g_num_block_x;
-    uint block_x = blockID - block_y * g_num_block_x;
-    uint base_x = block_x * BLOCK_SIZE_X;
-    uint base_y = block_y * BLOCK_SIZE_Y;
-    
-    if (threadInBlock < 16)
-    {
-        shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb;
-        uint3 pixel_h = float2half( shared_temp[GI].pixel );
-        shared_temp[GI].pixel_hr = half2float(pixel_h);
-        shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM);
-        shared_temp[GI].pixel_ph = start_quantize( pixel_h );
-        
-        shared_temp[GI].endPoint_low = shared_temp[GI].pixel_ph;
-        shared_temp[GI].endPoint_high = shared_temp[GI].pixel_ph;
-        shared_temp[GI].endPoint_lum_low = shared_temp[GI].pixel_lum;
-        shared_temp[GI].endPoint_lum_high = shared_temp[GI].pixel_lum;
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    
-    if (threadInBlock < 8)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 4)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 2)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    //ergod mode_type 11:14
-    if ( threadInBlock == 0 )
-    {
-        int2x3 endPoint;
-        // find_axis
-        endPoint[0] = shared_temp[threadBase + 0].endPoint_low;
-        endPoint[1] = shared_temp[threadBase + 0].endPoint_high;
-        
-        //compute_index
-        float3 span = endPoint[1] - endPoint[0];// fixed a bug in v0.2
-        float span_norm_sqr = dot( span, span );// fixed a bug in v0.2
-        float dotProduct = dot( span, shared_temp[threadBase + 0].pixel_ph - endPoint[0] );// fixed a bug in v0.2
-        if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 )
-        {
-            swap(endPoint[0], endPoint[1]);
-
-            shared_temp[GI].endPoint_low = endPoint[0];
-            shared_temp[GI].endPoint_high = endPoint[1];
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    if (threadInBlock < 4)
-    {
-        int2x3 endPoint;
-        endPoint[0] = shared_temp[threadBase + 0].endPoint_low;
-        endPoint[1] = shared_temp[threadBase + 0].endPoint_high;
-        
-        float3 span = endPoint[1] - endPoint[0];
-        float span_norm_sqr = dot( span, span );
-            
-        uint4 prec = candidateModePrec[threadInBlock + 10];
-        int2x3 endPoint_q = endPoint;
-        quantize( endPoint_q, prec.x );
-
-        bool transformed = candidateModeTransformed[threadInBlock + 10];
-        if (transformed)
-        {
-            endPoint_q[1] -= endPoint_q[0];
-        }
-        
-        bool bBadQuantize;
-        finish_quantize( bBadQuantize, endPoint_q, prec, transformed );
-        
-        start_unquantize( endPoint_q, prec, transformed );
-        
-        unquantize( endPoint_q, prec.x );
-        
-        float error = 0;
-        [loop]for ( uint j = 0; j < 16; j ++ )
-        {
-            float dotProduct = dot( span, shared_temp[threadBase + j].pixel_ph - endPoint[0] );// fixed a bug in v0.2
-            uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
-                : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] );
-                
-            uint3 pixel_rh;
-            generate_palette_unquantized16( pixel_rh, endPoint_q[0], endPoint_q[1], index );
-            float3 pixel_r = half2float( pixel_rh );
-            pixel_r -= shared_temp[threadBase + j].pixel_hr;
-            error += dot(pixel_r, pixel_r);
-        }
-        if ( bBadQuantize )
-            error = 1e20f;
-
-        shared_temp[GI].error = error;
-        shared_temp[GI].best_mode = candidateModeFlag[threadInBlock + 10];
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    
-    if (threadInBlock < 2)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 2].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 2].error;
-            shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 1].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 1].error;
-            shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode;
-        }
-        
-        g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, 0, 0);
-    }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryModeLE10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID )
-{
-    const uint MAX_USED_THREAD = 32;
-    uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
-    uint blockInGroup = GI / MAX_USED_THREAD;
-    uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
-    uint threadBase = blockInGroup * MAX_USED_THREAD;
-    uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
-    if (blockID >= g_num_total_blocks)
-    {
-        return;
-    }
-
-    if (asfloat(g_InBuff[blockID].x) < 1e-6f)
-    {
-        g_OutBuff[blockID] = g_InBuff[blockID];
-        return;
-    }
-#endif
-    
-    uint block_y = blockID / g_num_block_x;
-    uint block_x = blockID - block_y * g_num_block_x;
-    uint base_x = block_x * BLOCK_SIZE_X;
-    uint base_y = block_y * BLOCK_SIZE_Y;
-    
-    if (threadInBlock < 16)
-    {
-        shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb;
-        uint3 pixel_h = float2half( shared_temp[GI].pixel );
-        shared_temp[GI].pixel_hr = half2float(pixel_h);
-        shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM);
-        shared_temp[GI].pixel_ph = start_quantize( pixel_h );
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    
-    //ergod mode_type 1:10
-    if (threadInBlock < 32)
-    {
-        // find_axis
-        int2x3 endPoint[2];
-        endPoint[0][0] = MAX_INT;
-        endPoint[0][1] = MIN_INT;
-        endPoint[1][0] = MAX_INT;
-        endPoint[1][1] = MIN_INT;
-        
-        float2 endPoint_lum[2];
-        endPoint_lum[0][0] = MAX_FLOAT;
-        endPoint_lum[0][1] = MIN_FLOAT;
-        endPoint_lum[1][0] = MAX_FLOAT;
-        endPoint_lum[1][1] = MIN_FLOAT;
-
-        uint bit = candidateSectionBit[threadInBlock];
-        for ( uint i = 0; i < 16; i ++ )
-        {
-            int3 pixel_ph = shared_temp[threadBase + i].pixel_ph;
-            float pixel_lum = shared_temp[threadBase + i].pixel_lum;
-            if ( (bit >> i) & 1 ) //It gets error when using "candidateSection" as "endPoint_ph" index
-            {
-                if (endPoint_lum[1][0] > pixel_lum)
-                {
-                    endPoint[1][0] = pixel_ph;
-                    endPoint_lum[1][0] = pixel_lum;
-                }
-                if (endPoint_lum[1][1] < pixel_lum)
-                {
-                    endPoint[1][1] = pixel_ph;
-                    endPoint_lum[1][1] = pixel_lum;
-                }
-            }
-            else
-            {
-                if (endPoint_lum[0][0] > pixel_lum)
-                {
-                    endPoint[0][0] = pixel_ph;
-                    endPoint_lum[0][0] = pixel_lum;
-                }
-                if (endPoint_lum[0][1] < pixel_lum)
-                {
-                    endPoint[0][1] = pixel_ph;
-                    endPoint_lum[0][1] = pixel_lum;
-                }
-            }
-        }
-        
-        //compute_index
-        float3 span[2];// fixed a bug in v0.2
-        float span_norm_sqr[2];// fixed a bug in v0.2
-        [unroll]
-        for (uint p = 0; p < 2; ++ p)
-        {
-            span[p] = endPoint[p][1] - endPoint[p][0];
-            span_norm_sqr[p] = dot( span[p], span[p] );
-
-            float dotProduct = dot( span[p], shared_temp[threadBase + (0 == p ? 0 : candidateFixUpIndex1D[threadInBlock])].pixel_ph - endPoint[p][0] );// fixed a bug in v0.2
-            if ( span_norm_sqr[p] > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr[p] ) > 32 )
-            {
-                span[p] = -span[p];
-                swap(endPoint[p][0], endPoint[p][1]);
-            }
-        }
-
-        uint4 prec = candidateModePrec[g_mode_id];
-        int2x3 endPoint_q[2] = endPoint;
-        quantize( endPoint_q[0], prec.x );
-        quantize( endPoint_q[1], prec.x );
-
-        bool transformed = candidateModeTransformed[g_mode_id];
-        if (transformed)
-        {
-            endPoint_q[0][1] -= endPoint_q[0][0];
-            endPoint_q[1][0] -= endPoint_q[0][0];
-            endPoint_q[1][1] -= endPoint_q[0][0];
-        }
-
-        int bBadQuantize = 0;
-        finish_quantize_0( bBadQuantize, endPoint_q[0], prec, transformed );
-        finish_quantize_1( bBadQuantize, endPoint_q[1], prec, transformed );
-        
-        start_unquantize( endPoint_q, prec, transformed );
-        
-        unquantize( endPoint_q[0], prec.x );
-        unquantize( endPoint_q[1], prec.x );
-        
-        float error = 0;
-        for ( uint j = 0; j < 16; j ++ )
-        {
-            uint3 pixel_rh;
-            if ((bit >> j) & 1)
-            {
-                float dotProduct = dot( span[1], shared_temp[threadBase + j].pixel_ph - endPoint[1][0] );// fixed a bug in v0.2
-                uint index = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0
-                        : ( ( dotProduct < span_norm_sqr[1] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep1[63] );
-                generate_palette_unquantized8( pixel_rh, endPoint_q[1][0], endPoint_q[1][1], index );
-            }
-            else
-            {
-                float dotProduct = dot( span[0], shared_temp[threadBase + j].pixel_ph - endPoint[0][0] );// fixed a bug in v0.2
-                uint index = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0
-                        : ( ( dotProduct < span_norm_sqr[0] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep1[63] );
-                generate_palette_unquantized8( pixel_rh, endPoint_q[0][0], endPoint_q[0][1], index );
-            }
-
-            float3 pixel_r = half2float( pixel_rh );
-            pixel_r -= shared_temp[threadBase + j].pixel_hr;
-            error += dot(pixel_r, pixel_r);
-        }
-        if ( bBadQuantize )
-            error = 1e20f;
-
-        shared_temp[GI].error = error;
-        shared_temp[GI].best_mode = candidateModeFlag[g_mode_id];
-        shared_temp[GI].best_partition = threadInBlock;
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    
-    if (threadInBlock < 16)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 16].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 16].error;
-            shared_temp[GI].best_mode = shared_temp[GI + 16].best_mode;
-            shared_temp[GI].best_partition = shared_temp[GI + 16].best_partition;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 8)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 8].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 8].error;
-            shared_temp[GI].best_mode = shared_temp[GI + 8].best_mode;
-            shared_temp[GI].best_partition = shared_temp[GI + 8].best_partition;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 4)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 4].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 4].error;
-            shared_temp[GI].best_mode = shared_temp[GI + 4].best_mode;
-            shared_temp[GI].best_partition = shared_temp[GI + 4].best_partition;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 2)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 2].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 2].error;
-            shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode;
-            shared_temp[GI].best_partition = shared_temp[GI + 2].best_partition;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 1].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 1].error;
-            shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode;
-            shared_temp[GI].best_partition = shared_temp[GI + 1].best_partition;
-        }
-        
-        if (asfloat(g_InBuff[blockID].x) > shared_temp[GI].error)
-        {
-            g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, shared_temp[GI].best_partition, 0);
-        }
-        else
-        {
-            g_OutBuff[blockID] = g_InBuff[blockID];
-        }
-    }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
-{
-    const uint MAX_USED_THREAD = 32;
-    uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
-    uint blockInGroup = GI / MAX_USED_THREAD;
-    uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
-    uint threadBase = blockInGroup * MAX_USED_THREAD;
-    uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
-    if (blockID >= g_num_total_blocks)
-    {
-        return;
-    }
-#endif
-
-    uint block_y = blockID / g_num_block_x;
-    uint block_x = blockID - block_y * g_num_block_x;
-    uint base_x = block_x * BLOCK_SIZE_X;
-    uint base_y = block_y * BLOCK_SIZE_Y;
-    
-    if (threadInBlock < 16)
-    {
-        shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb;
-        shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel, RGB2LUM);
-        uint3 pixel_h = float2half( shared_temp[GI].pixel );
-        shared_temp[GI].pixel_ph = start_quantize( pixel_h );
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    
-    uint best_mode = g_InBuff[blockID].y;
-    uint best_partition = g_InBuff[blockID].z;
-    
-    uint4 block = 0;
-
-    if (threadInBlock < 32)
-    {
-        int2x3 endPoint;
-        endPoint[0] = MAX_INT;
-        endPoint[1] = MIN_INT;
-
-        float2 endPoint_lum;
-        endPoint_lum[0] = MAX_FLOAT;
-        endPoint_lum[1] = MIN_FLOAT;
-        
-        int2 endPoint_lum_index;
-        endPoint_lum_index[0] = -1;
-        endPoint_lum_index[1] = -1;
-
-        int3 pixel_ph = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_ph;
-        float pixel_lum = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_lum;
-        if (threadInBlock < 16)
-        {
-            if (best_mode > 10)
-            {
-                endPoint[0] = endPoint[1] = pixel_ph;
-                endPoint_lum[0] = endPoint_lum[1] = pixel_lum;
-            }
-            else
-            {
-                uint bits = candidateSectionBit[best_partition];
-                if (0 == ((bits >> threadInBlock) & 1))
-                {
-                    endPoint[0] = endPoint[1] = pixel_ph;
-                    endPoint_lum[0] = endPoint_lum[1] = pixel_lum;
-                }
-            }
-        }
-        else
-        {
-            if (best_mode <= 10)
-            {
-                uint bits = candidateSectionBit[best_partition];
-                if (1 == ((bits >> (threadInBlock & 0xF)) & 1))
-                {
-                    endPoint[0] = endPoint[1] = pixel_ph;
-                    endPoint_lum[0] = endPoint_lum[1] = pixel_lum;
-                }
-            }
-        }
-
-        shared_temp[GI].endPoint_low = endPoint[0];
-        shared_temp[GI].endPoint_high = endPoint[1];
-        
-        shared_temp[GI].endPoint_lum_low = endPoint_lum[0];
-        shared_temp[GI].endPoint_lum_high = endPoint_lum[1];
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if ((threadInBlock & 0xF) < 8)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if ((threadInBlock & 0xF) < 4)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if ((threadInBlock & 0xF) < 2)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if ((threadInBlock & 0xF) < 1)
-    {
-        if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low)
-        {
-            shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low;
-            shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low;
-        }
-        if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high)
-        {
-            shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high;
-            shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    if (threadInBlock < 2)
-    {
-        // find_axis
-        int2x3 endPoint;
-        endPoint[0] = shared_temp[threadBase + threadInBlock * 16].endPoint_low;
-        endPoint[1] = shared_temp[threadBase + threadInBlock * 16].endPoint_high;
-
-        uint fixup = 0;
-        if ((1 == threadInBlock) && (best_mode <= 10))
-        {
-            fixup = candidateFixUpIndex1D[best_partition];
-        }
-        
-        float3 span = endPoint[1] - endPoint[0];
-        float span_norm_sqr = dot( span, span );
-        float dotProduct = dot( span, shared_temp[threadBase + fixup].pixel_ph - endPoint[0] );
-        if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 )
-        {
-            swap(endPoint[0], endPoint[1]);
-        }
-
-        shared_temp[GI].endPoint_low = endPoint[0];
-        shared_temp[GI].endPoint_high = endPoint[1];
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    
-    if (threadInBlock < 16)
-    {
-        uint bits;
-        if (best_mode > 10)
-        {
-            bits = 0;
-        }
-        else
-        {
-            bits = candidateSectionBit[best_partition];
-        }
-
-        float3 span;
-        float dotProduct;
-        if ((bits >> threadInBlock) & 1)
-        {
-            span = shared_temp[threadBase + 1].endPoint_high - shared_temp[threadBase + 1].endPoint_low;
-            dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 1].endPoint_low );
-        }
-        else
-        {
-            span = shared_temp[threadBase + 0].endPoint_high - shared_temp[threadBase + 0].endPoint_low;
-            dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 0].endPoint_low );
-        }
-        float span_norm_sqr = dot( span, span );
-
-        if (best_mode > 10)
-        {
-            uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
-                    : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] );
-            if (threadInBlock == 0)
-            {
-                block.z |= index << 1;
-            }
-            else if (threadInBlock < 8)
-            {
-                block.z |= index << (threadInBlock * 4);
-            }
-            else
-            {
-                block.w |= index << ((threadInBlock - 8) * 4);
-            }
-        }
-        else
-        {
-            uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
-                    : ( ( dotProduct < span_norm_sqr ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep1[63] );
-
-            uint fixup = candidateFixUpIndex1D[best_partition];
-            int2 offset = int2((fixup != 2), (fixup == 15));
-
-            if (threadInBlock == 0)
-            {
-                block.z |= index << 18;
-            }
-            else if (threadInBlock < 3)
-            {
-                block.z |= index << (20 + (threadInBlock - 1) * 3);
-            }
-            else if (threadInBlock < 5)
-            {
-                block.z |= index << (25 + (threadInBlock - 3) * 3 + offset.x);
-            }
-            else if (threadInBlock == 5)
-            {
-                block.w |= index >> !offset.x;
-                if (!offset.x)
-                {
-                    block.z |= index << 31;
-                }
-            }
-            else if (threadInBlock < 9)
-            {
-                block.w |= index << (2 + (threadInBlock - 6) * 3 + offset.x);
-            }
-            else
-            {
-                block.w |= index << (11 + (threadInBlock - 9) * 3 + offset.y);
-            }
-        }
-        
-        shared_temp[GI].pixel_hr.xy = asfloat(block.zw);
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 8)
-    {
-        shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 8].pixel_hr.xy));
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 4)
-    {
-        shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 4].pixel_hr.xy));
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 2)
-    {
-        shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 2].pixel_hr.xy));
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 1].pixel_hr.xy));
-        
-        block.zw = asuint(shared_temp[GI].pixel_hr.xy);
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    bool transformed = candidateModeTransformed[best_mode - 1];
-    uint4 prec = candidateModePrec[best_mode - 1];
-    if (threadInBlock == 2)
-    {
-        int2x3 endPoint_q;
-        endPoint_q[0] = shared_temp[threadBase + 0].endPoint_low;
-        endPoint_q[1] = shared_temp[threadBase + 0].endPoint_high;
-
-        quantize( endPoint_q, prec.x );
-        if (transformed)
-        {
-            endPoint_q[1] -= endPoint_q[0];
-        }
-
-        shared_temp[GI].endPoint_low = endPoint_q[0];
-        shared_temp[GI].endPoint_high = endPoint_q[1];
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock == 3)
-    {
-        int3 ep0 = shared_temp[threadBase + 2].endPoint_low;
-        int2x3 endPoint_q;
-        endPoint_q[0] = shared_temp[threadBase + 1].endPoint_low;
-        endPoint_q[1] = shared_temp[threadBase + 1].endPoint_high;
-
-        if (best_mode <= 10)
-        {
-            quantize( endPoint_q, prec.x );
-            if (transformed)
-            {
-                endPoint_q[0] -= ep0;
-                endPoint_q[1] -= ep0;
-            }
-
-            shared_temp[GI].endPoint_low = endPoint_q[0];
-            shared_temp[GI].endPoint_high = endPoint_q[1];
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    if (threadInBlock < 2)
-    {
-        int2x3 endPoint_q;
-        endPoint_q[0] = shared_temp[threadBase + threadInBlock + 2].endPoint_low;
-        endPoint_q[1] = shared_temp[threadBase + threadInBlock + 2].endPoint_high;
-
-        int bBadQuantize = 0;
-        if (threadInBlock == 0)
-        {
-            if (best_mode > 10)
-            {
-                finish_quantize( bBadQuantize, endPoint_q, prec, transformed );
-            }
-            else
-            {
-                finish_quantize_0( bBadQuantize, endPoint_q, prec, transformed );
-            }
-        }
-        else // if (threadInBlock == 1)
-        {
-            if (best_mode <= 10)
-            {
-                finish_quantize_1( bBadQuantize, endPoint_q, prec, transformed );
-            }
-        }
-
-        shared_temp[GI].endPoint_low = endPoint_q[0];
-        shared_temp[GI].endPoint_high = endPoint_q[1];
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    
-    if ( threadInBlock == 0 )
-    {
-        int2x3 endPoint_q[2];
-        endPoint_q[0][0] = shared_temp[threadBase + 0].endPoint_low;
-        endPoint_q[0][1] = shared_temp[threadBase + 0].endPoint_high;
-        endPoint_q[1][0] = shared_temp[threadBase + 1].endPoint_low;
-        endPoint_q[1][1] = shared_temp[threadBase + 1].endPoint_high;
-
-        if ( best_mode > 10 )
-        {
-            block_package( block, endPoint_q[0], best_mode );
-        }
-        else
-        {
-            block_package( block, endPoint_q, best_mode, best_partition );
-        }
-        
-        g_OutBuff[blockID] = block;
-    }
-}
-
-uint float2half1( float f )
-{
-    uint Result;
-
-    uint IValue = asuint(f);
-    uint Sign = (IValue & 0x80000000U) >> 16U;
-    IValue = IValue & 0x7FFFFFFFU;
-    
-    if (IValue > 0x47FFEFFFU)
-    {
-        // The number is too large to be represented as a half.  Saturate to infinity.
-        Result = 0x7FFFU;
-    }
-    else
-    {
-        if (IValue < 0x38800000U)
-        {
-            // The number is too small to be represented as a normalized half.
-            // Convert it to a denormalized value.
-            uint Shift = 113U - (IValue >> 23U);
-            IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
-        }
-        else
-        {
-            // Rebias the exponent to represent the value as a normalized half.
-            IValue += 0xC8000000U;
-        }
-
-        Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU; 
-    }
-    return (Result|Sign);
-}
-
-uint3 float2half( float3 endPoint_f )
-{
-    //uint3 sign = asuint(endPoint_f) & 0x80000000;
-    //uint3 expo = asuint(endPoint_f) & 0x7F800000;
-    //uint3 base = asuint(endPoint_f) & 0x007FFFFF;
-    //return ( expo < 0x33800000 ) ? 0 
-    //                    //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present 
-    //    : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2
-    //                    //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation
-    //    : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff )
-    //                    // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present
-    //                    // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number
-    //    : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) );
-
-
-    return uint3( float2half1( endPoint_f.x ), float2half1( endPoint_f.y ), float2half1( endPoint_f.z ) );
-}
-int3 start_quantize( uint3 pixel_h )
-{
-    if ( g_format == UNSIGNED_F16 )
-    {
-        return asint( ( pixel_h << 6 ) / 31 );
-    }
-    else
-    {
-        return ( pixel_h < 0x8000 ) ? ( ( pixel_h == 0x7bff ) ? 0x7fff : asint( ( pixel_h << 5 ) / 31 ) )// fixed a bug in v0.2
-            : ( ( pixel_h == 0x7bff ) ? 0xffff8001 : -asint( ( ( 0x00007fff & pixel_h ) << 5 ) / 31 ) );// fixed a bug in v0.2
-    }
-}
-void quantize( inout int2x3 endPoint, uint prec )
-{
-    int iprec = asint( prec );
-    if ( g_format == UNSIGNED_F16 )
-    {
-        endPoint = ( ( iprec >= 15 ) | ( endPoint == 0 ) ) ? endPoint
-            : ( ( endPoint == asint(0xFFFF) ) ? ( ( 1 << iprec ) - 1 )
-            : ( ( ( endPoint << iprec ) + asint(0x0000) ) >> 16 ) );
-    }
-    else
-    {
-        endPoint = ( ( iprec >= 16 ) | ( endPoint == 0 ) ) ? endPoint
-            : ( ( endPoint >= 0 ) ? ( ( endPoint == asint(0x7FFF) ) ? ( ( 1 << ( iprec - 1 ) ) - 1 ) : ( ( ( endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) 
-            : ( ( -endPoint == asint(0x7FFF) ) ? -( ( 1 << ( iprec - 1 ) ) - 1 ) : -( ( ( -endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) );
-    }
-}
-void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed )
-{
-    if ( transformed )
-    {
-        bool3 bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) )
-            : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) );
-        bBadQuantize |= any(bBadComponent);
-
-        endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 );
-        endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] )
-            : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) );
-    }
-    else
-    {
-        endPoint &= ( ( 1 << prec.x ) - 1 );
-    }
-}
-void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed )
-{
-    if ( transformed )
-    {
-        bool2x3 bBadComponent;
-        bBadComponent[0] = ( endPoint[0] >= 0 ) ? ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) )
-            : ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) );
-        bBadComponent[1] = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) )
-            : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) );
-        bBadQuantize |= any(bBadComponent);
-
-        endPoint[0] = ( endPoint[0] >= 0 ) ? ( ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[0] )
-            : ( ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[0] & ( ( 1 << prec.yzw ) - 1 ) ) );
-        endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] )
-            : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) );
-    }
-    else
-    {
-        endPoint &= ( ( 1 << prec.x ) - 1 );
-    }
-}
-void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed )
-{
-    if ( transformed )
-    {
-        bool3 bBadComponent;
-        bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) )
-            : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) );
-        bBadQuantize = any( bBadComponent );
-
-        endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 );
-        endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] )
-            : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) );            
-    }
-    else
-    {
-        endPoint &= ( ( 1 << prec.x ) - 1 );
-        
-        bBadQuantize = 0;
-    }
-}
-
-void SIGN_EXTEND( uint3 prec, inout int3 color )
-{
-    uint3 p = 1 << (prec - 1);
-    color = (color & p) ? (color & (p - 1)) - p : color;
-}
-
-void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint )
-{
-    if ( g_format == SIGNED_F16 )
-        SIGN_EXTEND( prec.x, endPoint[0] );
-    if ( g_format == SIGNED_F16 || transformed )
-        SIGN_EXTEND( prec.yzw, endPoint[1] );
-}
-
-void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint[2] )
-{
-    if ( g_format == SIGNED_F16 )
-        SIGN_EXTEND( prec.x, endPoint[0][0] );
-    if ( g_format == SIGNED_F16 || transformed )
-    {
-        SIGN_EXTEND( prec.yzw, endPoint[0][1] );
-        SIGN_EXTEND( prec.yzw, endPoint[1][0] );
-        SIGN_EXTEND( prec.yzw, endPoint[1][1] );
-    }
-}
-void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed )
-{
-    sign_extend( transformed, prec, endPoint );
-    if ( transformed )
-    {
-        endPoint[0][1] += endPoint[0][0];
-        endPoint[1][0] += endPoint[0][0];
-        endPoint[1][1] += endPoint[0][0];
-    }
-}
-void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed )
-{
-    sign_extend( transformed, prec, endPoint );
-    if ( transformed )
-        endPoint[1] += endPoint[0];
-}
-void unquantize( inout int2x3 color, uint prec )
-{
-    int iprec = asint( prec );
-    if (g_format == UNSIGNED_F16 )
-    {
-        if (prec < 15)
-        {
-            color = (color != 0) ? (color == ((1 << iprec) - 1) ? 0xFFFF : (((color << 16) + 0x8000) >> iprec)) : color;
-        }
-    }
-    else
-    {
-        if (prec < 16)
-        {
-            uint2x3 s = color >= 0 ? 0 : 1;
-            color = abs(color);
-            color = (color != 0) ? (color >= ((1 << (iprec - 1)) - 1) ? 0x7FFF : (((color << 15) + 0x4000) >> (iprec - 1))) : color;
-            color = s > 0 ? -color : color;
-        }
-    }
-}
-uint3 finish_unquantize( int3 color )
-{
-    if ( g_format == UNSIGNED_F16 )
-        color = ( color * 31 ) >> 6;
-    else
-    {
-        color = ( color < 0 ) ? -( ( -color * 31 ) >> 5 ) : ( color * 31 ) >> 5;
-        color = ( color < 0 ) ? ( ( -color ) | 0x8000 ) : color;
-    }
-    return asuint(color);
-}
-void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i )
-{
-    static const int aWeight3[] = {0, 9, 18, 27, 37, 46, 55, 64};
-    
-    int3 tmp = ( low * ( 64 - aWeight3[i] ) + high * aWeight3[i] + 32 ) >> 6;
-    palette = finish_unquantize( tmp );
-}
-void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i )
-{
-    static const int aWeight4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
-    
-    int3 tmp = ( low * ( 64 - aWeight4[i] ) + high * aWeight4[i] + 32 ) >> 6;
-    palette = finish_unquantize( tmp );
-}
-
-float half2float1( uint Value )
-{
-    uint Mantissa = (uint)(Value & 0x03FF);
-
-    uint Exponent;
-    if ((Value & 0x7C00) != 0)  // The value is normalized
-    {
-        Exponent = (uint)((Value >> 10) & 0x1F);
-    }
-    else if (Mantissa != 0)     // The value is denormalized
-    {
-        // Normalize the value in the resulting float
-        Exponent = 1;
-
-        do
-        {
-            Exponent--;
-            Mantissa <<= 1;
-        } while ((Mantissa & 0x0400) == 0);
-
-        Mantissa &= 0x03FF;
-    }
-    else                        // The value is zero
-    {
-        Exponent = (uint)(-112);
-    }
-
-    uint Result = ((Value & 0x8000) << 16) | // Sign
-                      ((Exponent + 112) << 23) | // Exponent
-                      (Mantissa << 13);          // Mantissa
-
-    return asfloat(Result);
-}
-
-float3 half2float(uint3 color_h )
-{
-    //uint3 sign = color_h & 0x8000;
-    //uint3 expo = color_h & 0x7C00;
-    //uint3 base = color_h & 0x03FF;
-    //return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24
-    //    : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00
-
-    return float3( half2float1( color_h.x ), half2float1( color_h.y ), half2float1( color_h.z ) );
-}
-
-void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ) // for mode 1 - 10
-{
-    block.xy = 0;
-    block.z &= 0xFFFC0000;
-    
-    //block.z |= (partition_index & 0x1f) << 13;
-    
-    if ( mode_type == candidateModeFlag[0])
-    {
-        /*block.x = candidateModeMemory[0];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.x |= ( endPoint[1][0].g >> 2 ) & 0x00000004;
-        block.x |= ( endPoint[1][0].b >> 1 ) & 0x00000008;
-        block.x |= endPoint[1][1].b & 0x00000010;
-        block.y |= ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 );
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
-        block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) );
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[0] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[0] >> 1) & 1) << 1;
-        block.x |= ((endPoint[1][0].g >> 4) & 1) << 2;
-        block.x |= ((endPoint[1][0].b >> 4) & 1) << 3;
-        block.x |= ((endPoint[1][1].b >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[1])
-    {
-        /*block.x = candidateModeMemory[1];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00000FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x003F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 );
-        block.x |= ( ( endPoint[1][0].g >> 3 ) & 0x00000004 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 );
-        block.x |= ( endPoint[1][1].g >> 1 ) & 0x00000018;
-        block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 );
-        block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80);
-        block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 );
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[1] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[1] >> 1) & 1) << 1;
-        block.x |= ((endPoint[1][0].g >> 5) & 1) << 2;
-        block.x |= ((endPoint[1][1].g >> 4) & 1) << 3;
-        block.x |= ((endPoint[1][1].g >> 5) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[1][1].b >> 0) & 1) << 12;
-        block.x |= ((endPoint[1][1].b >> 1) & 1) << 13;
-        block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[1][0].b >> 5) & 1) << 22;
-        block.x |= ((endPoint[1][1].b >> 2) & 1) << 23;
-        block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[1][1].b >> 3) & 1) << 0;
-        block.y |= ((endPoint[1][1].b >> 5) & 1) << 1;
-        block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[0][1].r >> 5) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[0][1].g >> 5) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[0][1].b >> 5) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][0].r >> 5) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].r >> 5) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[2])
-    {
-        /*block.x = candidateModeMemory[2];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.y |= ( endPoint[0][0].r >> 2 ) & 0x00000100;
-        block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000;
-        block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
-        block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) );
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[2] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[2] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[2] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[2] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[2] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[0][0].r >> 10) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][0].g >> 10) & 1) << 17;
-        block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][0].b >> 10) & 1) << 27;
-        block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[3])
-    {
-        /*block.x = candidateModeMemory[3];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080;
-        block.y |= ( endPoint[0][0].g << 8 ) & 0x00040000;
-        block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000001E);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 );
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780);
-        block.yz |= ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000);
-        block.z |= ( ( endPoint[1][0].g << 7 ) & 0x00000800 );
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
-        block.z |= ( endPoint[1][1].b << 4 ) & 0x00000040;
-        block.z |= ( endPoint[1][1].b << 5 ) & 0x00000020;*/
-
-        block.x |= ((candidateModeMemory[3] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[3] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[3] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[3] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[3] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][0].r >> 10) & 1) << 7;
-        block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[0][0].g >> 10) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][0].b >> 10) & 1) << 27;
-        block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][1].b >> 0) & 1) << 5;
-        block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][0].g >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[4])
-    {
-        /*block.x = candidateModeMemory[4];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080;
-        block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000;
-        block.y |= ( ( endPoint[0][0].b << 18 ) & 0x10000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
-        block.y |= ( ( endPoint[1][0].g << 9 ) & 0x00001E00 ) | ( ( endPoint[1][0].b << 4 ) & 0x00000100 );
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780);
-        block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000060);
-        block.z |= ( endPoint[1][0].r << 1 ) & 0x0000001E;
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
-        block.z |= ( ( endPoint[1][1].b << 7 ) & 0x00000800 ) | ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/
-
-        block.x |= ((candidateModeMemory[4] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[4] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[4] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[4] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[4] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][0].r >> 10) & 1) << 7;
-        block.y |= ((endPoint[1][0].b >> 4) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][0].g >> 10) & 1) << 17;
-        block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[0][0].b >> 10) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][1].b >> 1) & 1) << 5;
-        block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].b >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[5])
-    {
-        /*block.x = candidateModeMemory[5];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00003FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x00FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000);
-        block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000003;
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
-        block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 );
-        block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 );
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
-        block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040);
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
-        block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/
-
-        block.x |= ((candidateModeMemory[5] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[5] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[5] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[5] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[5] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[6])
-    {
-        /*block.x = candidateModeMemory[6];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001;
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
-        block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000);
-        block.x |= ( ( endPoint[1][1].g << 9 ) & 0x00002000 ) | ( ( endPoint[1][1].b << 21 ) & 0x00800000);
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80);
-        block.y |= ( ( endPoint[1][1].b >> 2 ) & 0x00000006 );
-        block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ) | ( ( endPoint[1][1].b << 18 ) & 0x00040000 );
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[6] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[6] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[6] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[6] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[6] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[1][1].g >> 4) & 1) << 13;
-        block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[1][1].b >> 2) & 1) << 23;
-        block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[1][1].b >> 3) & 1) << 1;
-        block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[0][1].r >> 5) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][0].r >> 5) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].r >> 5) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[7])
-    {
-        /*block.x = candidateModeMemory[7];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001;
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
-        block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
-        block.x |= ( ( endPoint[1][0].g << 18 ) & 0x00800000 );
-        block.x |= ( ( endPoint[1][1].b << 13 ) & 0x00002000 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.y |= ( ( endPoint[1][1].g >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 );
-        block.y |= ( endPoint[1][1].b << 27 ) & 0x10000000;
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
-        block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/
-
-        block.x |= ((candidateModeMemory[7] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[7] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[7] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[7] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[7] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[1][1].b >> 0) & 1) << 13;
-        block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[1][0].g >> 5) & 1) << 23;
-        block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[1][1].g >> 5) & 1) << 1;
-        block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[0][1].g >> 5) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[8])
-    {
-        /*block.x = candidateModeMemory[8];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
-        block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001;
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 );
-        block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
-        block.x |= ( ( endPoint[1][0].b << 18 ) & 0x00800000 );
-        block.x |= ( endPoint[1][1].b << 12 ) & 0x00002000;
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
-        block.y |= ( endPoint[1][1].b << 18 ) & 0x00040000;
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
-        block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/
-
-        block.x |= ((candidateModeMemory[8] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[8] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[8] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[8] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[8] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[1][1].b >> 1) & 1) << 13;
-        block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[1][0].b >> 5) & 1) << 23;
-        block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[1][1].b >> 5) & 1) << 1;
-        block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[0][1].b >> 5) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-    else if ( mode_type == candidateModeFlag[9])
-    {
-        /*block.x = candidateModeMemory[9];
-        block.x |= ( ( endPoint[0][0].r << 5 ) & 0x000007E0 ) | ( ( endPoint[0][0].g << 15 ) & 0x001F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0x7E000000 );
-        block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 );
-        block.x |= ( ( endPoint[1][0].g << 16 ) & 0x00200000 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 );
-        block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
-        block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 );
-        block.x |= ( ( endPoint[1][1].g << 26 ) & 0x80000000 ) | ( ( endPoint[1][1].g << 7 ) & 0x00000800 );
-        block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E);
-        block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80);
-        block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
-        block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 );
-        block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[9] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[9] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[9] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[9] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[9] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[1][1].g >> 4) & 1) << 11;
-        block.x |= ((endPoint[1][1].b >> 0) & 1) << 12;
-        block.x |= ((endPoint[1][1].b >> 1) & 1) << 13;
-        block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
-        block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[1][0].g >> 5) & 1) << 21;
-        block.x |= ((endPoint[1][0].b >> 5) & 1) << 22;
-        block.x |= ((endPoint[1][1].b >> 2) & 1) << 23;
-        block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
-        block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[1][1].g >> 5) & 1) << 31;
-        block.y |= ((endPoint[1][1].b >> 3) & 1) << 0;
-        block.y |= ((endPoint[1][1].b >> 5) & 1) << 1;
-        block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
-        block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[0][1].r >> 5) & 1) << 8;
-        block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
-        block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
-        block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
-        block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
-        block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[0][1].g >> 5) & 1) << 18;
-        block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
-        block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
-        block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
-        block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
-        block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[0][1].b >> 5) & 1) << 28;
-        block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
-        block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
-        block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
-        block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
-        block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
-        block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
-        block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
-        block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
-        block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
-        block.z |= ((endPoint[1][0].r >> 5) & 1) << 6;
-        block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
-        block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
-        block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
-        block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
-        block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
-        block.z |= ((endPoint[1][1].r >> 5) & 1) << 12;
-        block.z |= ((partition_index >> 0) & 1) << 13;
-        block.z |= ((partition_index >> 1) & 1) << 14;
-        block.z |= ((partition_index >> 2) & 1) << 15;
-        block.z |= ((partition_index >> 3) & 1) << 16;
-        block.z |= ((partition_index >> 4) & 1) << 17;
-    }
-}
-void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ) // for mode 11 - 14
-{
-    /*block.x = ( ( endPoint[0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0].b << 25 ) & 0xFE000000 );
-    block.y |= ( endPoint[0].b >> 7 ) & 0x00000007;*/
-
-    block.xy = 0;
-    block.z &= 0xFFFFFFFE;
-
-
-    if ( mode_type == candidateModeFlag[10])
-    {
-       /* block.x |= candidateModeMemory[10];
-        block.y |= ( ( endPoint[1].r << 3 ) & 0x00001FF8 ) | ( ( endPoint[1].g << 13 ) & 0x007FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 );
-        block.z |= ( endPoint[1].b >> 9 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[10] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[10] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[10] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[10] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[10] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[1].r >> 5) & 1) << 8;
-        block.y |= ((endPoint[1].r >> 6) & 1) << 9;
-        block.y |= ((endPoint[1].r >> 7) & 1) << 10;
-        block.y |= ((endPoint[1].r >> 8) & 1) << 11;
-        block.y |= ((endPoint[1].r >> 9) & 1) << 12;
-        block.y |= ((endPoint[1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[1].g >> 5) & 1) << 18;
-        block.y |= ((endPoint[1].g >> 6) & 1) << 19;
-        block.y |= ((endPoint[1].g >> 7) & 1) << 20;
-        block.y |= ((endPoint[1].g >> 8) & 1) << 21;
-        block.y |= ((endPoint[1].g >> 9) & 1) << 22;
-        block.y |= ((endPoint[1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[1].b >> 5) & 1) << 28;
-        block.y |= ((endPoint[1].b >> 6) & 1) << 29;
-        block.y |= ((endPoint[1].b >> 7) & 1) << 30;
-        block.y |= ((endPoint[1].b >> 8) & 1) << 31;
-        block.z |= ((endPoint[1].b >> 9) & 1) << 0;
-    }
-    else if (mode_type == candidateModeFlag[11])
-    {
-        /*block.x |= candidateModeMemory[11];
-        block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 );
-        block.y |= ( ( endPoint[1].r << 3 ) & 0x00000FF8 ) | ( ( endPoint[1].g << 13 ) & 0x003FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 );
-        block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[11] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[11] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[11] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[11] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[11] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[1].r >> 5) & 1) << 8;
-        block.y |= ((endPoint[1].r >> 6) & 1) << 9;
-        block.y |= ((endPoint[1].r >> 7) & 1) << 10;
-        block.y |= ((endPoint[1].r >> 8) & 1) << 11;
-        block.y |= ((endPoint[0].r >> 10) & 1) << 12;
-        block.y |= ((endPoint[1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[1].g >> 5) & 1) << 18;
-        block.y |= ((endPoint[1].g >> 6) & 1) << 19;
-        block.y |= ((endPoint[1].g >> 7) & 1) << 20;
-        block.y |= ((endPoint[1].g >> 8) & 1) << 21;
-        block.y |= ((endPoint[0].g >> 10) & 1) << 22;
-        block.y |= ((endPoint[1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[1].b >> 5) & 1) << 28;
-        block.y |= ((endPoint[1].b >> 6) & 1) << 29;
-        block.y |= ((endPoint[1].b >> 7) & 1) << 30;
-        block.y |= ((endPoint[1].b >> 8) & 1) << 31;
-        block.z |= ((endPoint[0].b >> 10) & 1) << 0;
-    }
-    else if (mode_type == candidateModeFlag[12])// violate the spec in  [0].low
-    {
-        /*block.x |= candidateModeMemory[12];
-        block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 );
-        block.y |= ( ( endPoint[0].r << 0 ) & 0x00000800 ) | ( ( endPoint[0].g << 10 ) & 0x00200000 );
-        block.y |= ( endPoint[0].b << 20 ) & 0x80000000;
-        block.y |= ( ( endPoint[1].r << 3 ) & 0x000007F8 ) | ( ( endPoint[1].g << 13 ) & 0x001FE000 ) | ( ( endPoint[1].b << 23 ) & 0x7F800000 );
-        block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[12] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[12] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[12] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[12] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[12] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[1].r >> 4) & 1) << 7;
-        block.y |= ((endPoint[1].r >> 5) & 1) << 8;
-        block.y |= ((endPoint[1].r >> 6) & 1) << 9;
-        block.y |= ((endPoint[1].r >> 7) & 1) << 10;
-        block.y |= ((endPoint[0].r >> 11) & 1) << 11;
-        block.y |= ((endPoint[0].r >> 10) & 1) << 12;
-        block.y |= ((endPoint[1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[1].g >> 4) & 1) << 17;
-        block.y |= ((endPoint[1].g >> 5) & 1) << 18;
-        block.y |= ((endPoint[1].g >> 6) & 1) << 19;
-        block.y |= ((endPoint[1].g >> 7) & 1) << 20;
-        block.y |= ((endPoint[0].g >> 11) & 1) << 21;
-        block.y |= ((endPoint[0].g >> 10) & 1) << 22;
-        block.y |= ((endPoint[1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[1].b >> 4) & 1) << 27;
-        block.y |= ((endPoint[1].b >> 5) & 1) << 28;
-        block.y |= ((endPoint[1].b >> 6) & 1) << 29;
-        block.y |= ((endPoint[1].b >> 7) & 1) << 30;
-        block.y |= ((endPoint[0].b >> 11) & 1) << 31;
-        block.z |= ((endPoint[0].b >> 10) & 1) << 0;
-    }
-    else if (mode_type == candidateModeFlag[13])
-    {
-        /*block.x |= candidateModeMemory[13];
-        block.y |= ( ( endPoint[0].r >> 8 ) & 0x00000080 );
-        block.y |= ( ( endPoint[0].r >> 6 ) & 0x00000100 );
-        block.y |= ( ( endPoint[0].r >> 4 ) & 0x00000200 );
-        block.y |= ( ( endPoint[0].r >> 2 ) & 0x00000400 );
-        block.y |= ( ( endPoint[0].r >> 0 ) & 0x00000800 );
-        block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 );
-        block.y |= ( ( endPoint[0].g << 2 ) & 0x00020000 );
-        block.y |= ( ( endPoint[0].g << 4 ) & 0x00040000 );
-        block.y |= ( ( endPoint[0].g << 6 ) & 0x00080000 );
-        block.y |= ( ( endPoint[0].g << 8 ) & 0x00100000 );
-        block.y |= ( ( endPoint[0].g << 10 ) & 0x00200000 );
-        block.y |= ( ( endPoint[0].g << 12 ) & 0x00400000 );
-        block.y |= ( ( endPoint[0].b << 12 ) & 0x08000000 );
-        block.y |= ( ( endPoint[0].b << 14 ) & 0x10000000 );
-        block.y |= ( ( endPoint[0].b << 16 ) & 0x20000000 );
-        block.y |= ( ( endPoint[0].b << 18 ) & 0x40000000 );
-        block.y |= ( ( endPoint[0].b << 20 ) & 0x80000000 );
-        block.y |= ( ( endPoint[1].r << 3 ) & 0x00000078 ) | ( ( endPoint[1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[1].b << 23 ) & 0x07800000 );        
-        block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/
-
-        block.x |= ((candidateModeMemory[13] >> 0) & 1) << 0;
-        block.x |= ((candidateModeMemory[13] >> 1) & 1) << 1;
-        block.x |= ((candidateModeMemory[13] >> 2) & 1) << 2;
-        block.x |= ((candidateModeMemory[13] >> 3) & 1) << 3;
-        block.x |= ((candidateModeMemory[13] >> 4) & 1) << 4;
-        block.x |= ((endPoint[0].r >> 0) & 1) << 5;
-        block.x |= ((endPoint[0].r >> 1) & 1) << 6;
-        block.x |= ((endPoint[0].r >> 2) & 1) << 7;
-        block.x |= ((endPoint[0].r >> 3) & 1) << 8;
-        block.x |= ((endPoint[0].r >> 4) & 1) << 9;
-        block.x |= ((endPoint[0].r >> 5) & 1) << 10;
-        block.x |= ((endPoint[0].r >> 6) & 1) << 11;
-        block.x |= ((endPoint[0].r >> 7) & 1) << 12;
-        block.x |= ((endPoint[0].r >> 8) & 1) << 13;
-        block.x |= ((endPoint[0].r >> 9) & 1) << 14;
-        block.x |= ((endPoint[0].g >> 0) & 1) << 15;
-        block.x |= ((endPoint[0].g >> 1) & 1) << 16;
-        block.x |= ((endPoint[0].g >> 2) & 1) << 17;
-        block.x |= ((endPoint[0].g >> 3) & 1) << 18;
-        block.x |= ((endPoint[0].g >> 4) & 1) << 19;
-        block.x |= ((endPoint[0].g >> 5) & 1) << 20;
-        block.x |= ((endPoint[0].g >> 6) & 1) << 21;
-        block.x |= ((endPoint[0].g >> 7) & 1) << 22;
-        block.x |= ((endPoint[0].g >> 8) & 1) << 23;
-        block.x |= ((endPoint[0].g >> 9) & 1) << 24;
-        block.x |= ((endPoint[0].b >> 0) & 1) << 25;
-        block.x |= ((endPoint[0].b >> 1) & 1) << 26;
-        block.x |= ((endPoint[0].b >> 2) & 1) << 27;
-        block.x |= ((endPoint[0].b >> 3) & 1) << 28;
-        block.x |= ((endPoint[0].b >> 4) & 1) << 29;
-        block.x |= ((endPoint[0].b >> 5) & 1) << 30;
-        block.x |= ((endPoint[0].b >> 6) & 1) << 31;
-        block.y |= ((endPoint[0].b >> 7) & 1) << 0;
-        block.y |= ((endPoint[0].b >> 8) & 1) << 1;
-        block.y |= ((endPoint[0].b >> 9) & 1) << 2;
-        block.y |= ((endPoint[1].r >> 0) & 1) << 3;
-        block.y |= ((endPoint[1].r >> 1) & 1) << 4;
-        block.y |= ((endPoint[1].r >> 2) & 1) << 5;
-        block.y |= ((endPoint[1].r >> 3) & 1) << 6;
-        block.y |= ((endPoint[0].r >> 15) & 1) << 7;
-        block.y |= ((endPoint[0].r >> 14) & 1) << 8;
-        block.y |= ((endPoint[0].r >> 13) & 1) << 9;
-        block.y |= ((endPoint[0].r >> 12) & 1) << 10;
-        block.y |= ((endPoint[0].r >> 11) & 1) << 11;
-        block.y |= ((endPoint[0].r >> 10) & 1) << 12;
-        block.y |= ((endPoint[1].g >> 0) & 1) << 13;
-        block.y |= ((endPoint[1].g >> 1) & 1) << 14;
-        block.y |= ((endPoint[1].g >> 2) & 1) << 15;
-        block.y |= ((endPoint[1].g >> 3) & 1) << 16;
-        block.y |= ((endPoint[0].g >> 15) & 1) << 17;
-        block.y |= ((endPoint[0].g >> 14) & 1) << 18;
-        block.y |= ((endPoint[0].g >> 13) & 1) << 19;
-        block.y |= ((endPoint[0].g >> 12) & 1) << 20;
-        block.y |= ((endPoint[0].g >> 11) & 1) << 21;
-        block.y |= ((endPoint[0].g >> 10) & 1) << 22;
-        block.y |= ((endPoint[1].b >> 0) & 1) << 23;
-        block.y |= ((endPoint[1].b >> 1) & 1) << 24;
-        block.y |= ((endPoint[1].b >> 2) & 1) << 25;
-        block.y |= ((endPoint[1].b >> 3) & 1) << 26;
-        block.y |= ((endPoint[0].b >> 15) & 1) << 27;
-        block.y |= ((endPoint[0].b >> 14) & 1) << 28;
-        block.y |= ((endPoint[0].b >> 13) & 1) << 29;
-        block.y |= ((endPoint[0].b >> 12) & 1) << 30;
-        block.y |= ((endPoint[0].b >> 11) & 1) << 31;
-        block.z |= ((endPoint[0].b >> 10) & 1) << 0;
-    }
-}
diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl
deleted file mode 100644
index 6a57c3862..000000000
--- a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl
+++ /dev/null
@@ -1,1908 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BC7Encode.hlsl
-//
-// The Compute Shader for BC7 Encoder
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//#define REF_DEVICE
-
-#define CHAR_LENGTH			8
-#define NCHANNELS			4
-#define	BC7_UNORM			98
-#define MAX_UINT			0xFFFFFFFF
-#define MIN_UINT			0
-
-static const uint candidateSectionBit[64] = //Associated to partition 0-63
-{
-    0xCCCC, 0x8888, 0xEEEE, 0xECC8,
-    0xC880, 0xFEEC, 0xFEC8, 0xEC80,
-    0xC800, 0xFFEC, 0xFE80, 0xE800,
-    0xFFE8, 0xFF00, 0xFFF0, 0xF000,
-    0xF710, 0x008E, 0x7100, 0x08CE,
-    0x008C, 0x7310, 0x3100, 0x8CCE,
-    0x088C, 0x3110, 0x6666, 0x366C,
-    0x17E8, 0x0FF0, 0x718E, 0x399C,
-    0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, 
-    0x3c3c, 0x55aa, 0x9696, 0xa55a, 
-    0x73ce, 0x13c8, 0x324c, 0x3bdc, 
-    0x6996, 0xc33c, 0x9966, 0x660, 
-    0x272, 0x4e4, 0x4e40, 0x2720, 
-    0xc936, 0x936c, 0x39c6, 0x639c, 
-    0x9336, 0x9cc6, 0x817e, 0xe718, 
-    0xccf0, 0xfcc, 0x7744, 0xee22, 
-};
-static const uint candidateSectionBit2[64] = //Associated to partition 64-127
-{
-    0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8,
-    0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050,
-    0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090,
-    0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250,
-    0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0,
-    0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500,
-    0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,
-    0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200,
-    0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424,
-    0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50,
-    0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,
-    0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600,
-    0xaa444444, 0x54a854a8, 0x95809580, 0x96969600,
-    0xa85454a8, 0x80959580, 0xaa141414, 0x96960000,
-    0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000,
-    0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,
-};
-static const uint2 candidateFixUpIndex1D[128] = 
-{
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
-    { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
-    { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-    { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-    
-    {15, 0},{15, 0},{ 6, 0},{ 8, 0},
-    { 2, 0},{ 8, 0},{15, 0},{15, 0},
-    { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-    { 2, 0},{15, 0},{15, 0},{ 6, 0},
-    { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
-    {15, 0},{15, 0},{ 2, 0},{ 2, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{ 2, 0},{ 2, 0},{15, 0},
-    //candidateFixUpIndex1D[i][1], i < 64 should not be used
-    
-    { 3,15},{ 3, 8},{15, 8},{15, 3},
-    { 8,15},{ 3,15},{15, 3},{15, 8},
-    { 8,15},{ 8,15},{ 6,15},{ 6,15},
-    { 6,15},{ 5,15},{ 3,15},{ 3, 8},
-    { 3,15},{ 3, 8},{ 8,15},{15, 3},
-    { 3,15},{ 3, 8},{ 6,15},{10, 8},
-    { 5, 3},{ 8,15},{ 8, 6},{ 6,10},
-    { 8,15},{ 5,15},{15,10},{15, 8},
-    
-    { 8,15},{15, 3},{ 3,15},{ 5,10},
-    { 6,10},{10, 8},{ 8, 9},{15,10},
-    {15, 6},{ 3,15},{15, 8},{ 5,15},
-    {15, 3},{15, 6},{15, 6},{15, 8}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct
-    { 3,15},{15, 3},{ 5,15},{ 5,15},
-    { 5,15},{ 8,15},{ 5,15},{10,15},
-    { 5,15},{10,15},{ 8,15},{13,15},
-    {15, 3},{12,15},{ 3,15},{ 3, 8},
-};
-static const uint2 candidateFixUpIndex1DOrdered[128] = //Same with candidateFixUpIndex1D but order the result when i >= 64
-{
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
-    { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
-    { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-    { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-    
-    {15, 0},{15, 0},{ 6, 0},{ 8, 0},
-    { 2, 0},{ 8, 0},{15, 0},{15, 0},
-    { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-    { 2, 0},{15, 0},{15, 0},{ 6, 0},
-    { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
-    {15, 0},{15, 0},{ 2, 0},{ 2, 0},
-    {15, 0},{15, 0},{15, 0},{15, 0},
-    {15, 0},{ 2, 0},{ 2, 0},{15, 0},
-    //candidateFixUpIndex1DOrdered[i][1], i < 64 should not be used
-    
-    { 3,15},{ 3, 8},{ 8,15},{ 3,15},
-    { 8,15},{ 3,15},{ 3,15},{ 8,15},
-    { 8,15},{ 8,15},{ 6,15},{ 6,15},
-    { 6,15},{ 5,15},{ 3,15},{ 3, 8},
-    { 3,15},{ 3, 8},{ 8,15},{ 3,15},
-    { 3,15},{ 3, 8},{ 6,15},{ 8,10},
-    { 3, 5},{ 8,15},{ 6, 8},{ 6,10},
-    { 8,15},{ 5,15},{10,15},{ 8,15},
-    
-    { 8,15},{ 3,15},{ 3,15},{ 5,10},
-    { 6,10},{ 8,10},{ 8, 9},{10,15},
-    { 6,15},{ 3,15},{ 8,15},{ 5,15},
-    { 3,15},{ 6,15},{ 6,15},{ 8,15}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct
-    { 3,15},{ 3,15},{ 5,15},{ 5,15},
-    { 5,15},{ 8,15},{ 5,15},{10,15},
-    { 5,15},{10,15},{ 8,15},{13,15},
-    { 3,15},{12,15},{ 3,15},{ 3, 8},
-};
-//static const uint4x4 candidateRotation[4] = 
-//{
-//    {1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1},
-//    {0,0,0,1},{0,1,0,0},{0,0,1,0},{1,0,0,0},
-//    {1,0,0,0},{0,0,0,1},{0,0,1,0},{0,1,0,0},
-//    {1,0,0,0},{0,1,0,0},{0,0,0,1},{0,0,1,0}
-//};
-//static const uint2 candidateIndexPrec[8] = {{3,0},{3,0},{2,0},{2,0},
-//                                            {2,3}, //color index and alpha index can exchange
-//                                            {2,2},{4,4},{2,2}};
-
-static const uint aWeight[3][16] = { {0,  4,  9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64},
-                                    {0,  9, 18, 27, 37, 46, 55, 64,  0,  0,  0,  0,  0,  0,  0,  0},
-                                    {0, 21, 43, 64,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0} };
-
-                                //4 bit index: 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64
-static const uint aStep[3][64] = {  { 0, 0, 0, 1, 1, 1, 1, 2,
-                                    2, 2, 2, 2, 3, 3, 3, 3,
-                                    4, 4, 4, 4, 5, 5, 5, 5,
-                                    6, 6, 6, 6, 6, 7, 7, 7,
-                                    7, 8, 8, 8, 8, 9, 9, 9,
-                                    9,10,10,10,10,10,11,11,
-                                   11,11,12,12,12,12,13,13,
-                                   13,13,14,14,14,14,15,15 },
-                                //3 bit index: 0, 9, 18, 27, 37, 46, 55, 64
-                                    { 0,0,0,0,0,1,1,1,
-                                    1,1,1,1,1,1,2,2,
-                                    2,2,2,2,2,2,2,3,
-                                    3,3,3,3,3,3,3,3,
-                                    3,4,4,4,4,4,4,4,
-                                    4,4,5,5,5,5,5,5,
-                                    5,5,5,6,6,6,6,6,
-                                    6,6,6,6,7,7,7,7 },
-                                //2 bit index: 0, 21, 43, 64
-                                    { 0,0,0,0,0,0,0,0,
-                                    0,0,0,1,1,1,1,1,
-                                    1,1,1,1,1,1,1,1,
-                                    1,1,1,1,1,1,1,1,
-                                    1,2,2,2,2,2,2,2,
-                                    2,2,2,2,2,2,2,2,
-                                    2,2,2,2,2,2,3,3,
-                                    3,3,3,3,3,3,3,3 } };
-
-cbuffer cbCS : register( b0 )
-{
-    uint g_tex_width;
-    uint g_num_block_x;
-    uint g_format;
-    uint g_mode_id;
-    uint g_start_block_id;
-    uint g_num_total_blocks;
-    float g_alpha_weight;
-};
-
-//Forward declaration
-uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ); //Mode = 0
-uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ); //Mode = 1
-uint2x4 compress_endpoints2( inout uint2x4 endPoint ); //Mode = 2
-uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ); //Mode = 3
-uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ); //Mode = 7
-uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ); //Mode = 6
-uint2x4 compress_endpoints4( inout uint2x4 endPoint ); //Mode = 4
-uint2x4 compress_endpoints5( inout uint2x4 endPoint ); //Mode = 5
-
-void block_package0( out uint4 block, uint partition, uint threadBase ); //Mode0
-void block_package1( out uint4 block, uint partition, uint threadBase ); //Mode1
-void block_package2( out uint4 block, uint partition, uint threadBase ); //Mode2
-void block_package3( out uint4 block, uint partition, uint threadBase ); //Mode3
-void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ); //Mode4
-void block_package5( out uint4 block, uint rotation, uint threadBase ); //Mode5
-void block_package6( out uint4 block, uint threadBase ); //Mode6
-void block_package7( out uint4 block, uint partition, uint threadBase ); //Mode7
-
-
-void swap(inout uint4 lhs, inout uint4 rhs)
-{
-    uint4 tmp = lhs;
-    lhs = rhs;
-    rhs = tmp;
-}
-void swap(inout uint3 lhs, inout uint3 rhs)
-{
-    uint3 tmp = lhs;
-    lhs = rhs;
-    rhs = tmp;
-}
-void swap(inout uint lhs, inout uint rhs)
-{
-    uint tmp = lhs;
-    lhs = rhs;
-    rhs = tmp;
-}
-
-uint ComputeError(in uint4 a, in uint4 b)
-{		
-	return dot(a.rgb, b.rgb) + g_alpha_weight * a.a*b.a;
-}
-
-void Ensure_A_Is_Larger( inout uint4 a, inout uint4 b )
-{
-    if ( a.x < b.x )
-        swap( a.x, b.x );
-    if ( a.y < b.y )
-        swap( a.y, b.y );
-    if ( a.z < b.z )
-        swap( a.z, b.z );
-    if ( a.w < b.w )
-        swap( a.w, b.w );
-}
-
-
-Texture2D g_Input : register( t0 ); 
-StructuredBuffer<uint4> g_InBuff : register( t1 );
-
-RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
-
-#define THREAD_GROUP_SIZE	64
-#define BLOCK_SIZE_Y		4
-#define BLOCK_SIZE_X		4
-#define BLOCK_SIZE			(BLOCK_SIZE_Y * BLOCK_SIZE_X)
-
-struct BufferShared
-{
-    uint4 pixel;
-    uint error;
-    uint mode;
-    uint partition;
-    uint index_selector;
-    uint rotation;
-    uint4 endPoint_low;
-    uint4 endPoint_high;
-    uint4 endPoint_low_quantized;
-    uint4 endPoint_high_quantized;
-};
-groupshared BufferShared shared_temp[THREAD_GROUP_SIZE];
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryMode456CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 4 5 6 all have 1 subset per block, and fix-up index is always index 0
-{
-    // we process 4 BC blocks per thread group
-    const uint MAX_USED_THREAD = 16;                                                // pixels in a BC (block compressed) block
-    uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;                      // the number of BC blocks a thread group processes = 64 / 16 = 4
-    uint blockInGroup = GI / MAX_USED_THREAD;                                       // what BC block this thread is on within this thread group
-    uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;    // what global BC block this thread is on
-    uint threadBase = blockInGroup * MAX_USED_THREAD;                               // the first id of the pixel in this BC block in this thread group
-    uint threadInBlock = GI - threadBase;                                           // id of the pixel in this BC block
-
-#ifndef REF_DEVICE
-    if (blockID >= g_num_total_blocks)
-    {
-        return;
-    }
-#endif
-
-    uint block_y = blockID / g_num_block_x;
-    uint block_x = blockID - block_y * g_num_block_x;
-    uint base_x = block_x * BLOCK_SIZE_X;
-    uint base_y = block_y * BLOCK_SIZE_Y;
-    
-    if (threadInBlock < 16)
-    {
-        shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
-
-        shared_temp[GI].endPoint_low = shared_temp[GI].pixel;
-        shared_temp[GI].endPoint_high = shared_temp[GI].pixel;
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    if (threadInBlock < 8)
-    {
-        shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low);
-        shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high);
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 4)
-    {
-        shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low);
-        shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high);
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 2)
-    {
-        shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low);
-        shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high);
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low);
-        shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high);
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    uint2x4 endPoint;
-    endPoint[0] = shared_temp[threadBase].endPoint_low;
-    endPoint[1] = shared_temp[threadBase].endPoint_high;
-
-    uint error = 0xFFFFFFFF;
-    uint mode = 0;
-    uint index_selector = 0;
-    uint rotation = 0;
-
-    uint2 indexPrec;
-    if (threadInBlock < 8) // all threads of threadInBlock < 8 will be working on trying out mode 4, since only mode 4 has index selector bit
-    {
-        if (0 == (threadInBlock & 1)) // thread 0, 2, 4, 6
-        {
-            //2 represents 2bit index precision; 1 represents 3bit index precision
-            index_selector = 0;
-            indexPrec = uint2( 2, 1 );
-        }
-        else                          // thread 1, 3, 5, 7
-        {
-            //2 represents 2bit index precision; 1 represents 3bit index precision
-            index_selector = 1;
-            indexPrec = uint2( 1, 2 );
-        }
-    }
-    else
-    {
-         //2 represents 2bit index precision
-        indexPrec = uint2( 2, 2 );
-    }
-
-    uint4 pixel_r;
-    uint color_index;
-    uint alpha_index;
-    int4 span;
-    int2 span_norm_sqr;
-    int2 dotProduct;
-    if (threadInBlock < 12) // Try mode 4 5 in threads 0..11
-    {
-        // mode 4 5 have component rotation
-        if ((threadInBlock < 2) || (8 == threadInBlock))       // rotation = 0 in thread 0, 1
-        {
-            rotation = 0;
-        }
-        else if ((threadInBlock < 4) || (9 == threadInBlock))  // rotation = 1 in thread 2, 3
-        {
-            endPoint[0].ra = endPoint[0].ar;
-            endPoint[1].ra = endPoint[1].ar;
-
-            rotation = 1;
-        }
-        else if ((threadInBlock < 6) || (10 == threadInBlock)) // rotation = 2 in thread 4, 5
-        {
-            endPoint[0].ga = endPoint[0].ag;
-            endPoint[1].ga = endPoint[1].ag;
-
-            rotation = 2;
-        }
-        else if ((threadInBlock < 8) || (11 == threadInBlock)) // rotation = 3 in thread 6, 7
-        {
-            endPoint[0].ba = endPoint[0].ab;
-            endPoint[1].ba = endPoint[1].ab;
-
-            rotation = 3;
-        }
-
-        if (threadInBlock < 8)  // try mode 4 in threads 0..7
-        {
-            // mode 4 thread distribution
-            // Thread           0	1	2	3	4	5	6	7
-            // Rotation	        0	0	1	1	2	2	3	3
-            // Index selector   0	1	0	1	0	1	0	1
-
-            mode = 4;
-            compress_endpoints4( endPoint );
-        }
-        else                    // try mode 5 in threads 8..11
-        {
-            // mode 5 thread distribution
-            // Thread	 8	9  10  11
-            // Rotation	 0	1   2   3
-
-            mode = 5;
-            compress_endpoints5( endPoint );
-        }
-
-        uint4 pixel = shared_temp[threadBase + 0].pixel;
-        if (1 == rotation)
-        {
-            pixel.ra = pixel.ar;
-        }
-        else if (2 == rotation)
-        {
-            pixel.ga = pixel.ag;
-        }
-        else if (3 == rotation)
-        {
-            pixel.ba = pixel.ab;
-        }
-
-        span = endPoint[1] - endPoint[0];
-        span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a );
-        
-        // in mode 4 5 6, end point 0 must be closer to pixel 0 than end point 1, because of the fix-up index is always index 0
-        // TODO: this shouldn't be necessary here in error calculation
-        /*
-        dotProduct = int2( dot( span.rgb, pixel.rgb - endPoint[0].rgb ), span.a * ( pixel.a - endPoint[0].a ) );
-        if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) )
-        {
-            span.rgb = -span.rgb;
-            swap(endPoint[0].rgb, endPoint[1].rgb);
-        }
-        if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) )
-        {
-            span.a = -span.a;
-            swap(endPoint[0].a, endPoint[1].a);
-        }
-        */
-	
-        // should be the same as above
-        dotProduct = int2( dot( pixel.rgb - endPoint[0].rgb, pixel.rgb - endPoint[0].rgb ), dot( pixel.rgb - endPoint[1].rgb, pixel.rgb - endPoint[1].rgb ) );
-        if ( dotProduct.x > dotProduct.y )
-        {
-            span.rgb = -span.rgb;
-            swap(endPoint[0].rgb, endPoint[1].rgb);
-        }
-        dotProduct = int2( dot( pixel.a - endPoint[0].a, pixel.a - endPoint[0].a ), dot( pixel.a - endPoint[1].a, pixel.a - endPoint[1].a ) );
-        if ( dotProduct.x > dotProduct.y )
-        {
-            span.a = -span.a;
-            swap(endPoint[0].a, endPoint[1].a);
-        }
-
-        error = 0;
-        for ( uint i = 0; i < 16; i ++ )
-        {
-            pixel = shared_temp[threadBase + i].pixel;
-            if (1 == rotation)
-            {
-                pixel.ra = pixel.ar;
-            }
-            else if (2 == rotation)
-            {
-                pixel.ga = pixel.ag;
-            }
-            else if (3 == rotation)
-            {
-                pixel.ba = pixel.ab;
-            }
-
-            dotProduct.x = dot( span.rgb, pixel.rgb - endPoint[0].rgb );
-            color_index = ( span_norm_sqr.x <= 0 /*endPoint[0] == endPoint[1]*/ || dotProduct.x <= 0 /*pixel == endPoint[0]*/ ) ? 0
-                : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] );
-            dotProduct.y = dot( span.a, pixel.a - endPoint[0].a );
-            alpha_index = ( span_norm_sqr.y <= 0 || dotProduct.y <= 0 ) ? 0
-                : ( ( dotProduct.y < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct.y * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] );
-
-            // the same color_index and alpha_index should be used for reconstruction, so this should be left commented out
-            /*if (index_selector)
-            {
-                swap(color_index, alpha_index);
-            }*/
-
-            pixel_r.rgb = ( ( 64 - aWeight[indexPrec.x][color_index] ) * endPoint[0].rgb +
-                            aWeight[indexPrec.x][color_index] * endPoint[1].rgb + 
-                            32 ) >> 6;
-            pixel_r.a = ( ( 64 - aWeight[indexPrec.y][alpha_index] ) * endPoint[0].a + 
-                          aWeight[indexPrec.y][alpha_index] * endPoint[1].a + 
-                          32 ) >> 6;
-
-            Ensure_A_Is_Larger( pixel_r, pixel );
-            pixel_r -= pixel;
-            if (1 == rotation)
-            {
-                pixel_r.ra = pixel_r.ar;
-            }
-            else if (2 == rotation)
-            {
-                pixel_r.ga = pixel_r.ag;
-            }
-            else if (3 == rotation)
-            {
-                pixel_r.ba = pixel_r.ab;
-            }
-            error += ComputeError(pixel_r, pixel_r);
-        }
-    }
-    else if (threadInBlock < 16) // Try mode 6 in threads 12..15, since in mode 4 5 6, only mode 6 has p bit
-    {
-        uint p = threadInBlock - 12;
-
-        compress_endpoints6( endPoint, uint2(p >> 0, p >> 1) & 1 );
-
-        uint4 pixel = shared_temp[threadBase + 0].pixel;
-
-        span = endPoint[1] - endPoint[0];
-        span_norm_sqr = dot( span, span );
-        dotProduct = dot( span, pixel - endPoint[0] );
-        if ( span_norm_sqr.x > 0 && dotProduct.x >= 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) )
-        {
-            span = -span;
-            swap(endPoint[0], endPoint[1]);
-        }
-            
-        error = 0;
-        for ( uint i = 0; i < 16; i ++ )
-        {
-            pixel = shared_temp[threadBase + i].pixel;
-            
-            dotProduct.x = dot( span, pixel - endPoint[0] );
-            color_index = ( span_norm_sqr.x <= 0 || dotProduct.x <= 0 ) ? 0
-                : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[0][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[0][63] );
-            
-            pixel_r = ( ( 64 - aWeight[0][color_index] ) * endPoint[0]
-                + aWeight[0][color_index] * endPoint[1] + 32 ) >> 6;
-        
-            Ensure_A_Is_Larger( pixel_r, pixel );
-            pixel_r -= pixel;
-            error += ComputeError(pixel_r, pixel_r);
-        }
-
-        mode = 6;
-        rotation = p;    // Borrow rotation for p
-    }
-
-    shared_temp[GI].error = error;
-    shared_temp[GI].mode = mode;
-    shared_temp[GI].index_selector = index_selector;
-    shared_temp[GI].rotation = rotation;
-
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    if (threadInBlock < 8)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 8].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 8].error;
-            shared_temp[GI].mode = shared_temp[GI + 8].mode;
-            shared_temp[GI].index_selector = shared_temp[GI + 8].index_selector;
-            shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 4)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 4].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 4].error;
-            shared_temp[GI].mode = shared_temp[GI + 4].mode;
-            shared_temp[GI].index_selector = shared_temp[GI + 4].index_selector;
-            shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 2)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 2].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 2].error;
-            shared_temp[GI].mode = shared_temp[GI + 2].mode;
-            shared_temp[GI].index_selector = shared_temp[GI + 2].index_selector;
-            shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 1].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 1].error;
-            shared_temp[GI].mode = shared_temp[GI + 1].mode;
-            shared_temp[GI].index_selector = shared_temp[GI + 1].index_selector;
-            shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
-        }
-
-        g_OutBuff[blockID] = uint4(shared_temp[GI].error, (shared_temp[GI].index_selector << 31) | shared_temp[GI].mode,
-            0, shared_temp[GI].rotation); // rotation is indeed rotation for mode 4 5. for mode 6, rotation is p bit
-    }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 1 3 7 all have 2 subsets per block
-{
-    const uint MAX_USED_THREAD = 64;
-    uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
-    uint blockInGroup = GI / MAX_USED_THREAD;
-    uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
-    uint threadBase = blockInGroup * MAX_USED_THREAD;
-    uint threadInBlock = GI - threadBase;
-
-    uint block_y = blockID / g_num_block_x;
-    uint block_x = blockID - block_y * g_num_block_x;
-    uint base_x = block_x * BLOCK_SIZE_X;
-    uint base_y = block_y * BLOCK_SIZE_Y;
-    
-    if (threadInBlock < 16)
-    {
-        shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
-    }
-    GroupMemoryBarrierWithGroupSync();
-
-    shared_temp[GI].error = 0xFFFFFFFF;
-
-    uint4 pixel_r;
-    uint2x4 endPoint[2];        // endPoint[0..1 for subset id][0..1 for low and high in the subset]
-    uint2x4 endPointBackup[2];
-    uint color_index;
-    if (threadInBlock < 64)
-    {
-        uint partition = threadInBlock;
-
-        endPoint[0][0] = MAX_UINT;
-        endPoint[0][1] = MIN_UINT;
-        endPoint[1][0] = MAX_UINT;
-        endPoint[1][1] = MIN_UINT;
-        uint bits = candidateSectionBit[partition];
-        for ( uint i = 0; i < 16; i ++ )
-        {
-            uint4 pixel = shared_temp[threadBase + i].pixel;
-            if ( (( bits >> i ) & 0x01) == 1 )
-            {
-                endPoint[1][0] = min( endPoint[1][0], pixel );
-                endPoint[1][1] = max( endPoint[1][1], pixel );
-            }
-            else
-            {
-                endPoint[0][0] = min( endPoint[0][0], pixel );
-                endPoint[0][1] = max( endPoint[0][1], pixel );
-            }
-        }
-
-        endPointBackup[0] = endPoint[0];
-        endPointBackup[1] = endPoint[1];
-
-        uint max_p;
-        if (1 == g_mode_id)
-        {
-            // in mode 1, there is only one p bit per subset
-            max_p = 4;
-        }
-        else
-        {
-            // in mode 3 7, there are two p bits per subset, one for each end point
-            max_p = 16;
-        }
-
-        uint rotation = 0;
-        uint error = MAX_UINT;
-        for ( uint p = 0; p < max_p; p ++ )
-        {
-            endPoint[0] = endPointBackup[0];
-            endPoint[1] = endPointBackup[1];
-
-            for ( i = 0; i < 2; i ++ ) // loop through 2 subsets
-            {
-                if (g_mode_id == 1)
-                {
-                    compress_endpoints1( endPoint[i], (p >> i) & 1 );
-                }
-                else if (g_mode_id == 3)
-                {
-                    compress_endpoints3( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
-                }
-                else if (g_mode_id == 7)
-                {
-                    compress_endpoints7( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
-                }
-            }
-
-            int4 span[2];
-            span[0] = endPoint[0][1] - endPoint[0][0];
-            span[1] = endPoint[1][1] - endPoint[1][0];
-
-            if (g_mode_id != 7)
-            {
-                span[0].w = span[1].w = 0;
-            }
-
-            int span_norm_sqr[2];
-            span_norm_sqr[0] = dot( span[0], span[0] );
-            span_norm_sqr[1] = dot( span[1], span[1] );
-
-            // TODO: again, this shouldn't be necessary here in error calculation
-            int dotProduct = dot( span[0], shared_temp[threadBase + 0].pixel - endPoint[0][0] );
-            if ( span_norm_sqr[0] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[0] ) )
-            {
-                span[0] = -span[0];
-                swap(endPoint[0][0], endPoint[0][1]);
-            }
-            dotProduct = dot( span[1], shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel - endPoint[1][0] );
-            if ( span_norm_sqr[1] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[1] ) )
-            {
-                span[1] = -span[1];
-                swap(endPoint[1][0], endPoint[1][1]);
-            }
-
-            uint step_selector;
-            if (g_mode_id != 1)
-            {
-                step_selector = 2;  // mode 3 7 have 2 bit index
-            }
-            else
-            {
-                step_selector = 1;  // mode 1 has 3 bit index
-            }
-
-            uint p_error = 0;            
-            for ( i = 0; i < 16; i ++ )
-            {
-                if (((bits >> i) & 0x01) == 1)
-                {
-                    dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] );
-                    color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0) ? 0
-                        : ((dotProduct < span_norm_sqr[1]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[1])] : aStep[step_selector][63]);
-                }
-                else
-                {
-                    dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] );
-                    color_index = (span_norm_sqr[0] <= 0 || dotProduct <= 0) ? 0
-                        : ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[0])] : aStep[step_selector][63]);
-                }
-
-                uint subset_index = (bits >> i) & 0x01;
-
-                pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0]
-                    + aWeight[step_selector][color_index] * endPoint[subset_index][1] + 32) >> 6;
-                if (g_mode_id != 7)
-                {
-                    pixel_r.a = 255;
-                }
-
-                uint4 pixel = shared_temp[threadBase + i].pixel;
-                Ensure_A_Is_Larger( pixel_r, pixel );
-                pixel_r -= pixel;
-                p_error += ComputeError(pixel_r, pixel_r);
-            }
-
-            if (p_error < error)
-            {
-                error = p_error;
-                rotation = p;
-            }
-        }
-
-        shared_temp[GI].error = error;
-        shared_temp[GI].mode = g_mode_id;
-        shared_temp[GI].partition = partition;
-        shared_temp[GI].rotation = rotation; // mode 1 3 7 don't have rotation, we use rotation for p bits
-    }
-    GroupMemoryBarrierWithGroupSync();
-
-    if (threadInBlock < 32)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 32].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 32].error;
-            shared_temp[GI].mode = shared_temp[GI + 32].mode;
-            shared_temp[GI].partition = shared_temp[GI + 32].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 32].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-if (threadInBlock < 16)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 16].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 16].error;
-            shared_temp[GI].mode = shared_temp[GI + 16].mode;
-            shared_temp[GI].partition = shared_temp[GI + 16].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 16].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 8)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 8].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 8].error;
-            shared_temp[GI].mode = shared_temp[GI + 8].mode;
-            shared_temp[GI].partition = shared_temp[GI + 8].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 4)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 4].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 4].error;
-            shared_temp[GI].mode = shared_temp[GI + 4].mode;
-            shared_temp[GI].partition = shared_temp[GI + 4].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 2)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 2].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 2].error;
-            shared_temp[GI].mode = shared_temp[GI + 2].mode;
-            shared_temp[GI].partition = shared_temp[GI + 2].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 1].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 1].error;
-            shared_temp[GI].mode = shared_temp[GI + 1].mode;
-            shared_temp[GI].partition = shared_temp[GI + 1].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
-        }
-
-        if (g_InBuff[blockID].x > shared_temp[GI].error)
-        {
-            g_OutBuff[blockID] = uint4(shared_temp[GI].error, shared_temp[GI].mode, shared_temp[GI].partition, shared_temp[GI].rotation); // mode 1 3 7 don't have rotation, we use rotation for p bits
-        }
-        else
-        {
-            g_OutBuff[blockID] = g_InBuff[blockID];
-        }
-    }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 0 2 have 3 subsets per block
-{
-    const uint MAX_USED_THREAD = 64;
-    uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
-    uint blockInGroup = GI / MAX_USED_THREAD;
-    uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
-    uint threadBase = blockInGroup * MAX_USED_THREAD;
-    uint threadInBlock = GI - threadBase;
-
-    uint block_y = blockID / g_num_block_x;
-    uint block_x = blockID - block_y * g_num_block_x;
-    uint base_x = block_x * BLOCK_SIZE_X;
-    uint base_y = block_y * BLOCK_SIZE_Y;
-    
-    if (threadInBlock < 16)
-    {
-        shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
-    }
-    GroupMemoryBarrierWithGroupSync();
-
-    shared_temp[GI].error = 0xFFFFFFFF;
-
-    uint num_partitions;
-    if (0 == g_mode_id)
-    {
-        num_partitions = 16;
-    }
-    else
-    {
-        num_partitions = 64;
-    }
-
-    uint4 pixel_r;
-    uint2x4 endPoint[3];        // endPoint[0..1 for subset id][0..1 for low and high in the subset]
-    uint2x4 endPointBackup[3];
-    uint color_index[16];
-    if (threadInBlock < num_partitions)
-    {
-        uint partition = threadInBlock + 64;
-
-        endPoint[0][0] = MAX_UINT;
-        endPoint[0][1] = MIN_UINT;
-        endPoint[1][0] = MAX_UINT;
-        endPoint[1][1] = MIN_UINT;
-        endPoint[2][0] = MAX_UINT;
-        endPoint[2][1] = MIN_UINT;
-        uint bits2 = candidateSectionBit2[partition - 64];
-        for ( uint i = 0; i < 16; i ++ )
-        {
-            uint4 pixel = shared_temp[threadBase + i].pixel;
-            uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
-            if ( subset_index == 2 )
-            {
-                endPoint[2][0] = min( endPoint[2][0], pixel );
-                endPoint[2][1] = max( endPoint[2][1], pixel );
-            }
-            else if ( subset_index == 1 )
-            {
-                endPoint[1][0] = min( endPoint[1][0], pixel );
-                endPoint[1][1] = max( endPoint[1][1], pixel );
-            }
-            else
-            {
-                endPoint[0][0] = min( endPoint[0][0], pixel );
-                endPoint[0][1] = max( endPoint[0][1], pixel );
-            }
-        }
-
-        endPointBackup[0] = endPoint[0];
-        endPointBackup[1] = endPoint[1];
-        endPointBackup[2] = endPoint[2];
-
-        uint max_p;
-        if (0 == g_mode_id)
-        {
-            max_p = 64; // changed from 32 to 64
-        }
-        else
-        {
-            max_p = 1;
-        }
-
-        uint rotation = 0;
-        uint error = MAX_UINT;
-        for ( uint p = 0; p < max_p; p ++ )
-        {
-            endPoint[0] = endPointBackup[0];
-            endPoint[1] = endPointBackup[1];
-            endPoint[2] = endPointBackup[2];
-
-            for ( i = 0; i < 3; i ++ )
-            {
-                if (0 == g_mode_id)
-                {
-                    compress_endpoints0( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
-                }
-                else
-                {
-                    compress_endpoints2( endPoint[i] );
-                }
-            }
-
-            uint step_selector = 1 + (2 == g_mode_id);
-
-            int4 span[3];
-            span[0] = endPoint[0][1] - endPoint[0][0];
-            span[1] = endPoint[1][1] - endPoint[1][0];
-            span[2] = endPoint[2][1] - endPoint[2][0];
-            span[0].w = span[1].w = span[2].w = 0;
-            int span_norm_sqr[3];
-            span_norm_sqr[0] = dot( span[0], span[0] );
-            span_norm_sqr[1] = dot( span[1], span[1] );
-            span_norm_sqr[2] = dot( span[2], span[2] );
-
-            // TODO: again, this shouldn't be necessary here in error calculation
-            uint ci[3] = { 0, candidateFixUpIndex1D[partition].x, candidateFixUpIndex1D[partition].y };
-            for (i = 0; i < 3; i ++)
-            {
-                int dotProduct = dot( span[i], shared_temp[threadBase + ci[i]].pixel - endPoint[i][0] );
-                if ( span_norm_sqr[i] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[i] ) )
-                {
-                    span[i] = -span[i];
-                    swap(endPoint[i][0], endPoint[i][1]);
-                }
-            }
-
-            uint p_error = 0;
-            for ( i = 0; i < 16; i ++ )
-            {
-                uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
-                if ( subset_index == 2 )
-                {
-                    int dotProduct = dot( span[2], shared_temp[threadBase + i].pixel - endPoint[2][0] );
-                    color_index[i] = ( span_norm_sqr[2] <= 0 || dotProduct <= 0 ) ? 0
-                        : ( ( dotProduct < span_norm_sqr[2] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[2] ) ] : aStep[step_selector][63] );
-                }
-                else if ( subset_index == 1 )
-                {
-                    int dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] );
-                    color_index[i] = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0
-                        : ( ( dotProduct < span_norm_sqr[1] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep[step_selector][63] );
-                }
-                else
-                {
-                    int dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] );
-                    color_index[i] = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0
-                        : ( ( dotProduct < span_norm_sqr[0] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep[step_selector][63] );
-                }
-
-                pixel_r = ( ( 64 - aWeight[step_selector][color_index[i]] ) * endPoint[subset_index][0]
-                    + aWeight[step_selector][color_index[i]] * endPoint[subset_index][1] + 32 ) >> 6;
-                pixel_r.a = 255;
-
-                uint4 pixel = shared_temp[threadBase + i].pixel;                
-                Ensure_A_Is_Larger( pixel_r, pixel );
-                pixel_r -= pixel;
-                p_error += ComputeError(pixel_r, pixel_r);
-            }
-
-            if (p_error < error)
-            {
-                error = p_error;
-                rotation = p;    // Borrow rotation for p
-            }
-        }
-
-        shared_temp[GI].error = error;
-        shared_temp[GI].partition = partition;
-        shared_temp[GI].rotation = rotation;
-    }
-    GroupMemoryBarrierWithGroupSync();
-
-    if (threadInBlock < 32)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 32].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 32].error;
-            shared_temp[GI].partition = shared_temp[GI + 32].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 32].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 16)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 16].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 16].error;
-            shared_temp[GI].partition = shared_temp[GI + 16].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 16].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 8)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 8].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 8].error;
-            shared_temp[GI].partition = shared_temp[GI + 8].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 4)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 4].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 4].error;
-            shared_temp[GI].partition = shared_temp[GI + 4].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 2)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 2].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 2].error;
-            shared_temp[GI].partition = shared_temp[GI + 2].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
-        }
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-    if (threadInBlock < 1)
-    {
-        if ( shared_temp[GI].error > shared_temp[GI + 1].error )
-        {
-            shared_temp[GI].error = shared_temp[GI + 1].error;
-            shared_temp[GI].partition = shared_temp[GI + 1].partition;
-            shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
-        }
-
-        if (g_InBuff[blockID].x > shared_temp[GI].error)
-        {
-            g_OutBuff[blockID] = uint4(shared_temp[GI].error, g_mode_id, shared_temp[GI].partition, shared_temp[GI].rotation); // rotation is actually p bit for mode 0. for mode 2, rotation is always 0
-        }
-        else
-        {
-            g_OutBuff[blockID] = g_InBuff[blockID];
-        }
-    }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
-{
-    const uint MAX_USED_THREAD = 16;
-    uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
-    uint blockInGroup = GI / MAX_USED_THREAD;
-    uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
-    uint threadBase = blockInGroup * MAX_USED_THREAD;
-    uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
-    if (blockID >= g_num_total_blocks)
-    {
-        return;
-    }
-#endif
-
-    uint block_y = blockID / g_num_block_x;
-    uint block_x = blockID - block_y * g_num_block_x;
-    uint base_x = block_x * BLOCK_SIZE_X;
-    uint base_y = block_y * BLOCK_SIZE_Y;
-
-    uint mode = g_InBuff[blockID].y & 0x7FFFFFFF;
-    uint partition = g_InBuff[blockID].z;
-    uint index_selector = (g_InBuff[blockID].y >> 31) & 1;
-    uint rotation = g_InBuff[blockID].w;
-
-    if (threadInBlock < 16)
-    {
-        uint4 pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
-
-        if ((4 == mode) || (5 == mode))
-        {
-            if (1 == rotation)
-            {
-                pixel.ra = pixel.ar;
-            }
-            else if (2 == rotation)
-            {
-                pixel.ga = pixel.ag;
-            }
-            else if (3 == rotation)
-            {
-                pixel.ba = pixel.ab;
-            }
-        }
-
-        shared_temp[GI].pixel = pixel;
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    uint bits = candidateSectionBit[partition];
-    uint bits2 = candidateSectionBit2[partition - 64];
-
-    uint2x4 ep;
-    uint2x4 ep_quantized;
-    [unroll]
-    for (int ii = 2; ii >= 0; -- ii)
-    {
-        if (threadInBlock < 16)
-        {
-            uint2x4 ep;
-            ep[0] = MAX_UINT;
-            ep[1] = MIN_UINT;
-
-            uint4 pixel = shared_temp[GI].pixel;
-
-            uint subset_index = ( bits >> threadInBlock ) & 0x01;
-            uint subset_index2 = ( bits2 >> ( threadInBlock * 2 ) ) & 0x03;
-            if (0 == ii)
-            {
-                if ((0 == mode) || (2 == mode))
-                {
-                    if (0 == subset_index2)
-                    {
-                        ep[0] = ep[1] = pixel;
-                    }
-                }
-                else if ((1 == mode) || (3 == mode) || (7 == mode))
-                {
-                    if (0 == subset_index)
-                    {
-                        ep[0] = ep[1] = pixel;
-                    }
-                }
-                else if ((4 == mode) || (5 == mode) || (6 == mode))
-                {
-                    ep[0] = ep[1] = pixel;
-                }
-            }
-            else if (1 == ii)
-            {
-                if ((0 == mode) || (2 == mode))
-                {
-                    if (1 == subset_index2)
-                    {
-                        ep[0] = ep[1] = pixel;
-                    }
-                }
-                else if ((1 == mode) || (3 == mode) || (7 == mode))
-                {
-                    if (1 == subset_index)
-                    {
-                        ep[0] = ep[1] = pixel;
-                    }
-                }
-            }
-            else
-            {
-                if ((0 == mode) || (2 == mode))
-                {
-                    if (2 == subset_index2)
-                    {
-                        ep[0] = ep[1] = pixel;
-                    }
-                }
-            }
-
-            shared_temp[GI].endPoint_low = ep[0];
-            shared_temp[GI].endPoint_high = ep[1];
-        }
-#ifdef REF_DEVICE
-        GroupMemoryBarrierWithGroupSync();
-#endif
-
-        if (threadInBlock < 8)
-        {
-            shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low);
-            shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high);
-        }
-#ifdef REF_DEVICE
-        GroupMemoryBarrierWithGroupSync();
-#endif
-        if (threadInBlock < 4)
-        {
-            shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low);
-            shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high);
-        }
-#ifdef REF_DEVICE
-        GroupMemoryBarrierWithGroupSync();
-#endif
-        if (threadInBlock < 2)
-        {
-            shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low);
-            shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high);
-        }
-#ifdef REF_DEVICE
-        GroupMemoryBarrierWithGroupSync();
-#endif
-        if (threadInBlock < 1)
-        {
-            shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low);
-            shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high);
-        }
-#ifdef REF_DEVICE
-        GroupMemoryBarrierWithGroupSync();
-#endif
-
-        if (ii == (int)threadInBlock)
-        {
-            ep[0] = shared_temp[threadBase].endPoint_low;
-            ep[1] = shared_temp[threadBase].endPoint_high;
-        }
-    }
-
-    if (threadInBlock < 3)
-    {
-        uint2 P;
-        if (1 == mode)
-        {
-            P = (rotation >> threadInBlock) & 1;
-        }
-        else
-        {
-            P = uint2(rotation >> (threadInBlock * 2 + 0), rotation >> (threadInBlock * 2 + 1)) & 1;
-        }
-
-        if (0 == mode)
-        {
-            ep_quantized = compress_endpoints0( ep, P );
-        }
-        else if (1 == mode)
-        {
-            ep_quantized = compress_endpoints1( ep, P );
-        }
-        else if (2 == mode)
-        {
-            ep_quantized = compress_endpoints2( ep );
-        }
-        else if (3 == mode)
-        {
-            ep_quantized = compress_endpoints3( ep, P );
-        }
-        else if (4 == mode)
-        {
-            ep_quantized = compress_endpoints4( ep );
-        }
-        else if (5 == mode)
-        {
-            ep_quantized = compress_endpoints5( ep );
-        }
-        else if (6 == mode)
-        {
-            ep_quantized = compress_endpoints6( ep, P );
-        }
-        else //if (7 == mode)
-        {
-            ep_quantized = compress_endpoints7( ep, P );
-        }
-
-        int4 span = ep[1] - ep[0];
-        if (mode < 4)
-        {
-            span.w = 0;
-        }
-
-        if ((4 == mode) || (5 == mode))
-        {
-            if (0 == threadInBlock)
-            {
-                int2 span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a );
-                int2 dotProduct = int2( dot( span.rgb, shared_temp[threadBase + 0].pixel.rgb - ep[0].rgb ), span.a * ( shared_temp[threadBase + 0].pixel.a - ep[0].a ) );
-                if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) )
-                {
-                    swap(ep[0].rgb, ep[1].rgb);
-                    swap(ep_quantized[0].rgb, ep_quantized[1].rgb);
-                }
-                if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) )
-                {
-                    swap(ep[0].a, ep[1].a);
-                    swap(ep_quantized[0].a, ep_quantized[1].a);		    
-                }
-            }
-        }
-        else //if ((0 == mode) || (2 == mode) || (1 == mode) || (3 == mode) || (7 == mode) || (6 == mode))
-        {
-            int p;
-            if (0 == threadInBlock)
-            {
-                p = 0;
-            }
-            else if (1 == threadInBlock)
-            {
-                p = candidateFixUpIndex1D[partition].x;
-            }
-            else //if (2 == threadInBlock)
-            {
-                p = candidateFixUpIndex1D[partition].y;
-            }
-
-            int span_norm_sqr = dot( span, span );
-            int dotProduct = dot( span, shared_temp[threadBase + p].pixel - ep[0] );
-            if ( span_norm_sqr > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr ) )
-            {
-                swap(ep[0], ep[1]);
-                swap(ep_quantized[0], ep_quantized[1]);		
-            }
-        }
-
-        shared_temp[GI].endPoint_low = ep[0];
-        shared_temp[GI].endPoint_high = ep[1];
-        shared_temp[GI].endPoint_low_quantized = ep_quantized[0];
-        shared_temp[GI].endPoint_high_quantized = ep_quantized[1];
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    if (threadInBlock < 16)
-    {
-        uint color_index = 0;
-        uint alpha_index = 0;
-
-        uint2x4 ep;
-
-        uint2 indexPrec;
-        if ((0 == mode) || (1 == mode))
-        {
-            indexPrec = 1;
-        }
-        else if (6 == mode)
-        {
-            indexPrec = 0;
-        }
-        else if (4 == mode)
-        {
-            if (0 == index_selector)
-            {
-                indexPrec = uint2(2, 1);
-            }
-            else
-            {
-                indexPrec = uint2(1, 2);
-            }
-        }
-        else
-        {
-            indexPrec = 2;
-        }
-
-        int subset_index;
-        if ((0 == mode) || (2 == mode))
-        {
-            subset_index = (bits2 >> (threadInBlock * 2)) & 0x03;
-        }
-        else if ((1 == mode) || (3 == mode) || (7 == mode))
-        {
-            subset_index = (bits >> threadInBlock) & 0x01;
-        }
-        else
-        {
-            subset_index = 0;
-        }
-
-        ep[0] = shared_temp[threadBase + subset_index].endPoint_low;
-        ep[1] = shared_temp[threadBase + subset_index].endPoint_high;
-
-        int4 span = ep[1] - ep[0];
-        if (mode < 4)
-        {
-            span.w = 0;
-        }
-
-        if ((4 == mode) || (5 == mode))
-        {
-            int2 span_norm_sqr;
-            span_norm_sqr.x = dot( span.rgb, span.rgb );
-            span_norm_sqr.y = span.a * span.a;
-            
-            int dotProduct = dot( span.rgb, shared_temp[threadBase + threadInBlock].pixel.rgb - ep[0].rgb );
-            color_index = ( span_norm_sqr.x <= 0 || dotProduct <= 0 ) ? 0
-                    : ( ( dotProduct < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] );
-            dotProduct = dot( span.a, shared_temp[threadBase + threadInBlock].pixel.a - ep[0].a );
-            alpha_index = ( span_norm_sqr.y <= 0 || dotProduct <= 0 ) ? 0
-                    : ( ( dotProduct < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] );
-
-            if (index_selector)
-            {
-                swap(color_index, alpha_index);
-            }
-        }
-        else
-        {
-            int span_norm_sqr = dot( span, span );
-
-            int dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel - ep[0] );
-            color_index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
-                    : ( ( dotProduct < span_norm_sqr ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep[indexPrec.x][63] );
-        }
-
-        shared_temp[GI].error = color_index;
-        shared_temp[GI].mode = alpha_index;
-    }
-#ifdef REF_DEVICE
-    GroupMemoryBarrierWithGroupSync();
-#endif
-
-    if (0 == threadInBlock)
-    {
-        uint4 block;
-        if (0 == mode)
-        {
-            block_package0( block, partition, threadBase );
-        }
-        else if (1 == mode)
-        {
-            block_package1( block, partition, threadBase );
-        }
-        else if (2 == mode)
-        {
-            block_package2( block, partition, threadBase );
-        }
-        else if (3 == mode)
-        {
-            block_package3( block, partition, threadBase );
-        }
-        else if (4 == mode)
-        {
-            block_package4( block, rotation, index_selector, threadBase );
-        }
-        else if (5 == mode)
-        {
-            block_package5( block, rotation, threadBase );
-        }
-        else if (6 == mode)
-        {
-            block_package6( block, threadBase );
-        }
-        else //if (7 == mode)
-        {
-            block_package7( block, partition, threadBase );
-        }
-
-        g_OutBuff[blockID] = block;
-    }
-}
-
-//uint4 truncate_and_round( uint4 color, uint bits)
-//{
-//    uint precisionMask = ((1 << bits) - 1) << (8 - bits);
-//    uint precisionHalf = (1 << (7-bits));
-//
-//    uint4 truncated = color & precisionMask; 
-//    uint4 rounded = min(255, color + precisionHalf) & precisionMask;
-//    
-//    uint4 truncated_bak = truncated = truncated | (truncated >> bits);
-//    uint4 rounded_bak = rounded = rounded | (rounded >> bits);
-//
-//    uint4 color_bak = color;
-//    
-//    Ensure_A_Is_Larger( rounded, color );
-//    Ensure_A_Is_Larger( truncated, color_bak );
-//
-//    if (dot(rounded - color, rounded - color) < 
-//        dot(truncated - color_bak, truncated - color_bak))
-//    {
-//        return rounded_bak;
-//    }
-//    else
-//    {
-//        return truncated_bak;
-//    }
-//}
-
-uint4 quantize( uint4 color, uint uPrec )
-{
-    uint4 rnd = min(255, color + (1 << (7 - uPrec)));
-    return rnd >> (8 - uPrec);
-}
-
-uint4 unquantize( uint4 color, uint uPrec )
-{
-    color = color << (8 - uPrec);
-    return color | (color >> uPrec);
-}
-
-uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb & 0xFFFFFFFE;
-	    quantized[j].rgb |= P[j];
-        quantized[j].a = 0xFF;
-
-        endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
-        endPoint[j].a = 0xFF;
-
-        quantized[j] <<= 3;
-    }
-    return quantized;
-}
-uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb & 0xFFFFFFFE;
-	    quantized[j].rgb |= P[j];
-        quantized[j].a = 0xFF;
-
-        endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb;
-	    endPoint[j].a = 0xFF;
-
-        quantized[j] <<= 1;
-    }
-    return quantized;
-}
-uint2x4 compress_endpoints2( inout uint2x4 endPoint )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb;
-        quantized[j].a = 0xFF;
-
-        endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
-	    endPoint[j].a = 0xFF;    
-
-        quantized[j] <<= 3;
-    }
-    return quantized;
-}
-uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j].rgb = endPoint[j].rgb & 0xFFFFFFFE;
-	    quantized[j].rgb |= P[j];
-        quantized[j].a = 0xFF;
-        
-        endPoint[j].rgb = quantized[j].rgb;
-        endPoint[j].a = 0xFF;
-    }
-    return quantized;
-}
-uint2x4 compress_endpoints4( inout uint2x4 endPoint )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb;
-        quantized[j].a = quantize(endPoint[j].a, 6).r;
-        
-        endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;        
-        endPoint[j].a = unquantize(quantized[j].a, 6).r;
-
-        quantized[j].rgb <<= 3;
-        quantized[j].a <<= 2;
-    }    
-    return quantized;
-}
-uint2x4 compress_endpoints5( inout uint2x4 endPoint )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb;
-        quantized[j].a = endPoint[j].a;
-
-        endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb;
-        // endPoint[j].a   Alpha is full precision
-
-        quantized[j].rgb <<= 1;
-    }    
-    return quantized;
-}
-uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j] = endPoint[j] & 0xFFFFFFFE;
-	    quantized[j] |= P[j];
-	        
-        endPoint[j] = quantized[j];
-    }
-    return quantized;
-}
-uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P )
-{
-    uint2x4 quantized;
-    for ( uint j = 0; j < 2; j ++ )
-    {
-        quantized[j] = quantize(endPoint[j], 6) & 0xFFFFFFFE;
-	    quantized[j] |= P[j];
-
-        endPoint[j] = unquantize(quantized[j], 6);
-    }
-    return quantized << 2;
-}
-
-#define get_end_point_l(subset) shared_temp[threadBase + subset].endPoint_low_quantized
-#define get_end_point_h(subset) shared_temp[threadBase + subset].endPoint_high_quantized
-#define get_color_index(index) shared_temp[threadBase + index].error
-#define get_alpha_index(index) shared_temp[threadBase + index].mode
-
-void block_package0( out uint4 block, uint partition, uint threadBase )
-{
-    block.x = 0x01 | ( (partition - 64) << 1 ) 
-            | ( ( get_end_point_l(0).r & 0xF0 ) <<  1 ) | ( ( get_end_point_h(0).r & 0xF0 ) <<  5 ) 
-            | ( ( get_end_point_l(1).r & 0xF0 ) <<  9 ) | ( ( get_end_point_h(1).r & 0xF0 ) << 13 ) 
-            | ( ( get_end_point_l(2).r & 0xF0 ) << 17 ) | ( ( get_end_point_h(2).r & 0xF0 ) << 21 ) 
-            | ( ( get_end_point_l(0).g & 0xF0 ) << 25 );
-    block.y = ( ( get_end_point_l(0).g & 0xF0 ) >>  7 ) | ( ( get_end_point_h(0).g & 0xF0 ) >>  3 ) 
-            | ( ( get_end_point_l(1).g & 0xF0 ) <<  1 ) | ( ( get_end_point_h(1).g & 0xF0 ) <<  5 ) 
-            | ( ( get_end_point_l(2).g & 0xF0 ) <<  9 ) | ( ( get_end_point_h(2).g & 0xF0 ) << 13 ) 
-            | ( ( get_end_point_l(0).b & 0xF0 ) << 17 ) | ( ( get_end_point_h(0).b & 0xF0 ) << 21 )
-            | ( ( get_end_point_l(1).b & 0xF0 ) << 25 );
-    block.z = ( ( get_end_point_l(1).b & 0xF0 ) >>  7 ) | ( ( get_end_point_h(1).b & 0xF0 ) >>  3 ) 
-            | ( ( get_end_point_l(2).b & 0xF0 ) <<  1 ) | ( ( get_end_point_h(2).b & 0xF0 ) <<  5 ) 
-            | ( ( get_end_point_l(0).r & 0x08 ) << 10 ) | ( ( get_end_point_h(0).r & 0x08 ) << 11 ) 
-            | ( ( get_end_point_l(1).r & 0x08 ) << 12 ) | ( ( get_end_point_h(1).r & 0x08 ) << 13 ) 
-            | ( ( get_end_point_l(2).r & 0x08 ) << 14 ) | ( ( get_end_point_h(2).r & 0x08 ) << 15 )
-            | ( get_color_index(0) << 19 );
-    block.w = 0;
-    uint i = 1;
-    for ( ; i <= min( candidateFixUpIndex1DOrdered[partition][0], 4 ); i ++ )
-    {
-        block.z |= get_color_index(i) << ( i * 3 + 18 );
-    }
-    if ( candidateFixUpIndex1DOrdered[partition][0] < 4 ) //i = 4
-    {
-        block.z |= get_color_index(4) << 29;
-        i += 1;
-    }
-    else //i = 5
-    {
-        block.w |= ( get_color_index(4) & 0x04 ) >> 2;
-        for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
-            block.w |= get_color_index(i) << ( i * 3 - 14 );
-    }
-    for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 3 - 15 );
-    }
-    for ( ; i < 16; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 3 - 16 );
-    }
-}
-void block_package1( out uint4 block, uint partition, uint threadBase )
-{
-    block.x = 0x02 | ( partition << 2 ) 
-            | ( ( get_end_point_l(0).r & 0xFC ) <<  6 ) | ( ( get_end_point_h(0).r & 0xFC ) << 12 ) 
-            | ( ( get_end_point_l(1).r & 0xFC ) << 18 ) | ( ( get_end_point_h(1).r & 0xFC ) << 24 );
-    block.y = ( ( get_end_point_l(0).g & 0xFC ) >>  2 ) | ( ( get_end_point_h(0).g & 0xFC ) <<  4 ) 
-            | ( ( get_end_point_l(1).g & 0xFC ) << 10 ) | ( ( get_end_point_h(1).g & 0xFC ) << 16 )
-            | ( ( get_end_point_l(0).b & 0xFC ) << 22 ) | ( ( get_end_point_h(0).b & 0xFC ) << 28 );
-    block.z = ( ( get_end_point_h(0).b & 0xFC ) >>  4 ) | ( ( get_end_point_l(1).b & 0xFC ) <<  2 )
-            | ( ( get_end_point_h(1).b & 0xFC ) <<  8 ) 
-            | ( ( get_end_point_l(0).r & 0x02 ) << 15 ) | ( ( get_end_point_l(1).r & 0x02 ) << 16 )
-            | ( get_color_index(0) << 18 );
-    if ( candidateFixUpIndex1DOrdered[partition][0] == 15 )
-    {
-        block.w = (get_color_index(15) << 30) | (get_color_index(14) << 27) | (get_color_index(13) << 24) | (get_color_index(12) << 21) | (get_color_index(11) << 18) | (get_color_index(10) << 15)
-            | (get_color_index(9) << 12) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5);
-        block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
-    }
-    else if ( candidateFixUpIndex1DOrdered[partition][0] == 2 )
-    {
-        block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14)
-            | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 5) | (get_color_index(6) << 2) | (get_color_index(5) >> 1);
-        block.z |= (get_color_index(5) << 31) | (get_color_index(4) << 28) | (get_color_index(3) << 25) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
-    }
-    else if ( candidateFixUpIndex1DOrdered[partition][0] == 8 )
-    {
-        block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14)
-            | (get_color_index(9) << 11) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5);
-        block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
-    }
-    else //candidateFixUpIndex1DOrdered[partition] == 6
-    {
-        block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14)
-            | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 6) | (get_color_index(6) << 4) | get_color_index(5);
-        block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
-    }
-}
-void block_package2( out uint4 block, uint partition, uint threadBase )
-{
-    block.x = 0x04 | ( (partition - 64) << 3 ) 
-            | ( ( get_end_point_l(0).r & 0xF8 ) <<  6 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 11 ) 
-            | ( ( get_end_point_l(1).r & 0xF8 ) << 16 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 21 ) 
-            | ( ( get_end_point_l(2).r & 0xF8 ) << 26 );
-    block.y = ( ( get_end_point_l(2).r & 0xF8 ) >>  6 ) | ( ( get_end_point_h(2).r & 0xF8 ) >>  1 )
-            | ( ( get_end_point_l(0).g & 0xF8 ) <<  4 ) | ( ( get_end_point_h(0).g & 0xF8 ) <<  9 ) 
-            | ( ( get_end_point_l(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_h(1).g & 0xF8 ) << 19 ) 
-            | ( ( get_end_point_l(2).g & 0xF8 ) << 24 );
-    block.z = ( ( get_end_point_h(2).g & 0xF8 ) >>  3 ) | ( ( get_end_point_l(0).b & 0xF8 ) <<  2 )
-            | ( ( get_end_point_h(0).b & 0xF8 ) <<  7 )	| ( ( get_end_point_l(1).b & 0xF8 ) << 12 )
-            | ( ( get_end_point_h(1).b & 0xF8 ) << 17 ) | ( ( get_end_point_l(2).b & 0xF8 ) << 22 ) 
-            | ( ( get_end_point_h(2).b & 0xF8 ) << 27 );
-    block.w = ( ( get_end_point_h(2).b & 0xF8 ) >>  5 ) 
-            | ( get_color_index(0) << 3 );
-    uint i = 1;
-    for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 2 + 2 );
-    }
-    for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 2 + 1 );
-    }
-    for ( ; i < 16; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 2 );
-    }
-}
-void block_package3( out uint4 block, uint partition, uint threadBase )
-{
-    block.x = 0x08 | ( partition << 4 ) 
-            | ( ( get_end_point_l(0).r & 0xFE ) <<  9 ) | ( ( get_end_point_h(0).r & 0xFE ) << 16 ) 
-            | ( ( get_end_point_l(1).r & 0xFE ) << 23 ) | ( ( get_end_point_h(1).r & 0xFE ) << 30 );
-    block.y = ( ( get_end_point_h(1).r & 0xFE ) >>  2 ) | ( ( get_end_point_l(0).g & 0xFE ) <<  5 )
-            | ( ( get_end_point_h(0).g & 0xFE ) << 12 ) | ( ( get_end_point_l(1).g & 0xFE ) << 19 )
-            | ( ( get_end_point_h(1).g & 0xFE ) << 26 );
-    block.z = ( ( get_end_point_h(1).g & 0xFE ) >>  6 ) | ( ( get_end_point_l(0).b & 0xFE ) <<  1 )
-            | ( ( get_end_point_h(0).b & 0xFE ) <<  8 ) | ( ( get_end_point_l(1).b & 0xFE ) << 15 )
-            | ( ( get_end_point_h(1).b & 0xFE ) << 22 )
-            | ( ( get_end_point_l(0).r & 0x01 ) << 30 ) | ( ( get_end_point_h(0).r & 0x01 ) << 31 );
-    block.w = ( ( get_end_point_l(1).r & 0x01 ) <<  0 ) | ( ( get_end_point_h(1).r & 0x01 ) <<  1 )
-            | ( get_color_index(0) << 2 );
-    uint i = 1;
-    for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 2 + 1 );
-    }
-    for ( ; i < 16; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 2 );
-    }
-}
-void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase )
-{
-    block.x = 0x10 | ( (rotation & 3) << 5 ) | ( (index_selector & 1) << 7 )
-            | ( ( get_end_point_l(0).r & 0xF8 ) <<  5 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 10 )
-            | ( ( get_end_point_l(0).g & 0xF8 ) << 15 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 20 )
-            | ( ( get_end_point_l(0).b & 0xF8 ) << 25 );
-
-    block.y = ( ( get_end_point_l(0).b & 0xF8 ) >>  7 ) | ( ( get_end_point_h(0).b & 0xF8 ) >>  2 )
-            | ( ( get_end_point_l(0).a & 0xFC ) <<  4 ) | ( ( get_end_point_h(0).a & 0xFC ) << 10 )
-            | ( (get_color_index(0) & 1) << 18 ) | ( get_color_index(1) << 19 ) | ( get_color_index(2) << 21 ) | ( get_color_index(3) << 23 ) 
-            | ( get_color_index(4) << 25 ) | ( get_color_index(5) << 27 ) | ( get_color_index(6) << 29 ) | ( get_color_index(7) << 31 );
-
-    block.z = ( get_color_index(7) >>  1 ) | ( get_color_index(8) <<  1 ) | ( get_color_index(9) <<  3 ) | ( get_color_index(10)<<  5 )
-            | ( get_color_index(11)<<  7 ) | ( get_color_index(12)<<  9 ) | ( get_color_index(13)<< 11 ) | ( get_color_index(14)<< 13 )
-            | ( get_color_index(15)<< 15 ) | ( (get_alpha_index(0) & 3) << 17 ) | ( get_alpha_index(1) << 19 ) | ( get_alpha_index(2) << 22 )
-            | ( get_alpha_index(3) << 25 ) | ( get_alpha_index(4) << 28 ) | ( get_alpha_index(5) << 31 );
-
-    block.w = ( get_alpha_index(5) >>  1 ) | ( get_alpha_index(6) <<  2 ) | ( get_alpha_index(7) <<  5 ) | ( get_alpha_index(8) <<  8 ) 
-            | ( get_alpha_index(9) << 11 ) | ( get_alpha_index(10)<< 14 ) | ( get_alpha_index(11)<< 17 ) | ( get_alpha_index(12)<< 20 ) 
-            | ( get_alpha_index(13)<< 23 ) | ( get_alpha_index(14)<< 26 ) | ( get_alpha_index(15)<< 29 );
-}
-void block_package5( out uint4 block, uint rotation, uint threadBase )
-{
-    block.x = 0x20 | ( rotation << 6 )
-            | ( ( get_end_point_l(0).r & 0xFE ) <<  7 ) | ( ( get_end_point_h(0).r & 0xFE ) << 14 )
-            | ( ( get_end_point_l(0).g & 0xFE ) << 21 ) | ( ( get_end_point_h(0).g & 0xFE ) << 28 );
-    block.y = ( ( get_end_point_h(0).g & 0xFE ) >>  4 ) | ( ( get_end_point_l(0).b & 0xFE ) <<  3 )
-            | ( ( get_end_point_h(0).b & 0xFE ) << 10 )	| ( get_end_point_l(0).a << 18 ) | ( get_end_point_h(0).a << 26 );
-    block.z = ( get_end_point_h(0).a >>  6 )
-            | ( get_color_index(0) <<  2 ) | ( get_color_index(1) <<  3 ) | ( get_color_index(2) <<  5 ) | ( get_color_index(3) <<  7 ) 
-            | ( get_color_index(4) <<  9 ) | ( get_color_index(5) << 11 ) | ( get_color_index(6) << 13 ) | ( get_color_index(7) << 15 )
-            | ( get_color_index(8) << 17 ) | ( get_color_index(9) << 19 ) | ( get_color_index(10)<< 21 ) | ( get_color_index(11)<< 23 ) 
-            | ( get_color_index(12)<< 25 ) | ( get_color_index(13)<< 27 ) | ( get_color_index(14)<< 29 ) | ( get_color_index(15)<< 31 );
-    block.w =  ( get_color_index(15)>> 1 ) | ( get_alpha_index(0) <<  1 ) | ( get_alpha_index(1) <<  2 ) | ( get_alpha_index(2) <<  4 )
-            | ( get_alpha_index(3) <<  6 ) | ( get_alpha_index(4) <<  8 ) | ( get_alpha_index(5) << 10 ) | ( get_alpha_index(6) << 12 )
-            | ( get_alpha_index(7) << 14 ) | ( get_alpha_index(8) << 16 ) | ( get_alpha_index(9) << 18 ) | ( get_alpha_index(10)<< 20 ) 
-            | ( get_alpha_index(11)<< 22 ) | ( get_alpha_index(12)<< 24 ) | ( get_alpha_index(13)<< 26 ) | ( get_alpha_index(14)<< 28 )
-            | ( get_alpha_index(15)<< 30 );
-}
-void block_package6( out uint4 block, uint threadBase )
-{
-    block.x = 0x40
-            | ( ( get_end_point_l(0).r & 0xFE ) <<  6 ) | ( ( get_end_point_h(0).r & 0xFE ) << 13 )
-            | ( ( get_end_point_l(0).g & 0xFE ) << 20 ) | ( ( get_end_point_h(0).g & 0xFE ) << 27 );
-    block.y = ( ( get_end_point_h(0).g & 0xFE ) >>  5 ) | ( ( get_end_point_l(0).b & 0xFE ) <<  2 )
-            | ( ( get_end_point_h(0).b & 0xFE ) <<  9 )	| ( ( get_end_point_l(0).a & 0xFE ) << 16 )
-            | ( ( get_end_point_h(0).a & 0xFE ) << 23 )
-            | ( get_end_point_l(0).r & 0x01 ) << 31;
-    block.z = ( get_end_point_h(0).r & 0x01 )
-            | ( get_color_index(0) <<  1 ) | ( get_color_index(1) <<  4 ) | ( get_color_index(2) <<  8 ) | ( get_color_index(3) << 12 ) 
-            | ( get_color_index(4) << 16 ) | ( get_color_index(5) << 20 ) | ( get_color_index(6) << 24 ) | ( get_color_index(7) << 28 );
-    block.w = ( get_color_index(8) <<  0 ) | ( get_color_index(9) <<  4 ) | ( get_color_index(10)<<  8 ) | ( get_color_index(11)<< 12 ) 
-            | ( get_color_index(12)<< 16 ) | ( get_color_index(13)<< 20 ) | ( get_color_index(14)<< 24 ) | ( get_color_index(15)<< 28 );
-}
-void block_package7( out uint4 block, uint partition, uint threadBase )
-{
-    block.x = 0x80 | ( partition << 8 ) 
-            | ( ( get_end_point_l(0).r & 0xF8 ) << 11 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 16 ) 
-            | ( ( get_end_point_l(1).r & 0xF8 ) << 21 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 26 );
-    block.y = ( ( get_end_point_h(1).r & 0xF8 ) >>  6 ) | ( ( get_end_point_l(0).g & 0xF8 ) >>  1 )
-            | ( ( get_end_point_h(0).g & 0xF8 ) <<  4 ) | ( ( get_end_point_l(1).g & 0xF8 ) <<  9 ) 
-            | ( ( get_end_point_h(1).g & 0xF8 ) << 14 )	| ( ( get_end_point_l(0).b & 0xF8 ) << 19 ) 
-            | ( ( get_end_point_h(0).b & 0xF8 ) << 24 );
-    block.z = ( ( get_end_point_l(1).b & 0xF8 ) >>  3 )	| ( ( get_end_point_h(1).b & 0xF8 ) <<  2 ) 
-            | ( ( get_end_point_l(0).a & 0xF8 ) <<  7 ) | ( ( get_end_point_h(0).a & 0xF8 ) << 12 ) 
-            | ( ( get_end_point_l(1).a & 0xF8 ) << 17 ) | ( ( get_end_point_h(1).a & 0xF8 ) << 22 ) 
-            | ( ( get_end_point_l(0).r & 0x04 ) << 28 ) | ( ( get_end_point_h(0).r & 0x04 ) << 29 );
-    block.w = ( ( get_end_point_l(1).r & 0x04 ) >>  2 ) | ( ( get_end_point_h(1).r & 0x04 ) >>  1 )
-            | ( get_color_index(0) <<  2 );
-    uint i = 1;
-    for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 2 + 1 );
-    }
-    for ( ; i < 16; i ++ )
-    {
-        block.w |= get_color_index(i) << ( i * 2 );
-    }
-}
-\ No newline at end of file
diff --git a/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl b/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl
deleted file mode 100644
index 664e92e5d..000000000
--- a/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl
+++ /dev/null
@@ -1,72 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//--------------------------------------------------------------------------------------
-// File: BasicCompute11.hlsl
-//
-// This file contains the Compute Shader to perform array A + array B
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#ifdef USE_STRUCTURED_BUFFERS
-
-struct BufType
-{
-    int i;
-    float f;
-#ifdef TEST_DOUBLE
-    double d;
-#endif    
-};
-
-StructuredBuffer<BufType> Buffer0 : register(t0);
-StructuredBuffer<BufType> Buffer1 : register(t1);
-RWStructuredBuffer<BufType> BufferOut : register(u0);
-
-[numthreads(1, 1, 1)]
-void CSMain( uint3 DTid : SV_DispatchThreadID )
-{
-    BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i;
-    BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f;
-#ifdef TEST_DOUBLE
-    BufferOut[DTid.x].d = Buffer0[DTid.x].d + Buffer1[DTid.x].d;
-#endif 
-}
-
-#else // The following code is for raw buffers
-
-ByteAddressBuffer Buffer0 : register(t0);
-ByteAddressBuffer Buffer1 : register(t1);
-RWByteAddressBuffer BufferOut : register(u0);
-
-[numthreads(1, 1, 1)]
-void CSMain( uint3 DTid : SV_DispatchThreadID )
-{
-#ifdef TEST_DOUBLE
-    int i0 = asint( Buffer0.Load( DTid.x*16 ) );
-    float f0 = asfloat( Buffer0.Load( DTid.x*16+4 ) );
-    double d0 = asdouble( Buffer0.Load( DTid.x*16+8 ), Buffer0.Load( DTid.x*16+12 ) );
-    int i1 = asint( Buffer1.Load( DTid.x*16 ) );
-    float f1 = asfloat( Buffer1.Load( DTid.x*16+4 ) );
-    double d1 = asdouble( Buffer1.Load( DTid.x*16+8 ), Buffer1.Load( DTid.x*16+12 ) );
-    
-    BufferOut.Store( DTid.x*16, asuint(i0 + i1) );
-    BufferOut.Store( DTid.x*16+4, asuint(f0 + f1) );
-    
-    uint dl, dh;
-    asuint( d0 + d1, dl, dh );
-
-    BufferOut.Store( DTid.x*16+8, dl );
-    BufferOut.Store( DTid.x*16+12, dh );
-#else
-    int i0 = asint( Buffer0.Load( DTid.x*8 ) );
-    float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) );
-    int i1 = asint( Buffer1.Load( DTid.x*8 ) );
-    float f1 = asfloat( Buffer1.Load( DTid.x*8+4 ) );
-    
-    BufferOut.Store( DTid.x*8, asuint(i0 + i1) );
-    BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) );
-#endif // TEST_DOUBLE
-}
-
-#endif // USE_STRUCTURED_BUFFERS
diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx
deleted file mode 100644
index bd28f862b..000000000
--- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx
+++ /dev/null
@@ -1,158 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL.fx
-//
-// The effect file for the BasicHLSL sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Global variables
-//--------------------------------------------------------------------------------------
-float4 g_MaterialAmbientColor;      // Material's ambient color
-float4 g_MaterialDiffuseColor;      // Material's diffuse color
-int g_nNumLights;
-
-float3 g_LightDir;               // Light's direction in world space
-float4 g_LightDiffuse;           // Light's diffuse color
-float4 g_LightAmbient;              // Light's ambient color
-
-texture g_MeshTexture;              // Color texture for mesh
-
-float    g_fTime;                   // App's time in seconds
-float4x4 g_mWorld;                  // World matrix for object
-float4x4 g_mWorldViewProjection;    // World * View * Projection matrix
-
-
-
-//--------------------------------------------------------------------------------------
-// Texture samplers
-//--------------------------------------------------------------------------------------
-sampler MeshTextureSampler = 
-sampler_state
-{
-    Texture = <g_MeshTexture>;
-    MipFilter = LINEAR;
-    MinFilter = LINEAR;
-    MagFilter = LINEAR;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex shader output structure
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
-    float4 Position   : POSITION;   // vertex position 
-    float4 Diffuse    : COLOR0;     // vertex diffuse color (note that COLOR0 is clamped from 0..1)
-    float2 TextureUV  : TEXCOORD0;  // vertex texture coords 
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, 
-                         float3 vNormal : NORMAL,
-                         float2 vTexCoord0 : TEXCOORD0,
-                         uniform int nNumLights,
-                         uniform bool bTexture,
-                         uniform bool bAnimate )
-{
-
-    VS_OUTPUT Output;
-    float3 vNormalWorldSpace;
-   
-    // Transform the position from object space to homogeneous projection space
-    Output.Position = mul(vPos, g_mWorldViewProjection);
-    
-    // Transform the normal from object space to world space    
-    vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space)
-    
-    // Compute simple directional lighting equation
-    float3 vTotalLightDiffuse = float3(0,0,0);
-    for(int i=0; i<nNumLights; i++ )
-        vTotalLightDiffuse += g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir));
-        
-    Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse + 
-                         g_MaterialAmbientColor * g_LightAmbient;   
-    Output.Diffuse.a = 1.0f; 
-    
-    // Just copy the texture coordinate through
-    if( bTexture ) 
-        Output.TextureUV = vTexCoord0; 
-    else
-        Output.TextureUV = 0; 
-    
-    return Output;    
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader output structure
-//--------------------------------------------------------------------------------------
-struct PS_OUTPUT
-{
-    float4 RGBColor : COLOR0;  // Pixel color    
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-//       color with diffuse material color
-//--------------------------------------------------------------------------------------
-PS_OUTPUT RenderScenePS( VS_OUTPUT In,
-                         uniform bool bTexture ) 
-{ 
-    PS_OUTPUT Output;
-
-    // Lookup mesh texture and modulate it with diffuse
-    if( bTexture )
-        Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse;
-    else
-        Output.RGBColor = In.Diffuse;
-
-    return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Renders scene to render target
-//--------------------------------------------------------------------------------------
-technique RenderSceneWithTexture1Light
-{
-    pass P0
-    {          
-        VertexShader = compile vs_2_0 RenderSceneVS( 1, true, true );
-        PixelShader  = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired)
-    }
-}
-
-technique RenderSceneWithTexture2Light
-{
-    pass P0
-    {          
-        VertexShader = compile vs_2_0 RenderSceneVS( 2, true, true );
-        PixelShader  = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired)
-    }
-}
-
-technique RenderSceneWithTexture3Light
-{
-    pass P0
-    {          
-        VertexShader = compile vs_2_0 RenderSceneVS( 3, true, true );
-        PixelShader  = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired)
-    }
-}
-
-technique RenderSceneNoTexture
-{
-    pass P0
-    {          
-        VertexShader = compile vs_2_0 RenderSceneVS( 1, false, false );
-        PixelShader  = compile ps_2_0 RenderScenePS( false ); // trivial pixel shader (could use FF instead if desired)
-    }
-}
diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl
deleted file mode 100644
index 33ea61b07..000000000
--- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl
+++ /dev/null
@@ -1,60 +0,0 @@
-//TEST:COMPARE_HLSL:-no-mangle -profile ps_4_0 -entry PSMain
-
-#ifndef __SLANG__
-#define cbPerFrame cbPerFrame_0
-#define g_vLightDir g_vLightDir_0
-#define g_fAmbient g_fAmbient_0
-#define g_samLinear g_samLinear_0
-#define g_txDiffuse g_txDiffuse_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL11_PS.hlsl
-//
-// The pixel shader file for the BasicHLSL11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-	float4		g_vObjectColor			;//SLANG: : packoffset( c0 );
-};
-
-cbuffer cbPerFrame : register( b1 )
-{
-	float3		g_vLightDir				;//SLANG: : packoffset( c0 );
-	float		g_fAmbient				;//SLANG: : packoffset( c0.w );
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D	g_txDiffuse : register( t0 );
-SamplerState g_samLinear : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
-	float3 vNormal		: NORMAL;
-	float2 vTexcoord	: TEXCOORD0;
-};
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PSMain( PS_INPUT Input ) : SV_TARGET
-{
-	float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord );
-	
-	float fLighting = saturate( dot( g_vLightDir, Input.vNormal ) );
-	fLighting = max( fLighting, g_fAmbient );
-	
-	return vDiffuse * fLighting;
-}
-
diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl
deleted file mode 100644
index a0fb3c9ce..000000000
--- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#define g_mWorld g_mWorld_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL11_VS.hlsl
-//
-// The vertex shader file for the BasicHLSL11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-	matrix		g_mWorldViewProjection	;//SLANG: : packoffset( c0 );
-	matrix		g_mWorld				;//SLANG: : packoffset( c4 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-	float4 vPosition	: POSITION;
-	float3 vNormal		: NORMAL;
-	float2 vTexcoord	: TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
-	float3 vNormal		: NORMAL;
-	float2 vTexcoord	: TEXCOORD0;
-	float4 vPosition	: SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-	VS_OUTPUT Output;
-	
-	Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-	Output.vNormal = mul( Input.vNormal, (float3x3)g_mWorld );
-	Output.vTexcoord = Input.vTexcoord;
-	
-	return Output;
-}
-
diff --git a/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx b/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx
deleted file mode 100644
index 1ecc1930a..000000000
--- a/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx
+++ /dev/null
@@ -1,181 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL11.fx
-//
-// The effect file for the BasicHLSL sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Global variables
-//--------------------------------------------------------------------------------------
-float4 g_MaterialAmbientColor;      // Material's ambient color
-float4 g_MaterialDiffuseColor;      // Material's diffuse color
-int g_nNumLights;
-
-float3 g_LightDir[3];               // Light's direction in world space
-float4 g_LightDiffuse[3];           // Light's diffuse color
-float4 g_LightAmbient;              // Light's ambient color
-
-Texture2D g_MeshTexture;            // Color texture for mesh
-
-float    g_fTime;                   // App's time in seconds
-float4x4 g_mWorld;                  // World matrix for object
-float4x4 g_mWorldViewProjection;    // World * View * Projection matrix
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
-    DepthEnable = TRUE;
-    DepthWriteMask = ALL;
-    DepthFunc = LESS_EQUAL;
-};
-
-//--------------------------------------------------------------------------------------
-// Texture samplers
-//--------------------------------------------------------------------------------------
-SamplerState MeshTextureSampler
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Wrap;
-    AddressV = Wrap;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex shader output structure
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
-    float4 Position   : SV_POSITION; // vertex position 
-    float4 Diffuse    : COLOR0;      // vertex diffuse color (note that COLOR0 is clamped from 0..1)
-    float2 TextureUV  : TEXCOORD0;   // vertex texture coords 
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( float4 vPos : POSITION,
-                         float3 vNormal : NORMAL,
-                         float2 vTexCoord0 : TEXCOORD,
-                         uniform int nNumLights,
-                         uniform bool bTexture,
-                         uniform bool bAnimate )
-{
-    VS_OUTPUT Output;
-    float3 vNormalWorldSpace;
-  
-    float4 vAnimatedPos = vPos;
-    
-    // Animation the vertex based on time and the vertex's object space position
-    if( bAnimate )
-		vAnimatedPos += float4(vNormal, 0) * (sin(g_fTime+5.5)+0.5)*5;
-    
-    // Transform the position from object space to homogeneous projection space
-    Output.Position = mul(vAnimatedPos, g_mWorldViewProjection);
-    
-    // Transform the normal from object space to world space    
-    vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space)
-    
-    // Compute simple directional lighting equation
-    float3 vTotalLightDiffuse = float3(0,0,0);
-    for(int i=0; i<nNumLights; i++ )
-        vTotalLightDiffuse += g_LightDiffuse[i] * max(0,dot(vNormalWorldSpace, g_LightDir[i]));
-        
-    Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse + 
-                         g_MaterialAmbientColor * g_LightAmbient;   
-    Output.Diffuse.a = 1.0f; 
-    
-    // Just copy the texture coordinate through
-    if( bTexture ) 
-        Output.TextureUV = vTexCoord0; 
-    else
-        Output.TextureUV = 0; 
-    
-    return Output;    
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader output structure
-//--------------------------------------------------------------------------------------
-struct PS_OUTPUT
-{
-    float4 RGBColor : SV_Target;  // Pixel color
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-//       color with diffuse material color
-//--------------------------------------------------------------------------------------
-PS_OUTPUT RenderScenePS( VS_OUTPUT In,
-                         uniform bool bTexture ) 
-{ 
-    PS_OUTPUT Output;
-
-    // Lookup mesh texture and modulate it with diffuse
-    if( bTexture )
-        Output.RGBColor = g_MeshTexture.Sample(MeshTextureSampler, In.TextureUV) * In.Diffuse;
-    else
-        Output.RGBColor = In.Diffuse;
-
-    return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Renders scene to render target using D3D11 Techniques
-//--------------------------------------------------------------------------------------
-technique11 RenderSceneWithTexture1Light
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) );
-
-        SetDepthStencilState( EnableDepth, 0 );
-    }
-}
-
-technique11 RenderSceneWithTexture2Light
-{
-    pass P0
-    {          
-        SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 2, true, true ) ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) ); 
-        
-        SetDepthStencilState( EnableDepth, 0 );
-    }
-}
-
-technique11 RenderSceneWithTexture3Light
-{
-    pass P0
-    {          
-        SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 3, true, true ) ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) );
-
-        SetDepthStencilState( EnableDepth, 0 );
-    }
-}
-
-technique11 RenderSceneNoTexture
-{
-    pass P0
-    {          
-        SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( false ) ) );
-
-        SetDepthStencilState( EnableDepth, 0 );
-    }
-}
-\ No newline at end of file
diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl
deleted file mode 100644
index 3224d783c..000000000
--- a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl
+++ /dev/null
@@ -1,506 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSMain
-//--------------------------------------------------------------------------------------
-// File: RenderCascadeScene.hlsl
-//
-// This is the main shader file.  This shader is compiled with several different flags 
-// to provide different customizations based on user controls.
-// 
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-
-// This flag uses the derivative information to map the texels in a shadow map to the
-// view space plane of the primitive being rendred.  This depth is then used as the 
-// comparison depth and reduces self shadowing aliases.  This  technique is expensive
-// and is only valid when objects are planer ( such as a ground plane ).
-#ifndef USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG
-#define USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG 0
-#endif
-
-// This flag enables the shadow to blend between cascades.  This is most useful when the 
-// the shadow maps are small and artifact can be seen between the various cascade layers.
-#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG
-#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0
-#endif
-
-// There are two methods for selecting the proper cascade a fragment lies in.  Interval selection
-// compares the depth of the fragment against the frustum's depth partition.
-// Map based selection compares the texture coordinates against the acutal cascade maps.
-// Map based selection gives better coverage.  
-// Interval based selection is easier to extend and understand.
-#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG
-#define SELECT_CASCADE_BY_INTERVAL_FLAG 0
-#endif
-
-// The number of cascades 
-#ifndef CASCADE_COUNT_FLAG
-#define CASCADE_COUNT_FLAG 3
-#endif
-
-
-// Most titles will find that 3-4 cascades with 
-// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs.
-// High end PCs will be able to handle more cascades, and larger blur bands.
-// In some cases such as when large PCF kernels are used, derivative based depth offsets could be used 
-// with larger PCF blur kernels on high end PCs for the ground plane.
-
-cbuffer cbAllShadowData : register( b0 )
-{
-    matrix          m_mWorldViewProjection;
-    matrix          m_mWorld;
-    matrix          m_mWorldView;
-    matrix          m_mShadow;
-    float4          m_vCascadeOffset[8];
-    float4          m_vCascadeScale[8];
-    int             m_nCascadeLevels; // Number of Cascades
-    int             m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene
-    int             m_iPCFBlurForLoopStart; // For loop begin value. For a 5x5 Kernal this would be -2.
-    int             m_iPCFBlurForLoopEnd; // For loop end value. For a 5x5 kernel this would be 3.
-
-    // For Map based selection scheme, this keeps the pixels inside of the the valid range.
-    // When there is no boarder, these values are 0 and 1 respectivley.
-    float           m_fMinBorderPadding;     
-    float           m_fMaxBorderPadding;
-    float           m_fShadowBiasFromGUI;  // A shadow map offset to deal with self shadow artifacts.  
-                                           //These artifacts are aggravated by PCF.
-    float           m_fShadowPartitionSize; 
-    float           m_fCascadeBlendArea; // Amount to overlap when blending between cascades.
-    float           m_fTexelSize; 
-    float           m_fNativeTexelSizeInX;
-    float           m_fPaddingForCB3; // Padding variables exist because CBs must be a multiple of 16 bytes.
-    float4          m_fCascadeFrustumsEyeSpaceDepthsFloat[2];  // The values along Z that seperate the cascades.
-    float4          m_fCascadeFrustumsEyeSpaceDepthsFloat4[8];  // the values along Z that separte the cascades.  
-                                                          // Wastefully stored in float4 so they are array indexable. 
-    float3          m_vLightDir;
-    float           m_fPaddingCB4;
-
-};
-
-
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D    g_txDiffuse                    : register( t0 );
-Texture2D    g_txShadow                     : register( t5 );
-
-
-SamplerState g_samLinear                    : register( s0 );
-SamplerComparisonState g_samShadow          : register( s5 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 vPosition                        : POSITION;
-    float3 vNormal                          : NORMAL;
-    float2 vTexcoord                        : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
-    float3 vNormal                          : NORMAL;
-    float2 vTexcoord                        : TEXCOORD0;
-    float4 vTexShadow					    : TEXCOORD1;
-    float4 vPosition                        : SV_POSITION;
-    float4 vInterpPos                       : TEXCOORD2; 
-    float  vDepth                           : TEXCOORD3;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-    VS_OUTPUT Output;
-
-    Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection );
-    Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld );
-    Output.vTexcoord = Input.vTexcoord;
-    Output.vInterpPos = Input.vPosition;   
-    Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ; 
-       
-    // Transform the shadow texture coordinates for all the cascades.
-    Output.vTexShadow = mul( Input.vPosition, m_mShadow );
-    return Output;
-    
-}
-
-
-
-static const float4 vCascadeColorsMultiplier[8] = 
-{
-    float4 ( 1.5f, 0.0f, 0.0f, 1.0f ),
-    float4 ( 0.0f, 1.5f, 0.0f, 1.0f ),
-    float4 ( 0.0f, 0.0f, 5.5f, 1.0f ),
-    float4 ( 1.5f, 0.0f, 5.5f, 1.0f ),
-    float4 ( 1.5f, 1.5f, 0.0f, 1.0f ),
-    float4 ( 1.0f, 1.0f, 1.0f, 1.0f ),
-    float4 ( 0.0f, 1.0f, 5.5f, 1.0f ),
-    float4 ( 0.5f, 3.5f, 0.75f, 1.0f )
-};
-
-
-void ComputeCoordinatesTransform( in int iCascadeIndex,
-                                      in float4 InterpolatedPosition ,
-                                      in out float4 vShadowTexCoord , 
-                                      in out float4 vShadowTexCoordViewSpace ) 
-{
-    // Now that we know the correct map, we can transform the world space position of the current fragment                
-    if( SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex];
-        vShadowTexCoord += m_vCascadeOffset[iCascadeIndex];
-    }  
-          
-    vShadowTexCoord.x *= m_fShadowPartitionSize;  // precomputed (float)iCascadeIndex / (float)CASCADE_CNT
-    vShadowTexCoord.x += (m_fShadowPartitionSize * (float)iCascadeIndex ); 
-
-
-} 
-
-
-//--------------------------------------------------------------------------------------
-// This function calculates the screen space depth for shadow space texels
-//--------------------------------------------------------------------------------------
-void CalculateRightAndUpTexelDepthDeltas ( in float3 vShadowTexDDX,
-                                           in float3 vShadowTexDDY,
-                                           out float fUpTextDepthWeight,
-                                           out float fRightTextDepthWeight
- ) {
-        
-    // We use the derivatives in X and Y to create a transformation matrix.  Because these derivives give us the 
-    // transformation from screen space to shadow space, we need the inverse matrix to take us from shadow space 
-    // to screen space.  This new matrix will allow us to map shadow map texels to screen space.  This will allow 
-    // us to find the screen space depth of a corresponding depth pixel.
-    // This is not a perfect solution as it assumes the underlying geometry of the scene is a plane.  A more 
-    // accureate way of finding the actual depth would be to do a deferred rendering approach and actually 
-    //sample the depth.
-    
-    // Using an offset, or using variance shadow maps is a better approach to reducing these artifacts in most cases.
-    
-    float2x2 matScreentoShadow = float2x2( vShadowTexDDX.xy, vShadowTexDDY.xy );
-    float fDeterminant = determinant ( matScreentoShadow );
-    
-    float fInvDeterminant = 1.0f / fDeterminant;
-    
-    float2x2 matShadowToScreen = float2x2 (
-        matScreentoShadow._22 * fInvDeterminant, matScreentoShadow._12 * -fInvDeterminant, 
-        matScreentoShadow._21 * -fInvDeterminant, matScreentoShadow._11 * fInvDeterminant );
-
-    float2 vRightShadowTexelLocation = float2( m_fTexelSize, 0.0f );
-    float2 vUpShadowTexelLocation = float2( 0.0f, m_fTexelSize );  
-    
-    // Transform the right pixel by the shadow space to screen space matrix.
-    float2 vRightTexelDepthRatio = mul( vRightShadowTexelLocation,  matShadowToScreen );
-    float2 vUpTexelDepthRatio = mul( vUpShadowTexelLocation,  matShadowToScreen );
-
-    // We can now caculate how much depth changes when you move up or right in the shadow map.
-    // We use the ratio of change in x and y times the dervivite in X and Y of the screen space 
-    // depth to calculate this change.
-    fUpTextDepthWeight = 
-        vUpTexelDepthRatio.x * vShadowTexDDX.z 
-        + vUpTexelDepthRatio.y * vShadowTexDDY.z;
-    fRightTextDepthWeight = 
-        vRightTexelDepthRatio.x * vShadowTexDDX.z 
-        + vRightTexelDepthRatio.y * vShadowTexDDY.z;
-        
-}
-
-
-//--------------------------------------------------------------------------------------
-// Use PCF to sample the depth map and return a percent lit value.
-//--------------------------------------------------------------------------------------
-void CalculatePCFPercentLit ( in float4 vShadowTexCoord, 
-                              in float fRightTexelDepthDelta, 
-                              in float fUpTexelDepthDelta, 
-                              in float fBlurRowSize,
-                              out float fPercentLit
-                              ) 
-{
-    fPercentLit = 0.0f;
-    // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed.
-    // This would be performance improvment.
-    for( int x = m_iPCFBlurForLoopStart; x < m_iPCFBlurForLoopEnd; ++x ) 
-    {
-        for( int y = m_iPCFBlurForLoopStart; y < m_iPCFBlurForLoopEnd; ++y ) 
-        {
-            float depthcompare = vShadowTexCoord.z;
-            // A very simple solution to the depth bias problems of PCF is to use an offset.
-            // Unfortunately, too much offset can lead to Peter-panning (shadows near the base of object disappear )
-            // Too little offset can lead to shadow acne ( objects that should not be in shadow are partially self shadowed ).
-            depthcompare -= m_fShadowBiasFromGUI;
-            if ( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) 
-            {
-                // Add in derivative computed depth scale based on the x and y pixel.
-                depthcompare += fRightTexelDepthDelta * ( (float) x ) + fUpTexelDepthDelta * ( (float) y );
-            }
-            // Compare the transformed pixel depth to the depth read from the map.
-            fPercentLit += g_txShadow.SampleCmpLevelZero( g_samShadow, 
-                float2( 
-                    vShadowTexCoord.x + ( ( (float) x ) * m_fNativeTexelSizeInX ) , 
-                    vShadowTexCoord.y + ( ( (float) y ) * m_fTexelSize ) 
-                    ), 
-                depthcompare );
-        }
-    }
-    fPercentLit /= (float)fBlurRowSize;
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForInterval ( in int iCurrentCascadeIndex, 
-                                       in out float fPixelDepth, 
-                                       in out float fCurrentPixelsBlendBandLocation,
-                                       out float fBlendBetweenCascadesAmount
-                                       ) 
-{
-
-    // We need to calculate the band of the current shadow map where it will fade into the next cascade.
-    // We can then early out of the expensive PCF for loop.
-    // 
-    float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepthsFloat4[ iCurrentCascadeIndex  ].x;
-    //if( iNextCascadeIndex > 1 ) 
-    int fBlendIntervalbelowIndex = min(0, iCurrentCascadeIndex-1);
-    fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x;
-    fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x;
-    
-    // The current pixel's blend band location will be used to determine when we need to blend and by how much.
-    fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval;
-    fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation;
-    // The fBlendBetweenCascadesAmount is our location in the blend band.
-    fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord, 
-                                  in out float fCurrentPixelsBlendBandLocation,
-                                  out float fBlendBetweenCascadesAmount ) 
-{
-    // Calcaulte the blend band for the map based selection.
-    float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y );
-    fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y );
-    float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y );
-    fCurrentPixelsBlendBandLocation = 
-        min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 );
-    fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate the shadow based on several options and rende the scene.
-//--------------------------------------------------------------------------------------
-float4 PSMain( VS_OUTPUT Input ) : SV_TARGET
-{
-    float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord );
-    
-    float4 vShadowMapTextureCoord = 0.0f;
-    float4 vShadowMapTextureCoord_blend = 0.0f;
-    
-    float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f);
-    
-    float fPercentLit = 0.0f;
-    float fPercentLit_blend = 0.0f;
-
-   
-    float fUpTextDepthWeight=0;
-    float fRightTextDepthWeight=0;
-    float fUpTextDepthWeight_blend=0;
-    float fRightTextDepthWeight_blend=0;
-
-    int iBlurRowSize = m_iPCFBlurForLoopEnd - m_iPCFBlurForLoopStart;
-    iBlurRowSize *= iBlurRowSize;
-    float fBlurRowSize = (float)iBlurRowSize;
-        
-    int iCascadeFound = 0;
-    int iNextCascadeIndex = 1;
-
-    float fCurrentPixelDepth;
-
-    // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions.
-    fCurrentPixelDepth = Input.vDepth;
-    
-    // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used.
-    // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum. 
-    int iCurrentCascadeIndex;
-    
-    float4 vShadowMapTextureCoordViewSpace = Input.vTexShadow;
-    if( SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        iCurrentCascadeIndex = 0;
-        if ( CASCADE_COUNT_FLAG > 1 ) 
-        {
-            float4 vCurrentPixelDepth = Input.vDepth;
-            float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[0]);
-            float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[1]);
-            float fIndex = dot( 
-                            float4( CASCADE_COUNT_FLAG > 0,
-                                    CASCADE_COUNT_FLAG > 1, 
-                                    CASCADE_COUNT_FLAG > 2, 
-                                    CASCADE_COUNT_FLAG > 3)
-                            , fComparison )
-                         + dot( 
-                            float4(
-                                    CASCADE_COUNT_FLAG > 4,
-                                    CASCADE_COUNT_FLAG > 5,
-                                    CASCADE_COUNT_FLAG > 6,
-                                    CASCADE_COUNT_FLAG > 7)
-                            , fComparison2 ) ;
-                                    
-            fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 );
-            iCurrentCascadeIndex = (int)fIndex;
-        }
-    }
-    
-    if ( !SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        iCurrentCascadeIndex = 0;
-        if ( CASCADE_COUNT_FLAG  == 1 ) 
-        {
-            vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0];
-            vShadowMapTextureCoord += m_vCascadeOffset[0];
-        }
-        if ( CASCADE_COUNT_FLAG > 1 ) {
-            for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex ) 
-            {
-                vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex];
-                vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex];
-
-                if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding
-                  && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding )
-                { 
-                    iCurrentCascadeIndex = iCascadeIndex;   
-                    iCascadeFound = 1; 
-                }
-            }
-        }
-    }    
-    
-    float4 color = 0;   
-  
-    if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG  ) 
-    {
-        // Repeat text coord calculations for the next cascade. 
-        // The next cascade index is used for blurring between maps.
-        iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 ); 
-    }            
-
-    float fBlendBetweenCascadesAmount = 1.0f;
-    float fCurrentPixelsBlendBandLocation = 1.0f;
-    
-    if( SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1  ) 
-         {
-            CalculateBlendAmountForInterval ( iCurrentCascadeIndex, fCurrentPixelDepth, 
-                fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
-        }   
-    }
-    else 
-    {
-    
-        if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) 
-        {
-            CalculateBlendAmountForMap ( vShadowMapTextureCoord, 
-                fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
-        }   
-    }
-    
-    float3 vShadowMapTextureCoordDDX;
-    float3 vShadowMapTextureCoordDDY;
-    // The derivatives are used to find the slope of the current plane.
-    // The derivative calculation has to be inside of the loop in order to prevent divergent flow control artifacts.
-    if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) 
-    {
-        vShadowMapTextureCoordDDX = ddx( vShadowMapTextureCoordViewSpace );
-        vShadowMapTextureCoordDDY = ddy( vShadowMapTextureCoordViewSpace );    
-        
-        vShadowMapTextureCoordDDX *= m_vCascadeScale[iCurrentCascadeIndex];
-        vShadowMapTextureCoordDDY *= m_vCascadeScale[iCurrentCascadeIndex];
-    }    
-    
-    ComputeCoordinatesTransform( iCurrentCascadeIndex, 
-                                 Input.vInterpPos, 
-                                 vShadowMapTextureCoord, 
-                                 vShadowMapTextureCoordViewSpace );    
-                                 
-
-    vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex];
-         
-    if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) 
-    {
-         CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX, vShadowMapTextureCoordDDY,
-                                              fUpTextDepthWeight, fRightTextDepthWeight );
-    }
-    
-    CalculatePCFPercentLit ( vShadowMapTextureCoord, fRightTextDepthWeight, 
-                            fUpTextDepthWeight, fBlurRowSize, fPercentLit );
-                                             
-    if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) 
-    {
-        if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea) 
-        {  // the current pixel is within the blend band.
-    
-            // Repeat text coord calculations for the next cascade. 
-            // The next cascade index is used for blurring between maps.
-            if( !SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-            {
-                vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex];
-                vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex];
-            }
-            
-            ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos, 
-                                             vShadowMapTextureCoord_blend, 
-										     vShadowMapTextureCoordViewSpace );  
-       
-        // We repeat the calcuation for the next cascade layer, when blending between maps.
-            if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea) 
-            {  // the current pixel is within the blend band.
-                if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG ) 
-                {
-
-                    CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX,
-                                                          vShadowMapTextureCoordDDY,
-                                                          fUpTextDepthWeight_blend,
-                                                          fRightTextDepthWeight_blend );
-                }   
-                CalculatePCFPercentLit ( vShadowMapTextureCoord_blend, fRightTextDepthWeight_blend, 
-                                        fUpTextDepthWeight_blend, fBlurRowSize, fPercentLit_blend );
-                fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount ); 
-                // Blend the two calculated shadows by the blend amount.
-            }   
-        }   
-    }    
-
-    
-    if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4(1.0f,1.0f,1.0f,1.0f);
-    
-    float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f ); 
-    float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f ); 
-    float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f );
-    float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f );     
-    // Some ambient-like lighting.
-    float fLighting = 
-                      saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f +
-                      saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f +
-                      saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f +
-                      saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ;
-    
-    float4 vShadowLighting = fLighting * 0.5f;
-    fLighting += saturate( dot( m_vLightDir , Input.vNormal ) );
-    fLighting = lerp( vShadowLighting, fLighting, fPercentLit );
-    
-    return fLighting * vVisualizeCascadeColor * vDiffuse;
-
-}
-
diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl
deleted file mode 100644
index af9679ada..000000000
--- a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl
+++ /dev/null
@@ -1,59 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VSMain -stage vertex -entry VSMainPancake -stage vertex
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: RenderCascadeShadow.hlsl
-//
-// The shader file for the RenderCascadeScene sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-    matrix        g_mWorldViewProjection    ;//SLANG: : packoffset( c0 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 vPosition    : POSITION;
-};
-
-struct VS_OUTPUT
-{
-    float4 vPosition    : SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-    VS_OUTPUT Output;
-    
-    // There is nothing special here, just transform and write out the depth.
-    Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-
-    return Output;
-}
-
-
-VS_OUTPUT VSMainPancake( VS_INPUT Input )
-{
-    VS_OUTPUT Output;
-    // after transform move clipped geometry to near plane
-    Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-	//Output.vPosition.z = max( Output.vPosition.z, 0.0f );
-    return Output;
-}
-\ No newline at end of file
diff --git a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl
deleted file mode 100644
index 6e14bc10e..000000000
--- a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl
+++ /dev/null
@@ -1,75 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose
-//--------------------------------------------------------------------------------------
-// File: ComputeShaderSort11.hlsl
-//
-// This file contains the compute shaders to perform GPU sorting using DirectX 11.
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#define BITONIC_BLOCK_SIZE 512
-
-#define TRANSPOSE_BLOCK_SIZE 16
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer CB : register( b0 )
-{
-    unsigned int g_iLevel;
-    unsigned int g_iLevelMask;
-    unsigned int g_iWidth;
-    unsigned int g_iHeight;
-};
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-StructuredBuffer<unsigned int> Input : register( t0 );
-RWStructuredBuffer<unsigned int> Data : register( u0 );
-
-//--------------------------------------------------------------------------------------
-// Bitonic Sort Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE];
-
-[numthreads(BITONIC_BLOCK_SIZE, 1, 1)]
-void BitonicSort( uint3 Gid : SV_GroupID, 
-                  uint3 DTid : SV_DispatchThreadID, 
-                  uint3 GTid : SV_GroupThreadID, 
-                  uint GI : SV_GroupIndex )
-{
-    // Load shared data
-    shared_data[GI] = Data[DTid.x];
-    GroupMemoryBarrierWithGroupSync();
-    
-    // Sort the shared data
-    for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1)
-    {
-        unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI];
-        GroupMemoryBarrierWithGroupSync();
-        shared_data[GI] = result;
-        GroupMemoryBarrierWithGroupSync();
-    }
-    
-    // Store shared data
-    Data[DTid.x] = shared_data[GI];
-}
-
-//--------------------------------------------------------------------------------------
-// Matrix Transpose Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE];
-
-[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)]
-void MatrixTranspose( uint3 Gid : SV_GroupID, 
-                      uint3 DTid : SV_DispatchThreadID, 
-                      uint3 GTid : SV_GroupThreadID, 
-                      uint GI : SV_GroupIndex )
-{
-    transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x];
-    GroupMemoryBarrierWithGroupSync();
-    uint2 XY = DTid.yx - GTid.yx + GTid.xy;
-    Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y];
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx
deleted file mode 100644
index e1fead571..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx
+++ /dev/null
@@ -1,28 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VS -stage vertex -entry PS -stage fragment
-
-#ifndef __SLANG__
-#define SV_Target SV_TARGET
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: Tutorial02.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-float4 VS( float4 Pos : POSITION ) : SV_POSITION
-{
-    return Pos;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( float4 Pos : SV_POSITION ) : SV_Target
-{
-    return float4( 1.0f, 1.0f, 0.0f, 1.0f );    // Yellow, with Alpha = 1
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl
deleted file mode 100644
index 82300c10c..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial02.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl
deleted file mode 100644
index cdf4f9649..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial02.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx
deleted file mode 100644
index e1fead571..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx
+++ /dev/null
@@ -1,28 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VS -stage vertex -entry PS -stage fragment
-
-#ifndef __SLANG__
-#define SV_Target SV_TARGET
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: Tutorial02.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-float4 VS( float4 Pos : POSITION ) : SV_POSITION
-{
-    return Pos;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( float4 Pos : SV_POSITION ) : SV_Target
-{
-    return float4( 1.0f, 1.0f, 0.0f, 1.0f );    // Yellow, with Alpha = 1
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl
deleted file mode 100644
index 684788198..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial03.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl
deleted file mode 100644
index 40d9770fc..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial03.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx
deleted file mode 100644
index d311edc5a..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx
+++ /dev/null
@@ -1,46 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial04.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-cbuffer ConstantBuffer : register( b0 )
-{
-	matrix World;
-	matrix View;
-	matrix Projection;
-}
-
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
-    float4 Pos : SV_POSITION;
-    float4 Color : COLOR0;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VS( float4 Pos : POSITION, float4 Color : COLOR )
-{
-    VS_OUTPUT output = (VS_OUTPUT)0;
-    output.Pos = mul( Pos, World );
-    output.Pos = mul( output.Pos, View );
-    output.Pos = mul( output.Pos, Projection );
-    output.Color = Color;
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( VS_OUTPUT input ) : SV_Target
-{
-    return input.Color;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl
deleted file mode 100644
index 65c36988f..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial04.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl
deleted file mode 100644
index 4505c1a98..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial04.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx
deleted file mode 100644
index 5ef5487da..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx
+++ /dev/null
@@ -1,54 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial05.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-cbuffer ConstantBuffer : register( b0 )
-{
-	matrix World;
-	matrix View;
-	matrix Projection;
-}
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 Pos : POSITION;
-    float4 Color : COLOR;
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float4 Color : COLOR;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    output.Pos = mul( input.Pos, World );
-    output.Pos = mul( output.Pos, View );
-    output.Pos = mul( output.Pos, Projection );
-    output.Color = input.Color;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    return input.Color;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl
deleted file mode 100644
index 4226d4b47..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial05.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl
deleted file mode 100644
index 1c2f5519f..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial05.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx
deleted file mode 100644
index 219e96b9f..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx
+++ /dev/null
@@ -1,76 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -entry PSSolid
-//--------------------------------------------------------------------------------------
-// File: Tutorial06.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-cbuffer ConstantBuffer : register( b0 )
-{
-	matrix World;
-	matrix View;
-	matrix Projection;
-	float4 vLightDir[2];
-	float4 vLightColor[2];
-	float4 vOutputColor;
-}
-
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 Pos : POSITION;
-    float3 Norm : NORMAL;
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float3 Norm : TEXCOORD0;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    output.Pos = mul( input.Pos, World );
-    output.Pos = mul( output.Pos, View );
-    output.Pos = mul( output.Pos, Projection );
-    output.Norm = mul( float4( input.Norm, 1 ), World ).xyz;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    float4 finalColor = 0;
-    
-    //do NdotL lighting for 2 lights
-    for(int i=0; i<2; i++)
-    {
-        finalColor += saturate( dot( (float3)vLightDir[i],input.Norm) * vLightColor[i] );
-    }
-    finalColor.a = 1;
-    return finalColor;
-}
-
-
-//--------------------------------------------------------------------------------------
-// PSSolid - render a solid color
-//--------------------------------------------------------------------------------------
-float4 PSSolid( PS_INPUT input) : SV_Target
-{
-    return vOutputColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl
deleted file mode 100644
index 7bd5ece78..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial06.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl
deleted file mode 100644
index 50fcdbf56..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial06.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx
deleted file mode 100644
index f99aeba1b..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx
+++ /dev/null
@@ -1,67 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial07.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbNeverChanges : register( b0 )
-{
-    matrix View;
-};
-
-cbuffer cbChangeOnResize : register( b1 )
-{
-    matrix Projection;
-};
-
-cbuffer cbChangesEveryFrame : register( b2 )
-{
-    matrix World;
-    float4 vMeshColor;
-};
-
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 Pos : POSITION;
-    float2 Tex : TEXCOORD0;
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float2 Tex : TEXCOORD0;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    output.Pos = mul( input.Pos, World );
-    output.Pos = mul( output.Pos, View );
-    output.Pos = mul( output.Pos, Projection );
-    output.Tex = input.Tex;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl
deleted file mode 100644
index f81862efd..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial07.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl
deleted file mode 100644
index 3ce6baf34..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial07.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx
deleted file mode 100644
index f3c6a5774..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial08.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbChangesEveryFrame : register( b0 )
-{
-    matrix WorldViewProj;
-    matrix World;
-    float4 vMeshColor;
-};
-
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 Pos : POSITION;
-    float2 Tex : TEXCOORD;
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float2 Tex : TEXCOORD0;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    output.Pos = mul( input.Pos, WorldViewProj );
-    output.Tex = input.Tex;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx
deleted file mode 100644
index 2be29fb40..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx
+++ /dev/null
@@ -1,69 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial09.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbNeverChanges : register( b0 )
-{
-    float3 vLightDir;
-};
-
-cbuffer cbChangesEveryFrame : register( b1 )
-{
-    matrix WorldViewProj;
-    matrix World;
-};
-
-struct VS_INPUT
-{
-    float3 Pos          : POSITION;         //position
-    float3 Norm         : NORMAL;           //normal
-    float2 Tex          : TEXCOORD0;        //texture coordinate
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float4 Diffuse : COLOR0;
-    float2 Tex : TEXCOORD1;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    output.Pos = mul( float4(input.Pos,1), WorldViewProj );
-    float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) );
-
-    float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) );
-    output.Diffuse.rgb = fLighting;
-    output.Diffuse.a = 1.0f; 
-
-    output.Tex = input.Tex;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    //calculate lighting assuming light color is <1,1,1,1>
-    float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse;
-    outputColor.a = 1;
-    return outputColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx
deleted file mode 100644
index 68f53c0b6..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx
+++ /dev/null
@@ -1,73 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial10.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbNeverChanges : register( b0 )
-{
-    float3 vLightDir;
-};
-
-cbuffer cbChangesEveryFrame : register( b1 )
-{
-    matrix WorldViewProj;
-    matrix World;
-    float Puffiness;
-};
-
-struct VS_INPUT
-{
-    float3 Pos          : POSITION;         //position
-    float3 Norm         : NORMAL;           //normal
-    float2 Tex          : TEXCOORD0;        //texture coordinate
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float4 Diffuse : COLOR0;
-    float2 Tex : TEXCOORD1;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-
-    input.Pos += input.Norm * Puffiness;
-
-    output.Pos = mul( float4(input.Pos,1), WorldViewProj );
-    float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) );
-
-    float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) );
-    output.Diffuse.rgb = fLighting;
-    output.Diffuse.a = 1.0f; 
-
-    output.Tex = input.Tex;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    //calculate lighting assuming light color is <1,1,1,1>
-    float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse;
-    outputColor.a = 1;
-    return outputColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx
deleted file mode 100644
index a647a9079..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx
+++ /dev/null
@@ -1,117 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Tutorial11.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Wrap;
-    AddressV = Wrap;
-};
-
-cbuffer cbConstant
-{
-    float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
-    matrix World;
-    matrix View;
-    matrix Projection;
-    float Time;
-};
-
-cbuffer cbUserChanges
-{
-    float Waviness;
-};
-
-struct VS_INPUT
-{
-    float3 Pos          : POSITION;        
-    float3 Norm         : NORMAL;          
-    float2 Tex          : TEXCOORD0;       
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float3 Norm : TEXCOORD0;
-    float2 Tex : TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
-    DepthEnable = TRUE;
-    DepthWriteMask = ALL;
-    DepthFunc = LESS_EQUAL;
-};
-
-BlendState NoBlending
-{
-    AlphaToCoverageEnable = FALSE;
-    BlendEnable[0] = FALSE;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    
-    output.Pos = mul( float4(input.Pos,1), World );
-    
-    output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness;
-    
-    output.Pos = mul( output.Pos, View );
-    output.Pos = mul( output.Pos, Projection );
-    output.Norm = mul( input.Norm, World );
-    output.Tex = input.Tex;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    // Calculate lighting assuming light color is <1,1,1,1>
-    float fLighting = saturate( dot( input.Norm, vLightDir ) );
-    float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
-    outputColor.a = 1;
-    return outputColor;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Technique
-//--------------------------------------------------------------------------------------
-technique11 Render
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PS() ) );        
-
-        SetDepthStencilState( EnableDepth, 0 );
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
-
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx
deleted file mode 100644
index aae7f9a87..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx
+++ /dev/null
@@ -1,129 +0,0 @@
-//TEST_IGNORE_FILE:
-//
-// Constant Buffer Variables
-//
-
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Wrap;
-    AddressV = Wrap;
-};
-
-TextureCube g_txEnvMap;
-SamplerState samLinearClamp
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Clamp;
-    AddressV = Clamp;
-};
-
-cbuffer cbConstant
-{
-    float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
-    matrix World;
-    matrix View;
-    matrix Projection;
-    float Time;
-};
-
-cbuffer cbUserChanges
-{
-    float Waviness;
-};
-
-struct VS_INPUT
-{
-    float3 Pos          : POSITION;         //position
-    float3 Norm         : NORMAL;           //normal
-    float2 Tex          : TEXCOORD0;        //texture coordinate
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float3 Norm : TEXCOORD0;
-    float2 Tex : TEXCOORD1;
-    float3 ViewR : TEXCOORD2;
-};
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
-    DepthEnable = TRUE;
-    DepthWriteMask = ALL;
-    DepthFunc = LESS_EQUAL;
-};
-
-BlendState NoBlending
-{
-    AlphaToCoverageEnable = FALSE;
-    BlendEnable[0] = FALSE;
-};
-
-//
-// Vertex Shader
-//
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    
-    output.Pos = mul( float4(input.Pos,1), World );
-    
-    output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness;
-    
-    output.Pos = mul( output.Pos, View );
-    output.Pos = mul( output.Pos, Projection );
-    output.Norm = mul( input.Norm, (float3x3)World );
-    output.Tex = input.Tex;
-    
-    // Calculate the reflection vector
-    float3 viewNorm = mul( output.Norm, (float3x3)View );
-    output.ViewR = reflect( viewNorm, float3(0,0,-1.0) );
-    
-    return output;
-}
-
-
-//
-// Pixel Shader
-//
-float4 PS( PS_INPUT input) : SV_Target
-{
-    // Calculate lighting assuming light color is <1,1,1,1>
-    float fLighting = saturate( dot( input.Norm, vLightDir ) );
-   
-    // Load the environment map texture
-    float4 cReflect = g_txEnvMap.Sample( samLinearClamp, input.ViewR );
-    
-    // Load the diffuse texture and multiply by the lighting amount
-    float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
-    
-    // Add diffuse to reflection and go
-    float4 cTotal = cDiffuse + cReflect;
-    cTotal.a = 1;
-    return cTotal;
-}
-
-//
-// Technique
-//
-technique11 Render
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PS() ) );
-        
-        SetDepthStencilState( EnableDepth, 0 );
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx
deleted file mode 100644
index a6f09ecc7..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx
+++ /dev/null
@@ -1,191 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Tutorial13.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Wrap;
-    AddressV = Wrap;
-};
-
-TextureCube g_txEnvMap;
-SamplerState samLinearClamp
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Clamp;
-    AddressV = Clamp;
-};
-
-cbuffer cbConstant
-{
-    float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
-    matrix World;
-    matrix View;
-    matrix Projection;
-    float Time;
-};
-
-cbuffer cbUserChanges
-{
-    float Explode;
-};
-
-struct VS_INPUT
-{
-    float3 Pos          : POSITION;         
-    float3 Norm         : NORMAL;           
-    float2 Tex          : TEXCOORD0;        
-};
-
-struct GSPS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float3 Norm : TEXCOORD0;
-    float2 Tex : TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
-    DepthEnable = TRUE;
-    DepthWriteMask = ALL;
-    DepthFunc = LESS_EQUAL;
-};
-
-BlendState NoBlending
-{
-    AlphaToCoverageEnable = FALSE;
-    BlendEnable[0] = FALSE;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-GSPS_INPUT VS( VS_INPUT input )
-{
-    GSPS_INPUT output = (GSPS_INPUT)0;
-    
-    output.Pos = mul( float4(input.Pos,1), World );
-    output.Norm = mul( input.Norm, (float3x3)World );
-    output.Tex = input.Tex;
-    
-    return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Geometry Shader
-//--------------------------------------------------------------------------------------
-[maxvertexcount(12)]
-void GS( triangle GSPS_INPUT input[3], inout TriangleStream<GSPS_INPUT> TriStream )
-{
-    GSPS_INPUT output;
-    
-    //
-    // Calculate the face normal
-    //
-    float3 faceEdgeA = input[1].Pos - input[0].Pos;
-    float3 faceEdgeB = input[2].Pos - input[0].Pos;
-    float3 faceNormal = normalize( cross(faceEdgeA, faceEdgeB) );
-    float3 ExplodeAmt = faceNormal*Explode;
-    
-    //
-    // Calculate the face center
-    //
-    float3 centerPos = (input[0].Pos.xyz + input[1].Pos.xyz + input[2].Pos.xyz)/3.0;
-    float2 centerTex = (input[0].Tex + input[1].Tex + input[2].Tex)/3.0;
-    centerPos += faceNormal*Explode;
-    
-    //
-    // Output the pyramid
-    //
-    for( int i=0; i<3; i++ )
-    {
-        output.Pos = input[i].Pos + float4(ExplodeAmt,0);
-        output.Pos = mul( output.Pos, View );
-        output.Pos = mul( output.Pos, Projection );
-        output.Norm = input[i].Norm;
-        output.Tex = input[i].Tex;
-        TriStream.Append( output );
-        
-        int iNext = (i+1)%3;
-        output.Pos = input[iNext].Pos + float4(ExplodeAmt,0);
-        output.Pos = mul( output.Pos, View );
-        output.Pos = mul( output.Pos, Projection );
-        output.Norm = input[iNext].Norm;
-        output.Tex = input[iNext].Tex;
-        TriStream.Append( output );
-        
-        output.Pos = float4(centerPos,1) + float4(ExplodeAmt,0);
-        output.Pos = mul( output.Pos, View );
-        output.Pos = mul( output.Pos, Projection );
-        output.Norm = faceNormal;
-        output.Tex = centerTex;
-        TriStream.Append( output );
-        
-        TriStream.RestartStrip();
-    }
-    
-    for( int i=2; i>=0; i-- )
-    {
-        output.Pos = input[i].Pos + float4(ExplodeAmt,0);
-        output.Pos = mul( output.Pos, View );
-        output.Pos = mul( output.Pos, Projection );
-        output.Norm = -input[i].Norm;
-        output.Tex = input[i].Tex;
-        TriStream.Append( output );
-    }
-    TriStream.RestartStrip();
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( GSPS_INPUT input) : SV_Target
-{
-    // Calculate lighting assuming light color is <1,1,1,1>
-    float fLighting = saturate( dot( input.Norm, vLightDir ) );
-    
-    // Load the diffuse texture and multiply by the lighting amount
-    float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
-    cDiffuse.a = 1;
-    
-    // return diffuse
-    return cDiffuse;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Technique
-//--------------------------------------------------------------------------------------
-technique11 Render
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VS() ) );
-        SetGeometryShader( CompileShader( gs_4_0, GS() ) );
-        SetPixelShader( CompileShader( ps_4_0, PS() ) );
-        
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-        SetDepthStencilState( EnableDepth, 0 );
-    }
-}
-
-
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx
deleted file mode 100644
index b1e45b842..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx
+++ /dev/null
@@ -1,294 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Tutorial14.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Wrap;
-    AddressV = Wrap;
-};
-
-cbuffer cbConstant
-{
-    float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
-    matrix World;
-    matrix View;
-    matrix Projection;
-};
-
-struct VS_INPUT
-{
-    float3 Pos          : POSITION;         //position
-    float3 Norm         : NORMAL;           //normal
-    float2 Tex          : TEXCOORD0;        //texture coordinate
-};
-
-struct PS_INPUT
-{
-    float4 Pos : SV_POSITION;
-    float3 Norm : TEXCOORD0;
-    float2 Tex : TEXCOORD1;
-};
-
-struct QUADVS_INPUT
-{
-    float4 Pos : POSITION;
-    float2 Tex : TEXCOORD0;
-};
-
-struct QUADVS_OUTPUT
-{
-    float4 Pos : SV_POSITION;              // Transformed position
-    float2 Tex : TEXCOORD0;
-};
-
-//--------------------------------------------------------------------------------------
-// Blending States
-//--------------------------------------------------------------------------------------
-BlendState NoBlending
-{
-    BlendEnable[0] = FALSE;
-};
-
-BlendState SrcAlphaBlendingAdd
-{
-    BlendEnable[0] = TRUE;
-    SrcBlend = SRC_ALPHA;
-    DestBlend = ONE;
-    BlendOp = ADD;
-    SrcBlendAlpha = ZERO;
-    DestBlendAlpha = ZERO;
-    BlendOpAlpha = ADD;
-    RenderTargetWriteMask[0] = 0x0F;
-};
-
-BlendState SrcAlphaBlendingSub
-{
-    BlendEnable[0] = TRUE;
-    SrcBlend = SRC_ALPHA;
-    DestBlend = ONE;
-    BlendOp = SUBTRACT;
-    SrcBlendAlpha = ZERO;
-    DestBlendAlpha = ZERO;
-    BlendOpAlpha = ADD;
-    RenderTargetWriteMask[0] = 0x0F;
-};
-
-BlendState SrcColorBlendingAdd
-{
-    BlendEnable[0] = TRUE;
-    SrcBlend = SRC_COLOR;
-    DestBlend = ONE;
-    BlendOp = ADD;
-    SrcBlendAlpha = ZERO;
-    DestBlendAlpha = ZERO;
-    BlendOpAlpha = ADD;
-    RenderTargetWriteMask[0] = 0x0F;
-};
-
-BlendState SrcColorBlendingSub
-{
-    BlendEnable[0] = TRUE;
-    SrcBlend = SRC_COLOR;
-    DestBlend = ONE;
-    BlendOp = SUBTRACT;
-    SrcBlendAlpha = ZERO;
-    DestBlendAlpha = ZERO;
-    BlendOpAlpha = ADD;
-    RenderTargetWriteMask[0] = 0x0F;
-};
-
-//--------------------------------------------------------------------------------------
-// Depth/Stencil States
-//--------------------------------------------------------------------------------------
-DepthStencilState RenderWithStencilState
-{
-    DepthEnable = false;
-    DepthWriteMask = ZERO;
-    DepthFunc = Less;
-    
-    // Setup stencil states
-    StencilEnable = true;
-    StencilReadMask = 0xFF;
-    StencilWriteMask = 0x00;
-    
-    FrontFaceStencilFunc = Not_Equal;
-    FrontFaceStencilPass = Keep;
-    FrontFaceStencilFail = Zero;
-    
-    BackFaceStencilFunc = Not_Equal;
-    BackFaceStencilPass = Keep;
-    BackFaceStencilFail = Zero;
-};
-
-
-
-//--------------------------------------------------------------------------------------
-// Scene Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
-    PS_INPUT output = (PS_INPUT)0;
-    
-    output.Pos = mul( float4(input.Pos,1), World );
-    output.Pos = mul( output.Pos, View );
-    output.Pos = mul( output.Pos, Projection );
-    output.Norm = mul( input.Norm, World );
-    output.Tex = input.Tex;
-    
-    return output;
-}
-
-//-----------------------------------------------------------------------------
-// Quad Vertex Shaders
-//-----------------------------------------------------------------------------
-QUADVS_OUTPUT QuadVS( QUADVS_INPUT Input )
-{
-    QUADVS_OUTPUT Output;
-    Output.Pos = mul( Input.Pos, World );
-    Output.Pos = mul( Output.Pos, View );
-    Output.Pos = mul( Output.Pos, Projection );
-    Output.Tex = Input.Tex;
-    return Output;
-}
-
-QUADVS_OUTPUT ScreenQuadVS( QUADVS_INPUT Input )
-{
-    QUADVS_OUTPUT Output;
-    Output.Pos = Input.Pos;
-    Output.Tex = Input.Tex;
-    return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
-    // Calculate lighting assuming light color is <1,1,1,1>
-    float fLighting = saturate( dot( input.Norm, vLightDir ) );
-    float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
-    outputColor.a = 1;
-    return outputColor;
-}
-
-//--------------------------------------------------------------------------------------
-// Quad Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 QuadPS( QUADVS_OUTPUT input) : SV_Target
-{
-    return g_txDiffuse.Sample( samLinear, input.Tex );
-}
-
-
-//--------------------------------------------------------------------------------------
-// Scene Techniques
-//--------------------------------------------------------------------------------------
-technique11 RenderScene
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PS() ) );        
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
-
-//--------------------------------------------------------------------------------------
-// RenderWithStencil - set the depth stencil state inside of the technique
-//--------------------------------------------------------------------------------------
-technique11 RenderWithStencil
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, ScreenQuadVS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );     
-           
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-        SetDepthStencilState( RenderWithStencilState, 0 );
-    }
-}
-
-//--------------------------------------------------------------------------------------
-// Quad Techniques:  Alpha blending state is set inside the technique
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSolid
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );     
-           
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcAlphaAdd
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );   
-             
-        SetBlendState( SrcAlphaBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcAlphaSub
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );   
-             
-        SetBlendState( SrcAlphaBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcColorAdd
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );   
-             
-        SetBlendState( SrcColorBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcColorSub
-{
-    pass P0
-    {
-        SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );   
-             
-        SetBlendState( SrcColorBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-    }
-}
-
-
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h
deleted file mode 100644
index b44251829..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_LightPSH.h
-//
-// The pixel shader light header file for the DynamicShaderLinkage11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseLight
-{
-   float3 IlluminateAmbient(float3 vNormal);
-   
-   float3 IlluminateDiffuse(float3 vNormal);
-
-   float3 IlluminateSpecular(float3 vNormal, int specularPower );
-   
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cAmbientLight : iBaseLight
-{
-   float3	m_vLightColor;     
-   bool     m_bEnable;
-   
-   float3 IlluminateAmbient(float3 vNormal);
-      
-   float3 IlluminateDiffuse(float3 vNormal)
-   { 
-      return (float3)0;
-   }
-
-   float3 IlluminateSpecular(float3 vNormal, int specularPower )
-   { 
-      return (float3)0;
-   }
-};
-
-class cHemiAmbientLight : cAmbientLight
-{
-   // inherited float4 m_vLightColor is the SkyColor
-   float4   m_vGroundColor;
-   float4   m_vDirUp;
-
-   float3 IlluminateAmbient(float3 vNormal);
-   
-};
-
-class cDirectionalLight : cAmbientLight
-{
-   // inherited float4 m_vLightColor is the LightColor
-   float4 m_vLightDir;
-   
-   float3 IlluminateDiffuse( float3 vNormal );
-
-   float3 IlluminateSpecular( float3 vNormal, int specularPower );
-
-};
-
-class cOmniLight : cAmbientLight
-{
-   float3	m_vLightPosition;
-   float    radius;   
-   
-   float3 IlluminateDiffuse( float3 vNormal );
-  
-};
-
-class cSpotLight : cAmbientLight
-{
-   float3	m_vLightPosition;
-   float3	m_vLightDir;
-};
-
-class cEnvironmentLight : cAmbientLight
-{
-   float3  IlluminateSpecular( float3 vNormal, int specularPower );  
-};
-
-
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h
deleted file mode 100644
index 7f6bc3d22..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_MATERIALPSH.h
-//
-// The pixel shader material header file for the DynamicShaderLinkage11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseMaterial
-{
-   float3 GetAmbientColor(float2 vTexcoord);
-   
-   float3 GetDiffuseColor(float2 vTexcoord);
-
-   int GetSpecularPower();
-
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cBaseMaterial : iBaseMaterial
-{
-   float3	m_vColor;     
-   int      m_iSpecPower;
-   
-   float3 GetAmbientColor(float2 vTexcoord)
-   { 
-      return m_vColor;
-   }
-      
-   float3 GetDiffuseColor(float2 vTexcoord)
-   { 
-      return (float3)m_vColor;
-   }
-
-   int GetSpecularPower()
-   { 
-      return m_iSpecPower;
-   }
-   
-};
-
-class cPlasticMaterial : cBaseMaterial
-{  
-
-};
-
-class cPlasticTexturedMaterial : cPlasticMaterial
-{  
-   float3 GetAmbientColor(float2 vTexcoord);
-
-   float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-class cPlasticLightingOnlyMaterial : cBaseMaterial
-{  
-   float3 GetAmbientColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-      
-   float3 GetDiffuseColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-
-};
-
-class cRoughMaterial : cBaseMaterial
-{
-   int GetSpecularPower()
-   { 
-      return m_iSpecPower;
-   }
-};
-
-class cRoughTexturedMaterial : cRoughMaterial
-{  
-   float3 GetAmbientColor(float2 vTexcoord);
-
-   float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-
-class cRoughLightingOnlyMaterial : cRoughMaterial
-{
-   float3 GetAmbientColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-      
-   float3 GetDiffuseColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-
-};
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl
deleted file mode 100644
index 6850ad9cb..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl
+++ /dev/null
@@ -1,84 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSMain
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11.psh
-//
-// The pixel shader header file for the DynamicShaderLinkage11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Header Includes
-//--------------------------------------------------------------------------------------
-#include "DynamicShaderLinkage11_PSBuffers.h"
-
-// Defines for default static permutated setting
-#if defined( STATIC_PERMUTE ) 
-   #define HEMI_AMBIENT //CONST_AMBIENT //HEMI_AMBIENT
-   #define TEXTURE_ENABLE
-   #define SPECULAR_ENABLE
-#endif
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
-	float4 vPosition	: SV_POSITION;
-	float3 vNormal		: NORMAL;
-	float2 vTexcoord	: TEXCOORD0;
-	float4 vMatrix	: TEXCOORD1;	
-};
-
-//--------------------------------------------------------------------------------------
-// Abstract Interface Instances for dyamic linkage / permutation
-//--------------------------------------------------------------------------------------
-#if !defined( STATIC_PERMUTE ) 
-    iBaseLight     g_abstractAmbientLighting;
-    iBaseLight     g_abstractDirectLighting;
-    iBaseLight     g_abstractEnvironmentLighting;
-    iBaseMaterial  g_abstractMaterial;
-#else
-//--------------------------------------------------------------------------------------
-// Concrete Instances for STATIC_PERMUTE - static permutation
-//--------------------------------------------------------------------------------------
-    #if defined( HEMI_AMBIENT ) 
-        #define g_abstractAmbientLighting g_hemiAmbientLight
-    #else  
-        // CONST_AMBIENT
-        #define g_abstractAmbientLighting g_ambientLight
-    #endif
-    #define g_abstractDirectLighting g_directionalLight
-    #define g_abstractEnvironmentLighting g_environmentLight
-    #if defined( TEXTURE_ENABLE )
-        #define g_abstractMaterial g_plasticTexturedMaterial
-    #else    
-        #define g_abstractMaterial g_plasticMaterial
-    #endif
-#endif
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PSMain( PS_INPUT Input ) : SV_TARGET
-{   
-   // Compute the Ambient term
-   float3   Ambient = (float3)0.0f;	
-   Ambient = g_abstractMaterial.GetAmbientColor( Input.vTexcoord ) * g_abstractAmbientLighting.IlluminateAmbient( Input.vNormal );
-
-   // Accumulate the Diffuse contribution  
-   float3   Diffuse = (float3)0.0f;  
-   
-   Diffuse += g_abstractMaterial.GetDiffuseColor( Input.vTexcoord ) * g_abstractDirectLighting.IlluminateDiffuse( Input.vNormal );
-
-   // Compute the Specular contribution
-   float3   Specular = (float3)0.0f;   
-   Specular += g_abstractDirectLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() );
-   Specular += g_abstractEnvironmentLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() );
-     
-   // Accumulate the lighting with saturation
-   float3 Lighting = saturate( Ambient + Diffuse + Specular );
-     
-   return float4(Lighting,1.0f); 
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h
deleted file mode 100644
index e2263b832..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h
+++ /dev/null
@@ -1,129 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_LightPSH.hlsl
-//
-// The pixel shader light source module file for the DynamicShaderLinkage11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "DynamicShaderLinkage11_LightPSH.h"
-#include "DynamicShaderLinkage11_MaterialPSH.h"
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerFrame : register( b0 )
-{
-   cAmbientLight     g_ambientLight;
-   cHemiAmbientLight g_hemiAmbientLight;
-   cDirectionalLight g_directionalLight;
-   cEnvironmentLight g_environmentLight;
-   float4            g_vEyeDir;   
-};
-
-cbuffer cbPerPrimitive : register( b1 )
-{
-   cPlasticMaterial              g_plasticMaterial;
-   cPlasticTexturedMaterial      g_plasticTexturedMaterial;
-   cPlasticLightingOnlyMaterial  g_plasticLightingOnlyMaterial;
-   cRoughMaterial                g_roughMaterial;
-   cRoughTexturedMaterial        g_roughTexturedMaterial;
-   cRoughLightingOnlyMaterial    g_roughLightingOnlyMaterial;
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D	   g_txDiffuse : register( t0 );
-Texture2D	   g_txNormalMap : register( t1 );
-TextureCube	   g_txEnvironmentMap : register( t2 );
-
-SamplerState   g_samLinear : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// Lighting Class Methods
-//--------------------------------------------------------------------------------------
-// Ambient Lighting Class Methods
-float3 cAmbientLight::IlluminateAmbient(float3 vNormal)
-{ 
-   return float4( m_vLightColor * m_bEnable, 1.0f);
-}
-
-float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal)
-{ 
-   float thetha = (dot( vNormal, m_vDirUp ) + 1.0f) / 2.0f;
- 
-   return  lerp( m_vGroundColor, m_vLightColor, thetha) * m_bEnable;
-}
-
-// Directional Light class
-float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal ) 
-{
-   float lambert = saturate(dot( vNormal, m_vLightDir ));
- 	return ((float3)lambert * m_vLightColor * m_bEnable); 
-}
-
-float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower ) 
-{ 	
-   float3 H = -normalize(g_vEyeDir) + m_vLightDir;
-   float3 halfAngle = normalize( H );
-   float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower );  	
-
- 	return ((float3)specular * m_vLightColor * m_bEnable); 
-}
-
-// Omni Light Class
-float3 cOmniLight::IlluminateDiffuse( float3 vNormal ) 
-{
-   return (float3)0.0f; // TO DO!
-}
-
-// Environment Lighting
-float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower ) 
-{ 	  
-   // compute reflection vector taking into account a cheap fresnel falloff;
-   float3 N = normalize(vNormal); 
-   float3 E = normalize(g_vEyeDir);
-   float3 R = reflect( E, N ); 
-   float fresnel = 1 - dot( -E, N );  	
-   fresnel = (fresnel * fresnel * fresnel );
-
-   float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ) * fresnel;
-
-   return (specular * (float3)m_bEnable); 
-//   return ((float3)fresnel); 
-
-}
-
-//--------------------------------------------------------------------------------------
-// Material Class Methods
-//--------------------------------------------------------------------------------------
-// Plastic Material Methods
-float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse;
-}
-   
-float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse;
-}
-
-// Rough Material Methods
-float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse;
-}
-   
-float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse;
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl
deleted file mode 100644
index d47f20c23..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl
+++ /dev/null
@@ -1,73 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#define g_mWorld g_mWorld_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_VS.hlsl
-//
-// The vertex shader file for the DynamicShaderLinkage11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-	float4x4		g_mWorldViewProjection	;//SLANG: : packoffset( c0 );
-	float4x4		g_mWorld				;//SLANG: : packoffset( c4 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-	float4 vPosition	: POSITION;
-	float3 vNormal		: NORMAL;
-	float2 vTexcoord	: TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
-	float4 vPosition	: SV_POSITION;
-	float3 vNormal		: NORMAL;
-	float2 vTexcoord0	: TEXCOORD0;
-	float4 vMatrix	    : TEXCOORD1; // DEBUG
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-// We aliased signed vectors as a unsigned format. 
-// Need to recover signed values.  The values 1.0 and 2.0
-// are slightly inaccurate here.
-float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec )
-{
-    vVec *= 2.0f;
-    return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec;
-}
-
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-
-	VS_OUTPUT   Output;
-	float3      tmpNormal;
-	
-	Output.vPosition =  mul( Input.vPosition, g_mWorldViewProjection );
-	
-	// Expand compressed vectors
-	tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal );
-	Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld );
-	
-	Output.vTexcoord0 = Input.vTexcoord;
-
-    Output.vMatrix = (float4)g_mWorld[0]; // DEBUG
-	return Output;
-}
-
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx
deleted file mode 100644
index c72b98843..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx
+++ /dev/null
@@ -1,192 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11.fx
-//
-// The effect file for the DynamicShaderLinkageFX11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "DynamicShaderLinkageFX11_VS.hlsl"
-#include "DynamicShaderLinkageFX11_PS.hlsl"
-
-//
-// Settings for static permutations.
-// All of the pre-5.0 targets need static specialization
-// since they don't support late binding.  The below
-// just selects a single specialization but you could
-// create any number of them, each one representing
-// a new shader with the interfaces compiled out
-// due to the compile-time class references.
-//
-
-#define StaticMaterial         g_plasticTexturedMaterial
-#define StaticAmbientLight     g_ambientLight
-#define StaticDirectLight      g_directionalLight
-#define StaticEnvironmentLight g_environmentLight
-
-technique11 FeatureLevel10
-{
-    pass
-    {
-        SetRasterizerState(g_rasterizerState[g_fillMode]);
-        SetVertexShader(CompileShader(vs_4_0,
-                                      VSMain()));
-        SetPixelShader(CompileShader(ps_4_0,
-                                     PSMainUniform(StaticAmbientLight,
-                                                   StaticDirectLight,
-                                                   StaticEnvironmentLight,
-                                                   StaticMaterial)));
-    }
-}
-
-technique11 FeatureLevel10_1
-{
-    pass
-    {
-        SetRasterizerState(g_rasterizerState[g_fillMode]);
-        SetVertexShader(CompileShader(vs_4_1,
-                                      VSMain()));
-        SetPixelShader(CompileShader(ps_4_1,
-                                     PSMainUniform(StaticAmbientLight,
-                                                   StaticDirectLight,
-                                                   StaticEnvironmentLight,
-                                                   StaticMaterial)));
-    }
-}
-
-//
-// Variables for dynamic shader linkage.
-// There are two variations here for dynamic usage.
-// In the first we use the uniform entry point
-// and pass in global interface variables.  This
-// creates a shader which refers to the global
-// interface variables when running and we can bind
-// concrete instances in our C++ code by using
-// ID3DX11EffectInterfaceVariable::SetClassInstance.
-// This approach works well when you have several
-// independent variations and want to bind them
-// individually in your C++ code, such as the
-// different lighting and material parameters in
-// this sample.
-//
-
-iBaseLight g_abstractAmbientLighting;
-iBaseLight g_abstractDirectLighting;
-iBaseLight g_abstractEnvironmentLighting;
-iBaseMaterial g_abstractMaterial;
-    
-technique11 FeatureLevel11
-{
-    pass
-    {
-        SetRasterizerState(g_rasterizerState[g_fillMode]);
-        SetVertexShader(CompileShader(vs_5_0,
-                                      VSMain()));
-        SetPixelShader(CompileShader(ps_5_0,
-                                     PSMainUniform(g_abstractAmbientLighting,
-                                                   g_abstractDirectLighting,
-                                                   g_abstractEnvironmentLighting,
-                                                   g_abstractMaterial)));
-    }
-}
-
-//
-// In this second variation we use the non-uniform
-// entry point so that we don't have to specify
-// any interfaces when compiling the shader.  We
-// then reuse the compiled shader with different
-// BindInterfaces calls so that all bindings are
-// handled automatically by the effect runtime.
-// Below we have multiple techniques where
-// we've given a concrete binding for the material.
-// Lighting parameters are left as interfaces for
-// binding via effect variables, but could also
-// be specified concretely if the number of variations
-// is manageable.
-// This approach works well for a small number of variations
-// that are known in advance, as you can just list them
-// in your effect and you don't need to do the
-// binding work explicitly in your C++ code.
-//
-
-VertexShader g_NonUniVS = CompileShader(vs_5_0, VSMain());
-PixelShader g_NonUniPS = CompileShader(ps_5_0, PSMainNonUniform());
-
-technique11 FeatureLevel11_g_plasticMaterial
-{
-    pass
-    {
-        SetVertexShader(g_NonUniVS);
-        SetPixelShader(BindInterfaces(g_NonUniPS,
-                                      g_abstractAmbientLighting,
-                                      g_abstractDirectLighting,
-                                      g_abstractEnvironmentLighting,
-                                      g_plasticMaterial));
-    }
-}
-
-technique11 FeatureLevel11_g_plasticTexturedMaterial
-{
-    pass
-    {
-        SetVertexShader(g_NonUniVS);
-        SetPixelShader(BindInterfaces(g_NonUniPS,
-                                      g_abstractAmbientLighting,
-                                      g_abstractDirectLighting,
-                                      g_abstractEnvironmentLighting,
-                                      g_plasticTexturedMaterial));
-    }
-}
-
-technique11 FeatureLevel11_g_plasticLightingOnlyMaterial
-{
-    pass
-    {
-        SetVertexShader(g_NonUniVS);
-        SetPixelShader(BindInterfaces(g_NonUniPS,
-                                      g_abstractAmbientLighting,
-                                      g_abstractDirectLighting,
-                                      g_abstractEnvironmentLighting,
-                                      g_plasticLightingOnlyMaterial));
-    }
-}
-
-technique11 FeatureLevel11_g_roughMaterial
-{
-    pass
-    {
-        SetVertexShader(g_NonUniVS);
-        SetPixelShader(BindInterfaces(g_NonUniPS,
-                                      g_abstractAmbientLighting,
-                                      g_abstractDirectLighting,
-                                      g_abstractEnvironmentLighting,
-                                      g_roughMaterial));
-    }
-}
-
-technique11 FeatureLevel11_g_roughTexturedMaterial
-{
-    pass
-    {
-        SetVertexShader(g_NonUniVS);
-        SetPixelShader(BindInterfaces(g_NonUniPS,
-                                      g_abstractAmbientLighting,
-                                      g_abstractDirectLighting,
-                                      g_abstractEnvironmentLighting,
-                                      g_roughTexturedMaterial));
-    }
-}
-
-technique11 FeatureLevel11_g_roughLightingOnlyMaterial
-{
-    pass
-    {
-        SetVertexShader(g_NonUniVS);
-        SetPixelShader(BindInterfaces(g_NonUniPS,
-                                      g_abstractAmbientLighting,
-                                      g_abstractDirectLighting,
-                                      g_abstractEnvironmentLighting,
-                                      g_roughLightingOnlyMaterial));
-    }
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h
deleted file mode 100644
index 6f9a0f4d8..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_LightPSH.h
-//
-// The pixel shader light header file for the DynamicShaderLinkageFX11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseLight
-{
-   float3 IlluminateAmbient(float3 vNormal);
-   
-   float3 IlluminateDiffuse(float3 vNormal);
-
-   float3 IlluminateSpecular(float3 vNormal, int specularPower );
-   
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cAmbientLight : iBaseLight
-{
-   float3   m_vLightColor;     
-   bool     m_bEnable;
-   
-   float3 IlluminateAmbient(float3 vNormal);
-      
-   float3 IlluminateDiffuse(float3 vNormal)
-   { 
-      return (float3)0;
-   }
-
-   float3 IlluminateSpecular(float3 vNormal, int specularPower )
-   { 
-      return (float3)0;
-   }
-};
-
-class cHemiAmbientLight : cAmbientLight
-{
-   // inherited float4 m_vLightColor is the SkyColor
-   float4   m_vGroundColor;
-   float4   m_vDirUp;
-
-   float3 IlluminateAmbient(float3 vNormal);
-   
-};
-
-class cDirectionalLight : cAmbientLight
-{
-   // inherited float4 m_vLightColor is the LightColor
-   float4 m_vLightDir;
-   
-   float3 IlluminateDiffuse( float3 vNormal );
-
-   float3 IlluminateSpecular( float3 vNormal, int specularPower );
-
-};
-
-class cOmniLight : cAmbientLight
-{
-   float3   m_vLightPosition;
-   float    radius;   
-   
-   float3 IlluminateDiffuse( float3 vNormal );
-  
-};
-
-class cSpotLight : cAmbientLight
-{
-   float3   m_vLightPosition;
-   float3   m_vLightDir;
-};
-
-class cEnvironmentLight : cAmbientLight
-{
-   float3  IlluminateSpecular( float3 vNormal, int specularPower );  
-};
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h
deleted file mode 100644
index cd54a283d..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_MaterialPSH.h
-//
-// The pixel shader material header file for the DynamicShaderLinkageFX11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseMaterial
-{
-   float3 GetAmbientColor(float2 vTexcoord);
-   
-   float3 GetDiffuseColor(float2 vTexcoord);
-
-   int GetSpecularPower();
-
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cBaseMaterial : iBaseMaterial
-{
-   float3   m_vColor;     
-   int      m_iSpecPower;
-   
-   float3 GetAmbientColor(float2 vTexcoord)
-   { 
-      return m_vColor;
-   }
-      
-   float3 GetDiffuseColor(float2 vTexcoord)
-   { 
-      return (float3)m_vColor;
-   }
-
-   int GetSpecularPower()
-   { 
-      return m_iSpecPower;
-   }
-   
-};
-
-class cPlasticMaterial : cBaseMaterial
-{  
-
-};
-
-class cPlasticTexturedMaterial : cPlasticMaterial
-{  
-   float3 GetAmbientColor(float2 vTexcoord);
-
-   float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-class cPlasticLightingOnlyMaterial : cBaseMaterial
-{  
-   float3 GetAmbientColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-      
-   float3 GetDiffuseColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-
-};
-
-class cRoughMaterial : cBaseMaterial
-{
-   int GetSpecularPower()
-   { 
-      return m_iSpecPower;
-   }
-};
-
-class cRoughTexturedMaterial : cRoughMaterial
-{  
-   float3 GetAmbientColor(float2 vTexcoord);
-
-   float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-
-class cRoughLightingOnlyMaterial : cRoughMaterial
-{
-   float3 GetAmbientColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-      
-   float3 GetDiffuseColor(float2 vTexcoord)
-   { 
-      return (float3)1.0f;
-   }
-
-};
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h
deleted file mode 100644
index 3b4c528be..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_LightPSH.hlsl
-//
-// The pixel shader light source module file for the DynamicShaderLinkageFX11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "DynamicShaderLinkageFX11_LightPSH.h"
-#include "DynamicShaderLinkageFX11_MaterialPSH.h"
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerFrame : register( b0 )
-{
-   cAmbientLight     g_ambientLight;
-   cHemiAmbientLight g_hemiAmbientLight;
-   cDirectionalLight g_directionalLight;
-   cEnvironmentLight g_environmentLight;
-   float4            g_vEyeDir;   
-};
-
-cbuffer cbPerPrimitive : register( b1 )
-{
-   cPlasticMaterial              g_plasticMaterial;
-   cPlasticTexturedMaterial      g_plasticTexturedMaterial;
-   cPlasticLightingOnlyMaterial  g_plasticLightingOnlyMaterial;
-   cRoughMaterial                g_roughMaterial;
-   cRoughTexturedMaterial        g_roughTexturedMaterial;
-   cRoughLightingOnlyMaterial    g_roughLightingOnlyMaterial;
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D      g_txDiffuse : register( t0 );
-Texture2D      g_txNormalMap : register( t1 );
-TextureCube    g_txEnvironmentMap : register( t2 );
-
-SamplerState   g_samLinear : register( s0 )
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = WRAP;
-    AddressV = WRAP;
-    AddressW = WRAP;
-};
-
-//--------------------------------------------------------------------------------------
-// Rasterization State
-//--------------------------------------------------------------------------------------
-uint g_fillMode = 0;
-   
-RasterizerState g_rasterizerState[2]
-{
-{
-    FillMode = SOLID;
-    MultisampleEnable = true;
-},
-{
-    FillMode = WIREFRAME;
-    MultisampleEnable = true;
-}
-};
-
-//--------------------------------------------------------------------------------------
-// Lighting Class Methods
-//--------------------------------------------------------------------------------------
-// Ambient Lighting Class Methods
-float3 cAmbientLight::IlluminateAmbient(float3 vNormal)
-{ 
-   return m_vLightColor * m_bEnable;
-}
-
-float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal)
-{ 
-   float thetha = (dot( vNormal, m_vDirUp.xyz ) + 1.0f) / 2.0f;
- 
-   return  lerp( m_vGroundColor.xyz, m_vLightColor, thetha) * m_bEnable;
-}
-
-// Directional Light class
-float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal ) 
-{
-   float lambert = saturate(dot( vNormal, m_vLightDir.xyz ));
-   return ((float3)lambert * m_vLightColor * m_bEnable);
-}
-
-float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower ) 
-{       
-   float3 H = -normalize(g_vEyeDir.xyz) + m_vLightDir.xyz;
-   float3 halfAngle = normalize( H );
-   float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower );          
-
-        return ((float3)specular * m_vLightColor * m_bEnable); 
-}
-
-// Omni Light Class
-float3 cOmniLight::IlluminateDiffuse( float3 vNormal ) 
-{
-   return (float3)0.0f; // TO DO!
-}
-
-// Environment Lighting
-float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower ) 
-{         
-   // compute reflection vector taking into account a cheap fresnel falloff;
-   float3 N = normalize(vNormal); 
-   float3 E = normalize(g_vEyeDir.xyz);
-   float3 R = reflect( E, N ); 
-   float fresnel = 1 - dot( -E, N );    
-   fresnel = (fresnel * fresnel * fresnel );
-
-   float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ).xyz * fresnel;
-
-   return (specular * (float3)m_bEnable); 
-//   return ((float3)fresnel); 
-
-}
-
-//--------------------------------------------------------------------------------------
-// Material Class Methods
-//--------------------------------------------------------------------------------------
-// Plastic Material Methods
-float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse.xyz;
-}
-   
-float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse.xyz;
-}
-
-// Rough Material Methods
-float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse.xyz;
-}
-   
-float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{ 
-   float4 vDiffuse = (float4)1.0f;
-   vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );  
-   return m_vColor * vDiffuse.xyz;
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl
deleted file mode 100644
index 55d206259..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl
+++ /dev/null
@@ -1,113 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11.psh
-//
-// The pixel shader header file for the DynamicShaderLinkageFX11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Header Includes
-//--------------------------------------------------------------------------------------
-#include "DynamicShaderLinkageFX11_PSBuffers.h"
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
-    float4 vPosition    : SV_POSITION;
-    float3 vNormal      : NORMAL;
-    float2 vTexcoord    : TEXCOORD0;
-    float4 vMatrix      : TEXCOORD1;    
-};
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-
-// This pixel shader uses several interfaces during its
-// work.  We show three different ways of providing interface
-// bindings for the PS and those have two different
-// entry points so we've separated the base PS code
-// into a worker routine that's called by the entry
-// points.  Normally only one technique would be used
-// and this layering of entry point and worker would
-// not be necessary.
-float4 PSMainWorker( iBaseLight ambientLighting,
-                     iBaseLight directLighting,
-                     iBaseLight environmentLighting,
-                     iBaseMaterial material,
-                     PS_INPUT Input )
-{   
-   // Compute the Ambient term
-   float3   Ambient = (float3)0.0f; 
-   Ambient = material.GetAmbientColor( Input.vTexcoord ) * ambientLighting.IlluminateAmbient( Input.vNormal );
-
-   // Accumulate the Diffuse contribution  
-   float3   Diffuse = (float3)0.0f;  
-   
-   Diffuse += material.GetDiffuseColor( Input.vTexcoord ) * directLighting.IlluminateDiffuse( Input.vNormal );
-
-   // Compute the Specular contribution
-   float3   Specular = (float3)0.0f;   
-   Specular += directLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() );
-   Specular += environmentLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() );
-     
-   // Accumulate the lighting with saturation
-   float3 Lighting = saturate( Ambient + Diffuse + Specular);
-
-   return float4(Lighting,1.0f);
-}
-
-// One way to provide bindings for shaders in Effects 11 is
-// to use uniform interface parameters.  As with non-interface
-// uniform parameters you must specify a value for these
-// parameters in your CompileShader invocations in the effect.
-// You can provide concrete class instances if you want
-// to statically specialize your shaders, such as for targets
-// that don't support abstract interfaces; or you can provide
-// other interfaces that you bind using effect variables.
-// Both are shown in this sample's technique passes.
-float4 PSMainUniform( uniform iBaseLight ambientLighting,
-                      uniform iBaseLight directLighting,
-                      uniform iBaseLight environmentLighting,
-                      uniform iBaseMaterial material,
-                      PS_INPUT Input ) : SV_Target
-{
-    return PSMainWorker(ambientLighting,
-                        directLighting,
-                        environmentLighting,
-                        material,
-                        Input);
-}
-
-// Another way to use Effects 11 with interfaces is
-// to have non-uniform parameters, which then are
-// bound with a BindInterfaces in a technique pass.
-// BindInterfaces gives concrete instances to use
-// with a shader but does not do static specialization,
-// it just saves information for the effect runtime
-// to use when setting up the shader to run.
-// This lets you share a single shader, compiled with
-// interface usage, while still getting the convenience
-// of declaring concrete bindings in the effect and
-// not needed explicit binding in code via effect
-// variable updates.  If you have many different
-// variations it may be simpler to use bindings
-// through effect variables, as then you don't
-// need to list every possible binding set in your
-// techniques.
-float4 PSMainNonUniform( iBaseLight ambientLighting,
-                         iBaseLight directLighting,
-                         iBaseLight environmentLighting,
-                         iBaseMaterial material,
-                         PS_INPUT Input ) : SV_Target
-{
-    return PSMainWorker(ambientLighting,
-                        directLighting,
-                        environmentLighting,
-                        material,
-                        Input);
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl
deleted file mode 100644
index 4791e5786..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl
+++ /dev/null
@@ -1,65 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_VS.hlsl
-//
-// The vertex shader file for the DynamicShaderLinkageFX11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-    float4x4        g_mWorldViewProjection  : packoffset( c0 );
-    float4x4        g_mWorld                : packoffset( c4 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 vPosition    : POSITION;
-    float3 vNormal      : NORMAL;
-    float2 vTexcoord    : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
-    float4 vPosition    : SV_POSITION;
-    float3 vNormal      : NORMAL;
-    float2 vTexcoord0   : TEXCOORD0;
-    float4 vMatrix      : TEXCOORD1; // DEBUG
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-// We aliased signed vectors as a unsigned format. 
-// Need to recover signed values.  The values 1.0 and 2.0
-// are slightly inaccurate here.
-float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec )
-{
-    vVec *= 2.0f;
-    return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec;
-}
-
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-
-    VS_OUTPUT   Output;
-    float3      tmpNormal;
-    
-    Output.vPosition =  mul( Input.vPosition, g_mWorldViewProjection );
-    
-    // Expand compressed vectors
-    tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal );
-    Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld );
-    
-    Output.vTexcoord0 = Input.vTexcoord;
-
-    Output.vMatrix = (float4)g_mWorld[0]; // DEBUG
-    return Output;
-}
diff --git a/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx b/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx
deleted file mode 100644
index 699df8655..000000000
--- a/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx
+++ /dev/null
@@ -1,468 +0,0 @@
-//TEST_IGNORE_FILE:
-// FixedFuncEMU.fx
-// Copyright (c) 2005 Microsoft Corporation. All rights reserved.
-//
-
-struct VSSceneIn
-{
-    float3 pos          : POSITION;         //position of the particle
-    float3 norm         : NORMAL;           //velocity of the particle
-    float2 tex          : TEXTURE0;         //tex coords
-};
-
-struct VSSceneOut
-{
-    float4 pos : SV_Position;               //position
-    float2 tex : TEXTURE0;                  //texture coordinate
-    float3 wPos : TEXTURE1;                 //world space pos
-    float3 wNorm : TEXTURE2;                //world space normal
-    float4 colorD : COLOR0;                 //color for gouraud and flat shading
-    float4 colorS : COLOR1;                 //color for specular
-    float  fogDist : FOGDISTANCE;           //distance used for fog calculations
-    float3 planeDist : SV_ClipDistance0;    //clip distance for 3 planes
-};
-
-struct PSSceneIn
-{
-    float4 pos : SV_Position;               //position
-    float2 tex : TEXTURE0;                  //texture coordinate
-    float3 wPos : TEXTURE1;                 //world space pos
-    float3 wNorm : TEXTURE2;                //world space normal
-    float4 colorD : COLOR0;                 //color for gouraud and flat shading
-    float4 colorS : COLOR1;                 //color for specular
-    float  fogDist : FOGDISTANCE;           //distance used for fog calculations
-};
-
-struct Light
-{
-    float4 Position;
-    float4 Diffuse;
-    float4 Specular;
-    float4 Ambient;
-    float4 Atten;
-};
-
-#define FOGMODE_NONE    0
-#define FOGMODE_LINEAR  1
-#define FOGMODE_EXP     2
-#define FOGMODE_EXP2    3
-#define E 2.71828
-
-cbuffer cbLights
-{
-    float4   g_clipplanes[3];
-    Light    g_lights[8];
-};
-
-cbuffer cbPerFrame
-{
-    float4x4 g_mWorld;
-    float4x4 g_mView;
-    float4x4 g_mProj;
-    float4x4 g_mInvProj;
-    float4x4 g_mLightViewProj;
-};
-
-cbuffer cbPerTechnique
-{
-    bool     g_bEnableLighting = true;
-    bool     g_bEnableClipping = true;
-    bool     g_bPointScaleEnable = false;
-    float    g_pointScaleA;
-    float    g_pointScaleB;
-    float    g_pointScaleC;
-    float    g_pointSize;
-    
-    //fog params
-    int      g_fogMode = FOGMODE_NONE;
-    float    g_fogStart;
-    float    g_fogEnd;
-    float    g_fogDensity;
-    float4   g_fogColor;
-};
-    
-cbuffer cbPerViewChange
-{
-    //viewport params
-    float    g_viewportHeight;
-    float    g_viewportWidth;
-    float    g_nearPlane;
-};
-
-cbuffer cbImmutable
-{
-    float3 g_positions[4] =
-    {
-        float3( -0.5, 0.5, 0 ),
-        float3( 0.5, 0.5, 0 ),
-        float3( -0.5, -0.5, 0 ),
-        float3( 0.5, -0.5, 0 ),
-    };
-};
-
-Texture2D g_txDiffuse;
-Texture2D g_txProjected;
-SamplerState g_samLinear
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Clamp;
-    AddressV = Clamp;
-};
-
-DepthStencilState DisableDepth
-{
-    DepthEnable = FALSE;
-    DepthWriteMask = ZERO;
-};
-
-DepthStencilState EnableDepth
-{
-    DepthEnable = TRUE;
-    DepthWriteMask = ALL;
-};
-
-struct ColorsOutput
-{
-    float4 Diffuse;
-    float4 Specular;
-};
-
-ColorsOutput CalcLighting( float3 worldNormal, float3 worldPos, float3 cameraPos )
-{
-    ColorsOutput output = (ColorsOutput)0.0;
-    
-    for(int i=0; i<8; i++)
-    {
-        float3 toLight = g_lights[i].Position.xyz - worldPos;
-        float lightDist = length( toLight );
-        float fAtten = 1.0/dot( g_lights[i].Atten, float4(1,lightDist,lightDist*lightDist,0) );
-        float3 lightDir = normalize( toLight );
-        float3 halfAngle = normalize( normalize(-cameraPos) + lightDir );
-        
-        output.Diffuse += max(0,dot( lightDir, worldNormal ) * g_lights[i].Diffuse * fAtten) + g_lights[i].Ambient;
-        output.Specular += max(0,pow( dot( halfAngle, worldNormal ), 64 ) * g_lights[i].Specular * fAtten );
-    }
-    
-    return output;
-}
-
-//
-// VS for emulating fixed function pipeline
-//
-VSSceneOut VSScenemain(VSSceneIn input)
-{
-    VSSceneOut output = (VSSceneOut)0.0;
-
-    //output our final position in clipspace
-    float4 worldPos = mul( float4( input.pos, 1 ), g_mWorld );
-    float4 cameraPos = mul( worldPos, g_mView ); //Save cameraPos for fog calculations
-    output.pos = mul( cameraPos, g_mProj );
-    
-    //save world pos for later
-    output.wPos = worldPos;
-    
-    //save the fog distance for later
-    output.fogDist = cameraPos.z;
-    
-    //find our clipping planes (fixed function clipping is done in world space)
-    if( g_bEnableClipping )
-    {
-        worldPos.w = 1;
-        
-        //calc the distance from the 3 clipping planes
-        output.planeDist.x = dot( worldPos, g_clipplanes[0] );
-        output.planeDist.y = dot( worldPos, g_clipplanes[1] );
-        output.planeDist.z = dot( worldPos, g_clipplanes[2] );
-    }
-    else
-    {
-        output.planeDist.x = 1;
-        output.planeDist.y = 1;
-        output.planeDist.z = 1;
-    }
-    
-    //do gouraud lighting
-    if( g_bEnableLighting )
-    {
-        float3 worldNormal = normalize( mul( input.norm, (float3x3)g_mWorld ) );
-        output.wNorm = worldNormal;
-        ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos );
-        output.colorD = cOut.Diffuse;
-        output.colorS = cOut.Specular;
-    }
-    else
-    {
-        output.colorD = float4(1,1,1,1);
-    }
-    
-    //propogate texture coordinate
-    output.tex = input.tex;
-    
-    return output;
-}
-
-//
-// VS for rendering in screen space
-//
-PSSceneIn VSScreenSpacemain(VSSceneIn input)
-{
-    PSSceneIn output = (PSSceneIn)0.0;
-
-    //output our final position
-    output.pos.x = (input.pos.x / (g_viewportWidth/2.0)) -1;
-    output.pos.y = -(input.pos.y / (g_viewportHeight/2.0)) +1;
-    output.pos.z = input.pos.z;
-    output.pos.w = 1;
-    
-    //propogate texture coordinate
-    output.tex = input.tex;
-    output.colorD = float4(1,1,1,1);
-    
-    return output;
-}
-
-//
-// GS for flat shaded rendering
-//
-
-[maxvertexcount(3)]
-void GSFlatmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> FlatTriStream )
-{
-    VSSceneOut output;
-    
-    //
-    // Calculate the face normal
-    //
-    float3 faceEdgeA = input[1].wPos - input[0].wPos;
-    float3 faceEdgeB = input[2].wPos - input[0].wPos;
-
-    //
-    // Cross product
-    //
-    float3 faceNormal = cross(faceEdgeA, faceEdgeB);
-    
-    //
-    //calculate the face center
-    //
-    float3 faceCenter = (input[0].wPos + input[1].wPos + input[2].wPos)/3.0;
-    
-    //find world pos and camera pos
-    float4 worldPos = float4( faceCenter, 1 );
-    float4 cameraPos = mul( worldPos, g_mView );
-    
-    //do shading
-    float3 worldNormal = normalize( faceNormal );
-    ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos );
-    
-    for(int i=0; i<3; i++)
-    {
-        output = input[i];
-        output.colorD = cOut.Diffuse;
-        output.colorS = cOut.Specular;
-        
-        FlatTriStream.Append( output );
-    }
-    FlatTriStream.RestartStrip();
-}
-
-//
-// GS for point rendering
-//
-[maxvertexcount(12)]
-void GSPointmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> PointTriStream )
-{
-    VSSceneOut output;
-    
-    //
-    // Calculate the point size
-    //
-    //float fSizeX = (g_pointSize/g_viewportWidth)/4.0;
-    float fSizeY = (g_pointSize/g_viewportHeight)/4.0;
-    float fSizeX = fSizeY;
-    
-    for(int i=0; i<3; i++)
-    {
-        output = input[i];
-    
-        //find world pos and camera pos
-        float4 worldPos = float4(input[i].wPos,1);
-        float4 cameraPos = mul( worldPos, g_mView );
-        
-        //find our size
-        if( g_bPointScaleEnable )
-        {   
-            float dEye = length( cameraPos.xyz );
-            fSizeX = fSizeY = g_viewportHeight * g_pointSize * 
-                    sqrt( 1.0f/( g_pointScaleA + g_pointScaleB*dEye + g_pointScaleC*(dEye*dEye) ) );
-        }
-        
-        //do shading
-        if(g_bEnableLighting)
-        {
-            float3 worldNormal = input[i].wNorm;
-            ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos );
-        
-            output.colorD = cOut.Diffuse;
-            output.colorS = cOut.Specular;
-        }
-        else
-        {
-            output.colorD = float4(1,1,1,1);
-        }
-        
-        output.tex = input[i].tex;
-        
-        //
-        // Emit two new triangles
-        //
-        for(int i=0; i<4; i++)
-        {
-            float4 outPos = mul( worldPos, g_mView );
-            output.pos = mul( outPos, g_mProj );
-            float zoverNear = (outPos.z)/g_nearPlane;
-            float4 posSize = float4( g_positions[i].x*fSizeX*zoverNear,
-                                     g_positions[i].y*fSizeY*zoverNear,
-                                     0,
-                                     0 );
-            output.pos += posSize;
-            
-            PointTriStream.Append(output);
-        }
-        PointTriStream.RestartStrip();
-    }
-}
-
-//
-// Calculates fog factor based upon distance
-//
-float CalcFogFactor( float d )
-{
-    float fogCoeff = 1.0;
-    
-    if( FOGMODE_LINEAR == g_fogMode )
-    {
-        fogCoeff = (g_fogEnd - d)/(g_fogEnd - g_fogStart);
-    }
-    else if( FOGMODE_EXP == g_fogMode )
-    {
-        fogCoeff = 1.0 / pow( E, d*g_fogDensity );
-    }
-    else if( FOGMODE_EXP2 == g_fogMode )
-    {
-        fogCoeff = 1.0 / pow( E, d*d*g_fogDensity*g_fogDensity );
-    }
-    
-    return clamp( fogCoeff, 0, 1 );
-}
-
-//
-// PS for rendering with clip planes
-//
-float4 PSScenemain(PSSceneIn input) : SV_Target
-{   
-    //calculate the fog factor  
-    float fog = CalcFogFactor( input.fogDist );
-    
-    //calculate the color based off of the normal, textures, etc
-    float4 normalColor = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD + input.colorS;
-    
-    //calculate the color from the projected texture
-    float4 cookieCoord = mul( float4(input.wPos,1), g_mLightViewProj );
-    //since we don't have texldp, we must perform the w divide ourselves befor the texture lookup
-    cookieCoord.xy = 0.5 * cookieCoord.xy / cookieCoord.w + float2( 0.5, 0.5 ); 
-    float4 cookieColor = float4(0,0,0,0);
-    if( cookieCoord.z > 0 )
-        cookieColor = g_txProjected.Sample( g_samLinear, cookieCoord.xy );
-    
-    //for standard light-modulating effects just multiply normalcolor and coookiecolor
-    normalColor += cookieColor;
-    
-    return fog * normalColor + (1.0 - fog)*g_fogColor;
-}
-
-//
-// PS for rendering with alpha test
-//
-float4 PSAlphaTestmain(PSSceneIn input) : SV_Target
-{   
-    float4 color =  g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD;
-    if( color.a < 0.5 )
-        discard;
-    return color;
-}
-
-//
-// RenderSceneGouraud - renders gouraud-shaded primitives
-//
-technique10 RenderSceneGouraud
-{
-    pass p0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-        
-        SetDepthStencilState( EnableDepth, 0 );
-    }  
-}
-
-//
-// RenderSceneFlat - renders flat-shaded primitives
-//
-technique10 RenderSceneFlat
-{
-    pass p0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
-        SetGeometryShader( CompileShader( gs_4_0, GSFlatmain() ) );
-        SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-        
-        SetDepthStencilState( EnableDepth, 0 );
-    }  
-}
-
-//
-// RenderScenePoint - replaces d3dfill_point
-//
-technique10 RenderScenePoint
-{
-    pass p0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
-        SetGeometryShader( CompileShader( gs_4_0, GSPointmain() ) );
-        SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-        
-        SetDepthStencilState( EnableDepth, 0 );
-    }  
-}
-
-//
-// RenderScreneSpace - shows how to render something in screenspace
-//
-technique10 RenderScreenSpaceAlphaTest
-{
-    pass p0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VSScreenSpacemain() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PSAlphaTestmain() ) );
-        
-        SetDepthStencilState( DisableDepth, 0 );
-    }  
-}
-
-//
-// RenderScreneSpace - shows how to render something in screenspace
-//
-technique10 RenderTextureOnly
-{
-    pass p0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-        
-        SetDepthStencilState( EnableDepth, 0 );
-    }  
-}
-
diff --git a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl
deleted file mode 100644
index 6e14bc10e..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl
+++ /dev/null
@@ -1,75 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose
-//--------------------------------------------------------------------------------------
-// File: ComputeShaderSort11.hlsl
-//
-// This file contains the compute shaders to perform GPU sorting using DirectX 11.
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#define BITONIC_BLOCK_SIZE 512
-
-#define TRANSPOSE_BLOCK_SIZE 16
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer CB : register( b0 )
-{
-    unsigned int g_iLevel;
-    unsigned int g_iLevelMask;
-    unsigned int g_iWidth;
-    unsigned int g_iHeight;
-};
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-StructuredBuffer<unsigned int> Input : register( t0 );
-RWStructuredBuffer<unsigned int> Data : register( u0 );
-
-//--------------------------------------------------------------------------------------
-// Bitonic Sort Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE];
-
-[numthreads(BITONIC_BLOCK_SIZE, 1, 1)]
-void BitonicSort( uint3 Gid : SV_GroupID, 
-                  uint3 DTid : SV_DispatchThreadID, 
-                  uint3 GTid : SV_GroupThreadID, 
-                  uint GI : SV_GroupIndex )
-{
-    // Load shared data
-    shared_data[GI] = Data[DTid.x];
-    GroupMemoryBarrierWithGroupSync();
-    
-    // Sort the shared data
-    for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1)
-    {
-        unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI];
-        GroupMemoryBarrierWithGroupSync();
-        shared_data[GI] = result;
-        GroupMemoryBarrierWithGroupSync();
-    }
-    
-    // Store shared data
-    Data[DTid.x] = shared_data[GI];
-}
-
-//--------------------------------------------------------------------------------------
-// Matrix Transpose Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE];
-
-[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)]
-void MatrixTranspose( uint3 Gid : SV_GroupID, 
-                      uint3 DTid : SV_DispatchThreadID, 
-                      uint3 GTid : SV_GroupThreadID, 
-                      uint GI : SV_GroupIndex )
-{
-    transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x];
-    GroupMemoryBarrierWithGroupSync();
-    uint2 XY = DTid.yx - GTid.yx + GTid.xy;
-    Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y];
-}
diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl
deleted file mode 100644
index 8966ea3c1..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl
+++ /dev/null
@@ -1,529 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BuildGridCS -entry ClearGridIndicesCS -entry BuildGridIndicesCS -entry RearrangeParticlesCS -entry DensityCS_Simple -entry DensityCS_Shared -entry DensityCS_Grid -entry ForceCS_Simple -entry ForceCS_Shared -entry ForceCS_Grid -entry IntegrateCS
-//--------------------------------------------------------------------------------------
-// File: FluidCS11.hlsl
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Smoothed Particle Hydrodynamics Algorithm Based Upon:
-// Particle-Based Fluid Simulation for Interactive Applications
-// Matthias M�ller
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid Algorithm Based Upon:
-// Broad-Phase Collision Detection with CUDA
-// Scott Le Grand
-//--------------------------------------------------------------------------------------
-
-struct Particle
-{
-    float2 position;
-    float2 velocity;
-};
-
-struct ParticleForces
-{
-    float2 acceleration;
-};
-
-struct ParticleDensity
-{
-    float density;
-};
-
-cbuffer cbSimulationConstants : register( b0 )
-{
-    uint g_iNumParticles;
-    float g_fTimeStep;
-    float g_fSmoothlen;
-    float g_fPressureStiffness;
-    float g_fRestDensity;
-    float g_fDensityCoef;
-    float g_fGradPressureCoef;
-    float g_fLapViscosityCoef;
-    float g_fWallStiffness;
-
-    float4 g_vGravity;
-    float4 g_vGridDim;
-    float3 g_vPlanes[4];
-};
-
-//--------------------------------------------------------------------------------------
-// Fluid Simulation
-//--------------------------------------------------------------------------------------
-
-#define SIMULATION_BLOCK_SIZE 256
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-RWStructuredBuffer<Particle> ParticlesRW : register( u0 );
-StructuredBuffer<Particle> ParticlesRO : register( t0 );
-
-RWStructuredBuffer<ParticleDensity> ParticlesDensityRW : register( u0 );
-StructuredBuffer<ParticleDensity> ParticlesDensityRO : register( t1 );
-
-RWStructuredBuffer<ParticleForces> ParticlesForcesRW : register( u0 );
-StructuredBuffer<ParticleForces> ParticlesForcesRO : register( t2 );
-
-RWStructuredBuffer<unsigned int> GridRW : register( u0 );
-StructuredBuffer<unsigned int> GridRO : register( t3 );
-
-RWStructuredBuffer<uint2> GridIndicesRW : register( u0 );
-StructuredBuffer<uint2> GridIndicesRO : register( t4 );
-
-
-//--------------------------------------------------------------------------------------
-// Grid Construction
-//--------------------------------------------------------------------------------------
-
-// For simplicity, this sample uses a 16-bit hash based on the grid cell and
-// a 16-bit particle ID to keep track of the particles while sorting
-// This imposes a limitation of 64K particles and 256x256 grid work
-// You could extended the implementation to support large scenarios by using a uint2
-
-float2 GridCalculateCell(float2 position)
-{
-    return clamp(position * g_vGridDim.xy + g_vGridDim.zw, float2(0, 0), float2(255, 255));
-}
-
-unsigned int GridConstuctKey(uint2 xy)
-{
-    // Bit pack [-----UNUSED-----][----Y---][----X---]
-    //                16-bit         8-bit     8-bit
-    return dot(xy.yx, uint2(256, 1));
-}
-
-unsigned int GridConstuctKeyValuePair(uint2 xy, uint value)
-{
-    // Bit pack [----Y---][----X---][-----VALUE------]
-    //             8-bit     8-bit        16-bit
-    return dot(uint3(xy.yx, value), uint3(256*256*256, 256*256, 1));
-}
-
-unsigned int GridGetKey(unsigned int keyvaluepair)
-{
-    return (keyvaluepair >> 16);
-}
-
-unsigned int GridGetValue(unsigned int keyvaluepair)
-{
-    return (keyvaluepair & 0xFFFF);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Build Grid
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void BuildGridCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x; // Particle ID to operate on
-    
-    float2 position = ParticlesRO[P_ID].position;
-    float2 grid_xy = GridCalculateCell( position );
-    
-    GridRW[P_ID] = GridConstuctKeyValuePair((uint2)grid_xy, P_ID);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Build Grid Indices
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ClearGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    GridIndicesRW[DTid.x] = uint2(0, 0);
-}
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void BuildGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int G_ID = DTid.x; // Grid ID to operate on
-    unsigned int G_ID_PREV = (G_ID == 0)? g_iNumParticles : G_ID; G_ID_PREV--;
-    unsigned int G_ID_NEXT = G_ID + 1; if (G_ID_NEXT == g_iNumParticles) { G_ID_NEXT = 0; }
-    
-    unsigned int cell = GridGetKey( GridRO[G_ID] );
-    unsigned int cell_prev = GridGetKey( GridRO[G_ID_PREV] );
-    unsigned int cell_next = GridGetKey( GridRO[G_ID_NEXT] );
-    if (cell != cell_prev)
-    {
-        // I'm the start of a cell
-        GridIndicesRW[cell].x = G_ID;
-    }
-    if (cell != cell_next)
-    {
-        // I'm the end of a cell
-        GridIndicesRW[cell].y = G_ID + 1;
-    }
-}
-
-
-//--------------------------------------------------------------------------------------
-// Rearrange Particles
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void RearrangeParticlesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int ID = DTid.x; // Particle ID to operate on
-    const unsigned int G_ID = GridGetValue( GridRO[ ID ] );
-    ParticlesRW[ID] = ParticlesRO[ G_ID ];
-}
-
-
-//--------------------------------------------------------------------------------------
-// Density Calculation
-//--------------------------------------------------------------------------------------
-
-float CalculateDensity(float r_sq)
-{
-    const float h_sq = g_fSmoothlen * g_fSmoothlen;
-    // Implements this equation:
-    // W_poly6(r, h) = 315 / (64 * pi * h^9) * (h^2 - r^2)^3
-    // g_fDensityCoef = fParticleMass * 315.0f / (64.0f * PI * fSmoothlen^9)
-    return g_fDensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Simple N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x;
-    const float h_sq = g_fSmoothlen * g_fSmoothlen;
-    float2 P_position = ParticlesRO[P_ID].position;
-    
-    float density = 0;
-    
-    // Calculate the density based on all neighbors
-    for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++)
-    {
-        float2 N_position = ParticlesRO[N_ID].position;
-        
-        float2 diff = N_position - P_position;
-        float r_sq = dot(diff, diff);
-        if (r_sq < h_sq)
-        {
-            density += CalculateDensity(r_sq);
-        }
-    }
-    
-    ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Shared Memory Optimized N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-groupshared float2 density_shared_pos[SIMULATION_BLOCK_SIZE];
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x;
-    const float h_sq = g_fSmoothlen * g_fSmoothlen;
-    float2 P_position = ParticlesRO[P_ID].position;
-    
-    float density = 0;
-    
-    // Calculate the density based on all neighbors
-    [loop]
-    for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
-    {
-        // Cache a tile of particles unto shared memory to increase IO efficiency
-        density_shared_pos[GI] = ParticlesRO[N_block_ID + GI].position;
-       
-        GroupMemoryBarrierWithGroupSync();        
-
-        for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++) 
-        {
-            float2 N_position = density_shared_pos[N_tile_ID];
-            
-            float2 diff = N_position - P_position;
-            float r_sq = dot(diff, diff);
-            if (r_sq < h_sq)
-            {
-                density += CalculateDensity(r_sq);
-            }
-        }        
-        
-        GroupMemoryBarrierWithGroupSync();
-    }
-    
-    ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid + Sort Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x;
-    const float h_sq = g_fSmoothlen * g_fSmoothlen;
-    float2 P_position = ParticlesRO[P_ID].position;
-    
-    float density = 0;
-    
-    // Calculate the density based on neighbors from the 8 adjacent cells + current cell
-    int2 G_XY = (int2)GridCalculateCell( P_position );
-    for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++)
-    {
-        for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++)
-        {
-            unsigned int G_CELL = GridConstuctKey(uint2(X, Y));
-            uint2 G_START_END = GridIndicesRO[G_CELL];
-            for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++)
-            {
-                float2 N_position = ParticlesRO[N_ID].position;
-                
-                float2 diff = N_position - P_position;
-                float r_sq = dot(diff, diff);
-                if (r_sq < h_sq)
-                {
-                    density += CalculateDensity(r_sq);
-                }
-            }
-        }
-    }
-    
-    ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Force Calculation
-//--------------------------------------------------------------------------------------
-
-float CalculatePressure(float density)
-{
-    // Implements this equation:
-    // Pressure = B * ((rho / rho_0)^y  - 1)
-    return g_fPressureStiffness * max(pow(density / g_fRestDensity, 3) - 1, 0);
-}
-
-float2 CalculateGradPressure(float r, float P_pressure, float N_pressure, float N_density, float2 diff)
-{
-    const float h = g_fSmoothlen;
-    float avg_pressure = 0.5f * (N_pressure + P_pressure);
-    // Implements this equation:
-    // W_spkiey(r, h) = 15 / (pi * h^6) * (h - r)^3
-    // GRAD( W_spikey(r, h) ) = -45 / (pi * h^6) * (h - r)^2
-    // g_fGradPressureCoef = fParticleMass * -45.0f / (PI * fSmoothlen^6)
-    return g_fGradPressureCoef * avg_pressure / N_density * (h - r) * (h - r) / r * (diff);
-}
-
-float2 CalculateLapVelocity(float r, float2 P_velocity, float2 N_velocity, float N_density)
-{
-    const float h = g_fSmoothlen;
-    float2 vel_diff = (N_velocity - P_velocity);
-    // Implements this equation:
-    // W_viscosity(r, h) = 15 / (2 * pi * h^3) * (-r^3 / (2 * h^3) + r^2 / h^2 + h / (2 * r) - 1)
-    // LAPLACIAN( W_viscosity(r, h) ) = 45 / (pi * h^6) * (h - r)
-    // g_fLapViscosityCoef = fParticleMass * fViscosity * 45.0f / (PI * fSmoothlen^6)
-    return g_fLapViscosityCoef / N_density * (h - r) * vel_diff;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Simple N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x; // Particle ID to operate on
-    
-    float2 P_position = ParticlesRO[P_ID].position;
-    float2 P_velocity = ParticlesRO[P_ID].velocity;
-    float P_density = ParticlesDensityRO[P_ID].density;
-    float P_pressure = CalculatePressure(P_density);
-    
-    const float h_sq = g_fSmoothlen * g_fSmoothlen;
-    
-    float2 acceleration = float2(0, 0);
-
-    // Calculate the acceleration based on all neighbors
-    for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++)
-    {
-        float2 N_position = ParticlesRO[N_ID].position;
-        
-        float2 diff = N_position - P_position;
-        float r_sq = dot(diff, diff);
-        if (r_sq < h_sq && P_ID != N_ID)
-        {
-            float2 N_velocity = ParticlesRO[N_ID].velocity;
-            float N_density = ParticlesDensityRO[N_ID].density;
-            float N_pressure = CalculatePressure(N_density);
-            float r = sqrt(r_sq);
-
-            // Pressure Term
-            acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-            
-            // Viscosity Term
-            acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
-        }
-    }
-    
-    ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Shared Memory Optimized N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-groupshared struct { float2 position; float2 velocity; float density; } force_shared_pos[SIMULATION_BLOCK_SIZE];
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x; // Particle ID to operate on
-    
-    float2 P_position = ParticlesRO[P_ID].position;
-    float2 P_velocity = ParticlesRO[P_ID].velocity;
-    float P_density = ParticlesDensityRO[P_ID].density;
-    float P_pressure = CalculatePressure(P_density);
-    
-    const float h_sq = g_fSmoothlen * g_fSmoothlen;
-    
-    float2 acceleration = float2(0, 0);
-
-    // Calculate the acceleration based on all neighbors
-    [loop]
-    for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
-    {
-        // Cache a tile of particles unto shared memory to increase IO efficiency
-        force_shared_pos[GI].position = ParticlesRO[N_block_ID + GI].position;
-        force_shared_pos[GI].velocity = ParticlesRO[N_block_ID + GI].velocity;
-        force_shared_pos[GI].density = ParticlesDensityRO[N_block_ID + GI].density;
-       
-        GroupMemoryBarrierWithGroupSync();        
-
-        [loop]
-        for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++ ) 
-        {
-            uint N_ID = N_block_ID + N_tile_ID;
-            float2 N_position = force_shared_pos[N_tile_ID].position;
-            
-            float2 diff = N_position - P_position;
-            float r_sq = dot(diff, diff);
-            if (r_sq < h_sq && P_ID != N_ID)
-            {
-                float2 N_velocity = force_shared_pos[N_tile_ID].velocity;
-                float N_density = force_shared_pos[N_tile_ID].density;
-                float N_pressure = CalculatePressure(N_density);
-                float r = sqrt(r_sq);
-
-                // Pressure Term
-                acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-                
-                // Viscosity Term
-                acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
-            }
-        }        
-        
-        GroupMemoryBarrierWithGroupSync();
-    }
-    
-    ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid + Sort Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x; // Particle ID to operate on
-    
-    float2 P_position = ParticlesRO[P_ID].position;
-    float2 P_velocity = ParticlesRO[P_ID].velocity;
-    float P_density = ParticlesDensityRO[P_ID].density;
-    float P_pressure = CalculatePressure(P_density);
-    
-    const float h_sq = g_fSmoothlen * g_fSmoothlen;
-    
-    float2 acceleration = float2(0, 0);
-    
-    // Calculate the acceleration based on neighbors from the 8 adjacent cells + current cell
-    int2 G_XY = (int2)GridCalculateCell( P_position );
-    for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++)
-    {
-        for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++)
-        {
-            unsigned int G_CELL = GridConstuctKey(uint2(X, Y));
-            uint2 G_START_END = GridIndicesRO[G_CELL];
-            for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++)
-            {
-                float2 N_position = ParticlesRO[N_ID].position;
-                
-                float2 diff = N_position - P_position;
-                float r_sq = dot(diff, diff);
-                if (r_sq < h_sq && P_ID != N_ID)
-                {
-                    float2 N_velocity = ParticlesRO[N_ID].velocity;
-                    float N_density = ParticlesDensityRO[N_ID].density;
-                    float N_pressure = CalculatePressure(N_density);
-                    float r = sqrt(r_sq);
-
-                    // Pressure Term
-                    acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-                    
-                    // Viscosity Term
-                    acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
-                }
-            }
-        }
-    }
-
-    ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Integration
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void IntegrateCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    const unsigned int P_ID = DTid.x; // Particle ID to operate on
-    
-    float2 position = ParticlesRO[P_ID].position;
-    float2 velocity = ParticlesRO[P_ID].velocity;
-    float2 acceleration = ParticlesForcesRO[P_ID].acceleration;
-    
-    // Apply the forces from the map walls
-    [unroll]
-    for (unsigned int i = 0 ; i < 4 ; i++)
-    {
-        float dist = dot(float3(position, 1), g_vPlanes[i]);
-        acceleration += min(dist, 0) * -g_fWallStiffness * g_vPlanes[i].xy;
-    }
-    
-    // Apply gravity
-    acceleration += g_vGravity.xy;
-    
-    // Integrate
-    velocity += g_fTimeStep * acceleration;
-    position += g_fTimeStep * velocity;
-    
-    // Update
-    ParticlesRW[P_ID].position = position;
-    ParticlesRW[P_ID].velocity = velocity;
-}
diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl
deleted file mode 100644
index cfd14c2b2..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl
+++ /dev/null
@@ -1,124 +0,0 @@
-//TEST:COMPARE_HLSL:-no-mangle -profile sm_4_0 -entry ParticleVS -stage vertex -entry ParticleGS -stage geometry -entry ParticlePS -stage pixel
-
-#ifndef __SLANG__
-#define ParticlesRO ParticlesRO_0
-#define ParticleDensityRO ParticleDensityRO_0
-#define cbRenderConstants cbRenderConstants_0
-#define g_mViewProjection g_mViewProjection_0
-#define g_fParticleSize g_fParticleSize_0
-#define density density_0
-#define position position_0
-#define velocity velocity_0
-
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: FluidRender.hlsl
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Particle Rendering
-//--------------------------------------------------------------------------------------
-
-struct Particle {
-    float2 position;
-    float2 velocity;
-};
-
-struct ParticleDensity {
-    float density;
-};
-
-StructuredBuffer<Particle> ParticlesRO : register( t0 );
-StructuredBuffer<ParticleDensity> ParticleDensityRO : register( t1 );
-
-cbuffer cbRenderConstants : register( b0 )
-{
-    matrix g_mViewProjection;
-    float g_fParticleSize;
-};
-
-struct VSParticleOut
-{
-    float2 position : POSITION;
-    float4 color : COLOR;
-};
-
-struct GSParticleOut
-{
-    float4 position : SV_Position;
-    float4 color : COLOR;
-    float2 texcoord : TEXCOORD;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Visualization Helper
-//--------------------------------------------------------------------------------------
-
-static const float4 Rainbow[5] = {
-    float4(1, 0, 0, 1), // red
-    float4(1, 1, 0, 1), // orange
-    float4(0, 1, 0, 1), // green
-    float4(0, 1, 1, 1), // teal
-    float4(0, 0, 1, 1), // blue
-};
-
-float4 VisualizeNumber(float n)
-{
-    return lerp( Rainbow[ int(floor(n * 4.0f)) ], Rainbow[ int(ceil(n * 4.0f)) ], frac(n * 4.0f) );
-}
-
-float4 VisualizeNumber(float n, float lower, float upper)
-{
-    return VisualizeNumber( saturate( (n - lower) / (upper - lower) ) );
-}
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-
-VSParticleOut ParticleVS(uint ID : SV_VERTEXID) 
-{
-    VSParticleOut Out; //  = { { 0, 0 } , { 0, 0, 0, 0 } }; // (VSParticleOut)0;
-    Out.position = ParticlesRO[ID].position;
-    Out.color = VisualizeNumber(ParticleDensityRO[ID].density, 1000.0f, 2000.0f);
-    return Out;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Particle Geometry Shader
-//--------------------------------------------------------------------------------------
-
-static const float2 g_positions[4] = { float2(-1, 1), float2(1, 1), float2(-1, -1), float2(1, -1) };
-static const float2 g_texcoords[4] = { float2(0, 1), float2(1, 1), float2(0, 0), float2(1, 0) };
-
-[maxvertexcount(4)]
-void ParticleGS(point VSParticleOut In[1], inout TriangleStream<GSParticleOut> SpriteStream)
-{
-    [unroll]
-    for (int i = 0; i < 4; i++)
-    {
-        GSParticleOut Out; // = (GSParticleOut)0;
-        float4 position = float4(In[0].position, 0, 1) + g_fParticleSize * float4(g_positions[i], 0, 0);
-        Out.position = mul(position, g_mViewProjection);
-        Out.color = In[0].color;
-        Out.texcoord = g_texcoords[i];
-        SpriteStream.Append(Out);
-    }
-    SpriteStream.RestartStrip();
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-
-float4 ParticlePS(GSParticleOut In) : SV_TARGET
-{
-    return In.color;
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl
deleted file mode 100644
index 3addeca08..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl
+++ /dev/null
@@ -1,64 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//--------------------------------------------------------------------------------------
-// File: BrightPassAndHorizFilterCS.hlsl
-//
-// The CS for bright pass and horizontal blur, used in CS path of 
-// HDRToneMappingCS11 sample
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-static const float  MIDDLE_GRAY = 0.72f;
-static const float  LUM_WHITE = 1.5f;
-static const float  BRIGHT_THRESHOLD = 0.5f;
-
-Texture2D Input : register( t0 ); 
-StructuredBuffer<float> lum : register( t1 );
-RWStructuredBuffer<float4> Result : register( u0 );
-
-cbuffer cb0
-{
-    float4  g_avSampleWeights[15];
-    uint    g_outputwidth;
-    float   g_inverse;
-    int2    g_inputsize;
-}
-
-#define kernelhalf 7
-#define groupthreads 128
-groupshared float4 temp[groupthreads];
-
-[numthreads( groupthreads, 1, 1 )]
-void CSMain( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
-    int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y );
-    coord = coord.xy * 8 + int2(4, 3);
-    coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) );
-    float4 vColor = Input.Load( int3(coord, 0) );
-
-    float fLum = lum[0]*g_inverse;
-
-    // Bright pass and tone mapping
-    vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD );
-    vColor *= MIDDLE_GRAY / (fLum + 0.001f);
-    vColor *= (1.0f + vColor/LUM_WHITE);
-    vColor /= (1.0f + vColor);
-
-    temp[GI] = vColor;
-
-    GroupMemoryBarrierWithGroupSync();
-
-    // Horizontal blur
-    if ( GI >= kernelhalf && 
-         GI < (groupthreads - kernelhalf) && 
-         ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputwidth) )
-    {
-        float4 vOut = 0;
-        
-        [unroll]
-        for ( int i = -kernelhalf; i <= kernelhalf; ++i )
-            vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf];
-
-        Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputwidth] = float4(vOut.rgb, 1.0f);
-    }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl
deleted file mode 100644
index f2d119eb5..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl
+++ /dev/null
@@ -1,29 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSDump
-//--------------------------------------------------------------------------------------
-// File: DumpToTexture.hlsl
-//
-// The PS for converting CS output buffer to a texture, used in CS path of 
-// HDRToneMappingCS11 sample
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<float4> buffer : register( t0 );
-
-struct QuadVS_Output
-{
-    float4 Pos : SV_POSITION;              
-    float2 Tex : TEXCOORD0;
-};
-
-cbuffer cbPS : register( b0 )
-{
-    uint4    g_param;   
-};
-
-float4 PSDump( QuadVS_Output Input ) : SV_TARGET
-{
-    // To calculate the buffer offset, it is natural to use the screen space coordinates,
-    // Input.Pos is the screen space coordinates of the pixel being written 
-    return buffer[ (Input.Pos.x - 0.5) + (Input.Pos.y - 0.5) * g_param.x ];	
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl
deleted file mode 100644
index e21b97e30..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl
+++ /dev/null
@@ -1,73 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSVerticalFilter -entry CSHorizFilter
-//--------------------------------------------------------------------------------------
-// File: FilterCS.hlsl
-//
-// The CSs for doing vertical and horizontal blur, used in CS path of 
-// HDRToneMappingCS11 sample
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<float4> InputBuf : register( t0 );
-Texture2D InputTex : register( t1 ); 
-RWStructuredBuffer<float4> Result : register( u0 );
-
-cbuffer cb0
-{
-    float4  g_avSampleWeights[15];
-    int2    g_outputsize;
-    int2    g_inputsize;
-}
-
-#define kernelhalf 7
-#define groupthreads 128
-groupshared float4 temp[groupthreads];
-
-[numthreads( groupthreads, 1, 1 )]
-void CSVerticalFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
-    int offsety = GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y;
-    offsety = clamp( offsety, 0, g_inputsize.y-1 );
-    int offset = Gid.x + offsety * g_inputsize.x;
-    temp[GI] = InputBuf[offset];
-
-    GroupMemoryBarrierWithGroupSync();
-
-    // Vertical blur
-    if ( GI >= kernelhalf && 
-         GI < (groupthreads - kernelhalf) && 
-         ( (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) < g_outputsize.y) )
-    {
-        float4 vOut = 0;
-        
-        [unroll]
-        for ( int i = -kernelhalf; i <= kernelhalf; ++i )
-            vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf];
-
-        Result[Gid.x + (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) * g_outputsize.x] = float4(vOut.rgb, 1.0f);
-    }
-}
-
-[numthreads( groupthreads, 1, 1 )]
-void CSHorizFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
-    int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y );
-    coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) );
-    temp[GI] = InputTex.Load( int3(coord, 0) );        
-
-    GroupMemoryBarrierWithGroupSync();
-
-    // Horizontal blur
-    if ( GI >= kernelhalf && 
-         GI < (groupthreads - kernelhalf) && 
-         ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputsize.x) )
-    {
-        float4 vOut = 0;
-        
-        [unroll]
-        for ( int i = -kernelhalf; i <= kernelhalf; ++i )
-            vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf];
-
-        Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputsize.x] = float4(vOut.rgb, 1.0f);
-    }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl
deleted file mode 100644
index f5a49d2eb..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl
+++ /dev/null
@@ -1,79 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry QuadVS -profile ps_4_0 -entry PSFinalPass -entry PSFinalPassForCPUReduction
-//--------------------------------------------------------------------------------------
-// File: FinalPass.hlsl
-//
-// The PSs for doing tone-mapping based on the input luminance, used in CS path of 
-// HDRToneMappingCS11 sample
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-struct QuadVS_Input
-{
-    float4 Pos : POSITION;
-    float2 Tex : TEXCOORD0;
-};
-
-struct QuadVS_Output
-{
-    float4 Pos : SV_POSITION;              
-    float2 Tex : TEXCOORD0;
-};
-
-QuadVS_Output QuadVS( QuadVS_Input Input )
-{
-    QuadVS_Output Output;
-    Output.Pos = Input.Pos;
-    Output.Tex = Input.Tex;
-    return Output;
-}
-
-Texture2D<float4> tex : register( t0 );
-StructuredBuffer<float> lum : register( t1 );
-Texture2D<float4> bloom : register( t2 );
-
-SamplerState PointSampler : register (s0);
-SamplerState LinearSampler : register (s1);
-
-
-static const float  MIDDLE_GRAY = 0.72f;
-static const float  LUM_WHITE = 1.5f;
-
-cbuffer cbPS : register( b0 )
-{
-    float4    g_param;   
-};
-
-float4 PSFinalPass( QuadVS_Output Input ) : SV_TARGET
-{
-    float4 vColor = tex.Sample( PointSampler, Input.Tex );
-    float fLum = lum[0]*g_param.x;
-    float3 vBloom = bloom.Sample( LinearSampler, Input.Tex );
-
-    // Tone mapping
-    vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f);
-    vColor.rgb *= (1.0f + vColor/LUM_WHITE);
-    vColor.rgb /= (1.0f + vColor);
-    
-    vColor.rgb += 0.6f * vBloom;
-    vColor.a = 1.0f;
-
-    return vColor;
-}
-
-float4 PSFinalPassForCPUReduction( QuadVS_Output Input ) : SV_TARGET
-{
-    float4 vColor = tex.Sample( PointSampler, Input.Tex );
-    float fLum = g_param.x;
-    float3 vBloom = bloom.Sample( LinearSampler, Input.Tex );
-
-    // Tone mapping
-    vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f);
-    vColor.rgb *= (1.0f + vColor/LUM_WHITE);
-    vColor.rgb /= (1.0f + vColor);
-    
-    vColor.rgb += 0.6f * vBloom;
-    vColor.a = 1.0f;
-    
-    return vColor;
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl
deleted file mode 100644
index 3f16b2449..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl
+++ /dev/null
@@ -1,129 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry DownScale2x2_Lum -entry DownScale3x3 -entry FinalPass -entry DownScale3x3_BrightPass -entry Bloom
-//--------------------------------------------------------------------------------------
-// File: PSApproach.hlsl
-//
-// The PSs for doing post-processing, used in PS path of 
-// HDRToneMappingCS11 sample
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-static const float4 LUM_VECTOR = float4(.299, .587, .114, 0);
-static const float  MIDDLE_GRAY = 0.72f;
-static const float  LUM_WHITE = 1.5f;
-static const float  BRIGHT_THRESHOLD = 0.5f;
-
-SamplerState PointSampler : register (s0);
-SamplerState LinearSampler : register (s1);
-
-struct QuadVS_Output
-{
-    float4 Pos : SV_POSITION;              
-    float2 Tex : TEXCOORD0;
-};
-
-Texture2D s0 : register(t0);
-Texture2D s1 : register(t1);
-Texture2D s2 : register(t2);
-
-float4 DownScale2x2_Lum ( QuadVS_Output Input ) : SV_TARGET
-{    
-    float4 vColor = 0.0f;
-    float  fAvg = 0.0f;
-    
-    for( int y = -1; y < 1; y++ )
-    {
-        for( int x = -1; x < 1; x++ )
-        {
-            // Compute the sum of color values
-            vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) );                       
-                
-            fAvg += dot( vColor, LUM_VECTOR );
-        }
-    }
-    
-    fAvg /= 4;
-    
-    return float4(fAvg, fAvg, fAvg, 1.0f);
-}
-
-float4 DownScale3x3( QuadVS_Output Input ) : SV_TARGET
-{
-    float fAvg = 0.0f; 
-    float4 vColor;
-    
-    for( int y = -1; y <= 1; y++ )
-    {
-        for( int x = -1; x <= 1; x++ )
-        {
-            // Compute the sum of color values
-            vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) );
-                        
-            fAvg += vColor.r; 
-        }
-    }
-    
-    // Divide the sum to complete the average
-    fAvg /= 9;
-    
-    return float4(fAvg, fAvg, fAvg, 1.0f);
-}
-
-float4 FinalPass( QuadVS_Output Input ) : SV_TARGET
-{   
-    //float4 vColor = 0;
-    float4 vColor = s0.Sample( PointSampler, Input.Tex );
-    float4 vLum = s1.Sample( PointSampler, float2(0,0) );
-    float3 vBloom = s2.Sample( LinearSampler, Input.Tex );       
-    
-    // Tone mapping
-    vColor.rgb *= MIDDLE_GRAY / (vLum.r + 0.001f);
-    vColor.rgb *= (1.0f + vColor/LUM_WHITE);
-    vColor.rgb /= (1.0f + vColor);
-    
-    vColor.rgb += 0.6f * vBloom;
-    vColor.a = 1.0f;    
-    
-    return vColor;
-}
-
-float4 DownScale3x3_BrightPass( QuadVS_Output Input ) : SV_TARGET
-{   
-    float3 vColor = 0.0f;
-    float4 vLum = s1.Sample( PointSampler, float2(0, 0) );
-    float  fLum = vLum.r;
-
-    vColor = s0.Sample( PointSampler, Input.Tex ).rgb;          
- 
-    // Bright pass and tone mapping
-    vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD );
-    vColor *= MIDDLE_GRAY / (fLum + 0.001f);
-    vColor *= (1.0f + vColor/LUM_WHITE);
-    vColor /= (1.0f + vColor);
-    
-    return float4(vColor, 1.0f);
-}
-
-cbuffer cb0
-{
-    float2 g_avSampleOffsets[15];
-    float4 g_avSampleWeights[15];
-}
-
-float4 Bloom( QuadVS_Output Input ) : SV_TARGET
-{    
-    float4 vSample = 0.0f;
-    float4 vColor = 0.0f;
-    float2 vSamplePosition;
-    
-    for( int iSample = 0; iSample < 15; iSample++ )
-    {
-        // Sample from adjacent points
-        vSamplePosition = Input.Tex + g_avSampleOffsets[iSample];
-        vColor = s0.Sample( PointSampler, vSamplePosition);
-        
-        vSample += g_avSampleWeights[iSample]*vColor;
-    }
-    
-    return vSample;
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl
deleted file mode 100644
index 1316250d5..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl
+++ /dev/null
@@ -1,72 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//-----------------------------------------------------------------------------
-// File: ReduceTo1DCS.hlsl
-//
-// Desc: Reduce an input Texture2D to a buffer
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-Texture2D Input : register( t0 ); 
-RWStructuredBuffer<float> Result : register( u0 );
-
-cbuffer cbCS : register( b0 )
-{
-    uint4    g_param;   // (g_param.x, g_param.y) is the x and y dimensions of the Dispatch call
-                        // (g_param.z, g_param.w) is the size of the above Input Texture2D
-};
-
-//#define CS_FULL_PIXEL_REDUCITON // Defining this or not must be the same as in HDRToneMappingCS11.cpp
-
-#define blocksize 8
-#define blocksizeY 8
-#define groupthreads (blocksize*blocksizeY)
-groupshared float accum[groupthreads];
-
-static const float4 LUM_VECTOR = float4(.299, .587, .114, 0);
-
-[numthreads(blocksize,blocksizeY,1)]
-void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{    
-    float4 s = 
-#ifdef CS_FULL_PIXEL_REDUCITON
-        Input.Load( uint3(DTid.xy                                                   , 0) )+ 
-        Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x,                    0), 0) ) +
-        Input.Load( uint3(DTid.xy + uint2(0,                   blocksizeY*g_param.y), 0) ) + 
-        Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x, blocksizeY*g_param.y), 0) );
-#else
-        Input.Load( uint3((float)DTid.x/81.0f*g_param.z, (float)DTid.y/81.0f*g_param.w, 0) );
-#endif
-        
-    accum[GI] = dot( s, LUM_VECTOR );
-
-    // Parallel reduction algorithm follows 
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 32 )
-        accum[GI] += accum[32+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 16 )
-        accum[GI] += accum[16+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 8 )
-        accum[GI] += accum[8+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 4 )
-        accum[GI] += accum[4+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 2 )
-        accum[GI] += accum[2+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 1 )
-        accum[GI] += accum[1+GI];
-
-    if ( GI == 0 )
-    {                
-        Result[Gid.y*g_param.x+Gid.x] = accum[0];
-    }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
deleted file mode 100644
index 73857a6bb..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
+++ /dev/null
@@ -1,63 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSMain
-//-----------------------------------------------------------------------------
-// File: ReduceToSingleCS.hlsl
-//
-// Desc: Reduce an input buffer by a factor of groupthreads
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-
-StructuredBuffer<float> Input : register( t0 );
-RWStructuredBuffer<float> Result : register( u0 );
-
-cbuffer cbCS : register( b0 )
-{
-    uint4    g_param;   // g_param.x is the actual elements contained in Input
-                        // g_param.y is the x dimension of the Dispatch call
-};
-
-#define groupthreads 128
-groupshared float accum[groupthreads];
-
-[numthreads(groupthreads,1,1)]
-void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    if ( DTid.x < g_param.x )
-        accum[GI] = Input[DTid.x];
-    else
-        accum[GI] = 0;
-
-    // Parallel reduction algorithm follows 
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 64 )
-        accum[GI] += accum[64+GI];  
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 32 )    
-        accum[GI] += accum[32+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 16 )
-        accum[GI] += accum[16+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 8 ) 
-        accum[GI] += accum[8+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 4 )
-        accum[GI] += accum[4+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 2 )
-        accum[GI] += accum[2+GI];
-
-    GroupMemoryBarrierWithGroupSync();
-    if ( GI < 1 )
-        accum[GI] += accum[1+GI];
-    
-    if ( GI == 0 )
-    {        
-        Result[Gid.x] = accum[0];
-    }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl
deleted file mode 100644
index a0e44ba95..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl
+++ /dev/null
@@ -1,44 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry SkyboxVS -profile ps_4_0 -entry SkyboxPS
-//-----------------------------------------------------------------------------
-// File: SkyBox11.hlsl
-//
-// Desc: 
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-
-cbuffer cbPerObject : register( b0 )
-{
-    row_major matrix    g_mWorldViewProjection	: packoffset( c0 );
-}
-
-TextureCube	g_EnvironmentTexture : register( t0 );
-SamplerState g_sam : register( s0 );
-
-struct SkyboxVS_Input
-{
-    float4 Pos : POSITION;
-};
-
-struct SkyboxVS_Output
-{
-    float4 Pos : SV_POSITION;
-    float3 Tex : TEXCOORD0;
-};
-
-SkyboxVS_Output SkyboxVS( SkyboxVS_Input Input )
-{
-    SkyboxVS_Output Output;
-    
-    Output.Pos = Input.Pos;
-    Output.Tex = normalize( mul(Input.Pos, g_mWorldViewProjection) );
-    
-    return Output;
-}
-
-float4 SkyboxPS( SkyboxVS_Output Input ) : SV_TARGET
-{
-    float4 color = g_EnvironmentTexture.Sample( g_sam, Input.Tex );
-    return color;
-}
diff --git a/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx b/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx
deleted file mode 100644
index 3c8d45078..000000000
--- a/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx
+++ /dev/null
@@ -1,591 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Instancing.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Input and output structures 
-//--------------------------------------------------------------------------------------
-struct VSInstIn
-{
-    float3 pos : POSITION;
-    float3 norm : NORMAL;
-    float2 tex : TEXTURE0;
-    row_major float4x4 mTransform : mTransform;
-};
-
-struct VSSceneIn
-{
-    float3 pos : POSITION;
-    float3 norm : NORMAL;
-    float2 tex : TEXTURE0;
-};
-
-struct VSGrassIn
-{
-    float3 pos : POSITION;
-    float3 norm : NORMAL;
-    float2 tex : TEXTURE0;
-    row_major float4x4 mTransform : mTransform;
-    uint VertexID : SV_VertexID;
-};
-
-struct VSGrassOut
-{
-    float3 pos : POSITION;
-    float3 norm : NORMAL;
-    float2 tex : TEXTURE0;
-    uint VertexID : VERTID;
-};
-
-struct VSQuadIn
-{
-    float3 pos : POSITION;
-    float2 tex : TEXTURE0;
-    row_major float4x4 mTransform : mTransform;
-    float fOcc : fOcc;
-    uint InstanceId : SV_InstanceID;
-};
-
-struct PSSceneIn
-{
-    float4 pos : SV_Position;
-    float2 tex : TEXTURE0;
-    float4 color : COLOR0;
-};
-
-struct PSQuadIn
-{
-    float4 pos : SV_Position;
-    float3 tex : TEXTURE0;
-    float4 color : COLOR0;
-};
-
-//--------------------------------------------------------------------------------------
-// Constant buffers 
-//--------------------------------------------------------------------------------------
-cbuffer crarely
-{
-    float4x4 g_mTreeMatrices[50];
-    uint g_iNumTrees;
-};
-
-cbuffer ceveryframe
-{
-    float4x4 g_mWorldViewProj;
-    float4x4 g_mWorldView;
-};
-
-cbuffer cmultipleperframe
-{
-    float g_GrassWidth;
-    float g_GrassHeight;
-    uint g_iGrassCoverage;
-};
-
-cbuffer cusercontrolled
-{
-    float g_GrassMessiness;
-};
-
-struct light_struct
-{
-    float4 direction;
-    float4 color;
-};
-
-cbuffer cimmutable
-{
-    light_struct g_lights[4] = { 
-                    { float4(0.620275,  0.683659, 0.384537, 1),  float4(0.75, 0.599, 0.405, 1) },		//sun
-                    { float4(0.063288, -0.987444, 0.144735, 1),  float4(0.192, 0.273, 0.275, 1) },		//bottom
-                    { float4(0.23007,   0.785579, -0.574422, 1),  float4(0.300, 0.292, 0.223, 1) },		//highlight
-                    { float4(-0.620275,  -0.683659, -0.384537, 1),  float4(0.0, 0.0, 0.1, 1) }			//blue rim-light
-                    };
-    
-    float4 g_ambient = float4(0.4945,0.465,0.5,1);
-    
-    float g_occDimHeight = 2400.0;	//scalar that tells us how much to darken the tree near the top
-};
-
-cbuffer cgrassblade
-{
-    float3 g_positions[6] =
-    {
-        float3( -1, 0, 0 ),
-        float3( -1, 2, 0 ),
-        float3( 1, 0, 0 ),
-        float3( 1, 2, 0 ),
-        
-        float3( -1, 0, 0 ),
-        float3( -1, 2, 0 ),
-    };
-    float2 g_texcoords[6] = 
-    { 
-        float2(0,1), 
-        float2(0,0),
-        float2(1,1),
-        float2(1,0),
-        
-        float2(0,1),
-        float2(0,0),
-    };
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-Texture2DArray g_tx2dArray;
-SamplerState g_samLinear
-{
-    Filter = ANISOTROPIC;
-    AddressU = Wrap;
-    AddressV = Wrap;
-};
-
-Texture1D g_txRandom;
-SamplerState g_samPoint
-{
-    Filter = MIN_MAG_MIP_POINT;
-    AddressU = Wrap;
-    AddressV = Wrap;
-};
-
-//--------------------------------------------------------------------------------------
-// State structures
-//--------------------------------------------------------------------------------------
-BlendState QuadAlphaBlendState
-{
-    AlphaToCoverageEnable = TRUE;
-    RenderTargetWriteMask[0] = 0x0F;
-};
-
-RasterizerState EnableMSAA
-{
-    CullMode = BACK;
-    MultisampleEnable = TRUE;
-};
-
-DepthStencilState DisableDepthTestWrite
-{
-    DepthEnable = FALSE;
-    DepthWriteMask = ZERO;
-};
-
-DepthStencilState EnableDepthTestWrite
-{
-    DepthEnable = TRUE;
-    DepthWriteMask = ALL;
-};
-
-BlendState NoBlending
-{
-    AlphaToCoverageEnable = FALSE;
-    BlendEnable[0] = FALSE;
-};
-
-//--------------------------------------------------------------------------------------
-// Sky vertex shader
-//--------------------------------------------------------------------------------------
-PSSceneIn VSSkymain(VSSceneIn input)
-{
-    PSSceneIn output;
-    
-    //
-    // Transform the vert to view-space
-    //
-    float4 v4Position = mul(float4(input.pos, 1), g_mWorldViewProj);
-    output.pos = v4Position;
-    
-    //  
-    // Transfer the rest
-    //
-    output.tex = input.tex;
-    
-    output.color = float4(1,1,1,1);
-    
-    return output;
-}
-
-//--------------------------------------------------------------------------------------
-// CalcLighting helper function.  Calculates lighting from 4 light sources, adds ambient
-// and attenuates for depth.  Used by all techniques for lighting.
-//--------------------------------------------------------------------------------------
-float4 CalcLighting( float3 norm, float depth )
-{
-    float4 color = float4(0,0,0,0);
-    
-    // add the contributions of 4 directional lights
-    [unroll] for( int i=0; i<4; i++ )
-    {
-        color += saturate( dot(g_lights[i].direction,norm) )*g_lights[i].color;
-    }
-    
-    // give some attenuation due to depth
-    float attenuate = depth / 10000.0;
-    float4 attenColor = float4(0.15, 0.2, 0.3, 0);
-    
-    // add it all up plus ambient
-    return (1-attenuate*0.23)*(color + g_ambient) + attenColor*attenuate;
-}
-
-//--------------------------------------------------------------------------------------
-// Instancing vertex shader.  Positions the vertices based upon the matrix stored
-// in the second vertex stream.
-//--------------------------------------------------------------------------------------
-PSSceneIn VSInstmain(VSInstIn input)
-{
-    PSSceneIn output;
-    
-    //
-    // Transform by our Sceneance matrix
-    //
-    float4 InstancePosition = mul(float4(input.pos, 1), input.mTransform);
-    float4 ViewPos = mul(InstancePosition, g_mWorldView );
-    
-    //
-    // Transform the vert to view-space
-    //
-    float4 v4Position = mul(InstancePosition, g_mWorldViewProj);
-    output.pos = v4Position;
-    
-    //  
-    // Transfer the rest
-    //
-    output.tex = input.tex;
-    
-    //
-    // dot the norm with the light dir
-    //
-    float3 norm = mul(input.norm,(float3x3)input.mTransform);
-    output.color = CalcLighting( norm, ViewPos.z );
-    
-    //
-    // Dim the color by how far up the tree we are.  
-    // This is a nice way to fake occlusion of the branches by the leaves.
-    //
-    output.color *= 1.0f - saturate(input.pos.y/g_occDimHeight);
-    
-    
-    return output;
-}
-
-//--------------------------------------------------------------------------------------
-// Quad (leaf) vertex shader.  Instances the quad over multiple leaf positions and 
-// multiple trees.  This demonstrates how to do double instancing.
-//--------------------------------------------------------------------------------------
-PSQuadIn VSQuadmain(VSQuadIn input)
-{
-    PSQuadIn output;
-    
-    // base our leaf texture upon which instance id we are
-    uint iLeaf = input.InstanceId/g_iNumTrees;
-    uint iLeafTex = iLeaf%3;
-    output.tex = float3(input.tex, float(iLeafTex) );
-
-    //
-    // Transform the position by the Instance matrix
-    //
-    int iTree = input.InstanceId - (input.InstanceId/g_iNumTrees)*g_iNumTrees;
-    float4 vInstancePos = mul( float4(input.pos, 1), input.mTransform  );
-    float4 InstancePosition = mul(vInstancePos, g_mTreeMatrices[iTree] );
-    float4 ViewPos = mul(InstancePosition, g_mWorldView );
-        
-    //  
-    // Transform the Instance position to view-space
-    //
-    output.pos = mul(InstancePosition, g_mWorldViewProj);
-    
-    // pack distance from the eye into the color alpha channel
-    output.color = float4(input.fOcc,input.fOcc,input.fOcc,ViewPos.z);
-    
-    return output;
-}
-
-//--------------------------------------------------------------------------------------
-// Grass vertex shader.  Basically a passthrough except for instancing the island base
-// mesh.
-//--------------------------------------------------------------------------------------
-VSGrassOut VSGrassmain(VSGrassIn input)
-{
-    // simple transform into the instance space
-    VSGrassOut output;
-    output.pos = mul(float4(input.pos, 1), input.mTransform);
-    output.norm = mul(input.norm, (float3x3)input.mTransform);
-    output.tex = input.tex;
-    output.VertexID = input.VertexID;
-    
-    return output;
-}
-
-//--------------------------------------------------------------------------------------
-// Quad (leaf) GS.  Calculates the normal and lighting for the leaf.
-//--------------------------------------------------------------------------------------
-[maxvertexcount(3)]
-void GSQuadmain(triangle PSQuadIn input[3], inout TriangleStream<PSQuadIn> QuadStream)
-{
-    PSQuadIn output;
-
-    //
-    // Calculate the face normal
-    //
-    float4 faceNormalA = input[1].pos.xyzw - input[0].pos.xyzw;
-    float4 faceNormalB = input[2].pos.xyzw - input[0].pos.xyzw;
-
-    //
-    // Cross product
-    //
-    float3 faceNormal = cross(faceNormalA, faceNormalB);
-
-    //
-    // Normalize face normal
-    //  
-    faceNormal = normalize(faceNormal);
-
-    //
-    // Dot face normal with some arbitrary light vectors
-    //
-    float4 color1 = CalcLighting( faceNormal, input[0].color.a );
-    color1 *= input[0].color;
-
-    //
-    // Make sure we always have an alpha of 1
-    //  
-    color1.a = 1.0;
-
-    //
-    // Emit out the new tri
-    //
-    for(int i=0; i<3; i++)
-    {
-        output.pos = input[i].pos;
-        output.color = color1;
-        output.tex = input[i].tex;  
-        QuadStream.Append(output);
-    }
-    QuadStream.RestartStrip();
-}
-
-//--------------------------------------------------------------------------------------
-// RandomDir helper.  Samples a random dir out of our 1d random texture.  In this case
-// we use a texture because the offset could be anywhere.  If we were sampling linearly
-// then we would probably just use a buffer and load from that.
-//--------------------------------------------------------------------------------------
-float3 RandomDir(float fOffset)
-{   
-    float tCoord = (fOffset) / 300.0;
-    return g_txRandom.SampleLevel( g_samPoint, tCoord, 0 );
-}
-
-//--------------------------------------------------------------------------------------
-// Helper to determing if a point is within a triangle
-//--------------------------------------------------------------------------------------
-bool IsInTriangle( float3 P, float3 A, float3 B, float3 C )
-{
-    float3 crossA = cross( B-A, P-A );
-    float3 crossB = cross( C-B, P-B );
-    float3 crossC = cross( A-C, P-C );
-    
-    if( dot( crossA, crossB ) > 0 &&
-        dot( crossB, crossC ) > 0 )
-    {
-        return true;
-    }
-    else
-    {
-        return false;
-    }
-}
-
-//--------------------------------------------------------------------------------------
-// Gets a random orientation matrix based upon the RandomDir funciton
-//--------------------------------------------------------------------------------------
-float4x4 GetRandomOrientation( float3 Pos, float3 Norm, float fRandOffset )
-{
-    float3 Tangent = RandomDir(fRandOffset);
-    
-    float3 Bitangent = normalize( cross( Tangent, Norm ) );
-    Tangent = normalize( cross( Bitangent, Norm ) );
-    
-    float4x4 matWorld = { float4( Tangent, 0 ),
-                          float4( Norm, 0 ),
-                          float4( Bitangent, 0 ),
-                          float4( Pos, 1 ) };
-    return matWorld;
-}
-
-//--------------------------------------------------------------------------------------
-// Generates an actual grass blade
-//--------------------------------------------------------------------------------------
-void OutputGrassBlade( VSGrassOut midPoint, inout TriangleStream<PSQuadIn> GrassStream, int iGrassTex )
-{
-    PSQuadIn output;
-    
-    float4x4 mWorld = GetRandomOrientation( midPoint.pos, midPoint.norm, (float)midPoint.VertexID );
-    float4 ViewPos = mul( midPoint.pos, g_mWorldView );
-    
-    float3 grassNorm = midPoint.norm;
-    float4 color1 = CalcLighting( grassNorm, ViewPos.z );
-    
-    for(int v=0; v<6; v++)
-    {
-        float3 pos = g_positions[v];
-        pos.x *= g_GrassWidth;
-        pos.y *= g_GrassHeight;
-        
-        output.pos = mul( float4(pos,1), mWorld );
-        output.pos = mul( output.pos, g_mWorldViewProj );
-        output.tex = float3( g_texcoords[v], iGrassTex );
-        output.color = color1;
-    
-        GrassStream.Append( output );
-    }
-    
-    GrassStream.RestartStrip();
-}
-
-//--------------------------------------------------------------------------------------
-// Midpoint of the three vertices A,B,C
-//--------------------------------------------------------------------------------------
-VSGrassOut CalcMidPoint( VSGrassOut A, VSGrassOut B, VSGrassOut C )
-{
-    VSGrassOut MidPoint;
-    
-    MidPoint.pos = (A.pos + B.pos + C.pos)/3.0f;
-    MidPoint.norm = (A.norm + B.norm + C.norm)/3.0f;
-    MidPoint.tex = (A.tex + B.tex + C.tex)/3.0f;
-    MidPoint.VertexID = A.VertexID + B.VertexID + C.VertexID;
-    
-    return MidPoint;
-}
-
-//--------------------------------------------------------------------------------------
-// The actual grass geometry shader.  This generates grass blades based upon an input
-// mesh (the tops of the islands) and a coverage texture.  Each of the textures channels
-// determines how much of each of the 4 types of grass to place at a particular spot.
-//--------------------------------------------------------------------------------------
-[maxvertexcount(90)]
-void GSGrassmain(triangle VSGrassOut input[3], inout TriangleStream<PSQuadIn> GrassStream )
-{
-    VSGrassOut MidPoint = CalcMidPoint( input[0], input[1], input[2] );
-    
-    float4 CoverageMask = g_tx2dArray.SampleLevel( g_samPoint, float3(MidPoint.tex,4), 0 );
-    float cm[4];
-    cm[0] = CoverageMask.r;
-    cm[1] = CoverageMask.g;
-    cm[2] = CoverageMask.b;
-    cm[3] = CoverageMask.a;
-    
-    for(int g=0; g<4; g++)
-    {
-        float MaxBlades = float(g_iGrassCoverage)*cm[g];
-        for(float i=0; i<MaxBlades; i++)
-        {	
-            float randOffset = g*5 + (i+1);
-            float3 Tan = RandomDir( MidPoint.pos.x + randOffset );
-            float3 Len = normalize( RandomDir( MidPoint.pos.z + randOffset ) );
-            float3 Shift = Len.x*g_GrassMessiness*normalize( cross( Tan, MidPoint.norm ) );
-            VSGrassOut grassPoint = MidPoint;
-            grassPoint.VertexID += randOffset;
-            grassPoint.pos += Shift; 
-                
-            //uncomment this to make the grass strictly conform to the mesh
-            //if( IsInTriangle( grassPoint.pos, input[0].pos, input[1].pos, input[2].pos ) )
-            {
-                OutputGrassBlade( grassPoint, GrassStream, g );
-            }
-        }
-    }
-}
-
-//--------------------------------------------------------------------------------------
-// PS for non-leaf or grass items.
-//--------------------------------------------------------------------------------------
-float4 PSScenemain(PSSceneIn input) : SV_Target
-{
-    float4 color = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color;
-    return color;
-}
-
-//--------------------------------------------------------------------------------------
-// PS for leaves and grass
-//--------------------------------------------------------------------------------------
-float4 PSQuadmain(PSQuadIn input) : SV_Target
-{
-    float4 color = g_tx2dArray.Sample( g_samLinear, input.tex );
-    color.xyz *= input.color.xyz;
-    return color;
-}
-
-//--------------------------------------------------------------------------------------
-// Render instanced meshes with vertex lighting
-//--------------------------------------------------------------------------------------
-technique10 RenderInstancedVertLighting
-{
-    pass p0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VSInstmain() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-        
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-        SetDepthStencilState( EnableDepthTestWrite, 0 );
-        SetRasterizerState( EnableMSAA );
-    }  
-}
-
-//--------------------------------------------------------------------------------------
-// Skybox
-//--------------------------------------------------------------------------------------
-technique10 RenderSkybox
-{
-    pass p0
-    {
-        SetVertexShader( CompileShader( vs_4_0, VSSkymain() ) );
-        SetGeometryShader( NULL );
-        SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-        
-        SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-        SetDepthStencilState( DisableDepthTestWrite, 0 );
-        SetRasterizerState( EnableMSAA );
-    }  
-}
-
-//--------------------------------------------------------------------------------------
-// Render leaves
-//--------------------------------------------------------------------------------------
-technique10 RenderQuad
-{
-    pass p0
-    {
-        
-        SetVertexShader( CompileShader( vs_4_0, VSQuadmain() ) );
-        SetGeometryShader( CompileShader( gs_4_0, GSQuadmain() ) );
-        SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) );
-        
-        SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-        SetDepthStencilState( EnableDepthTestWrite, 0 );
-        SetRasterizerState( EnableMSAA );
-    }  
-}
-
-//--------------------------------------------------------------------------------------
-// Render grass
-//--------------------------------------------------------------------------------------
-technique10 RenderGrass
-{
-    pass p0
-    {
-        
-        SetVertexShader( CompileShader( vs_4_0, VSGrassmain() ) );
-        SetGeometryShader( CompileShader( gs_4_0, GSGrassmain() ) );
-        SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) );
-        
-        SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
-        SetDepthStencilState( EnableDepthTestWrite, 0 );
-        SetRasterizerState( EnableMSAA );
-    }  
-}
diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl
deleted file mode 100644
index 5463f5b92..000000000
--- a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl
+++ /dev/null
@@ -1,202 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSMain
-//--------------------------------------------------------------------------------------
-// File: MultithreadedRendering11_PS.hlsl
-//
-// The pixel shader file for the MultithreadedRendering11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// Various debug options
-//#define NO_DIFFUSE_MAP
-//#define NO_NORMAL_MAP
-//#define NO_AMBIENT
-//#define NO_DYNAMIC_LIGHTING
-//#define NO_SHADOW_MAP
-
-#define SHADOW_DEPTH_BIAS 0.0005f
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-static const int g_iNumLights = 4;
-static const int g_iNumShadows = 1; // by convention, the first n lights cast shadows
-
-cbuffer cbPerObject : register( b0 )
-{
-	float4		g_vObjectColor			: packoffset( c0 );
-};
-
-cbuffer cbPerLight : register( b1 )
-{
-    struct LightDataStruct
-    {
-	    matrix		m_mLightViewProj;
-	    float4		m_vLightPos;
-	    float4		m_vLightDir;
-	    float4		m_vLightColor;
-	    float4		m_vFalloffs;    // x = dist end, y = dist range, z = cos angle end, w = cos range
-	} g_LightData[g_iNumLights]         : packoffset( c0 );
-};
-
-cbuffer cbPerScene : register( b2 )
-{
-	float4		g_vMirrorPlane			: packoffset( c0 );
-	float4  	g_vAmbientColor			: packoffset( c1 );
-	float4		g_vTintColor			: packoffset( c2 );
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D	        g_txDiffuse                : register( t0 );
-Texture2D	        g_txNormal                 : register( t1 );
-Texture2D           g_txShadow[g_iNumShadows]  : register( t2 );
-
-SamplerState        g_samPointClamp : register( s0 );
-SamplerState        g_samLinearWrap : register( s1 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
-	float3 vNormal		: NORMAL;
-	float3 vTangent		: TANGENT;
-	float2 vTexcoord	: TEXCOORD0;
-	float4 vPosWorld	: TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// Sample normal map, convert to signed, apply tangent-to-world space transform
-//--------------------------------------------------------------------------------------
-float3 CalcPerPixelNormal( float2 vTexcoord, float3 vVertNormal, float3 vVertTangent )
-{
-	// Compute tangent frame
-	vVertNormal =   normalize( vVertNormal );	
-	vVertTangent =  normalize( vVertTangent );	
-	float3 vVertBinormal = normalize( cross( vVertTangent, vVertNormal ) );	
-	float3x3 mTangentSpaceToWorldSpace = float3x3( vVertTangent, vVertBinormal, vVertNormal ); 
-	
-	// Compute per-pixel normal
-	float3 vBumpNormal = g_txNormal.Sample( g_samLinearWrap, vTexcoord );
-	vBumpNormal = 2.0f * vBumpNormal - 1.0f;
-	
-	return mul( vBumpNormal, mTangentSpaceToWorldSpace );
-}
-
-//--------------------------------------------------------------------------------------
-// Test how much pixel is in shadow, using 2x2 percentage-closer filtering
-//--------------------------------------------------------------------------------------
-float4 CalcUnshadowedAmountPCF2x2( int iShadow, float4 vPosWorld )
-{
-    matrix mLightViewProj = g_LightData[iShadow].m_mLightViewProj;
-    Texture2D txShadow =    g_txShadow[iShadow]; 
-
-    // Compute pixel position in light space
-    float4 vLightSpacePos = mul( vPosWorld, mLightViewProj ); 
-    vLightSpacePos.xyz /= vLightSpacePos.w;
-    
-    // Translate from surface coords to texture coords
-    // Could fold these into the matrix
-    float2 vShadowTexCoord = 0.5f * vLightSpacePos + 0.5f;
-    vShadowTexCoord.y = 1.0f - vShadowTexCoord.y;
-    
-    // Depth bias to avoid pixel self-shadowing
-    float vLightSpaceDepth = vLightSpacePos.z - SHADOW_DEPTH_BIAS;
-    
-    // Find sub-pixel weights
-    float2 vShadowMapDims = float2( 2048.0f, 2048.0f ); // need to keep in sync with .cpp file
-    float4 vSubPixelCoords;
-    vSubPixelCoords.xy = frac( vShadowMapDims * vShadowTexCoord );
-    vSubPixelCoords.zw = 1.0f - vSubPixelCoords;
-    float4 vBilinearWeights = vSubPixelCoords.zxzx * vSubPixelCoords.wwyy;
-
-    // 2x2 percentage closer filtering
-    float2 vTexelUnits = 1.0f / vShadowMapDims;
-    float4 vShadowDepths;
-    vShadowDepths.x = txShadow.Sample( g_samPointClamp, vShadowTexCoord );
-    vShadowDepths.y = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( vTexelUnits.x, 0.0f ) );
-    vShadowDepths.z = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( 0.0f, vTexelUnits.y ) );
-    vShadowDepths.w = txShadow.Sample( g_samPointClamp, vShadowTexCoord + vTexelUnits );
-    
-    // What weighted fraction of the 4 samples are nearer to the light than this pixel?
-    float4 vShadowTests = ( vShadowDepths >= vLightSpaceDepth ) ? 1.0f : 0.0f;
-    return dot( vBilinearWeights, vShadowTests );
-}
-
-//--------------------------------------------------------------------------------------
-// Diffuse lighting calculation, with angle and distance falloff
-//--------------------------------------------------------------------------------------
-float4 CalcLightingColor( int iLight, float3 vPosWorld, float3 vPerPixelNormal )
-{
-    float3 vLightPos =      g_LightData[iLight].m_vLightPos.xyz; 
-    float3 vLightDir =      g_LightData[iLight].m_vLightDir.xyz;
-    float4 vLightColor =    g_LightData[iLight].m_vLightColor; 
-    float4 vFalloffs =      g_LightData[iLight].m_vFalloffs; 
-    
-    float3 vLightToPixelUnNormalized = vPosWorld - vLightPos;
-    
-    // Dist falloff = 0 at vFalloffs.x, 1 at vFalloffs.x - vFalloffs.y
-    float fDist = length( vLightToPixelUnNormalized );
-    float fDistFalloff = saturate( ( vFalloffs.x - fDist ) / vFalloffs.y );
-    
-    // Normalize from here on
-    float3 vLightToPixelNormalized = vLightToPixelUnNormalized / fDist;
-    
-    // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w
-    float fCosAngle = dot( vLightToPixelNormalized, vLightDir );
-    float fAngleFalloff = saturate( ( fCosAngle - vFalloffs.z ) / vFalloffs.w );
-    
-    // Diffuse contribution
-    float fNDotL = saturate( -dot( vLightToPixelNormalized, vPerPixelNormal ) );
-    
-	return vLightColor * fNDotL * fDistFalloff * fAngleFalloff;
-}
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PSMain( PS_INPUT Input ) : SV_TARGET
-{
-    // Manual clip test, so that objects which are behind the mirror 
-    // don't show up in the mirror.
-    clip( dot( g_vMirrorPlane.xyz, Input.vPosWorld.xyz ) + g_vMirrorPlane.w );
-
-#ifdef NO_DIFFUSE_MAP
-	float4 vDiffuse = 0.5f;
-#else   // #ifdef NO_DIFFUSE_MAP
-	float4 vDiffuse = g_txDiffuse.Sample( g_samLinearWrap, Input.vTexcoord );
-#endif  // #ifdef NO_DIFFUSE_MAP #else
-	
-	// Compute per-pixel normal
-#ifdef NO_NORMAL_MAP
-	float3 vPerPixelNormal = Input.vNormal;
-#else   // #ifdef NO_NORMAL_MAP
-	float3 vPerPixelNormal = CalcPerPixelNormal( Input.vTexcoord, Input.vNormal, Input.vTangent );
-#endif  // #ifdef NO_NORMAL_MAP #else
-
-    // Compute lighting contribution
-#ifdef NO_AMBIENT
-	float4 vTotalLightingColor = 0.0f;
-#else   // #ifdef NO_AMBIENT
-	float4 vTotalLightingColor = g_vAmbientColor;
-#endif  // #ifdef NO_AMBIENT #else
-
-#ifndef NO_DYNAMIC_LIGHTING
-	for ( int iLight = 0; iLight < g_iNumLights; ++iLight )
-	{
-        float4 vLightingColor = CalcLightingColor( iLight, Input.vPosWorld, vPerPixelNormal );
-#ifndef NO_SHADOW_MAP
-	    if ( iLight < g_iNumShadows && any( vLightingColor.xyz ) > 0.0f ) // Don't bother checking shadow map if the pixel is unlit
-	    {
-            vLightingColor *= CalcUnshadowedAmountPCF2x2( iLight, Input.vPosWorld );
-	    }
-#endif  // #ifndef NO_SHADOW_MAP
-	    vTotalLightingColor += vLightingColor;
-	}
-#endif  // #ifndef NO_DYNAMIC_LIGHTING
-
-	return vDiffuse * g_vTintColor * g_vObjectColor * vTotalLightingColor;
-}
diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl
deleted file mode 100644
index 12fe14ae9..000000000
--- a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl
+++ /dev/null
@@ -1,83 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorld g_mWorld_0
-#define cbPerScene cbPerScene_0
-#define g_mViewProj g_mViewProj_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: MultithreadedRendering11_VS.hlsl
-//
-// The vertex shader file for the MultithreadedRendering11 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// Various debug options
-//#define UNCOMPRESSED_VERTEX_DATA  // The sdkmesh file contained uncompressed vertex data
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-	matrix		g_mWorld	;//SLANG: : packoffset( c0 );
-};
-cbuffer cbPerScene : register( b1 )
-{
-	matrix		g_mViewProj	;//SLANG: : packoffset( c0 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-	float4 vPosition	: POSITION;
-	float3 vNormal		: NORMAL;
-	float2 vTexcoord	: TEXCOORD0;
-	float3 vTangent		: TANGENT;
-};
-
-struct VS_OUTPUT
-{
-	float3 vNormal		: NORMAL;
-	float3 vTangent		: TANGENT;
-	float2 vTexcoord	: TEXCOORD0;
-	float4 vPosWorld	: TEXCOORD1;
-	float4 vPosition	: SV_POSITION;
-};
-
-// We aliased signed vectors as a unsigned format. 
-// Need to recover signed values.  The values 1.0 and 2.0
-// are slightly inaccurate here.
-float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec )
-{
-    vVec *= 2.0f;
-    return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec;
-}
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-	VS_OUTPUT Output;
-	
-#ifndef UNCOMPRESSED_VERTEX_DATA
-	// Expand compressed vectors
-	Input.vNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal );
-	Input.vTangent = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vTangent );
-#endif  // #ifndef UNCOMPRESSED_VERTEX_DATA
-	
-	Output.vPosWorld = mul( Input.vPosition,    g_mWorld );
-	Output.vPosition = mul( Output.vPosWorld,   g_mViewProj );
-	Output.vNormal   = mul( Input.vNormal,      (float3x3)g_mWorld );
-	Output.vTangent  = mul( Input.vTangent,     (float3x3)g_mWorld );
-	Output.vTexcoord = Input.vTexcoord;
-	
-	return Output;
-}
-
diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl
deleted file mode 100644
index bac2839db..000000000
--- a/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl
+++ /dev/null
@@ -1,103 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//--------------------------------------------------------------------------------------
-// File: NBodyGravityCS11.hlsl
-//
-// Demonstrates how to use Compute Shader to do n-body gravity computation
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-static float softeningSquared = 0.0012500000*0.0012500000;
-static float g_fG = 6.67300e-11f * 10000.0f;
-static float g_fParticleMass = g_fG*10000.0f * 10000.0f;
-
-#define blocksize 128
-groupshared float4 sharedPos[blocksize];
-
-// Body to body interaction, acceleration of the particle at position bi is updated
-void bodyBodyInteraction(inout float3 ai, float4 bj, float4 bi, float mass, int particles ) 
-{
-    float3 r = bj.xyz - bi.xyz;
-
-    float distSqr = dot(r, r);
-    distSqr += softeningSquared;
-
-    float invDist = 1.0f / sqrt(distSqr);
-	float invDistCube =  invDist * invDist * invDist;
-    
-    float s = mass * invDistCube * particles;
-
-    ai += r * s;    
-}
-
-cbuffer cbCS : register( b0 )
-{
-    uint4   g_param;    // pcbCS->param[0] = MAX_PARTICLES;
-                        // pcbCS->param[1] = dimx;              
-    float4  g_paramf;   // pcbCS->paramf[0] = 0.1f;
-                        // pcbCS->paramf[1] = 1; 
-};
-
-struct PosVelo
-{
-    float4 pos;
-    float4 velo;
-};
-
-StructuredBuffer<PosVelo> oldPosVelo;
-RWStructuredBuffer<PosVelo> newPosVelo;
-
-[numthreads(blocksize, 1, 1)]
-void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
-    // Each thread of the CS updates one of the particles
-
-    float4 pos = oldPosVelo[DTid.x].pos;
-    float4 vel = oldPosVelo[DTid.x].velo;
-    float3 accel = 0;
-    float mass = g_fParticleMass;
-
-    // Update current particle using all other particles
-    [loop]
-    for (uint tile = 0; tile < g_param.y; tile++)
-    {
-        // Cache a tile of particles unto shared memory to increase IO efficiency
-        sharedPos[GI] = oldPosVelo[tile * blocksize + GI].pos;
-       
-        GroupMemoryBarrierWithGroupSync();        
-
-        [unroll]
-        for (uint counter = 0; counter < blocksize; counter+=8 ) 
-        {
-            bodyBodyInteraction(accel, sharedPos[counter], pos, mass, 1);
-            bodyBodyInteraction(accel, sharedPos[counter+1], pos, mass, 1);
-            bodyBodyInteraction(accel, sharedPos[counter+2], pos, mass, 1);
-            bodyBodyInteraction(accel, sharedPos[counter+3], pos, mass, 1);
-            bodyBodyInteraction(accel, sharedPos[counter+4], pos, mass, 1);
-            bodyBodyInteraction(accel, sharedPos[counter+5], pos, mass, 1);
-            bodyBodyInteraction(accel, sharedPos[counter+6], pos, mass, 1);
-            bodyBodyInteraction(accel, sharedPos[counter+7], pos, mass, 1);
-        }
-        
-        GroupMemoryBarrierWithGroupSync();
-    }  
-
-    // g_param.x is the number of our particles, however this number might not be an exact multiple of the tile size.
-    // In such cases, out of bound reads occur in the process above, which means there will be 
-    // tooManyParticles "phantom" particles generating false gravity at position (0, 0, 0), so we have to substract them here.
-    // NOTE, out of bound reads always return 0 in CS
-    const uint tooManyParticles = g_param.y * blocksize - g_param.x;
-    bodyBodyInteraction(accel, float4(0, 0, 0, 0), pos, mass, -tooManyParticles);
-
-    // Update the velocity and position of current particle using the acceleration computed above
-    vel.xyz += accel.xyz * g_paramf.x;      //deltaTime;
-    vel.xyz *= g_paramf.y;                  //damping;
-    pos.xyz += vel.xyz * g_paramf.x;        //deltaTime;    
-
-    if ( DTid.x < g_param.x )
-    {
-        newPosVelo[DTid.x].pos = pos;
-        newPosVelo[DTid.x].velo = float4(vel.xyz, length(accel));
-    }
-}
diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl
deleted file mode 100644
index 7f6292662..000000000
--- a/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl
+++ /dev/null
@@ -1,128 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw
-//--------------------------------------------------------------------------------------
-// File: ParticleDraw.hlsl
-//
-// Shaders for rendering the particle as point sprite
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-struct VSParticleIn
-{
-    float4  color   : COLOR;
-    uint    id      : SV_VERTEXID;
-};
-
-struct VSParticleDrawOut
-{
-    float3 pos			: POSITION;
-    float4 color		: COLOR;
-};
-
-struct GSParticleDrawOut
-{
-    float2 tex			: TEXCOORD0;
-    float4 color		: COLOR;
-    float4 pos			: SV_POSITION;
-};
-
-struct PSParticleDrawIn
-{
-    float2 tex			: TEXCOORD0;
-    float4 color		: COLOR;
-};
-
-struct PosVelo
-{
-    float4 pos;
-    float4 velo;
-};
-
-Texture2D		            g_txDiffuse;
-StructuredBuffer<PosVelo>   g_bufPosVelo;
-
-
-SamplerState g_samLinear
-{
-    Filter = MIN_MAG_MIP_LINEAR;
-    AddressU = Clamp;
-    AddressV = Clamp;
-};
-
-cbuffer cb0
-{
-    row_major float4x4 g_mWorldViewProj;
-    row_major float4x4 g_mInvView;    
-};
-
-cbuffer cb1
-{
-    static float g_fParticleRad = 10.0f;   
-};
-
-cbuffer cbImmutable
-{
-    static float3 g_positions[4] =
-    {
-        float3( -1, 1, 0 ),
-        float3( 1, 1, 0 ),
-        float3( -1, -1, 0 ),
-        float3( 1, -1, 0 ),
-    };
-    
-    static float2 g_texcoords[4] = 
-    { 
-        float2(0,0), 
-        float2(1,0),
-        float2(0,1),
-        float2(1,1),
-    };
-};
-
-//
-// Vertex shader for drawing the point-sprite particles
-//
-VSParticleDrawOut VSParticleDraw(VSParticleIn input)
-{
-    VSParticleDrawOut output;
-    
-    output.pos = g_bufPosVelo[input.id].pos;
-    
-    float mag = g_bufPosVelo[input.id].velo.w/9;
-    output.color = lerp( float4(1,0.1,0.1,1), input.color, mag );
-    
-    return output;
-}
-
-//
-// GS for rendering point sprite particles.  Takes a point and turns it into 2 tris.
-//
-[maxvertexcount(4)]
-void GSParticleDraw(point VSParticleDrawOut input[1], inout TriangleStream<GSParticleDrawOut> SpriteStream)
-{
-    GSParticleDrawOut output;
-    
-    //
-    // Emit two new triangles
-    //
-    for(int i=0; i<4; i++)
-    {
-        float3 position = g_positions[i] * g_fParticleRad;
-        position = mul( position, (float3x3)g_mInvView ) + input[0].pos;
-        output.pos = mul( float4(position,1.0), g_mWorldViewProj ); 
-
-        output.color = input[0].color;        
-        output.tex = g_texcoords[i];
-        SpriteStream.Append(output);
-    }
-    SpriteStream.RestartStrip();
-}
-
-//
-// PS for drawing particles
-//
-float4 PSParticleDraw(PSParticleDrawIn input) : SV_Target
-{   
-    return g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color;
-}
-\ No newline at end of file
diff --git a/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl
deleted file mode 100644
index 80a1e165e..000000000
--- a/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl
+++ /dev/null
@@ -1,277 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw
-//-----------------------------------------------------------------------------
-// File: OIT_CS.hlsl
-//
-// Desc: Compute shaders for used in the Order Independent Transparency sample.
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-// TODO: use structured buffers
-RWBuffer<float>     deepBufferDepth     : register( u0 );
-RWBuffer<uint>      deepBufferColorUINT : register( u1 );
-RWTexture2D<float4> frameBuffer         : register( u2 );
-RWBuffer<uint>      prefixSum           : register( u3 );
-
-Texture2D<uint> fragmentCount : register ( t0 );
-
-cbuffer CB : register( b0 )
-{
-    uint g_nFrameWidth      : packoffset( c0.x );
-    uint g_nFrameHeight     : packoffset( c0.y );
-    uint g_nPassSize        : packoffset( c0.z );
-    uint g_nReserved        : packoffset( c0.w );
-}
-
-#define blocksize 1
-#define groupthreads (blocksize*blocksize)
-groupshared float accum[groupthreads];
-
-// First pass of the prefix sum creation algorithm.  Converts a 2D buffer to a 1D buffer,
-// and sums every other value with the previous value.
-[numthreads(1,1,1)]
-void CreatePrefixSum_Pass0_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
-    int nThreadNum = nGid.y*g_nFrameWidth + nGid.x;
-    if( nThreadNum%2 == 0 )
-    {
-        prefixSum[nThreadNum] = fragmentCount[nGid.xy];
-        
-        // Add the Fragment count to the next bin
-        if( (nThreadNum+1) < g_nFrameWidth * g_nFrameHeight )
-        {
-            int2 nextUV;
-            nextUV.x = (nThreadNum+1) % g_nFrameWidth;
-            nextUV.y = (nThreadNum+1) / g_nFrameWidth;
-            prefixSum[ nThreadNum+1 ] = prefixSum[ nThreadNum ] + fragmentCount[ nextUV ];
-        }
-    }
-}
-
-// Second and following passes.  Each pass distributes the sum of the first half of the group
-// to the second half of the group.  There are n/groupsize groups in each pass.
-// Each pass increases the group size until it is the size of the buffer.
-// The resulting buffer holds the prefix sum of all preceding values in each
-// position 
-[numthreads(1,1,1)]
-void CreatePrefixSum_Pass1_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
-    int nThreadNum = nGid.x;
-    
-    int nValue = prefixSum[nThreadNum*g_nPassSize + g_nPassSize/2 - 1];
-    for(int i = nThreadNum*g_nPassSize + g_nPassSize/2; i < nThreadNum*g_nPassSize + g_nPassSize && i < g_nFrameWidth*g_nFrameHeight; i++)
-    {
-        prefixSum[i] = prefixSum[i] + nValue;
-    }
-}
-
-#if 1
-
-// Sort the fragments using a bitonic sort, then accumulate the fragments into the final result.
-groupshared int nIndex[32];
-#define NUM_THREADS 8
-[numthreads(1,1,1)]
-void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
-    uint nThreadNum = nGid.y * g_nFrameWidth + nGid.x;
-    
-//    uint r0, r1, r2;
-//    float rd0, rd1, rd2, rd3, rd4, rd5, rd6, rd7;
-
-    uint N = fragmentCount[nDTid.xy];
-    
-    uint N2 = 1 << (int)(ceil(log2(N)));
-
-    float fDepth[32];
-    for(int i = 0; i < N; i++)
-    {
-        nIndex[i] = i;
-        fDepth[i] = deepBufferDepth[ prefixSum[nThreadNum-1] + i ];
-    }
-    for(int i = N; i < N2; i++)
-    {
-        nIndex[i] = i;
-        fDepth[i] = 1.1f;
-    }
-    
-    uint idx = blocksize*nGTid.y + nGTid.x;
-
-    // Bitonic sort
-    for( int k = 2; k <= N2; k = 2*k )
-    {
-        for( int j = k>>1; j > 0 ; j = j>>1 ) 
-        {
-            for( int i = 0; i < N2; i++ ) 
-            {
-//                GroupMemoryBarrierWithGroupSync();
-                //i = idx;
-
-                float di = fDepth[ nIndex[ i ] ];
-                int ixj = i^j;
-                if ( ( ixj ) > i )
-                {
-                    float dixj = fDepth[ nIndex[ ixj ] ];
-                    if ( ( i&k ) == 0 && di > dixj )
-                    { 
-                        int temp = nIndex[ i ];
-                        nIndex[ i ] = nIndex[ ixj ];
-                        nIndex[ ixj ] = temp;
-                    }
-                    if ( ( i&k ) != 0 && di < dixj )
-                    {
-                        int temp = nIndex[ i ];
-                        nIndex[ i ] = nIndex[ ixj ];
-                        nIndex[ ixj ] = temp;
-                    }
-                }
-            }
-        }
-    }
-
-    // Output the final result to the frame buffer
-    if( idx == 0 )
-    {
-
-     /*   
-        // Debug
-        uint color[8];
-        for(int i = 0; i < 8; i++)
-        {
-            color[i] = deepBufferColorUINT[prefixSum[nThreadNum-1] + i];
-        }
-
-        for(int i = 0; i < 8; i++)
-        {
-            deepBufferDepth[nThreadNum*8+i] = fDepth[i];//fDepth[nIndex[i]];
-            deepBufferColorUINT[nThreadNum*8+i] = color[nIndex[i]];
-        }
-     */     
-   
-        // Accumulate fragments into final result
-        float4 result = 0.0f;
-        for( int x = N-1; x >= 0; x-- )
-        {
-            uint bufferValue = deepBufferColorUINT[ prefixSum[nThreadNum-1] + nIndex[ x ] ];
-            float4 color;
-            color.r = ( ( bufferValue >> 0  & 0xFF )) / 255.0f;
-            color.g = ( bufferValue >> 8  & 0xFF ) / 255.0f;
-            color.b = ( bufferValue >> 16 & 0xFF ) / 255.0f;
-            color.a = ( bufferValue >> 24 & 0xFF ) / 255.0f;
-            result = lerp( result, color, color.a );
-        }
-        result.a = 1.0f;
-        frameBuffer[ nGid.xy ] = result;
-    }
-}
-
-#else
-[numthreads(1,1,1)]
-void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
-    uint nThreadNum = nDTid.y * g_nFrameWidth + nDTid.x;
-    float d0 = deepBufferDepth[nThreadNum*8];
-    float d1 = deepBufferDepth[nThreadNum*8+1];
-    float d2 = deepBufferDepth[nThreadNum*8+2];
-    
-    uint s0 = deepBufferColorUINT[nThreadNum*8 + 0]; 
-    uint s1 = deepBufferColorUINT[nThreadNum*8 + 1];
-    uint s2 = deepBufferColorUINT[nThreadNum*8 + 2];
-    
-    uint r0, r1, r2;
-    float rd0, rd1, rd2;
-    if( d0 < d1 && d0 < d2 )
-    {
-        r0 = s0;
-        rd0 = d0;
-        if( d1 < d2 )
-        {
-           r1 = s1;
-           r2 = s2;
-           
-           rd1 = d1;
-           rd2 = d2;
-        }
-        else
-        {
-            r1 = s2;
-            r2 = s1;
-            
-            rd1 = d2;
-            rd2 = d1;
-        } 
-    }
-    else if( d1 < d2 )
-    {
-        r0 = s1;
-        rd0 = d1;
-        if( d0 < d2 )
-        {
-          r1 = s0;
-          r2 = s2;
-          
-          rd1 = d0;
-          rd2 = d2;
-        }
-        else
-        {
-          r1 = s2;
-          r2 = s0;
-          
-          rd1 = d2;
-          rd2 = d0;
-        }
-    }
-    else
-    {
-        r0 = s2;
-        rd0 = d2;
-        if( d1 < d0 )
-        {
-          r1 = s1;
-          r2 = s0;
-          
-          rd1 = d1;
-          rd2 = d0;
-        }
-        else
-        {
-          r1 = s0;
-          r2 = s1;
-          
-          rd1 = d0;
-          rd2 = d1;
-        }
-    }
-    
-    deepBufferDepth[nThreadNum*8] = rd0;
-    deepBufferDepth[nThreadNum*8+1] = rd1;
-    deepBufferDepth[nThreadNum*8+2] = rd2;
-
-    deepBufferColorUINT[nThreadNum*8] = r0;
-    deepBufferColorUINT[nThreadNum*8+1] = r1;
-    deepBufferColorUINT[nThreadNum*8+2] = r2;
-
-    // convert the color to floats
-    float4 color[3];
-    color[0].r = (r0 >> 0  & 0xFF) / 255.0f;
-    color[0].g = (r0 >> 8  & 0xFF) / 255.0f;
-    color[0].b = (r0 >> 16 & 0xFF) / 255.0f;
-    color[0].a = (r0 >> 24 & 0xFF) / 255.0f;
-    
-    color[1].r = (r1 >> 0  & 0xFF) / 255.0f;
-    color[1].g = (r1 >> 8  & 0xFF) / 255.0f;
-    color[1].b = (r1 >> 16 & 0xFF) / 255.0f;
-    color[1].a = (r1 >> 24 & 0xFF) / 255.0f;
-    
-    color[2].r = (r2 >> 0  & 0xFF) / 255.0f;
-    color[2].g = (r2 >> 8  & 0xFF) / 255.0f;
-    color[2].b = (r2 >> 16 & 0xFF) / 255.0f;
-    color[2].a = (r2 >> 24 & 0xFF) / 255.0f;
-    
-    float4 result = lerp(lerp(lerp(0, color[2], color[2].a), color[1], color[1].a), color[0], color[0].a);
-    result.a = 1.0f;
-    
-    frameBuffer[nDTid.xy] = result;
-}
-
-#endif
-\ No newline at end of file
diff --git a/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl
deleted file mode 100644
index 5fae02d62..000000000
--- a/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry FragmentCountPS -entry FillDeepBufferPS
-//-----------------------------------------------------------------------------
-// File: OITPS.hlsl
-//
-// Desc: Pixel shaders used in the Order Independent Transparency sample.
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-//TODO: Use structured buffers
-RWTexture2D<uint> fragmentCount     : register( u1 );
-RWBuffer<float>   deepBufferDepth   : register( u2 );
-RWBuffer<uint4>   deepBufferColor   : register( u3 );
-RWBuffer<uint>    prefixSum         : register( u4 );
-
-cbuffer CB : register( b0 )
-{
-    uint g_nFrameWidth      : packoffset( c0.x );
-    uint g_nFrameHeight     : packoffset( c0.y );
-    uint g_nReserved0       : packoffset( c0.z );
-    uint g_nReserved1       : packoffset( c0.w );
-}
-
-struct SceneVS_Output
-{
-    float4 pos   : SV_POSITION;
-    float4 color : COLOR0;
-};
-
-void FragmentCountPS( SceneVS_Output input)
-{
-    // Increments need to be done atomically
-    InterlockedAdd(fragmentCount[input.pos.xy], 1);
-}
-
-void FillDeepBufferPS( SceneVS_Output input )
-{
-    uint x = input.pos.x;
-    uint y = input.pos.y;
-
-    // Atomically allocate space in the deep buffer
-    uint fc;
-    InterlockedAdd(fragmentCount[input.pos.xy], 1, fc);
-
-    uint nPrefixSumPos = y*g_nFrameWidth + x;
-    uint nDeepBufferPos;
-    if( nPrefixSumPos == 0 )
-        nDeepBufferPos = fc;
-    else
-        nDeepBufferPos = prefixSum[nPrefixSumPos-1] + fc;
-
-    // Store fragment data into the allocated space
-    deepBufferDepth[nDeepBufferPos] = input.pos.z;
-    deepBufferColor[nDeepBufferPos] = clamp(input.color, 0, 1)*255;
-}
-
diff --git a/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl b/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl
deleted file mode 100644
index 2254091f6..000000000
--- a/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl
+++ /dev/null
@@ -1,42 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry SceneVS
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-//-----------------------------------------------------------------------------
-// File: SceneVS.hlsl
-//
-// Desc: Vertex shader for the scene.
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-
-
-cbuffer cbPerObject : register( b0 )
-{
-    row_major matrix    g_mWorldViewProjection	;//SLANG: : packoffset( c0 );
-}
-
-struct SceneVS_Input
-{
-    float4 pos   : POSITION;
-    float4 color : COLOR;
-};
-
-struct SceneVS_Output
-{
-    float4 pos   : SV_POSITION;
-    float4 color : COLOR0;
-};
-
-SceneVS_Output SceneVS( SceneVS_Input input )
-{
-    SceneVS_Output output;
-    
-    output.color = input.color;
-    output.pos   = mul(input.pos, g_mWorldViewProjection );
-    
-    return output;
-}
diff --git a/tests/hlsl/dxsdk/README.md b/tests/hlsl/dxsdk/README.md
deleted file mode 100644
index dd0c0fb6b..000000000
--- a/tests/hlsl/dxsdk/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-DirectX SDK Sample Shaders
-==========================
-
-This directory contains shaders that have shipped as part of the DirectX SDK.
-The licsense terms for these shaders are specificed at the top of the source files.
-\ No newline at end of file
diff --git a/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl b/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl
deleted file mode 100644
index d01cd7aa4..000000000
--- a/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl
+++ /dev/null
@@ -1,233 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL: -profile vs_4_0 -entry BezierVS -profile hs_5_0 -entry BezierHS -profile ds_5_0 -entry BezierDS -profile ps_4_0 -entry BezierPS -entry SolidColorPS
-
-// Note(Slang): Disabling this test for now because compiling it via IR ends up creating a local variable of the `OutputPatch<...>` type, which we need to get rid of via SSA optimization.
-
-
-//--------------------------------------------------------------------------------------
-// File: SimpleBezier11.hlsl
-//
-// This sample shows an simple implementation of the DirectX 11 Hardware Tessellator
-// for rendering a Bezier Patch.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// This allows us to compile the shader with a #define to choose
-// the different partition modes for the hull shader.
-// See the hull shader: [partitioning(BEZIER_HS_PARTITION)]
-// This sample demonstrates "integer", "fractional_even", and "fractional_odd"
-#ifndef BEZIER_HS_PARTITION
-#define BEZIER_HS_PARTITION "integer"
-#endif // BEZIER_HS_PARTITION
-
-// The input patch size.  In this sample, it is 16 control points.
-// This value should match the call to IASetPrimitiveTopology()
-#define INPUT_PATCH_SIZE 16
-
-// The output patch size.  In this sample, it is also 16 control points.
-#define OUTPUT_PATCH_SIZE 16
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerFrame : register( b0 )
-{
-    matrix g_mViewProjection;
-    float3 g_vCameraPosWorld;
-    float  g_fTessellationFactor;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex shader section
-//--------------------------------------------------------------------------------------
-struct VS_CONTROL_POINT_INPUT
-{
-    float3 vPosition        : POSITION;
-};
-
-struct VS_CONTROL_POINT_OUTPUT
-{
-    float3 vPosition        : POSITION;
-};
-
-// This simple vertex shader passes the control points straight through to the
-// hull shader.  In a more complex scene, you might transform the control points
-// or perform skinning at this step.
-
-// The input to the vertex shader comes from the vertex buffer.
-
-// The output from the vertex shader will go into the hull shader.
-
-VS_CONTROL_POINT_OUTPUT BezierVS( VS_CONTROL_POINT_INPUT Input )
-{
-    VS_CONTROL_POINT_OUTPUT Output;
-
-    Output.vPosition = Input.vPosition;
-
-    return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Constant data function for the BezierHS.  This is executed once per patch.
-//--------------------------------------------------------------------------------------
-struct HS_CONSTANT_DATA_OUTPUT
-{
-    float Edges[4]             : SV_TessFactor;
-    float Inside[2]            : SV_InsideTessFactor;
-};
-
-struct HS_OUTPUT
-{
-    float3 vPosition           : BEZIERPOS;
-};
-
-// This constant hull shader is executed once per patch.  For the simple Mobius strip
-// model, it will be executed 4 times.  In this sample, we set the tessellation factor
-// via SV_TessFactor and SV_InsideTessFactor for each patch.  In a more complex scene,
-// you might calculate a variable tessellation factor based on the camera's distance.
-
-HS_CONSTANT_DATA_OUTPUT BezierConstantHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> ip,
-                                          uint PatchID : SV_PrimitiveID )
-{    
-    HS_CONSTANT_DATA_OUTPUT Output;
-
-    float TessAmount = g_fTessellationFactor;
-
-    Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount;
-    Output.Inside[0] = Output.Inside[1] = TessAmount;
-
-    return Output;
-}
-
-// The hull shader is called once per output control point, which is specified with
-// outputcontrolpoints.  For this sample, we take the control points from the vertex
-// shader and pass them directly off to the domain shader.  In a more complex scene,
-// you might perform a basis conversion from the input control points into a Bezier
-// patch, such as the SubD11 Sample.
-
-// The input to the hull shader comes from the vertex shader
-
-// The output from the hull shader will go to the domain shader.
-// The tessellation factor, topology, and partition mode will go to the fixed function
-// tessellator stage to calculate the UVW and domain points.
-
-[domain("quad")]
-[partitioning(BEZIER_HS_PARTITION)]
-[outputtopology("triangle_cw")]
-[outputcontrolpoints(OUTPUT_PATCH_SIZE)]
-[patchconstantfunc("BezierConstantHS")]
-HS_OUTPUT BezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> p, 
-                    uint i : SV_OutputControlPointID,
-                    uint PatchID : SV_PrimitiveID )
-{
-    HS_OUTPUT Output;
-    Output.vPosition = p[i].vPosition;
-    return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Bezier evaluation domain shader section
-//--------------------------------------------------------------------------------------
-struct DS_OUTPUT
-{
-    float4 vPosition        : SV_POSITION;
-    float3 vWorldPos        : WORLDPOS;
-    float3 vNormal            : NORMAL;
-};
-
-//--------------------------------------------------------------------------------------
-float4 BernsteinBasis(float t)
-{
-    float invT = 1.0f - t;
-
-    return float4( invT * invT * invT,
-                   3.0f * t * invT * invT,
-                   3.0f * t * t * invT,
-                   t * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float4 dBernsteinBasis(float t)
-{
-    float invT = 1.0f - t;
-
-    return float4( -3 * invT * invT,
-                   3 * invT * invT - 6 * t * invT,
-                   6 * t * invT - 3 * t * t,
-                   3 * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float3 EvaluateBezier( const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch,
-                       float4 BasisU,
-                       float4 BasisV )
-{
-    float3 Value = float3(0,0,0);
-    Value  = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w );
-    Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w );
-    Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w );
-    Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w );
-
-    return Value;
-}
-
-// The domain shader is run once per vertex and calculates the final vertex's position
-// and attributes.  It receives the UVW from the fixed function tessellator and the
-// control point outputs from the hull shader.  Since we are using the DirectX 11
-// Tessellation pipeline, it is the domain shader's responsibility to calculate the
-// final SV_POSITION for each vertex.  In this sample, we evaluate the vertex's
-// position using a Bernstein polynomial and the normal is calculated as the cross
-// product of the U and V derivatives.
-
-// The input SV_DomainLocation to the domain shader comes from fixed function
-// tessellator.  And the OutputPatch comes from the hull shader.  From these, you
-// must calculate the final vertex position, color, texcoords, and other attributes.
-
-// The output from the domain shader will be a vertex that will go to the video card's
-// rasterization pipeline and get drawn to the screen.
-
-[domain("quad")]
-DS_OUTPUT BezierDS( HS_CONSTANT_DATA_OUTPUT input, 
-                    float2 UV : SV_DomainLocation,
-                    const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch )
-{
-    float4 BasisU = BernsteinBasis( UV.x );
-    float4 BasisV = BernsteinBasis( UV.y );
-    float4 dBasisU = dBernsteinBasis( UV.x );
-    float4 dBasisV = dBernsteinBasis( UV.y );
-
-    float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV );
-    float3 Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV );
-    float3 BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV );
-    float3 Norm = normalize( cross( Tangent, BiTangent ) );
-
-    DS_OUTPUT Output;
-    Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection );
-    Output.vWorldPos = WorldPos;
-    Output.vNormal = Norm;
-
-    return Output;    
-}
-
-//--------------------------------------------------------------------------------------
-// Smooth shading pixel shader section
-//--------------------------------------------------------------------------------------
-
-// The pixel shader works the same as it would in a normal graphics pipeline.
-// In this sample, it performs very simple N dot L lighting.
-
-float4 BezierPS( DS_OUTPUT Input ) : SV_TARGET
-{
-    float3 N = normalize(Input.vNormal);
-    float3 L = normalize(Input.vWorldPos - g_vCameraPosWorld);
-    return abs(dot(N, L)) * float4(1, 0, 0, 1);
-}
-
-//--------------------------------------------------------------------------------------
-// Solid color shading pixel shader (used for wireframe overlay)
-//--------------------------------------------------------------------------------------
-float4 SolidColorPS( DS_OUTPUT Input ) : SV_TARGET
-{
-    // Return a solid green color
-    return float4( 0, 1, 0, 1 );
-}
diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx
deleted file mode 100644
index 00883ce70..000000000
--- a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx
+++ /dev/null
@@ -1,112 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: SimpleSample.fx
-//
-// The effect file for the SimpleSample sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Global variables
-//--------------------------------------------------------------------------------------
-float4 g_MaterialAmbientColor;      // Material's ambient color
-float4 g_MaterialDiffuseColor;      // Material's diffuse color
-float3 g_LightDir;                  // Light's direction in world space
-float4 g_LightDiffuse;              // Light's diffuse color
-texture g_MeshTexture;              // Color texture for mesh
-
-float    g_fTime;                   // App's time in seconds
-float4x4 g_mWorld;                  // World matrix for object
-float4x4 g_mWorldViewProjection;    // World * View * Projection matrix
-
-
-
-//--------------------------------------------------------------------------------------
-// Texture samplers
-//--------------------------------------------------------------------------------------
-sampler MeshTextureSampler = 
-sampler_state
-{
-    Texture = <g_MeshTexture>;
-    MipFilter = LINEAR;
-    MinFilter = LINEAR;
-    MagFilter = LINEAR;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex shader output structure
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
-    float4 Position   : POSITION;   // vertex position 
-    float4 Diffuse    : COLOR0;     // vertex diffuse color (note that COLOR0 is clamped from 0..1)
-    float2 TextureUV  : TEXCOORD0;  // vertex texture coords 
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( float4 vPos : POSITION, 
-                         float3 vNormal : NORMAL,
-                         float2 vTexCoord0 : TEXCOORD0 )
-{
-    VS_OUTPUT Output;
-    float3 vNormalWorldSpace;
-    
-    // Transform the position from object space to homogeneous projection space
-    Output.Position = mul(vPos, g_mWorldViewProjection);
-    
-    // Transform the normal from object space to world space    
-    vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space)
-
-    // Calc diffuse color    
-    Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)) + 
-                         g_MaterialAmbientColor;   
-    Output.Diffuse.a = 1.0f; 
-    
-    // Just copy the texture coordinate through
-    Output.TextureUV = vTexCoord0; 
-    
-    return Output;    
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader output structure
-//--------------------------------------------------------------------------------------
-struct PS_OUTPUT
-{
-    float4 RGBColor : COLOR0;  // Pixel color    
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-// color with diffuse material color
-//--------------------------------------------------------------------------------------
-PS_OUTPUT RenderScenePS( VS_OUTPUT In ) 
-{ 
-    PS_OUTPUT Output;
-
-    // Lookup mesh texture and modulate it with diffuse
-    Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse;
-
-    return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Renders scene 
-//--------------------------------------------------------------------------------------
-technique RenderScene
-{
-    pass P0
-    {          
-        VertexShader = compile vs_2_0 RenderSceneVS();
-        PixelShader  = compile ps_2_0 RenderScenePS(); 
-    }
-}
diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl
deleted file mode 100644
index 7c688940b..000000000
--- a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl
+++ /dev/null
@@ -1,86 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry RenderSceneVS -profile ps_4_0 -entry RenderScenePS
-//--------------------------------------------------------------------------------------
-// File: SimpleSample.hlsl
-//
-// The HLSL file for the SimpleSample sample for the Direct3D 11 device
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-    matrix  g_mWorldViewProjection  : packoffset( c0 );
-    matrix  g_mWorld                : packoffset( c4 );
-    float4  g_MaterialAmbientColor  : packoffset( c8 );
-    float4  g_MaterialDiffuseColor  : packoffset( c9 );
-}
-
-cbuffer cbPerFrame : register( b1 )
-{
-    float3              g_vLightDir             : packoffset( c0 );
-    float               g_fTime                 : packoffset( c0.w );
-    float4              g_LightDiffuse          : packoffset( c1 );
-};
-
-//-----------------------------------------------------------------------------------------
-// Textures and Samplers
-//-----------------------------------------------------------------------------------------
-Texture2D    g_txDiffuse : register( t0 );
-SamplerState g_samLinear : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// shader input/output structure
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 Position     : POSITION; // vertex position 
-    float3 Normal       : NORMAL;   // this normal comes in per-vertex
-    float2 TextureUV    : TEXCOORD0;// vertex texture coords 
-};
-
-struct VS_OUTPUT
-{
-    float4 Position     : SV_POSITION; // vertex position 
-    float4 Diffuse      : COLOR0;      // vertex diffuse color (note that COLOR0 is clamped from 0..1)
-    float2 TextureUV    : TEXCOORD0;   // vertex texture coords 
-};
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( VS_INPUT input )
-{
-    VS_OUTPUT Output;
-    float3 vNormalWorldSpace;
-    
-    // Transform the position from object space to homogeneous projection space
-    Output.Position = mul( input.Position, g_mWorldViewProjection );
-    
-    // Transform the normal from object space to world space    
-    vNormalWorldSpace = normalize(mul(input.Normal, (float3x3)g_mWorld)); // normal (world space)
-
-    // Calc diffuse color    
-    Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_vLightDir)) + 
-                         g_MaterialAmbientColor;   
-    Output.Diffuse.a = 1.0f; 
-    
-    // Just copy the texture coordinate through
-    Output.TextureUV = input.TextureUV; 
-    
-    return Output;    
-}
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-// color with diffuse material color
-//--------------------------------------------------------------------------------------
-float4 RenderScenePS( VS_OUTPUT In ) : SV_TARGET
-{ 
-    // Lookup mesh texture and modulate it with diffuse
-    return g_txDiffuse.Sample( g_samLinear, In.TextureUV ) * In.Diffuse;
-}
diff --git a/tests/hlsl/dxsdk/SubD11/SubD11.hlsl b/tests/hlsl/dxsdk/SubD11/SubD11.hlsl
deleted file mode 100644
index 839e004e6..000000000
--- a/tests/hlsl/dxsdk/SubD11/SubD11.hlsl
+++ /dev/null
@@ -1,1238 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry PatchSkinningVS -entry MeshSkinningVS -profile hs_5_0 -entry SubDToBezierHS -entry SubDToBezierHS4444 -profile ds_5_0 -entry BezierEvalDS -profile ps_4_0 -entry SmoothPS -entry SolidColorPS
-//--------------------------------------------------------------------------------------
-// File: SubD11.hlsl
-//
-// This file contains functions to convert from a Catmull-Clark subdivision
-// representation to a bicubic patch representation.
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//Work-around for an optimization rule problem in the June 2010 HLSL Compiler (9.29.952.3111)
-//see http://support.microsoft.com/kb/2448404
-#if D3DX_VERSION == 0xa2b
-#pragma ruledisable 0x0802405f
-#endif
-
-//--------------------------------------------------------------------------------------
-// A sample extraordinary SubD quad is represented by the following diagram:
-//
-//                        15              Valences:
-//                       /  \               Vertex 0: 5
-//                      /    14             Vertex 1: 4
-//          17---------16   /  \            Vertex 2: 5
-//          | \         |  /    \           Vertex 3: 3
-//          |  \        | /      13
-//          |   \       |/      /         Prefixes:
-//          |    3------2------12           Vertex 0: 9
-//          |    |      |      |            Vertex 1: 12
-//          |    |      |      |            Vertex 2: 16
-//          4----0------1------11           Vertex 3: 18
-//         /    /|      |      |
-//        /    / |      |      |
-//       5    /  8------9------10
-//        \  /  /
-//         6   /
-//          \ /
-//           7
-//
-// Where the quad bounded by vertices 0,1,2,3 represents the actual subd surface of interest
-// The 1-ring neighborhood of the quad is represented by vertices 4 through 17.  The counter-
-// clockwise winding of this 1-ring neighborhood is important, especially when it comes to compute
-// the corner vertices of the bicubic patch that we will use to approximate the subd quad (0,1,2,3).
-// 
-// The resulting bicubic patch fits within the subd quad (0,1,2,3) and has the following control
-// point layout:
-//
-//     12--13--14--15
-//      8---9--10--11
-//      4---5---6---7
-//      0---1---2---3
-//
-// The inner 4 control points of the bicubic patch are a combination of only the vertices (0,1,2,3)
-// of the subd quad.  However, the corner control points for the bicubic patch (0,3,15,12) are actually
-// a much more complex weighting of the subd patch and the 1-ring neighborhood.  In the example above
-// the bicubic control point 0 is actually a weighted combination of subd points 0,1,2,3 and 1-ring
-// neighborhood points 17, 4, 5, 6, 7, 8, and 9.  We can see that the 1-ring neighbor hood is simply
-// walked from the prefix value from the previous corner (corner 3 in this case) to the prefix 
-// prefix value for the current corner.  We add one more vertex on either side of the prefix values
-// and we have all the data necessary to calculate the value for the corner points.
-//
-// The edge control points of the bicubic patch (1,2,13,14,4,8,7,11) are also combinations of their 
-// neighbors, but fortunately each one is only a combination of 6 values and no walk is required.
-//--------------------------------------------------------------------------------------
-
-#define MOD4(x) ((x)&3)
-#ifndef MAX_POINTS
-#define MAX_POINTS 32
-#endif
-#define MAX_BONE_MATRICES 80
-                        
-//--------------------------------------------------------------------------------------
-// Textures
-//--------------------------------------------------------------------------------------
-Texture2D       g_txHeight : register( t0 );           // Height and Bump texture
-Texture2D       g_txDiffuse : register( t1 );          // Diffuse texture
-Texture2D       g_txSpecular : register( t2 );         // Specular texture
-
-//--------------------------------------------------------------------------------------
-// Samplers
-//--------------------------------------------------------------------------------------
-SamplerState g_samLinear : register( s0 );
-SamplerState g_samPoint : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbTangentStencilConstants : register( b0 )
-{
-    float g_TanM[1024]; // Tangent patch stencils precomputed by the application
-    float g_fCi[16];    // Valence coefficients precomputed by the application
-};
-
-cbuffer cbPerMesh : register( b1 )
-{
-    matrix g_mConstBoneWorld[MAX_BONE_MATRICES];
-};
-
-cbuffer cbPerFrame : register( b2 )
-{
-    matrix g_mViewProjection;
-    float3 g_vCameraPosWorld;
-    float  g_fTessellationFactor;
-    float  g_fDisplacementHeight;
-    float3 g_vSolidColor;
-};
-
-cbuffer cbPerSubset : register( b3 )
-{
-    int g_iPatchStartIndex;
-}
-
-//--------------------------------------------------------------------------------------
-Buffer<uint4>  g_ValencePrefixBuffer : register( t0 );
-
-//--------------------------------------------------------------------------------------
-struct VS_CONTROL_POINT_OUTPUT
-{
-    float3 vPosition		: WORLDPOS;
-    float2 vUV				: TEXCOORD0;
-    float3 vTangent			: TANGENT;
-};
-
-struct BEZIER_CONTROL_POINT
-{
-    float3 vPosition	: BEZIERPOS;
-};
-
-struct PS_INPUT
-{
-    float3 vWorldPos        : POSITION;
-    float3 vNormal			: NORMAL;
-    float2 vUV				: TEXCOORD;
-    float3 vTangent			: TANGENT;
-    float3 vBiTangent		: BITANGENT;
-};
-
-//--------------------------------------------------------------------------------------
-// SubD to Bezier helper functions
-//--------------------------------------------------------------------------------------
-// Helps with getting tangent stencils from the g_TanM constant array
-#define TANM(a,v) ( g_TanM[ Val[v]*64 + (a) ] )
-
-//--------------------------------------------------------------------------------------
-float3 ComputeInteriorVertex( uint index, 
-                              uint Val[4], 
-                              const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip )
-{
-    switch( index )
-    {
-    case 0:
-        return (ip[0].vPosition*Val[0] + ip[1].vPosition*2 +      ip[2].vPosition +        ip[3].vPosition*2)      / (5+Val[0]);
-    case 1:
-        return (ip[0].vPosition*2 +      ip[1].vPosition*Val[1] + ip[2].vPosition*2 +      ip[3].vPosition)        / (5+Val[1]);
-    case 2:
-        return (ip[0].vPosition +        ip[1].vPosition*2 +      ip[2].vPosition*Val[2] + ip[3].vPosition*2)      / (5+Val[2]);
-    case 3:
-        return (ip[0].vPosition*2 +      ip[1].vPosition +        ip[2].vPosition*2 +      ip[3].vPosition*Val[3]) / (5+Val[3]);
-    }
-    
-    return float3(0,0,0);
-}
-
-//--------------------------------------------------------------------------------------
-// Computes the corner vertices of the output UV patch.  The corner vertices are
-// a weighted combination of all points that are "connected" to that corner by an edge.
-// The interior 4 points of the original subd quad are easy to get.  The points in the
-// 1-ring neighborhood around the interior quad are not.
-//
-// Because the valence of that corner could be any number between 3 and 16, we need to
-// walk around the subd patch vertices connected to that point.  This is there the
-// Pref (prefix) values come into play.  Each corner has a prefix value that is the index
-// of the last value around the 1-ring neighborhood that should be used in calculating
-// the coefficient of that corner.  The walk goes from the prefix value of the previous
-// corner to the prefix value of the current corner.
-//--------------------------------------------------------------------------------------
-void ComputeCornerVertex( uint index, 
-                          out float3 CornerB, // Corner for the Bezier patch
-                          out float3 CornerU, // Corner for the tangent patch
-                          out float3 CornerV, // Corner for the bitangent patch
-                          const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, 
-                          const in uint Val[4], 
-                          const in uint Pref[4] )
-{
-    const float fOWt = 1;
-    const float fEWt = 4;
-
-    // Figure out where to start the walk by using the previous corner's prefix value
-    uint PrefIm1 = 0;
-    uint uStart = 4;
-    if( index )
-    {
-        PrefIm1 = Pref[index-1];
-        uStart = PrefIm1;
-    }
-    
-    // Setup the walk indices
-    uint uTIndexStart = 2 - (index&1);
-    uint uTIndex = uTIndexStart;
-
-    // Calculate the N*N weight for the final value
-    CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part
-
-    // Zero out the corners
-    CornerU = float4(0,0,0,0);
-    CornerV = float4(0,0,0,0);
-    
-    const uint uV = Val[index]  + ( ( index & 1 ) ? 1 : -1 );
-        
-    // Start the walk with the uStart prefix (the prefix of the corner before us)
-    CornerB += ip[uStart].vPosition * fEWt;
-    CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index);
-
-    // Gather all vertices between the previous corner's prefix and our own prefix
-    // We'll do two at a time, since they always come in twos
-    while(uStart < Pref[index]-1) 
-    {
-        ++uStart;
-        CornerB += ip[uStart].vPosition * fOWt;
-        CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index );
-        CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-        ++uTIndex;
-        ++uStart;
-        CornerB += ip[uStart].vPosition * fEWt;
-        CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
-        CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index );
-    }
-    ++uStart;
-
-    // Add in the last guy and make sure to wrap to the beginning if we're the last corner
-    if (index == 3)
-        uStart = 4; 
-    CornerB += ip[uStart].vPosition * fOWt;
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-    // Add in the guy before the prefix as well
-    if (index)
-        uStart = PrefIm1-1;
-    else
-        uStart = Pref[3]-1;
-    uTIndex = uTIndexStart-1;
-
-    CornerB += ip[uStart].vPosition * fOWt;
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-    // We're done with the walk now.  Now we need to add the contributions of the original subd quad.
-    CornerB += ip[MOD4(index+1)].vPosition * fEWt;
-    CornerB += ip[MOD4(index+2)].vPosition * fOWt;
-    CornerB += ip[MOD4(index+3)].vPosition * fEWt;
-    
-    uTIndex = 0 + (index&1)*(Val[index]-1);
-    uStart = MOD4(index+1);
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-    
-    uStart = MOD4(index+2);
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-    uStart = MOD4(index+3);
-    uTIndex = (uTIndex+1)%Val[index];
-
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-
-    // Normalize the corner weights
-    CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize
-
-    // fixup signs from directional derivatives...
-    if( !((index - 1) & 2) ) // 1 and 2
-        CornerU *= -1;
-
-    if( index >= 2 ) // 2 and 3
-        CornerV *= -1;
-}
-
-void ComputeCornerVertex4444( uint index, 
-                          out float3 CornerB, // Corner for the Bezier patch
-                          out float3 CornerU, // Corner for the tangent patch
-                          out float3 CornerV, // Corner for the bitangent patch
-                          const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, 
-                          const in uint Val[4], 
-                          const in uint Pref[4] )
-{
-    const float fOWt = 1;
-    const float fEWt = 4;
-
-    // Figure out where to start the walk by using the previous corner's prefix value
-    uint PrefIm1 = 0;
-    uint uStart = 4;
-    if( index )
-    {
-        PrefIm1 = Pref[index-1];
-        uStart = PrefIm1;
-    }
-    
-    // Setup the walk indices
-    uint uTIndexStart = 2 - (index&1);
-    uint uTIndex = uTIndexStart;
-
-    // Calculate the N*N weight for the final value
-    CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part
-
-    // Zero out the corners
-    CornerU = float4(0,0,0,0);
-    CornerV = float4(0,0,0,0);
-    
-    const uint uV = Val[index]  + ( ( index & 1 ) ? 1 : -1 );
-        
-    // Start the walk with the uStart prefix (the prefix of the corner before us)
-    CornerB += ip[uStart].vPosition * fEWt;
-    CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index);
-
-    // Gather all vertices between the previous corner's prefix and our own prefix
-    // We'll do two at a time, since they always come in twos
-    while(uStart < Pref[index]-1) 
-    {
-        ++uStart;
-        CornerB += ip[uStart].vPosition * fOWt;
-        CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index );
-        CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-        ++uTIndex;
-        ++uStart;
-        CornerB += ip[uStart].vPosition * fEWt;
-        CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
-        CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index );
-    }
-    ++uStart;
-
-    // Add in the last guy and make sure to wrap to the beginning if we're the last corner
-    if (index == 3)
-        uStart = 4; 
-    CornerB += ip[uStart].vPosition * fOWt;
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-    // Add in the guy before the prefix as well
-    if (index)
-        uStart = PrefIm1-1;
-    else
-        uStart = Pref[3]-1;
-    uTIndex = uTIndexStart-1;
-
-    CornerB += ip[uStart].vPosition * fOWt;
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-    // We're done with the walk now.  Now we need to add the contributions of the original subd quad.
-    CornerB += ip[MOD4(index+1)].vPosition * fEWt;
-    CornerB += ip[MOD4(index+2)].vPosition * fOWt;
-    CornerB += ip[MOD4(index+3)].vPosition * fEWt;
-    
-    uTIndex = 0 + (index&1)*(Val[index]-1);
-    uStart = MOD4(index+1);
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-    
-    uStart = MOD4(index+2);
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
-    uStart = MOD4(index+3);
-    uTIndex = (uTIndex+1)%Val[index];
-
-    CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
-    CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-
-    // Normalize the corner weights
-    CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize
-
-    // fixup signs from directional derivatives...
-    if( !((index - 1) & 2) ) // 1 and 2
-        CornerU *= -1;
-
-    if( index >= 2 ) // 2 and 3
-        CornerV *= -1;
-}
-
-//--------------------------------------------------------------------------------------
-// Computes the edge vertices of the output bicubic patch.  The edge vertices
-// (1,2,4,7,8,11,13,14) are a weighted (by valence) combination of 6 interior and 1-ring
-// neighborhood points.  However, we don't have to do the walk on this one since we
-// don't need all of the neighbor points attached to this vertex.
-//--------------------------------------------------------------------------------------
-float3 ComputeEdgeVertex( in uint index /* 0-7 */, 
-                          const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip, 
-                          const in uint Val[4], 
-                          const in uint Pref[4] )
-{
-    float val1 = 2 * Val[0] + 10;
-    float val2 = 2 * Val[1] + 10;
-    float val13 = 2 * Val[3] + 10;
-    float val14 = 2 * Val[2] + 10;
-    float val4 = val1;
-    float val8 = val13;
-    float val7 = val2;
-    float val11 = val14;
-    
-    float3 vRetVal = float3(0,0,0);
-    switch( index )
-    {
-    // Horizontal
-    case 0:
-        vRetVal = (Val[0]*2*ip[0].vPosition + 4*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*2 +
-              2*ip[Pref[0]-1].vPosition + ip[Pref[0]].vPosition) / val1;
-        break;
-    case 1:
-        vRetVal = (4*ip[0].vPosition + Val[1]*2*ip[1].vPosition + ip[2].vPosition*2 + ip[3].vPosition +
-              ip[Pref[0]-1].vPosition + 2*ip[Pref[0]].vPosition) / val2;
-        break;
-    case 2:
-        vRetVal = (2*ip[0].vPosition + ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition*2*Val[3] +
-               2*ip[Pref[2]].vPosition + ip[Pref[2]-1].vPosition) / val13;
-        break;
-    case 3:
-        vRetVal = (ip[0].vPosition + 2*ip[1].vPosition + Val[2]*2*ip[2].vPosition + ip[3].vPosition*4 +
-               ip[Pref[2]].vPosition + 2*ip[Pref[2]-1].vPosition) / val14;
-        break;
-    // Vertical
-    case 4:
-        vRetVal = (Val[0]*2*ip[0].vPosition + 2*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*4 +
-              2*ip[4].vPosition + ip[Pref[3]-1].vPosition) / val4;
-        break;
-    case 5:
-        vRetVal = (4*ip[0].vPosition + ip[1].vPosition + 2*ip[2].vPosition + ip[3].vPosition*2*Val[3] +
-              ip[4].vPosition + 2*ip[Pref[3]-1].vPosition) / val8;
-        break;
-    case 6:
-        vRetVal = (2*ip[0].vPosition + Val[1]*2*ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition +
-              2*ip[Pref[1]-1].vPosition + ip[Pref[1]].vPosition) / val7;
-        break;
-    case 7:
-        vRetVal = (ip[0].vPosition + 4*ip[1].vPosition + Val[2]*2*ip[2].vPosition + 2*ip[3].vPosition +
-               ip[Pref[1]-1].vPosition + 2*ip[Pref[1]].vPosition) / val11;
-        break;
-    }
-        
-    return vRetVal;
-}
-
-//--------------------------------------------------------------------------------------
-// Helper function
-//--------------------------------------------------------------------------------------
-void BezierRaise(inout float3 pQ[3], out float3 pC[4])
-{
-    pC[0] = pQ[0];
-    pC[3] = pQ[2];
-
-    for( int i=1; i<3; i++ ) 
-    {
-        pC[i] = ( 1.0f / 3.0f ) * ( pQ[i - 1] * i + ( 3.0f - i ) * pQ[i] );
-    }
-}
-
-//--------------------------------------------------------------------------------------
-// Computes the tangent patch from the input bezier patch
-//--------------------------------------------------------------------------------------
-void ComputeTanPatch( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch, 
-                      inout float3 vOut[16], 
-                      in float fCWts[4], 
-                      in float3 vCorner[4], 
-                      in float3 vCornerLocal[4], 
-                      in const uint cX, 
-                      in const uint cY)
-{
-    float3 vQuad[3];
-    float3 vQuadB[3];
-    float3 vCubic[4];
-
-    // boundary edges are really simple...
-    vQuad[0] = vCornerLocal[0];
-    vQuad[2] = vCornerLocal[1];
-    vQuad[1] = 3.0f*(bezpatch[2*cX+0*cY].vPosition-bezpatch[1*cX+0*cY].vPosition);
-
-    BezierRaise(vQuad,vCubic);
-    vOut[1*cX + 0*cY] = vCubic[1];
-    vOut[2*cX + 0*cY] = vCubic[2];
-
-    vQuad[0] = vCornerLocal[2];
-    vQuad[2] = vCornerLocal[3];
-    vQuad[1] = 3.0f*(bezpatch[2*cX+3*cY].vPosition-bezpatch[1*cX+3*cY].vPosition);
-
-    BezierRaise(vQuad,vCubic);
-    vOut[1*cX + 3*cY] = vCubic[1];
-    vOut[2*cX + 3*cY] = vCubic[2];
-
-    // two internal edges - this is where work happens...
-    float3 vA,vB,vC,vD,vE;
-    float fC0,fC1;
-    vQuad[1] = 3.0f*(bezpatch[2*cX+2*cY].vPosition-bezpatch[1*cX+2*cY].vPosition);
-    // also do "second" scan line
-    vQuadB[1] = 3.0f*(bezpatch[2*cX+1*cY].vPosition-bezpatch[1*cX+1*cY].vPosition);
-
-    vD = 3.0f*(bezpatch[1*cX + 2*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition);
-    vE = 3.0f*(bezpatch[1*cX + 1*cY].vPosition - bezpatch[0*cX + 1*cY].vPosition); // used later...
-
-    fC0 = fCWts[3];
-    fC1 = fCWts[0];
-
-    // sign flip
-    vA = -vCorner[3];
-    vB = 3.0f*(bezpatch[0*cX + 1*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition);
-    vC = -vCorner[0];
-
-    vQuad[0] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD;
-    vQuadB[0] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE;
-
-    // do end of strip - same as before, but stuff is switched around...
-    vC = vCorner[2];
-    vB = 3.0f*(bezpatch[3*cX + 2*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition);
-    vA = vCorner[1];
-
-    vD = 3.0f*(bezpatch[2*cX + 1*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition);
-    vE = 3.0f*(bezpatch[2*cX + 2*cY].vPosition - bezpatch[3*cX + 2*cY].vPosition);
-    
-    fC0 = fCWts[1];
-    fC1 = fCWts[2];
- 
-    vQuadB[2] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD;
-    vQuad[2] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE;
-
-    vQuadB[2] *= -1.0f;
-    vQuad[2] *= -1.0f;
-
-    BezierRaise(vQuad,vCubic);
-
-    vOut[0*cX + 2*cY] = vCubic[0];
-    vOut[1*cX + 2*cY] = vCubic[1];
-    vOut[2*cX + 2*cY] = vCubic[2];
-    vOut[3*cX + 2*cY] = vCubic[3];
-
-    BezierRaise(vQuadB,vCubic);
-
-    vOut[0*cX + 1*cY] = vCubic[0];
-    vOut[1*cX + 1*cY] = vCubic[1];
-    vOut[2*cX + 1*cY] = vCubic[2];
-    vOut[3*cX + 1*cY] = vCubic[3];
-}
-
-//--------------------------------------------------------------------------------------
-// Skinning vertex shader Section
-//--------------------------------------------------------------------------------------
-struct VS_CONTROL_POINT_INPUT
-{
-    float3 vPosition		: POSITION;
-    float2 vUV				: TEXCOORD0;
-    float3 vTangent			: TANGENT;
-    uint4  vBones			: BONES;
-    float4 vWeights			: WEIGHTS;
-};
-
-VS_CONTROL_POINT_OUTPUT PatchSkinningVS( VS_CONTROL_POINT_INPUT Input )
-{
-    VS_CONTROL_POINT_OUTPUT Output;
-    
-    float4 vInputPos = float4( Input.vPosition, 1 );
-    float4 vWorldPos = float4( 0, 0, 0, 0 );
-    
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-    
-    float3 vWorldTan = float3( 0, 0, 0 );
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-    
-    Output.vPosition = vWorldPos;
-    Output.vUV = Input.vUV;
-    Output.vTangent = vWorldTan;
-    
-    return Output;
-}
-
-struct VS_MESH_POINT_INPUT
-{
-    float3 vPosition		: POSITION;
-    float2 vUV				: TEXCOORD0;
-    float3 vNormal			: NORMAL;
-    float3 vTangent			: TANGENT;
-    uint4  vBones			: BONES;
-    float4 vWeights			: WEIGHTS;
-};
-
-struct VS_MESH_POINT_OUTPUT
-{
-    float3 vWorldPos        : POSITION;
-    float3 vNormal			: NORMAL;
-    float2 vUV				: TEXCOORD;
-    float3 vTangent			: TANGENT;
-    float3 vBiTangent		: BITANGENT;
-    
-    float4 vPosition        : SV_POSITION;
-};
-
-VS_MESH_POINT_OUTPUT MeshSkinningVS( VS_MESH_POINT_INPUT Input )
-{
-    VS_MESH_POINT_OUTPUT Output;
-    
-    float4 vInputPos = float4( Input.vPosition, 1 );
-    float4 vWorldPos = float4( 0, 0, 0, 0 );
-    
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
-    vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-    
-    float3 vWorldTan = float3( 0, 0, 0 );
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
-    vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-    
-    float3 vWorldNormal = float3( 0, 0, 0 );
-    vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
-    vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
-    vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
-    vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-    
-    Output.vWorldPos = vWorldPos.xyz;
-    Output.vPosition = mul( float4( vWorldPos.xyz, 1 ), g_mViewProjection );
-    Output.vUV = Input.vUV;
-    Output.vTangent = vWorldTan;
-    Output.vNormal = vWorldNormal;
-    Output.vBiTangent = cross( vWorldNormal, vWorldTan );
-    
-    return Output;    
-}
-
-//--------------------------------------------------------------------------------------
-// SubD to Bezier hull shader Section
-//--------------------------------------------------------------------------------------
-struct HS_CONSTANT_DATA_OUTPUT
-{
-    float Edges[4]			: SV_TessFactor;
-    float Inside[2]			: SV_InsideTessFactor;
-    
-    float3 vTangent[4]		: TANGENT;
-    float2 vUV[4]			: TEXCOORD;
-    float3 vTanUCorner[4]	: TANUCORNER;
-    float3 vTanVCorner[4]	: TANVCORNER;
-    float4 vCWts			: TANWEIGHTS;
-};
-
-//--------------------------------------------------------------------------------------
-// Load per-patch valence and prefix data
-//--------------------------------------------------------------------------------------
-void LoadValenceAndPrefixData( in uint PatchID, out uint Val[4], out uint Prefixes[4] )
-{
-    PatchID += g_iPatchStartIndex;
-    uint4 ValPack = g_ValencePrefixBuffer.Load( PatchID * 2 );
-    uint4 PrefPack = g_ValencePrefixBuffer.Load( PatchID * 2 + 1 );
-    
-    Val[0] = ValPack.x;
-    Val[1] = ValPack.y;
-    Val[2] = ValPack.z;
-    Val[3] = ValPack.w;
-    
-    Prefixes[0] = PrefPack.x;
-    Prefixes[1] = PrefPack.y;
-    Prefixes[2] = PrefPack.z;
-    Prefixes[3] = PrefPack.w;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Constant data function for the SubDToBezierHS.  This is executed once per patch.
-//--------------------------------------------------------------------------------------
-HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip,
-                                                 uint PatchID : SV_PrimitiveID )
-{	
-    HS_CONSTANT_DATA_OUTPUT Output;
-    
-    float TessAmount = g_fTessellationFactor;
-
-    Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount;
-    Output.Inside[0] = Output.Inside[1] = TessAmount;
-    
-    Output.vTangent[0] = ip[0].vTangent;
-    Output.vTangent[1] = ip[1].vTangent;
-    Output.vTangent[2] = ip[2].vTangent;
-    Output.vTangent[3] = ip[3].vTangent;
-    
-    Output.vUV[0] = ip[0].vUV;
-    Output.vUV[1] = ip[1].vUV;
-    Output.vUV[2] = ip[2].vUV;
-    Output.vUV[3] = ip[3].vUV;
-    
-    // Compute part of our tangent patch here
-    uint Val[4];
-    uint Prefixes[4];
-    LoadValenceAndPrefixData( PatchID, Val, Prefixes );
-
-    [unroll]
-    for( int i=0; i<4; i++ )
-    {
-        float3 CornerB, CornerU, CornerV;
-        ComputeCornerVertex( i, CornerB, CornerU, CornerV, ip, Val, Prefixes );
-        Output.vTanUCorner[i] = CornerU;
-        Output.vTanVCorner[i] = CornerV;
-    }
-    
-    float fCWts[4];
-    Output.vCWts.x = g_fCi[ Val[0]-3 ];
-    Output.vCWts.y = g_fCi[ Val[1]-3 ];
-    Output.vCWts.z = g_fCi[ Val[2]-3 ];
-    Output.vCWts.w = g_fCi[ Val[3]-3 ];
-    
-    return Output;
-}
-
-HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip,
-                                                 uint PatchID : SV_PrimitiveID )
-{	
-    HS_CONSTANT_DATA_OUTPUT Output;
-    
-    float TessAmount = g_fTessellationFactor;
-
-    Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount;
-    Output.Inside[0] = Output.Inside[1] = TessAmount;
-    
-    Output.vTangent[0] = ip[0].vTangent;
-    Output.vTangent[1] = ip[1].vTangent;
-    Output.vTangent[2] = ip[2].vTangent;
-    Output.vTangent[3] = ip[3].vTangent;
-    
-    Output.vUV[0] = ip[0].vUV;
-    Output.vUV[1] = ip[1].vUV;
-    Output.vUV[2] = ip[2].vUV;
-    Output.vUV[3] = ip[3].vUV;
-    
-    // Compute part of our tangent patch here
-    static const uint Val[4] = (uint[4])uint4(4,4,4,4);
-    static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16);
-
-    [unroll]
-    for( int i=0; i<4; i++ )
-    {
-        float3 CornerB, CornerU, CornerV;
-        ComputeCornerVertex4444( i, CornerB, CornerU, CornerV, ip, Val, Prefixes );
-        Output.vTanUCorner[i] = CornerU;
-        Output.vTanVCorner[i] = CornerV;
-    }
-    
-    float fCWts[4];
-    Output.vCWts.x = g_fCi[ Val[0]-3 ];
-    Output.vCWts.y = g_fCi[ Val[1]-3 ];
-    Output.vCWts.z = g_fCi[ Val[2]-3 ];
-    Output.vCWts.w = g_fCi[ Val[3]-3 ];
-    
-    return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// HS for SubDToBezier.  This outputcontrolpoints(16) specifies that we will produce
-// 16 control points.  Therefore this function will be invoked 16x, one for each output
-// control point.
-//
-// !! PERFORMANCE NOTE: This hull shader is written for maximum readability, and its
-// performance is not expected to be optimal on D3D11 hardware.  The switch statement
-// below that determines the codepath for each patch control point generates sub-optimal
-// code for parallel execution on the GPU.  A future implementation of this hull shader
-// will combine the 16 codepaths and 3 variants (corner, edge, interior) into one shared
-// codepath; this change is expected to increase performance at the expense of readability.
-//--------------------------------------------------------------------------------------
-[domain("quad")]
-[partitioning("integer")]
-[outputtopology("triangle_cw")]
-[outputcontrolpoints(16)]
-[patchconstantfunc("SubDToBezierConstantsHS")]
-BEZIER_CONTROL_POINT SubDToBezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p, 
-                                     uint i : SV_OutputControlPointID,
-                                     uint PatchID : SV_PrimitiveID )
-{
-    // Valences and prefixes are loaded from a buffer
-    uint Val[4];
-    uint Prefixes[4];
-    LoadValenceAndPrefixData( PatchID, Val, Prefixes );
-    
-    float3 CornerB = float3(0,0,0);
-    float3 CornerU = float3(0,0,0);
-    float3 CornerV = float3(0,0,0);
-    
-    BEZIER_CONTROL_POINT Output;
-    Output.vPosition = float3(0,0,0);
-    
-    // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates
-    // inefficient code for the sake of readability.
-    switch( i )
-    {
-    // Interior vertices
-    case 5:
-        Output.vPosition = ComputeInteriorVertex( 0, Val, p );
-        break;
-    case 6:
-        Output.vPosition = ComputeInteriorVertex( 1, Val, p );
-        break;
-    case 10:
-        Output.vPosition = ComputeInteriorVertex( 2, Val, p );
-        break;
-    case 9:
-        Output.vPosition = ComputeInteriorVertex( 3, Val, p );
-        break;
-        
-    // Corner vertices
-    case 0:
-        ComputeCornerVertex( 0, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-    case 3:
-        ComputeCornerVertex( 1, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-    case 15:
-        ComputeCornerVertex( 2, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-    case 12:
-        ComputeCornerVertex( 3, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-        
-    // Edge vertices
-    case 1:
-        Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes );
-        break;
-    case 2:
-        Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes );
-        break;
-    case 13:
-        Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes );
-        break;
-    case 14:
-        Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes );
-        break;
-    case 4:
-        Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes );
-        break;
-    case 8:
-        Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes );
-        break;
-    case 7:
-        Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes );
-        break;
-    case 11:
-        Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes );
-        break;
-    }
-    
-    return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Specialised version for Regular (4,4,4,4) patches, this is much simpler and has less
-// branching compared to the general one above
-//--------------------------------------------------------------------------------------
-[domain("quad")]
-[partitioning("integer")]
-[outputtopology("triangle_cw")]
-[outputcontrolpoints(16)]
-[patchconstantfunc("SubDToBezierConstantsHS4444")]
-BEZIER_CONTROL_POINT SubDToBezierHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p, 
-                                     uint i : SV_OutputControlPointID,
-                                     uint PatchID : SV_PrimitiveID )
-{
-    // Valences and prefixes are Constant for this case (4,4,4,4)
-    static const uint Val[4] = (uint[4])uint4(4,4,4,4);
-    static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16);
-    
-    float3 CornerB = float3(0,0,0);
-    float3 CornerU = float3(0,0,0);
-    float3 CornerV = float3(0,0,0);
-    
-    BEZIER_CONTROL_POINT Output;
-    Output.vPosition = float3(0,0,0);
-    
-    // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates
-    // inefficient code for the sake of readability.
-    switch( i )
-    {
-    // Interior vertices
-    case 5:
-        Output.vPosition = ComputeInteriorVertex( 0, Val, p );
-        break;
-    case 6:
-        Output.vPosition = ComputeInteriorVertex( 1, Val, p );
-        break;
-    case 10:
-        Output.vPosition = ComputeInteriorVertex( 2, Val, p );
-        break;
-    case 9:
-        Output.vPosition = ComputeInteriorVertex( 3, Val, p );
-        break;
-        
-    // Corner vertices
-    case 0:
-        ComputeCornerVertex4444( 0, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-    case 3:
-        ComputeCornerVertex4444( 1, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-    case 15:
-        ComputeCornerVertex4444( 2, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-    case 12:
-        ComputeCornerVertex4444( 3, CornerB, CornerU, CornerV, p, Val, Prefixes );
-        Output.vPosition = CornerB;
-        break;
-        
-    // Edge vertices
-    case 1:
-        Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes );
-        break;
-    case 2:
-        Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes );
-        break;
-    case 13:
-        Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes );
-        break;
-    case 14:
-        Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes );
-        break;
-    case 4:
-        Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes );
-        break;
-    case 8:
-        Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes );
-        break;
-    case 7:
-        Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes );
-        break;
-    case 11:
-        Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes );
-        break;
-    }
-    
-    return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Bezier evaluation domain shader section
-//--------------------------------------------------------------------------------------
-struct DS_OUTPUT
-{
-    float3 vWorldPos        : POSITION;
-    float3 vNormal			: NORMAL;
-    float2 vUV				: TEXCOORD;
-    float3 vTangent			: TANGENT;
-    float3 vBiTangent		: BITANGENT;
-    
-    float4 vPosition		: SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-float4 BernsteinBasis(float t)
-{
-    float invT = 1.0f - t;
-
-    return float4( invT * invT * invT,
-                   3.0f * t * invT * invT,
-                   3.0f * t * t * invT,
-                   t * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float4 dBernsteinBasis(float t)
-{
-    float invT = 1.0f - t;
-
-    return float4( -3 * invT * invT,
-                   3 * invT * invT - 6 * t * invT,
-                   6 * t * invT - 3 * t * t,
-                   3 * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float3 EvaluateBezier( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch,
-                       float4 BasisU,
-                       float4 BasisV )
-{
-    float3 Value = float3(0,0,0);
-    Value  = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w );
-    Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w );
-    Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w );
-    Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w );
-    
-    return Value;
-}
-
-//--------------------------------------------------------------------------------------
-float3 EvaluateBezierTan( const float3 bezpatch[16],
-                          float4 BasisU,
-                          float4 BasisV )
-{
-    float3 Value = float3(0,0,0);
-    Value  = BasisV.x * ( bezpatch[0] * BasisU.x + bezpatch[1] * BasisU.y + bezpatch[2] * BasisU.z + bezpatch[3] * BasisU.w );
-    Value += BasisV.y * ( bezpatch[4] * BasisU.x + bezpatch[5] * BasisU.y + bezpatch[6] * BasisU.z + bezpatch[7] * BasisU.w );
-    Value += BasisV.z * ( bezpatch[8] * BasisU.x + bezpatch[9] * BasisU.y + bezpatch[10] * BasisU.z + bezpatch[11] * BasisU.w );
-    Value += BasisV.w * ( bezpatch[12] * BasisU.x + bezpatch[13] * BasisU.y + bezpatch[14] * BasisU.z + bezpatch[15] * BasisU.w );
-    
-    return Value;
-}
-
-//--------------------------------------------------------------------------------------
-// Compute a two full tangent patches from the Tangent corner data created in the
-// HS constant data function.
-//--------------------------------------------------------------------------------------
-void CreatTangentPatches( in HS_CONSTANT_DATA_OUTPUT input, 
-                        const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch,
-                        out float3 TanU[16], 
-                        out float3 TanV[16] )
-{    
-    TanV[0]  = input.vTanVCorner[0];
-    TanV[3]  = input.vTanVCorner[1];
-    TanV[15] = input.vTanVCorner[2];
-    TanV[12] = input.vTanVCorner[3];
-    
-    TanU[0]  = input.vTanUCorner[0];
-    TanU[3]  = input.vTanUCorner[1];
-    TanU[15] = input.vTanUCorner[2];
-    TanU[12] = input.vTanUCorner[3];
-    
-    float fCWts[4];
-    fCWts[0] = input.vCWts.x;
-    fCWts[1] = input.vCWts.y;
-    fCWts[2] = input.vCWts.z;
-    fCWts[3] = input.vCWts.w;
-
-    float3 vCorner[4];
-    float3 vCornerLocal[4];
-    
-    vCorner[0] = TanV[0];
-    vCorner[1] = TanV[3];
-    vCorner[2] = TanV[15];
-    vCorner[3] = TanV[12];
-    vCornerLocal[0] = TanU[0];
-    vCornerLocal[1] = TanU[3];
-    vCornerLocal[2] = TanU[12];
-    vCornerLocal[3] = TanU[15];
-
-    ComputeTanPatch( bezpatch, TanU, fCWts, vCorner, vCornerLocal, 1, 4 );
-
-    fCWts[3] = input.vCWts.y;
-    fCWts[1] = input.vCWts.w;
-
-    vCorner[0] = TanU[0];
-    vCorner[3] = TanU[3];
-    vCorner[2] = TanU[15];
-    vCorner[1] = TanU[12];
-    vCornerLocal[0] = TanV[0];
-    vCornerLocal[1] = TanV[12];
-    vCornerLocal[2] = TanV[3];
-    vCornerLocal[3] = TanV[15];
-
-    ComputeTanPatch( bezpatch, TanV, fCWts, vCorner, vCornerLocal, 4, 1 );
-}
-
-//--------------------------------------------------------------------------------------
-// For each input UV (from the Tessellator), evaluate the Bezier patch at this position.
-//--------------------------------------------------------------------------------------
-[domain("quad")]
-DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input, 
-                        float2 UV : SV_DomainLocation,
-                        const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch )
-{
-    float4 BasisU = BernsteinBasis( UV.x );
-    float4 BasisV = BernsteinBasis( UV.y );
-    
-    float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV );
-    
-    float3 TanU[16];
-    float3 TanV[16];
-    CreatTangentPatches( input, bezpatch, TanU, TanV );
-    float3 Tangent = EvaluateBezierTan( TanU, BasisU, BasisV );
-    float3 BiTangent = EvaluateBezierTan( TanV, BasisU, BasisV );
-    
-    // To see what the patch looks like without using the tangent patches to fix the normals, uncomment this section
-    /*
-    float4 dBasisU = dBernsteinBasis( UV.x );
-    float4 dBasisV = dBernsteinBasis( UV.y );
-    Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV );
-    BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV );
-    */
-    
-    float3 Norm = normalize( cross( Tangent, BiTangent ) );
-
-    DS_OUTPUT Output;
-    Output.vNormal = Norm;
-    
-    // Evalulate the tangent vectors through bilinear interpolation.
-    // These tangents are the texture-space tangents.  They should not be confused with the parametric
-    // tangents that we use to get the normals for the bicubic patch.
-    float3 TextureTanU0 = input.vTangent[0];
-    float3 TextureTanU1 = input.vTangent[1];
-    float3 TextureTanU2 = input.vTangent[2];
-    float3 TextureTanU3 = input.vTangent[3];
-    
-    float3 UVbottom = lerp( TextureTanU0, TextureTanU1, UV.x );
-    float3 UVtop = lerp( TextureTanU3, TextureTanU2, UV.x );
-    float3 Tan = lerp( UVbottom, UVtop, UV.y );
-
-    Output.vTangent = Tan;
-
-    // This is an optimization.  We assume that the UV mapping of the mesh will result in a "relatively" orthogonal
-    // tangent basis.  If we assume this, then we can avoid fetching and bilerping the BiTangent along with the tangent.
-    Output.vBiTangent = cross( Norm, Tan );
-
-    // bilerp the texture coordinates    
-    float2 tex0 = input.vUV[0];
-    float2 tex1 = input.vUV[1];
-    float2 tex2 = input.vUV[2];
-    float2 tex3 = input.vUV[3];
-        
-    float2 bottom = lerp( tex0, tex1, UV.x );
-    float2 top = lerp( tex3, tex2, UV.x );
-    float2 TexUV = lerp( bottom, top, UV.y );
-    Output.vUV = TexUV;
-    
-    if( g_fDisplacementHeight > 0 )
-    {
-        // On this sample displacement can go into or out of the mesh.  This is why we bias the heigh amount.
-        float height = g_fDisplacementHeight * ( g_txHeight.SampleLevel( g_samPoint, TexUV, 0 ).a * 2 - 1 );
-        float3 WorldPosMiddle = Norm * height;
-        WorldPos += WorldPosMiddle;
-    }
-    
-    Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection );
-    Output.vWorldPos = WorldPos;
-    
-    return Output;    
-}
-
-//--------------------------------------------------------------------------------------
-// Smooth shading pixel shader section
-//--------------------------------------------------------------------------------------
-
-float3 safe_normalize( float3 vInput )
-{
-    float len2 = dot( vInput, vInput );
-    if( len2 > 0 )
-    {
-        return vInput * rsqrt( len2 );
-    }
-    return vInput;
-}
-
-static const float g_fSpecularExponent = 32.0f;
-static const float g_fSpecularIntensity = 0.6f;
-static const float g_fNormalMapIntensity = 1.5f;
-
-float2 ComputeDirectionalLight( float3 vWorldPos, float3 vWorldNormal, float3 vDirLightDir )
-{
-    // Result.x is diffuse illumination, Result.y is specular illumination
-    float2 Result = float2( 0, 0 );
-    Result.x = pow( saturate( dot( vWorldNormal, -vDirLightDir ) ), 2 );
-    
-    float3 vPointToCamera = normalize( g_vCameraPosWorld - vWorldPos );
-    float3 vHalfAngle = normalize( vPointToCamera - vDirLightDir );
-    Result.y = pow( saturate( dot( vHalfAngle, vWorldNormal ) ), g_fSpecularExponent );
-    
-    return Result;
-}
-
-float3 ColorGamma( float3 Input )
-{
-    return pow( Input, 2.2f );
-}
-
-float4 SmoothPS( PS_INPUT Input ) : SV_TARGET
-{
-    float4 vNormalMapSampleRaw = g_txHeight.Sample( g_samLinear, Input.vUV );
-    float3 vNormalMapSampleBiased = ( vNormalMapSampleRaw.xyz * 2 ) - 1; 
-    vNormalMapSampleBiased.xy *= g_fNormalMapIntensity;
-    float3 vNormalMapSample = normalize( vNormalMapSampleBiased );
-    
-    float3 vNormal = safe_normalize( Input.vNormal ) * vNormalMapSample.z;
-    vNormal += safe_normalize( Input.vTangent ) * vNormalMapSample.x;
-    vNormal += safe_normalize( Input.vBiTangent ) * vNormalMapSample.y;
-                     
-    //float3 vColor = float3( 1, 1, 1 );
-    float3 vColor = g_txDiffuse.Sample( g_samLinear, Input.vUV ).rgb;
-    float vSpecular = g_txSpecular.Sample( g_samLinear, Input.vUV ).r * g_fSpecularIntensity;
-    
-    const float3 DirLightDirections[4] =
-    {
-        // key light
-        normalize( float3( -63.345150, -58.043934, 27.785097 ) ),
-        // fill light
-        normalize( float3( 23.652107, -17.391443, 54.972504 ) ),
-        // back light 1
-        normalize( float3( 20.470509, -22.939510, -33.929531 ) ),
-        // back light 2
-        normalize( float3( -31.003685, 24.242104, -41.352859 ) ),
-    };
-    
-    const float3 DirLightColors[4] = 
-    {
-        // key light
-        ColorGamma( float3( 1.0f, 0.964f, 0.706f ) * 1.0f ),
-        // fill light
-        ColorGamma( float3( 0.446f, 0.641f, 1.0f ) * 1.0f ),
-        // back light 1
-        ColorGamma( float3( 1.0f, 0.862f, 0.419f ) * 1.0f ),
-        // back light 2
-        ColorGamma( float3( 0.405f, 0.630f, 1.0f ) * 1.0f ),
-    };
-        
-    float3 fLightColor = 0;
-    for( int i = 0; i < 4; ++i )
-    {
-        float2 LightDiffuseSpecular = ComputeDirectionalLight( Input.vWorldPos, vNormal, DirLightDirections[i] );
-        fLightColor += DirLightColors[i] * vColor * LightDiffuseSpecular.x;
-        fLightColor += DirLightColors[i] * LightDiffuseSpecular.y * vSpecular;
-    }
-    
-    return float4( fLightColor, 1 );
-}
-
-//--------------------------------------------------------------------------------------
-// Solid color shading pixel shader (used for wireframe overlay)
-//--------------------------------------------------------------------------------------
-float4 SolidColorPS( PS_INPUT Input ) : SV_TARGET
-{
-    return float4( g_vSolidColor, 1 );
-}
diff --git a/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl
deleted file mode 100644
index 7d9763a79..000000000
--- a/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl
+++ /dev/null
@@ -1,216 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY
-
-//SLANG: This test has been disabled because its semantic correctness
-//around use of compile-time-constant expressions relies on processing
-//the `[unroll]` attribute, and we don't yet support that.
-
-//--------------------------------------------------------------------------------------
-// File: Skinning10.fx
-//
-// The effect file for the Skinning10 sample.  
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#ifndef SEPERABLE_BLUR_KERNEL_SIZE
-#define SEPERABLE_BLUR_KERNEL_SIZE 3
-#endif
-
-static const int BLUR_KERNEL_BEGIN = SEPERABLE_BLUR_KERNEL_SIZE / -2; 
-static const int BLUR_KERNEL_END = SEPERABLE_BLUR_KERNEL_SIZE / 2 + 1;
-static const float FLOAT_BLUR_KERNEL_SIZE = (float)SEPERABLE_BLUR_KERNEL_SIZE;
-
-cbuffer cbblurVS : register( b2)
-{
-	int2		g_iWidthHeight			: packoffset( c0 );
-	int		    g_iKernelStart  		: packoffset( c0.z );
-	int		    g_iKernelEnd	        : packoffset( c0.w );
-};
-
-//--------------------------------------------------------------------------------------
-// defines
-//--------------------------------------------------------------------------------------
-
-Texture2DArray g_txShadow : register( t5 );
-SamplerState g_samShadow : register( s5 );
-
-//--------------------------------------------------------------------------------------
-// Input/Output structures
-//--------------------------------------------------------------------------------------
-
-struct PSIn
-{
-    float4      Pos	    : SV_Position;		//Position
-    float2      Tex	    : TEXCOORD;		    //Texture coordinate
-    float2      ITex    : TEXCOORD2;
-};
-
-struct VSIn
-{
-    uint Pos	: SV_VertexID ;
-};
-
-
-PSIn VSMain(VSIn inn)
-{
-    PSIn output;
-
-    output.Pos.y  = -1.0f + (inn.Pos%2) * 2.0f ;
-    output.Pos.x  = -1.0f + (inn.Pos/2) * 2.0f;
-    output.Pos.z = .5;
-    output.Pos.w = 1;
-    output.Tex.x = inn.Pos/2;
-    output.Tex.y = 1.0f - inn.Pos%2;
-    output.ITex.x = (float)(g_iWidthHeight.x * output.Tex.x);
-    output.ITex.y = (float)(g_iWidthHeight.y * output.Tex.y);
-    return output;
-}
-
-//float PSDepth
-
-//------------------------------------------------------------------------------
-// Logarithmic filtering
-//------------------------------------------------------------------------------
-
-float log_conv ( float x0, float X, float y0, float Y )
-{
-    return (X + log(x0 + (y0 * exp(Y - X))));
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader that performs bump mapping on the final vertex
-//--------------------------------------------------------------------------------------
-float2 PSBlurX(PSIn input) : SV_Target
-{	
-/*
-	float2 centerDistance;
-	if ( input.Tex.x  < .5 ) centerDistance.x = (1.0 - input.Tex.x);
-	else centerDistance.x = input.Tex.x;
-	if ( input.Tex.y  < .5 ) centerDistance.y = (1.0 - input.Tex.y);
-	else centerDistance.y = input.Tex.y;
-	if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
-	centerDistance.x -= .2;
-	centerDistance.x *= (1.0f / .8);
-
-    float store_samples[8];
-    int ind = 0;
-    for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) {
-        store_samples[ind] = g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).r;
-        ind++;
-    }
-    const float c = (1.f/5.f);    
-    
-    float accum;
-    accum = log_conv( c, store_samples[0], c, store_samples[1] );    
-    
-    ind = 0;
-    for (x = g_iKernelStart - 2; x < g_iKernelEnd; ++x) {
-        ind++;
-        accum += log_conv( 1.0f, accum, c, store_samples[ind] );
-    }
-    float2 rt;
-    rt.x = accum;
-    return rt;
-    */
-    /*
-    float2 dep = 0;
-	float2 centerDistance;
-	if ( input.Tex.x  < .5 ) centerDistance.x = (1.0 - input.Tex.x);
-	else centerDistance.x = input.Tex.x;
-	if ( input.Tex.y  < .5 ) centerDistance.y = (1.0 - input.Tex.y);
-	else centerDistance.y = input.Tex.y;
-	if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
-	centerDistance.x -= .2;
-	centerDistance.x *= ( 1.0f / 0.8f );
-
-    for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) {
-        dep += g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).rg;
-    }
-    dep /= (g_iKernelEnd - g_iKernelStart);
-    return dep;
-  */  
-  
-    float2 dep=0;
-    [unroll]for ( int x = BLUR_KERNEL_BEGIN; x < BLUR_KERNEL_END; ++x ) {
-        dep += g_txShadow.Sample( g_samShadow,  float3( input.Tex.x, input.Tex.y, 0 ), int2( x,0 ) ).rg;
-    }
-    dep /= FLOAT_BLUR_KERNEL_SIZE;
-    return dep;  
-    
-//    return g_txShadow.Sample(g_samShadow,  float3(input.Tex.x, input.Tex.y, 0) ).rg;
-    
-}
-
-//--------------------------------------------------------------------------------------
-// Pixel shader that performs bump mapping on the final vertex
-//--------------------------------------------------------------------------------------
-float2 PSBlurY(PSIn input) : SV_Target
-{	
-/*
-	float2 centerDistance;
-	if ( input.Tex.x  < .5 ) centerDistance.x = (1.0 - input.Tex.x);
-	else centerDistance.x = input.Tex.x;
-	if ( input.Tex.y  < .5 ) centerDistance.y = (1.0 - input.Tex.y);
-	else centerDistance.y = input.Tex.y;
-	if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
-	centerDistance.x -= .2;
-	centerDistance.x *= (1.0f / .8);
-	
-    float store_samples[8];
-    int ind = 0;
-    for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) {
-        store_samples[ind] = g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).r;
-    }
-    const float c = (1.f/5.f);    
-    
-    float accum;
-    accum = log_conv( c, store_samples[0], c, store_samples[1] );    
-    
-    ind = 0;
-    for (y = g_iKernelStart; y < g_iKernelEnd; ++y) {
-        ind++;
-        accum += log_conv( 1.0f, accum, c, store_samples[ind] );
-    }
-    float2 rt;
-    rt.x = accum;
-    return rt;
-    */
-    
-    
-    /*    
-    float2 dep = 0;
-
-	float2 centerDistance;
-	if ( input.Tex.x  < .5 ) centerDistance.x = (1.0 - input.Tex.x);
-	else centerDistance.x = input.Tex.x;
-	if ( input.Tex.y  < .5 ) centerDistance.y = (1.0 - input.Tex.y);
-	else centerDistance.y = input.Tex.y;
-	if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
-	centerDistance.x -= 0;
-	centerDistance.x *= (1.0f / 1.0f);
-	
-	if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
-    for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) {
-        dep += g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).rg;
-    }
-    
-    
-    dep /= (g_iKernelEnd - g_iKernelStart);
-    return dep;
-    
-    */
-    
-    
-    float2 dep=0;
-    [unroll]for ( int y = BLUR_KERNEL_BEGIN; y < BLUR_KERNEL_END; ++y ) {
-        dep += g_txShadow.Sample( g_samShadow,  float3( input.Tex.x, input.Tex.y, 0 ), int2( 0,y ) ).rg;
-    }
-    dep /= FLOAT_BLUR_KERNEL_SIZE;
-    return dep;  
-    
-    //return g_txShadow.Sample(g_samShadow,  float3(input.Tex.x, input.Tex.y, 0) ).rg;
-}
-
-
-
diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl
deleted file mode 100644
index 29c9851d8..000000000
--- a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl
+++ /dev/null
@@ -1,412 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY
-//--------------------------------------------------------------------------------------
-// File: RenderCascadeScene.hlsl
-//
-// This is the main shader file.  This shader is compiled with several different flags 
-// to provide different customizations based on user controls.
-// 
-// 
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-
-// This flag enables the shadow to blend between cascades.  This is most useful when the 
-// the shadow maps are small and artifact can be seen between the various cascade layers.
-#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG
-#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0
-#endif
-
-// There are two methods for selecting the proper cascade a fragment lies in.  Interval selection
-// compares the depth of the fragment against the frustum's depth partition.
-// Map based selection compares the texture coordinates against the acutal cascade maps.
-// Map based selection gives better coverage.  
-// Interval based selection is easier to extend and understand.
-#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG
-#define SELECT_CASCADE_BY_INTERVAL_FLAG 0
-#endif
-
-// The number of cascades 
-#ifndef CASCADE_COUNT_FLAG
-#define CASCADE_COUNT_FLAG 3
-#endif
-
-
-// Most titles will find that 3-4 cascades with 
-// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs.
-
-cbuffer cbAllShadowData : register( b0 )
-{
-    matrix          m_mWorldViewProjection;
-    matrix          m_mWorld;
-    matrix          m_mWorldView;
-    matrix          m_mShadow;
-    float4          m_vCascadeOffset[8];
-    float4          m_vCascadeScale[8];
-    int             m_nCascadeLevels; // Number of Cascades
-    int             m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene
-
-    // For Map based selection scheme, this keeps the pixels inside of the the valid range.
-    // When there is no boarder, these values are 0 and 1 respectivley.
-    float           m_fMinBorderPadding;     
-    float           m_fMaxBorderPadding;
-                                          
-    float           m_fCascadeBlendArea; // Amount to overlap when blending between cascades.
-    float           m_fTexelSize; // Padding variables exist because CBs must be a multiple of 16 bytes.
-    float           m_fNativeTexelSizeInX;
-    float4          m_fCascadeFrustumsEyeSpaceDepthsData[2];  // The values along Z that seperate the cascades.
-    // This code creates an array based pointer that points towards the vectorized input data.
-    // This is the only way to index arbitrary arrays of data.
-    // If the array is used at run time, the compiler will generate code that uses logic to index the correct component.
-
-    static float    m_fCascadeFrustumsEyeSpaceDepths[8] = (float[8])m_fCascadeFrustumsEyeSpaceDepthsData;
-    
-    float3          m_vLightDir;
-    float           m_fPaddingCB4;
-
-};
-
-
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D           g_txDiffuse             : register( t0 );
-Texture2DArray      g_txShadow              : register( t5 );
-
-SamplerState g_samLinear                    : register( s0 );
-SamplerState g_samShadow                    : register( s5 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-    float4 vPosition                        : POSITION;
-    float3 vNormal                          : NORMAL;
-    float2 vTexcoord                        : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
-    float3 vNormal                          : NORMAL;
-    float2 vTexcoord                        : COLOR0;
-    float4 vTexShadow						: TEXCOORD1;
-    float4 vPosition                        : SV_POSITION;
-    float4 vInterpPos                       : TEXCOORD2;
-    float  vDepth                           : TEXCOORD3;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-    VS_OUTPUT Output;
-
-    Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection );
-    Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld );
-    Output.vTexcoord = Input.vTexcoord;
-    Output.vInterpPos = Input.vPosition;   
-    Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ; 
-       
-    // Transform the shadow texture coordinates for all the cascades.
-    Output.vTexShadow = mul( Input.vPosition, m_mShadow );
-        
-    return Output;
-}
-
-
-
-static const float4 vCascadeColorsMultiplier[8] = 
-{
-    float4 ( 1.5f, 0.0f, 0.0f, 1.0f ),
-    float4 ( 0.0f, 1.5f, 0.0f, 1.0f ),
-    float4 ( 0.0f, 0.0f, 5.5f, 1.0f ),
-    float4 ( 1.5f, 0.0f, 5.5f, 1.0f ),
-    float4 ( 1.5f, 1.5f, 0.0f, 1.0f ),
-    float4 ( 1.0f, 1.0f, 1.0f, 1.0f ),
-    float4 ( 0.0f, 1.0f, 5.5f, 1.0f ),
-    float4 ( 0.5f, 3.5f, 0.75f, 1.0f )
-};
-
-
-void ComputeCoordinatesTransform( in int iCascadeIndex,
-                                  in float4 InterpolatedPosition, 
-                                  in out float4 vShadowTexCoord,
-                                  in out float4 vShadowTexCoordViewSpace ) 
-{
-    // Now that we know the correct map, we can transform the world space position of the current fragment                
-    if( SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex];
-        vShadowTexCoord += m_vCascadeOffset[iCascadeIndex];
-    }  
-    vShadowTexCoord.w = vShadowTexCoord.z; // We put the z value in w so that we can index the texture array with Z.
-    vShadowTexCoord.z = iCascadeIndex;
-    
-} 
-
-//--------------------------------------------------------------------------------------
-// Use PCF to sample the depth map and return a percent lit value.
-//--------------------------------------------------------------------------------------
-void CalculateVarianceShadow ( in float4 vShadowTexCoord, in float4 vShadowMapTextureCoordViewSpace, int iCascade, out float fPercentLit ) 
-{
-    fPercentLit = 0.0f;
-    // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed.
-    // This would be a performance improvment.
-	        
-    float2 mapDepth = 0;
-
-
-    // In orderto pull the derivative out of divergent flow control we calculate the 
-    // derivative off of the view space coordinates an then scale the deriviative.
-    
-    float3 vShadowTexCoordDDX = 
-		ddx(vShadowMapTextureCoordViewSpace );
-    vShadowTexCoordDDX *= m_vCascadeScale[iCascade].xyz; 
-    float3 vShadowTexCoordDDY = 
-		ddy(vShadowMapTextureCoordViewSpace );
-    vShadowTexCoordDDY *= m_vCascadeScale[iCascade].xyz; 
-    
-    mapDepth += g_txShadow.SampleGrad( g_samShadow, vShadowTexCoord.xyz, 
-									   vShadowTexCoordDDX,
-									   vShadowTexCoordDDY);
-    // The sample instruction uses gradients for some filters.
-		        
-    float  fAvgZ  = mapDepth.x; // Filtered z
-    float  fAvgZ2 = mapDepth.y; // Filtered z-squared
-    
-    if ( vShadowTexCoord.w <= fAvgZ ) // We put the z value in w so that we can index the texture array with Z.
-    {
-        fPercentLit = 1;
-	}
-	else 
-	{
-	    float variance = ( fAvgZ2 ) - ( fAvgZ * fAvgZ );
-        variance       = min( 1.0f, max( 0.0f, variance + 0.00001f ) );
-    
-        float mean     = fAvgZ;
-        float d        = vShadowTexCoord.w - mean; // We put the z value in w so that we can index the texture array with Z.
-        float p_max    = variance / ( variance + d*d );
-
-        // To combat light-bleeding, experiment with raising p_max to some power
-        // (Try values from 0.1 to 100.0, if you like.)
-        fPercentLit = pow( p_max, 4 );
-	    
-	}
-    
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForInterval ( in int iNextCascadeIndex, 
-                                       in out float fPixelDepth, 
-                                       in out float fCurrentPixelsBlendBandLocation,
-                                       out float fBlendBetweenCascadesAmount
-                                       ) 
-{
-
-    // We need to calculate the band of the current shadow map where it will fade into the next cascade.
-    // We can then early out of the expensive PCF for loop.
-    // 
-    float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex - 1 ];
-    if( iNextCascadeIndex > 1 ) 
-    {
-        fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ];
-        fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ];
-    } 
-    // The current pixel's blend band location will be used to determine when we need to blend and by how much.
-    fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval;
-    fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation;
-    // The fBlendBetweenCascadesAmount is our location in the blend band.
-    fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord, 
-                                  in out float fCurrentPixelsBlendBandLocation,
-                                  out float fBlendBetweenCascadesAmount ) 
-{
-    // Calcaulte the blend band for the map based selection.
-    float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y );
-    fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y );
-    float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y );
-    fCurrentPixelsBlendBandLocation = 
-        min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 );
-    fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate the shadow based on several options and rende the scene.
-//--------------------------------------------------------------------------------------
-
-float4 PSMain( VS_OUTPUT Input ) : SV_TARGET
-{
-    float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord );
-    
-    
-    float4 vShadowMapTextureCoordViewSpace = 0.0f;
-    float4 vShadowMapTextureCoord = 0.0f;
-    float4 vShadowMapTextureCoord_blend = 0.0f;
-    
-    float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f);
-    
-    float fPercentLit = 0.0f;
-    float fPercentLit_blend = 0.0f;
-
-    int iCascadeFound = 0;
-    int iCurrentCascadeIndex=1;
-    int iNextCascadeIndex = 0;
-
-    float fCurrentPixelDepth;
-
-    // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions.
-    fCurrentPixelDepth = Input.vDepth;
-    
-    // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used.
-    // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum. 
-    vShadowMapTextureCoordViewSpace = Input.vTexShadow;
-    
-    
-    if( SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        iCurrentCascadeIndex = 0;
-        if (CASCADE_COUNT_FLAG > 1 ) 
-        {
-            float4 vCurrentPixelDepth = Input.vDepth;
-            float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[0]);
-            float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[1]);
-            float fIndex = dot( 
-                            float4( CASCADE_COUNT_FLAG > 0,
-                                    CASCADE_COUNT_FLAG > 1, 
-                                    CASCADE_COUNT_FLAG > 2, 
-                                    CASCADE_COUNT_FLAG > 3)
-                            , fComparison )
-                         + dot( 
-                            float4(
-                                    CASCADE_COUNT_FLAG > 4,
-                                    CASCADE_COUNT_FLAG > 5,
-                                    CASCADE_COUNT_FLAG > 6,
-                                    CASCADE_COUNT_FLAG > 7)
-                            , fComparison2 ) ;
-                                    
-            fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 );
-            iCurrentCascadeIndex = (int)fIndex;
-        }
-    }
-    
-    if ( !SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        iCurrentCascadeIndex = 0;
-        if ( CASCADE_COUNT_FLAG == 1 ) 
-        {
-            vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0];
-            vShadowMapTextureCoord += m_vCascadeOffset[0];
-        }
-        if ( CASCADE_COUNT_FLAG > 1 ) {
-            for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex ) 
-            {
-                vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex];
-                vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex];
-
-                if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding
-                  && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding )
-                { 
-                    iCurrentCascadeIndex = iCascadeIndex;   
-                    iCascadeFound = 1; 
-                }
-            }
-        }
-    }    
-    // Found the correct map.
-    vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex];
-    
-    ComputeCoordinatesTransform( iCurrentCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace  );    
-                                             
-    if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 ) 
-    {
-        // Repeat text coord calculations for the next cascade. 
-        // The next cascade index is used for blurring between maps.
-        iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 ); 
-        if( !SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-        {
-            vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex];
-            vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex];
-        }
-        ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace );  
-    }            
-    float fBlendBetweenCascadesAmount = 1.0f;
-    float fCurrentPixelsBlendBandLocation = 1.0f;
-    
-    if( SELECT_CASCADE_BY_INTERVAL_FLAG ) 
-    {
-        if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) 
-        {
-            CalculateBlendAmountForInterval ( iNextCascadeIndex, fCurrentPixelDepth, 
-                fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
-            
-        }   
-    }
-    else 
-    {
-        if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG ) 
-        {
-            CalculateBlendAmountForMap ( vShadowMapTextureCoord, 
-                fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
-        }   
-    }
-    
-    // Because the Z coordinate specifies the texture array,
-    // the derivative will be 0 when there is no divergence
-    //float fDivergence = abs( ddy( vShadowMapTextureCoord.z ) ) +  abs( ddx( vShadowMapTextureCoord.z ) );
-    CalculateVarianceShadow ( vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace, 
-								iCurrentCascadeIndex, fPercentLit);
-								
-    // We repeat the calcuation for the next cascade layer, when blending between maps.
-    if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG  && CASCADE_COUNT_FLAG > 1 ) 
-    {
-        if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea ) 
-        {  // the current pixel is within the blend band.
-
-			// Because the Z coordinate species the texture array,
-			// the derivative will be 0 when there is no divergence
-			float fDivergence = abs( ddy( vShadowMapTextureCoord_blend.z ) ) +  
-				abs( ddx( vShadowMapTextureCoord_blend.z) );
-            CalculateVarianceShadow ( vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace, 
-										iNextCascadeIndex, fPercentLit_blend );
-
-            // Blend the two calculated shadows by the blend amount.
-            fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount ); 
-
-        }   
-    }    
-  
-    if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4( 1.0f, 1.0f, 1.0f, 1.0f );
-    
-    float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f ); 
-    float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f ); 
-    float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f );
-    float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f );     
-    // Some ambient-like lighting.
-    float fLighting = 
-                      saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f +
-                      saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f +
-                      saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f +
-                      saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ;
-    
-    float4 vShadowLighting = fLighting * 0.5f;
-    fLighting += saturate( dot( m_vLightDir , Input.vNormal ) );
-    fLighting = lerp( vShadowLighting, fLighting, fPercentLit );
-    
-    return fLighting * vVisualizeCascadeColor * vDiffuse;
-
-}
-
diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl
deleted file mode 100644
index 9aec9a55d..000000000
--- a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl
+++ /dev/null
@@ -1,49 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VSMain -stage vertex -entry PSMain -stage pixel
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
-	matrix		g_mWorldViewProjection	;//SLANG: : packoffset( c0 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
-	float4 vPosition	: POSITION;
-};
-
-struct VS_OUTPUT
-{
-	float4 vPosition	: SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-	VS_OUTPUT Output;
-	
-	
-	Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-
-	return Output;
-}
-
-
-float2 PSMain (VS_OUTPUT Input) : SV_TARGET 
-{
-    float2 rt;
-    rt.x = Input.vPosition.z;
-    rt.y = rt.x * rt.x;
-    return rt;
-}
-\ No newline at end of file
diff --git a/tests/hlsl/simple/rw-texture.hlsl b/tests/hlsl/simple/rw-texture.hlsl
index 26916b474..de8e82777 100644
--- a/tests/hlsl/simple/rw-texture.hlsl
+++ b/tests/hlsl/simple/rw-texture.hlsl
@@ -5,7 +5,16 @@
 // Ensure that we implement the `Load` operations on
 // `RWTexture*` types with the correct signature.
 
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define R(X) : register(X)
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
 #define C C_0
 #define SV_Target SV_TARGET
 #define u2 u2_0
@@ -16,19 +25,20 @@
 #endif
 
 
-cbuffer C : register(b0)
+BEGIN_CBUFFER(C)
 {
     uint2 u2;
     uint3 u3;
-};
+}
+END_CBUFFER(C, register(b0))
 
-RWTexture2D<float4>         t2  : register(u1);
-RWTexture2DArray<float4>    t2a : register(u2);
-RWTexture3D<float4>         t3  : register(u3);
+RWTexture2D<float4>         t2  R(u1);
+RWTexture2DArray<float4>    t2a R(u2);
+RWTexture3D<float4>         t3  R(u3);
 
 float4 main() : SV_Target
 {
-    return t2.Load(u2)
-        + t2a.Load(u3)
-        + t3.Load(u3);
+    return t2.Load(CBUFFER_REF(C,u2))
+        + t2a.Load(CBUFFER_REF(C,u3))
+        +  t3.Load(CBUFFER_REF(C,u3));
 }
diff --git a/tests/parser/cast-precedence.hlsl b/tests/parser/cast-precedence.hlsl
index 3383d9912..29793e4a2 100644
--- a/tests/parser/cast-precedence.hlsl
+++ b/tests/parser/cast-precedence.hlsl
@@ -3,20 +3,32 @@
 // Confirm that type-cast expressions parse with
 // the appropriate precedence.
 
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : register(REG) { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
 #define C C_0
 #define a a_0
 #define b b_0
 #define SV_Position SV_POSITION
 #endif
 
-cbuffer C : register(b0)
+BEGIN_CBUFFER(C)
 {
 	float a;
 	float b;
-};
+}
+END_CBUFFER(C,b0)
+
 
 float4 main() : SV_Position
 {
-	return (uint) a / b;
+	return (uint) CBUFFER_REF(C,a) / CBUFFER_REF(C,b);
 }
diff --git a/tests/reflection/parameter-block-explicit-space.slang b/tests/reflection/parameter-block-explicit-space.slang
index 5679a1c35..b4d3eff9c 100644
--- a/tests/reflection/parameter-block-explicit-space.slang
+++ b/tests/reflection/parameter-block-explicit-space.slang
@@ -71,7 +71,7 @@ struct A
 {
     float4 au;
 };
-cbuffer _S1         : register(b0, space2)
+cbuffer a            : register(b0, space2)
 { A a; }
 Texture2D       at1  : register(t0, space2);
 Texture2D       at2  : register(t1, space2);
@@ -81,7 +81,7 @@ struct B
 {
     float4      bu;
 };
-cbuffer _S3         : register(b0, space3)
+cbuffer b           : register(b0, space3)
 { B b; }
 Texture2D       bt  : register(t0, space3);
 SamplerState    bs  : register(s0, space3);
diff --git a/tests/rewriter/type-splitting.hlsl b/tests/rewriter/type-splitting.hlsl
index 0826cbf21..850e1b5ad 100644
--- a/tests/rewriter/type-splitting.hlsl
+++ b/tests/rewriter/type-splitting.hlsl
@@ -42,9 +42,14 @@ struct Foo_0
 	float2 u_0;
 };
 
+struct SLANG_ParameterGroup_C_0
+{
+	Foo_0 foo_0;	
+};
+
 cbuffer C_0
 {
-	Foo_0 foo_0;
+	SLANG_ParameterGroup_C_0 C_0;
 }
 
 Texture2D    C_foo_t_0;
@@ -52,7 +57,7 @@ SamplerState C_foo_s_0;
 
 float4 main() : SV_TARGET
 {
-	return C_foo_t_0.Sample(C_foo_s_0, foo_0.u_0);	
+	return C_foo_t_0.Sample(C_foo_s_0, C_0.foo_0.u_0);	
 }
 
 #endif
diff --git a/tests/vkray/anyhit.slang.glsl b/tests/vkray/anyhit.slang.glsl
index 43fd29e01..07789cdbd 100644
--- a/tests/vkray/anyhit.slang.glsl
+++ b/tests/vkray/anyhit.slang.glsl
@@ -10,8 +10,8 @@ struct Params_0
 layout(binding = 0)
 layout(std140) uniform _S1
 {
-    Params_0 gParams_0;
-};
+    Params_0 _data;
+} gParams_0;
 
 layout(binding = 1)
 uniform texture2D gParams_alphaMap_0;
@@ -35,7 +35,7 @@ void main()
 {
     SphereHitAttributes_0 _S4 = _S2;
 
-    if(bool(gParams_0.mode_0))
+    if(bool(gParams_0._data.mode_0))
     {
         float val_0 = textureLod(
             sampler2D(gParams_alphaMap_0, gParams_sampler_0),
diff --git a/tests/vkray/callable-caller.slang.glsl b/tests/vkray/callable-caller.slang.glsl
index 2704e6720..b0d174381 100644
--- a/tests/vkray/callable-caller.slang.glsl
+++ b/tests/vkray/callable-caller.slang.glsl
@@ -4,16 +4,26 @@ layout(row_major) uniform;
 layout(row_major) buffer;
 #extension GL_NV_ray_tracing : require
 
+#define tmp_ubo         _S1
+#define tmp_launchid    _S2
+#define tmp_luanchidf   _S3
+#define tmp_launchsize  _S4
+#define tmp_launchpos   _S5
+#define tmp_shaderidx   _S6
+#define tmp_payload     _S7
+#define tmp_launchid2   _S8
+
 struct SLANG_ParameterGroup_C_0
 {
     uint shaderIndex_0;
 };
 
 layout(binding = 0)
-layout(std140) uniform C_0
+layout(std140)
+uniform tmp_ubo
 {
-    uint shaderIndex_0;
-};
+    SLANG_ParameterGroup_C_0 _data;
+} C_0;
 
 struct MaterialPayload_0
 {
@@ -43,25 +53,25 @@ void main()
     MaterialPayload_0 payload_1;
     payload_1.albedo_0 = vec4(0);
 
-    uvec3 _S1 = gl_LaunchIDNV;
-    vec2 _S2 = vec2(_S1.xy);
+    uvec3 tmp_launchid = gl_LaunchIDNV;
+    vec2 tmp_luanchidf = vec2(tmp_launchid.xy);
 
-    uvec3 _S3 = gl_LaunchSizeNV;
-    vec2 _S4 = _S2 / vec2(_S3.xy);
+    uvec3 tmp_launchsize = gl_LaunchSizeNV;
+    vec2 tmp_launchpos = tmp_luanchidf / vec2(tmp_launchsize.xy);
 
-    payload_1.uv_0 = _S4;
+    payload_1.uv_0 = tmp_launchpos;
 
-    uint _S5 = shaderIndex_0;
+    uint tmp_shaderidx = C_0._data.shaderIndex_0;
 
-    MaterialPayload_0 _S6;
-    _S6 = payload_1;
-    CallShader_0(_S5, _S6);
-    payload_1 = _S6;
+    MaterialPayload_0 tmp_payload;
+    tmp_payload = payload_1;
+    CallShader_0(tmp_shaderidx, tmp_payload);
+    payload_1 = tmp_payload;
 
-    uvec3 _S7 = gl_LaunchIDNV;
+    uvec3 tmp_launchid2 = gl_LaunchIDNV;
     imageStore(
         gImage_0,
-        ivec2(_S7.xy),
+        ivec2(tmp_launchid2.xy),
         payload_1.albedo_0);
     return;
 }
diff --git a/tests/vkray/closesthit.slang.glsl b/tests/vkray/closesthit.slang.glsl
index a056b7809..79fd3afbe 100644
--- a/tests/vkray/closesthit.slang.glsl
+++ b/tests/vkray/closesthit.slang.glsl
@@ -2,50 +2,70 @@
 #version 460
 #extension GL_NV_ray_tracing : require
 
-layout(shaderRecordNV)
-buffer ShaderRecord_0
+#define tmp_shaderrecord    _S1
+#define tmp_colors          _S2
+#define tmp_hitattrs        _S3
+#define tmp_payload         _S4
+#define tmp_localattrs      _S5
+#define tmp_customidx       _S6
+#define tmp_instanceid      _S7
+#define tmp_add_0           _S8
+#define tmp_primid          _S9
+#define tmp_add_1           _S10
+#define tmp_hitkind         _S11
+#define tmp_hitt            _S12
+#define tmp_tmin            _S13
+
+struct SLANG_ParameterGroup_ShaderRecord_0
 {
-    uint shaderRecordID_0;
+    uint shaderRecordID_0;    
 };
 
-layout(std430, binding = 0) buffer _S1
+layout(shaderRecordNV)
+buffer tmp_shaderrecord
 {
-    vec4 colors_0[];
-};
+    SLANG_ParameterGroup_ShaderRecord_0 _data;
+} ShaderRecord_0;
+
+layout(std430, binding = 0) buffer tmp_colors
+{
+    vec4 _data[];
+} colors_0;
  
 struct BuiltInTriangleIntersectionAttributes_0
 {
     vec2 barycentrics_0;
 };
 
-hitAttributeNV BuiltInTriangleIntersectionAttributes_0 _S2;
+
+hitAttributeNV BuiltInTriangleIntersectionAttributes_0 tmp_hitattrs;
 
 struct ReflectionRay_0
 {
     vec4 color_0;
 };
 
-rayPayloadInNV ReflectionRay_0 _S3;
+rayPayloadInNV ReflectionRay_0 tmp_payload;
 
 void main()
 {
-    BuiltInTriangleIntersectionAttributes_0 _S4 = _S2;
+    BuiltInTriangleIntersectionAttributes_0 tmp_localattrs = tmp_hitattrs;
 
-    uint _S5 = gl_InstanceCustomIndexNV;
-    uint _S6 = gl_InstanceID;
+    uint tmp_customidx = gl_InstanceCustomIndexNV;
+    uint tmp_instanceid = gl_InstanceID;
 
-    uint _S7 = _S5 + _S6;
-    uint _S8 = gl_PrimitiveID;
+    uint tmp_add_0 = tmp_customidx + tmp_instanceid;
+    uint tmp_primid = gl_PrimitiveID;
 
-    uint _S9 = _S7 + _S8;
-    uint _S10 = gl_HitKindNV;
+    uint tmp_add_1 = tmp_add_0 + tmp_primid;
+    uint tmp_hitkind = gl_HitKindNV;
 
-    vec4 color_1 = colors_0[_S9 + _S10 + shaderRecordID_0];
+    vec4 color_1 = colors_0._data[tmp_add_1 + tmp_hitkind + ShaderRecord_0._data.shaderRecordID_0];
 
-    float _S11 = gl_HitTNV;
-    float _S12 = gl_RayTminNV;
+    float tmp_hitt = gl_HitTNV;
+    float tmp_tmin = gl_RayTminNV;
 
-    _S3.color_0 = color_1 * (_S11 - _S12);
+    tmp_payload.color_0 = color_1 * (tmp_hitt - tmp_tmin);
 
     return;
 }
diff --git a/tests/vkray/intersection.slang.glsl b/tests/vkray/intersection.slang.glsl
index cfa53efa7..09d7e63a5 100644
--- a/tests/vkray/intersection.slang.glsl
+++ b/tests/vkray/intersection.slang.glsl
@@ -3,19 +3,37 @@
 
 #extension GL_NV_ray_tracing : require
 
+#define tmp_ubo _S1
+#define tmp_reportHit _S2
+#define tmp_origin _S3
+#define tmp_direction _S4
+#define tmp_tmin _S5
+#define tmp_tmax _S6
+#define tmp_ray _S7
+#define tmp_sphere _S8
+#define tmp_thit _S9
+#define tmp_hitattrs _S10
+#define tmp_dithit _S11
+#define tmp_reportresult _S12
+
 struct Sphere_0
 {
     vec3 position_0;
     float radius_0;
 };
 
-layout(binding = 0)
-layout(std140)
-uniform U_0
+struct SLANG_ParameterGroup_U_0
 {
     Sphere_0 gSphere_0;
 };
 
+layout(binding = 0)
+layout(std140)
+uniform tmp_ubo
+{
+    SLANG_ParameterGroup_U_0 _data;
+} U_0;
+
 struct RayDesc_0
 {
     vec3 Origin_0;
@@ -45,40 +63,40 @@ hitAttributeNV SphereHitAttributes_0 a_0;
 bool ReportHit_0(float tHit_1, uint hitKind_0, SphereHitAttributes_0 attributes_0)
 {
     a_0 = attributes_0;
-    bool _S1 = reportIntersectionNV(tHit_1, hitKind_0);
-    return _S1;
+    bool tmp_reportHit = reportIntersectionNV(tHit_1, hitKind_0);
+    return tmp_reportHit;
 }
 
 void main()
 {
     RayDesc_0 ray_1;
-    vec3 _S2 = gl_ObjectRayOriginNV;
 
-    ray_1.Origin_0 = _S2;
-    vec3 _S3 = gl_ObjectRayDirectionNV;
+    vec3 tmp_origin = gl_ObjectRayOriginNV;
+    ray_1.Origin_0 = tmp_origin;
 
-    ray_1.Direction_0 = _S3;
-    float _S4 = gl_RayTminNV;
+    vec3 tmp_direction = gl_ObjectRayDirectionNV;
+    ray_1.Direction_0 = tmp_direction;
 
-    ray_1.TMin_0 = _S4;
-    float _S5 = gl_RayTmaxNV;
+    float tmp_tmin = gl_RayTminNV;
+    ray_1.TMin_0 = tmp_tmin;
 
-    ray_1.TMax_0 = _S5;
+    float tmp_tmax = gl_RayTmaxNV;
+    ray_1.TMax_0 = tmp_tmax;
 
-    RayDesc_0 _S6 = ray_1;
+    RayDesc_0 tmp_ray = ray_1;
 
-    Sphere_0 _S7 = gSphere_0;
+    Sphere_0 tmp_sphere = U_0._data.gSphere_0;
 
-    float _S8;
-    SphereHitAttributes_0 _S9;
-    bool _S10 = rayIntersectsSphere_0(_S6, _S7, _S8, _S9);
+    float tmp_thit;
+    SphereHitAttributes_0 tmp_hitattrs;
+    bool tmp_dithit = rayIntersectsSphere_0(tmp_ray, tmp_sphere, tmp_thit, tmp_hitattrs);
 
-    float tHit_2 = _S8;
-    SphereHitAttributes_0 attrs_1 = _S9;
+    float tHit_2 = tmp_thit;
+    SphereHitAttributes_0 attrs_1 = tmp_hitattrs;
 
-    if(_S10)
+    if(tmp_dithit)
     {
-        bool _S11 = ReportHit_0(tHit_2, (uint((0))), attrs_1);
+        bool tmp_reportresult = ReportHit_0(tHit_2, (uint((0))), attrs_1);
     }
 
     return;
diff --git a/tests/vkray/raygen.slang.glsl b/tests/vkray/raygen.slang.glsl
index 512215a73..f65053ecf 100644
--- a/tests/vkray/raygen.slang.glsl
+++ b/tests/vkray/raygen.slang.glsl
@@ -1,10 +1,46 @@
 //TEST_IGNORE_FILE:
 #version 460
 
+layout(row_major) uniform;
+
 #extension GL_NV_ray_tracing : require
 
 #define TRACING_EPSILON 1e-6
 
+#define tmp_ubo             _S1
+#define tmp_saturate        _S2
+#define tmp_launchID_x      _S3
+#define tmp_add_x           _S4
+#define tmp_launchSize_x    _S5
+#define tmp_div_x           _S6
+#define tmp_launchID_y      _S7
+#define tmp_add_y           _S8
+#define tmp_launchSize_y    _S9
+#define tmp_div_y           _S10
+#define tmp_tex_pos         _S11
+#define tmp_tex_nrm         _S12
+#define tmp_light_invDist   _S13
+#define tmp_trace_A         _S14
+#define tmp_trace_B         _S15
+#define tmp_trace_C         _S16
+#define tmp_trace_D         _S17
+#define tmp_trace_E         _S18
+#define tmp_trace_ray       _S19
+#define tmp_trace_payload   _S20
+#define tmp_cmp             _S21
+#define tmp_color           _S22
+#define tmp_dot             _S23
+#define tmp_sat             _S24
+#define tmp_trace2_A        _S25
+#define tmp_trace2_B        _S26
+#define tmp_trace2_C        _S27
+#define tmp_trace2_D        _S28
+#define tmp_trace2_E        _S29
+#define tmp_trace2_ray      _S30
+#define tmp_trace2_payload  _S31
+#define tmp_storeIdx        _S32
+
+
 layout(binding = 0) uniform texture2D samplerPosition_0;
 layout(binding = 2) uniform sampler sampler_0;
 layout(binding = 1) uniform texture2D samplerNormal_0;
@@ -17,15 +53,20 @@ struct Light_0
 
 #define NUM_LIGHTS 17
 
-layout(binding = 3)
-layout(std140) uniform ubo_0
+struct Uniforms_0
 {
     Light_0 light_0;
     vec4 viewPos_0;
-    layout(row_major) mat4x4 view_0;
-    layout(row_major) mat4x4 model_0;
+    mat4x4 view_0;
+    mat4x4 model_0;
 };
 
+layout(binding = 3)
+layout(std140) uniform tmp_ubo
+{
+    Uniforms_0 _data;
+} ubo_0;
+
 layout(binding = 5) uniform accelerationStructureNV as_0;
 
 struct ShadowRay_0
@@ -79,8 +120,8 @@ void TraceRay_0(
 
 float saturate_0(float x_0)
 {
-    float _S1 = clamp(x_0, float(0), float(1));
-    return _S1;
+    float tmp_saturate = clamp(x_0, float(0), float(1));
+    return tmp_saturate;
 }
 
 void TraceRay_1(
@@ -114,27 +155,28 @@ void main()
 {
     float atten_0;
 
-    uvec3 _S2 = gl_LaunchIDNV;
-    float _S3 = float(_S2.x) + 0.5;
-    uvec3 _S4 = gl_LaunchSizeNV;
-    float _S5 = _S3 / float(_S4.x);
-    uvec3 _S6 = gl_LaunchIDNV;
-    float _S7 = float(_S6.y) + 0.5;
-    uvec3 _S8 = gl_LaunchSizeNV;
-    float _S9 = _S7 / float(_S8.y);
-    vec2 inUV_0 = vec2(_S5, _S9);
+    uvec3 tmp_launchID_x = gl_LaunchIDNV;
+    float tmp_add_x = float(tmp_launchID_x.x) + 0.5;
+    uvec3 tmp_launchSize_x = gl_LaunchSizeNV;
+    float tmp_div_x = tmp_add_x / float(tmp_launchSize_x.x);
+
+    uvec3 tmp_launchID_y = gl_LaunchIDNV;
+    float tmp_add_y = float(tmp_launchID_y.y) + 0.5;
+    uvec3 tmp_launchSize_y = gl_LaunchSizeNV;
+    float tmp_div_y = tmp_add_y / float(tmp_launchSize_y.y);
+    vec2 inUV_0 = vec2(tmp_div_x, tmp_div_y);
     
-    vec4 _S10 = texture(sampler2D(samplerPosition_0, sampler_0), inUV_0);
-    vec3 P_0 = _S10.xyz;
+    vec4 tmp_tex_pos = texture(sampler2D(samplerPosition_0, sampler_0), inUV_0);
+    vec3 P_0 = tmp_tex_pos.xyz;
 
-    vec4 _S11 = texture(sampler2D(samplerNormal_0, sampler_0), inUV_0);
-    vec3 N_0 = _S11.xyz * 2.0 - 1.0;
+    vec4 tmp_tex_nrm = texture(sampler2D(samplerNormal_0, sampler_0), inUV_0);
+    vec3 N_0 = tmp_tex_nrm.xyz * 2.0 - 1.0;
 
-    vec3 lightDelta_0 = light_0.position_0.xyz - P_0;
+    vec3 lightDelta_0 = ubo_0._data.light_0.position_0.xyz - P_0;
     float lightDist_0 = length(lightDelta_0);
     vec3 L_0 = normalize(lightDelta_0);
 
-    float _S12 = 1.0 / (lightDist_0 * lightDist_0);
+    float tmp_light_invDist = 1.0 / (lightDist_0 * lightDist_0);
 
     RayDesc_0 ray_0;
     ray_0.Origin_0 = P_0;
@@ -144,47 +186,47 @@ void main()
 
     ShadowRay_0 shadowRay_0;
     shadowRay_0.hitDistance_0 = float(0);
-    const uint _S13 = uint(1);
-    const uint _S14 = uint(0xFF);
-    const uint _S15 = uint(0);
-    const uint _S16 = uint(0);
-    const uint _S17 = uint(2);
-
-    RayDesc_0 _S18 = ray_0;
-    ShadowRay_0 _S19;
-    _S19 = shadowRay_0;
-    TraceRay_0(as_0, _S13, _S14, _S15, _S16, _S17, _S18, _S19);
-    shadowRay_0 = _S19;
-
-    bool _S20 = shadowRay_0.hitDistance_0 < lightDist_0;
+    const uint tmp_trace_A = uint(1);
+    const uint tmp_trace_B = uint(0xFF);
+    const uint tmp_trace_C = uint(0);
+    const uint tmp_trace_D = uint(0);
+    const uint tmp_trace_E = uint(2);
+
+    RayDesc_0 tmp_trace_ray = ray_0;
+    ShadowRay_0 tmp_trace_payload;
+    tmp_trace_payload = shadowRay_0;
+    TraceRay_0(as_0, tmp_trace_A, tmp_trace_B, tmp_trace_C, tmp_trace_D, tmp_trace_E, tmp_trace_ray, tmp_trace_payload);
+    shadowRay_0 = tmp_trace_payload;
+
+    bool tmp_cmp = shadowRay_0.hitDistance_0 < lightDist_0;
     ReflectionRay_0 reflectionRay_0;
-    if(_S20)
+    if(tmp_cmp)
     {
         atten_0 = (0.00000000000000000000);
     }
     else
     {
-        atten_0 = _S12;
+        atten_0 = tmp_light_invDist;
     }
 
-    vec3 _S21 = light_0.color_0.xyz;
-    float _S22 = dot(N_0, L_0);
-    float _S23 = saturate_0(_S22);
-    vec3 color_2 = (_S21 * _S23) * atten_0;
-
-    const uint _S24 = uint(1);
-    const uint _S25 = uint(255);
-    const uint _S26 = uint(0);
-    const uint _S27 = uint(0);
-    const uint _S28 = uint(2);
-    RayDesc_0 _S29 = ray_0;
-    ReflectionRay_0 _S30;
-    _S30 = reflectionRay_0;
-    TraceRay_1(as_0, _S24, _S25, _S26, _S27, _S28, _S29, _S30);
-
-    vec3 color_3 = color_2 + _S30.color_1;
-
-    uvec3 _S31 = gl_LaunchIDNV;
-    imageStore(outputImage_0, ivec2(uvec2(ivec2(_S31.xy))), vec4(color_3, 1.0));
+    vec3 tmp_color = ubo_0._data.light_0.color_0.xyz;
+    float tmp_dot = dot(N_0, L_0);
+    float tmp_sat = saturate_0(tmp_dot);
+    vec3 color_2 = (tmp_color * tmp_sat) * atten_0;
+
+    const uint tmp_trace2_A = uint(1);
+    const uint tmp_trace2_B = uint(255);
+    const uint tmp_trace2_C = uint(0);
+    const uint tmp_trace2_D = uint(0);
+    const uint tmp_trace2_E = uint(2);
+    RayDesc_0 tmp_trace2_ray = ray_0;
+    ReflectionRay_0 tmp_trace2_payload;
+    tmp_trace2_payload = reflectionRay_0;
+    TraceRay_1(as_0, tmp_trace2_A, tmp_trace2_B, tmp_trace2_C, tmp_trace2_D, tmp_trace2_E, tmp_trace2_ray, tmp_trace2_payload);
+
+    vec3 color_3 = color_2 + tmp_trace2_payload.color_1;
+
+    uvec3 tmp_storeIdx = gl_LaunchIDNV;
+    imageStore(outputImage_0, ivec2(uvec2(ivec2(tmp_storeIdx.xy))), vec4(color_3, 1.0));
     return;
 }