summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--source/slang/check.cpp11
-rw-r--r--source/slang/emit.cpp401
-rw-r--r--source/slang/hlsl.meta.slang72
-rw-r--r--source/slang/hlsl.meta.slang.h72
-rw-r--r--source/slang/ir-inst-defs.h4
-rw-r--r--source/slang/ir.cpp1
-rw-r--r--source/slang/type-layout.cpp11
-rw-r--r--source/slang/type-layout.h7
-rw-r--r--source/slang/vm.cpp66
-rw-r--r--tests/bindings/binding0.hlsl12
-rw-r--r--tests/bindings/binding1.hlsl14
-rw-r--r--tests/bindings/explicit-binding.hlsl24
-rw-r--r--tests/bindings/glsl-parameter-blocks.slang.glsl6
-rw-r--r--tests/bindings/multi-file-extra.hlsl34
-rw-r--r--tests/bindings/multi-file.hlsl34
-rw-r--r--tests/bindings/packoffset.hlsl51
-rw-r--r--tests/bindings/resources-in-cbuffer.hlsl73
-rw-r--r--tests/bugs/gh-103.slang25
-rw-r--r--tests/bugs/gh-172.slang19
-rw-r--r--tests/bugs/gh-333.slang17
-rw-r--r--tests/bugs/split-nested-types.hlsl15
-rw-r--r--tests/bugs/vec-init-list.hlsl22
-rw-r--r--tests/bugs/vk-structured-buffer-binding.hlsl3
-rw-r--r--tests/bugs/vk-structured-buffer-binding.hlsl.expected31
-rw-r--r--tests/bugs/vk-structured-buffer-binding.hlsl.glsl26
-rw-r--r--tests/compute/matrix-layout.hlsl11
-rw-r--r--tests/compute/matrix-layout.hlsl.expected.txt24
-rw-r--r--tests/cross-compile/array-of-buffers.slang32
-rw-r--r--tests/cross-compile/array-of-buffers.slang.glsl57
-rw-r--r--tests/cross-compile/array-of-buffers.slang.hlsl35
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl65
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl109
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl217
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl56
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl45
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl628
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl206
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl411
-rw-r--r--tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h9
-rw-r--r--tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl2567
-rw-r--r--tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl1908
-rw-r--r--tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl72
-rw-r--r--tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx158
-rw-r--r--tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl60
-rw-r--r--tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl56
-rw-r--r--tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx181
-rw-r--r--tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl506
-rw-r--r--tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl59
-rw-r--r--tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl75
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx28
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx28
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx46
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx54
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx76
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx67
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl3
-rw-r--r--tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx56
-rw-r--r--tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx69
-rw-r--r--tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx73
-rw-r--r--tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx117
-rw-r--r--tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx129
-rw-r--r--tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx191
-rw-r--r--tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx294
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h84
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h103
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl84
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h129
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl73
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx192
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h82
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h103
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h152
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl113
-rw-r--r--tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl65
-rw-r--r--tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx468
-rw-r--r--tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl75
-rw-r--r--tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl529
-rw-r--r--tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl124
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl64
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl29
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl73
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl79
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl129
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl72
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl63
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl44
-rw-r--r--tests/hlsl/dxsdk/InstancingFX11/Instancing.fx591
-rw-r--r--tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl202
-rw-r--r--tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl83
-rw-r--r--tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl103
-rw-r--r--tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl128
-rw-r--r--tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl277
-rw-r--r--tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl56
-rw-r--r--tests/hlsl/dxsdk/OIT11/SceneVS.hlsl42
-rw-r--r--tests/hlsl/dxsdk/README.md5
-rw-r--r--tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl233
-rw-r--r--tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx112
-rw-r--r--tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl86
-rw-r--r--tests/hlsl/dxsdk/SubD11/SubD11.hlsl1238
-rw-r--r--tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl216
-rw-r--r--tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl412
-rw-r--r--tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl49
-rw-r--r--tests/hlsl/simple/rw-texture.hlsl28
-rw-r--r--tests/parser/cast-precedence.hlsl20
-rw-r--r--tests/reflection/parameter-block-explicit-space.slang4
-rw-r--r--tests/rewriter/type-splitting.hlsl9
-rw-r--r--tests/vkray/anyhit.slang.glsl6
-rw-r--r--tests/vkray/callable-caller.slang.glsl40
-rw-r--r--tests/vkray/closesthit.slang.glsl58
-rw-r--r--tests/vkray/intersection.slang.glsl62
-rw-r--r--tests/vkray/raygen.slang.glsl152
122 files changed, 836 insertions, 16167 deletions
diff --git a/source/slang/check.cpp b/source/slang/check.cpp
index e07bdf156..21e3b894b 100644
--- a/source/slang/check.cpp
+++ b/source/slang/check.cpp
@@ -7922,16 +7922,15 @@ namespace Slang
RefPtr<Expr> expr = inExpr;
for (;;)
{
- auto& type = expr->type;
- if (auto pointerLikeType = type->As<PointerLikeType>())
+ auto baseType = expr->type;
+ if (auto pointerLikeType = baseType->As<PointerLikeType>())
{
- type = QualType(pointerLikeType->elementType);
+ auto elementType = QualType(pointerLikeType->elementType);
+ elementType.IsLeftValue = baseType.IsLeftValue;
auto derefExpr = new DerefExpr();
derefExpr->base = expr;
- derefExpr->type = QualType(pointerLikeType->elementType);
-
- // TODO(tfoley): deal with l-value-ness here
+ derefExpr->type = elementType;
expr = derefExpr;
continue;
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index fb9968232..ba1b2177a 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -2383,7 +2383,6 @@ struct EmitVisitor
case kIROp_FieldAddress:
case kIROp_getElementPtr:
case kIROp_Specialize:
- case kIROp_BufferElementRef:
return true;
}
@@ -2536,25 +2535,6 @@ struct EmitVisitor
return true;
}
- bool isDerefBaseImplicit(
- EmitContext* /*context*/,
- IRInst* inst)
- {
- auto type = inst->getDataType();
-
- if(as<IRUniformParameterGroupType>(type) && !as<IRParameterBlockType>(type))
- {
- // TODO: we need to be careful here, because
- // HLSL shader model 6 allows these as explicit
- // types.
- return true;
- }
-
- return false;
- }
-
-
-
void emitIROperand(
EmitContext* ctx,
IRInst* inst,
@@ -3645,13 +3625,16 @@ struct EmitVisitor
IRFieldExtract* fieldExtract = (IRFieldExtract*) inst;
- if (!isDerefBaseImplicit(ctx, fieldExtract->getBase()))
- {
- auto prec = kEOp_Postfix;
- needClose = maybeEmitParens(outerPrec, prec);
+ auto prec = kEOp_Postfix;
+ needClose = maybeEmitParens(outerPrec, prec);
- emitIROperand(ctx, fieldExtract->getBase(), mode, leftSide(outerPrec, prec));
- emit(".");
+ auto base = fieldExtract->getBase();
+ emitIROperand(ctx, base, mode, leftSide(outerPrec, prec));
+ emit(".");
+ if(getTarget(ctx) == CodeGenTarget::GLSL
+ && as<IRUniformParameterGroupType>(base->getDataType()))
+ {
+ emit("_data.");
}
emit(getIRName(fieldExtract->getField()));
}
@@ -3663,15 +3646,17 @@ struct EmitVisitor
IRFieldAddress* ii = (IRFieldAddress*) inst;
- if (!isDerefBaseImplicit(ctx, ii->getBase()))
- {
- auto prec = kEOp_Postfix;
- needClose = maybeEmitParens(outerPrec, prec);
+ auto prec = kEOp_Postfix;
+ needClose = maybeEmitParens(outerPrec, prec);
- emitIROperand(ctx, ii->getBase(), mode, leftSide(outerPrec, prec));
- emit(".");
+ auto base = ii->getBase();
+ emitIROperand(ctx, base, mode, leftSide(outerPrec, prec));
+ emit(".");
+ if(getTarget(ctx) == CodeGenTarget::GLSL
+ && as<IRUniformParameterGroupType>(base->getDataType()))
+ {
+ emit("_data.");
}
-
emit(getIRName(ii->getField()));
}
break;
@@ -3774,7 +3759,15 @@ struct EmitVisitor
break;
case kIROp_Load:
- emitIROperand(ctx, inst->getOperand(0), mode, outerPrec);
+ {
+ auto base = inst->getOperand(0);
+ emitIROperand(ctx, base, mode, outerPrec);
+ if(getTarget(ctx) == CodeGenTarget::GLSL
+ && as<IRUniformParameterGroupType>(base->getDataType()))
+ {
+ emit("._data");
+ }
+ }
break;
case kIROp_Store:
@@ -3794,39 +3787,6 @@ struct EmitVisitor
}
break;
- case kIROp_BufferLoad:
- case kIROp_BufferElementRef:
- {
- auto prec = kEOp_Postfix;
- needClose = maybeEmitParens(outerPrec, prec);
-
- emitIROperand(ctx, inst->getOperand(0), mode, leftSide(outerPrec, prec));
- emit("[");
- emitIROperand(ctx, inst->getOperand(1), mode, kEOp_General);
- emit("]");
- }
- break;
-
- case kIROp_BufferStore:
- {
- auto precAssign = kEOp_Assign;
- needClose = maybeEmitParens(outerPrec, precAssign);
-
- auto outerPrecSubscript = precAssign;
- auto precSubscript = kEOp_Postfix;
- bool needCloseSubscript = maybeEmitParens(outerPrecSubscript, precSubscript);
-
- emitIROperand(ctx, inst->getOperand(0), mode, leftSide(outerPrecSubscript, precSubscript));
- emit("[");
- emitIROperand(ctx, inst->getOperand(1), mode, kEOp_General);
- emit("]");
- maybeCloseParens(needCloseSubscript);
-
- emit(" = ");
- emitIROperand(ctx, inst->getOperand(2), mode, rightSide(outerPrec, precAssign));
- }
- break;
-
case kIROp_GroupMemoryBarrierWithGroupSync:
emit("GroupMemoryBarrierWithGroupSync()");
break;
@@ -5618,63 +5578,19 @@ struct EmitVisitor
}
}
- void emitHLSLParameterBlock(
- EmitContext* ctx,
- IRGlobalVar* varDecl,
- IRParameterBlockType* type)
- {
- emit("cbuffer ");
-
- // Generate a dummy name for the block
- emit("_S");
- Emit(ctx->shared->uniqueIDCounter++);
-
- auto varLayout = getVarLayout(ctx, varDecl);
- SLANG_RELEASE_ASSERT(varLayout);
-
- EmitVarChain blockChain(varLayout);
-
- EmitVarChain containerChain = blockChain;
- EmitVarChain elementChain = blockChain;
-
- auto typeLayout = varLayout->typeLayout;
- if( auto parameterGroupTypeLayout = typeLayout.As<ParameterGroupTypeLayout>() )
- {
- containerChain = EmitVarChain(parameterGroupTypeLayout->containerVarLayout, &blockChain);
- elementChain = EmitVarChain(parameterGroupTypeLayout->elementVarLayout, &blockChain);
-
- typeLayout = parameterGroupTypeLayout->elementVarLayout->getTypeLayout();
- }
-
- emitHLSLRegisterSemantic(LayoutResourceKind::ConstantBuffer, &containerChain);
-
- emit("\n{\n");
- indent();
-
- auto elementType = type->getElementType();
-
-
- emitIRType(ctx, elementType, getIRName(varDecl));
-
- emitHLSLParameterGroupFieldLayoutSemantics(&elementChain);
- emit(";\n");
-
- dedent();
- emit("}\n");
- }
-
void emitHLSLParameterGroup(
EmitContext* ctx,
IRGlobalVar* varDecl,
IRUniformParameterGroupType* type)
{
- if(auto parameterBlockType = as<IRParameterBlockType>(type))
+ if(as<IRTextureBufferType>(type))
{
- emitHLSLParameterBlock(ctx, varDecl, parameterBlockType);
- return;
+ emit("tbuffer ");
+ }
+ else
+ {
+ emit("cbuffer ");
}
-
- emit("cbuffer ");
emit(getIRName(varDecl));
auto varLayout = getVarLayout(ctx, varDecl);
@@ -5701,111 +5617,37 @@ struct EmitVisitor
auto elementType = type->getElementType();
- if(auto structType = as<IRStructType>(elementType))
- {
- auto structTypeLayout = typeLayout.As<StructTypeLayout>();
- SLANG_RELEASE_ASSERT(structTypeLayout);
-
- UInt fieldIndex = 0;
- for(auto ff : structType->getFields())
- {
- // TODO: need a plan to deal with the case where the IR-level
- // `struct` type might not match the high-level type, so that
- // the numbering of fields is different.
- //
- // The right plan is probably to require that the lowering pass
- // create a fresh layout for any type/variable that it splits
- // in this fashion, so that the layout information it attaches
- // can always be assumed to apply to the actual instruciton.
- //
-
- auto fieldLayout = structTypeLayout->fields[fieldIndex++];
-
- auto fieldKey = ff->getKey();
- auto fieldType = ff->getFieldType();
-
- // Fields of `void` type aren't valid in HLSL/GLSL.
- //
- // TODO: legalization should get rid of any fields that have
- // empty, or effectively empty types (e.g., emptry structs
- // should be translated over to `void`).
- if(as<IRVoidType>(fieldType))
- continue;
-
- emitIRVarModifiers(ctx, fieldLayout, fieldKey, fieldType);
-
- emitIRType(ctx, fieldType, getIRName(fieldKey));
-
- emitHLSLParameterGroupFieldLayoutSemantics(fieldLayout, &elementChain);
-
- emit(";\n");
- }
- }
- else
- {
- // TODO: during legalization we should turn `ParameterGroup<X>` where `X`
- // is not a `struct` type into `ParameterGroup<S>` where `S` is defined
- // as something like `struct S { X _; };`
- //
- emit("/* unexpected */");
- }
+ emitIRType(ctx, elementType, getIRName(varDecl));
+ emit(";\n");
dedent();
emit("}\n");
}
- void emitGLSLParameterBlock(
- EmitContext* ctx,
- IRGlobalVar* varDecl,
- IRParameterBlockType* type)
+ void emitArrayBrackets(
+ EmitContext* ctx,
+ IRType* type)
{
- auto varLayout = getVarLayout(ctx, varDecl);
- SLANG_RELEASE_ASSERT(varLayout);
-
- EmitVarChain blockChain(varLayout);
-
- EmitVarChain containerChain = blockChain;
- EmitVarChain elementChain = blockChain;
+ SLANG_UNUSED(ctx);
- auto typeLayout = varLayout->typeLayout;
- if( auto parameterGroupTypeLayout = typeLayout.As<ParameterGroupTypeLayout>() )
+ if(auto arrayType = as<IRArrayType>(type))
{
- containerChain = EmitVarChain(parameterGroupTypeLayout->containerVarLayout, &blockChain);
- elementChain = EmitVarChain(parameterGroupTypeLayout->elementVarLayout, &blockChain);
-
- typeLayout = parameterGroupTypeLayout->elementVarLayout->getTypeLayout();
+ emit("[");
+ EmitVal(arrayType->getElementCount(), kEOp_General);
+ emit("]");
+ }
+ else if(auto unsizedArrayType = as<IRUnsizedArrayType>(type))
+ {
+ emit("[]");
}
-
- emitGLSLLayoutQualifier(LayoutResourceKind::DescriptorTableSlot, &containerChain);
- emit("layout(std140) uniform ");
-
- // Generate a dummy name for the block
- emit("_S");
- Emit(ctx->shared->uniqueIDCounter++);
-
- emit("\n{\n");
- indent();
-
- auto elementType = type->getElementType();
-
- emitIRType(ctx, elementType, getIRName(varDecl));
- emit(";\n");
-
- dedent();
- emit("};\n");
}
+
void emitGLSLParameterGroup(
EmitContext* ctx,
IRGlobalVar* varDecl,
IRUniformParameterGroupType* type)
{
- if(auto parameterBlockType = as<IRParameterBlockType>(type))
- {
- emitGLSLParameterBlock(ctx, varDecl, parameterBlockType);
- return;
- }
-
auto varLayout = getVarLayout(ctx, varDecl);
SLANG_RELEASE_ASSERT(varLayout);
@@ -5814,7 +5656,7 @@ struct EmitVisitor
EmitVarChain containerChain = blockChain;
EmitVarChain elementChain = blockChain;
- auto typeLayout = varLayout->typeLayout;
+ auto typeLayout = varLayout->typeLayout->unwrapArray();
if( auto parameterGroupTypeLayout = typeLayout.As<ParameterGroupTypeLayout>() )
{
containerChain = EmitVarChain(parameterGroupTypeLayout->containerVarLayout, &blockChain);
@@ -5841,71 +5683,28 @@ struct EmitVisitor
emit("layout(std140) uniform ");
}
- emit(getIRName(varDecl));
+ // Generate a dummy name for the block
+ emit("_S");
+ Emit(ctx->shared->uniqueIDCounter++);
emit("\n{\n");
indent();
auto elementType = type->getElementType();
- if(auto structType = as<IRStructType>(elementType))
- {
- auto structTypeLayout = typeLayout.As<StructTypeLayout>();
- SLANG_RELEASE_ASSERT(structTypeLayout);
-
- UInt fieldIndex = 0;
- for(auto ff : structType->getFields())
- {
- // TODO: need a plan to deal with the case where the IR-level
- // `struct` type might not match the high-level type, so that
- // the numbering of fields is different.
- //
- // The right plan is probably to require that the lowering pass
- // create a fresh layout for any type/variable that it splits
- // in this fashion, so that the layout information it attaches
- // can always be assumed to apply to the actual instruciton.
- //
-
- auto fieldLayout = structTypeLayout->fields[fieldIndex++];
-
- auto fieldKey = ff->getKey();
- auto fieldType = ff->getFieldType();
- if(as<IRVoidType>(fieldType))
- continue;
-
- // Note: we will emit matrix-layout modifiers here, but
- // we will refrain from emitting other modifiers that
- // might not be appropriate to the context (e.g., we
- // shouldn't go emitting `uniform` just because these
- // things are uniform...).
- //
- // TODO: we need a more refined set of modifiers that
- // we should allow on fields, because we might end
- // up supporting layout that isn't the default for
- // the given block type (e.g., something other than
- // `std140` for a uniform block).
- //
- emitIRMatrixLayoutModifiers(ctx, fieldLayout);
-
- emitIRType(ctx, fieldType, getIRName(fieldKey));
+ emitIRType(ctx, elementType, "_data");
+ emit(";\n");
-// emitHLSLParameterGroupFieldLayoutSemantics(layout, fieldLayout);
+ dedent();
+ emit("} ");
- emit(";\n");
- }
- }
- else
- {
- emit("/* unexpected */");
- }
+ emit(getIRName(varDecl));
- // TODO: we should consider always giving parameter blocks
- // names when outputting GLSL, since that shouldn't affect
- // the semantics of things, and will reduce the risk of
- // collisions in the global namespace...
+ // If the underlying variable was an array (or array of arrays, etc.)
+ // we need to emit all those array brackets here.
+ emitArrayBrackets(ctx, varDecl->getDataType()->getValueType());
- dedent();
- emit("};\n");
+ emit(";\n");
}
void emitIRParameterGroup(
@@ -6025,19 +5824,14 @@ struct EmitVisitor
auto elementType = structuredBufferType->getElementType();
- emitIRType(ctx, elementType, getIRName(varDecl) + "[]");
+ emitIRType(ctx, elementType, "_data[]");
emit(";\n");
dedent();
- emit("}");
+ emit("} ");
- // TODO: we need to consider the case where the type of the variable is
- // an *array* of structured buffers, in which case we need to declare
- // the block as an array too.
- //
- // The main challenge here is that then the block will have a name,
- // and also the field inside the block will have a name, so that when
- // the user had written `a[i][j]` we now need to emit `a[i].someName[j]`.
+ emit(getIRName(varDecl));
+ emitArrayBrackets(ctx, varDecl->getDataType()->getValueType());
emit(";\n");
}
@@ -6084,20 +5878,13 @@ struct EmitVisitor
emit("\n{\n");
indent();
- emit("uint ");
- emit(getIRName(varDecl));
- emit("[];\n");
+ emit("uint _data[];\n");
dedent();
- emit("}");
+ emit("} ");
- // TODO: we need to consider the case where the type of the variable is
- // an *array* of structured buffers, in which case we need to declare
- // the block as an array too.
- //
- // The main challenge here is that then the block will have a name,
- // and also the field inside the block will have a name, so that when
- // the user had written `a[i][j]` we now need to emit `a[i].someName[j]`.
+ emit(getIRName(varDecl));
+ emitArrayBrackets(ctx, varDecl->getDataType()->getValueType());
emit(";\n");
}
@@ -6129,6 +5916,16 @@ struct EmitVisitor
Emit("}\n");
}
+ // When a global shader parameter represents a "parameter group"
+ // (either a constant buffer or a parameter block with non-resource
+ // data in it), we will prefer to emit it as an ordinary `cbuffer`
+ // declaration or `uniform` block, even when emitting HLSL for
+ // D3D profiles that support the explicit `ConstantBuffer<T>` type.
+ //
+ // Alternatively, we could make this choice based on profile, and
+ // prefer `ConstantBuffer<T>` on profiles that support it and/or when
+ // the input code used that syntax.
+ //
if (auto paramBlockType = as<IRUniformParameterGroupType>(varType))
{
emitIRParameterGroup(
@@ -6140,8 +5937,31 @@ struct EmitVisitor
if(getTarget(ctx) == CodeGenTarget::GLSL)
{
- // When outputting GLSL, we need to transform any declaration of
- // a `*StructuredBuffer<T>` into an ordinary `buffer` declaration.
+ // There are a number of types that are (or can be)
+ // "first-class" in D3D HLSL, but are second-class in GLSL in
+ // that they require explicit global declarations for each value/object,
+ // and don't support declaration as ordinary variables.
+ //
+ // This includes constant buffers (`uniform` blocks) and well as
+ // structured and byte-address buffers (both mapping to `buffer` blocks).
+ //
+ // We intercept these types, and arrays thereof, to produce the required
+ // global declarations. This assumes that earlier "legalization" passes
+ // already performed the work of pulling fields with these types out of
+ // aggregates.
+ //
+ // Note: this also assumes that these types are not used as function
+ // parameters/results, local variables, etc. Additional legalization
+ // steps are required to guarantee these conditions.
+ //
+ if (auto paramBlockType = as<IRUniformParameterGroupType>(unwrapArray(varType)))
+ {
+ emitGLSLParameterGroup(
+ ctx,
+ varDecl,
+ paramBlockType);
+ return;
+ }
if( auto structuredBufferType = as<IRHLSLStructuredBufferTypeBase>(unwrapArray(varType)) )
{
emitIRStructuredBuffer_GLSL(
@@ -6150,9 +5970,6 @@ struct EmitVisitor
structuredBufferType);
return;
}
-
- // When outputting GLSL, we need to transform any declaration of
- // a `*ByteAddressBuffer<T>` into an ordinary `buffer` declaration.
if( auto byteAddressBufferType = as<IRByteAddressBufferTypeBase>(unwrapArray(varType)) )
{
emitIRByteAddressBuffer_GLSL(
@@ -6166,7 +5983,15 @@ struct EmitVisitor
// when outputting GLSL (well, except in the case where they
// actually *require* redeclaration...).
//
- // TODO: can we detect this more robustly?
+ // Note: these won't be variables the user declare explicitly
+ // in their code, but rather variables that we generated as
+ // part of legalizing the varying input/output signature of
+ // an entry point for GL/Vulkan.
+ //
+ // TODO: This could be handled more robustly by attaching an
+ // appropriate decoration to these variables to indicate their
+ // purpose.
+ //
if(getText(varDecl->mangledName).StartsWith("gl_"))
{
// The variable represents an OpenGL system value,
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 950931fc2..98b50e574 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -18,26 +18,26 @@ __magic_type(HLSLByteAddressBufferType)
__intrinsic_type($(kIROp_HLSLByteAddressBufferType))
struct ByteAddressBuffer
{
- __target_intrinsic(glsl, "$1 = $0.length()")
+ __target_intrinsic(glsl, "$1 = $0._data.length()")
void GetDimensions(
out uint dim);
- __target_intrinsic(glsl, "$0[$1]")
+ __target_intrinsic(glsl, "$0._data[$1]")
uint Load(int location);
uint Load(int location, out uint status);
- __target_intrinsic(glsl, "uvec2($0[$1], $0[$1+1])")
+ __target_intrinsic(glsl, "uvec2($0._data[$1], $0._data[$1+1])")
uint2 Load2(int location);
uint2 Load2(int location, out uint status);
- __target_intrinsic(glsl, "uvec3($0[$1], $0[$1+1], $0[$1+2])")
+ __target_intrinsic(glsl, "uvec3($0._data[$1], $0._data[$1+1], $0._data[$1+2])")
uint3 Load3(int location);
uint3 Load3(int location, out uint status);
- __target_intrinsic(glsl, "uvec4($0[$1], $0[$1+1], $0[$1+2], $0[$1+3])")
+ __target_intrinsic(glsl, "uvec4($0._data[$1], $0._data[$1+1], $0._data[$1+2], $0._data[$1+3])")
uint4 Load4(int location);
uint4 Load4(int location, out uint status);
@@ -55,7 +55,11 @@ struct StructuredBuffer
T Load(int location);
T Load(int location, out uint status);
- __subscript(uint index) -> T { __intrinsic_op(bufferLoad) get; };
+ __subscript(uint index) -> T
+ {
+ __target_intrinsic(glsl, "$0._data[$1]")
+ get;
+ };
};
__generic<T>
@@ -105,133 +109,133 @@ struct $(item.name)
// Note(tfoley): supports all operations from `ByteAddressBuffer`
// TODO(tfoley): can this be made a sub-type?
- __target_intrinsic(glsl, "$1 = $0.length()")
+ __target_intrinsic(glsl, "$1 = $0._data.length()")
void GetDimensions(
out uint dim);
- __target_intrinsic(glsl, "$0[$1]")
+ __target_intrinsic(glsl, "$0._data[$1]")
uint Load(int location);
uint Load(int location, out uint status);
- __target_intrinsic(glsl, "uvec2($0[$1], $0[$1+4])")
+ __target_intrinsic(glsl, "uvec2($0._data[$1], $0._data[$1+4])")
uint2 Load2(int location);
uint2 Load2(int location, out uint status);
- __target_intrinsic(glsl, "uvec3($0[$1], $0[$1+4], $0[$1+8])")
+ __target_intrinsic(glsl, "uvec3($0._data[$1], $0._data[$1+4], $0._data[$1+8])")
uint3 Load3(int location);
uint3 Load3(int location, out uint status);
- __target_intrinsic(glsl, "uvec4($0[$1], $0[$1+4], $0[$1+8], $0[$1+12])")
+ __target_intrinsic(glsl, "uvec4($0._data[$1], $0._data[$1+4], $0._data[$1+8], $0._data[$1+12])")
uint4 Load4(int location);
uint4 Load4(int location, out uint status);
// Added operations:
- __target_intrinsic(glsl, "($3 = atomicAdd($0[$1], $2))")
+ __target_intrinsic(glsl, "($3 = atomicAdd($0._data[$1], $2))")
void InterlockedAdd(
UINT dest,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "atomicAdd($0[$1], $2)")
+ __target_intrinsic(glsl, "atomicAdd($0._data[$1], $2)")
void InterlockedAdd(
UINT dest,
UINT value);
- __target_intrinsic(glsl, "($3 = atomicAnd($0[$1], $2))")
+ __target_intrinsic(glsl, "($3 = atomicAnd($0._data[$1], $2))")
void InterlockedAnd(
UINT dest,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "atomicAnd($0[$1], $2)")
+ __target_intrinsic(glsl, "atomicAnd($0._data[$1], $2)")
void InterlockedAnd(
UINT dest,
UINT value);
- __target_intrinsic(glsl, "($4 = atomicCompSwap($0[$1], $2, $3))")
+ __target_intrinsic(glsl, "($4 = atomicCompSwap($0._data[$1], $2, $3))")
void InterlockedCompareExchange(
UINT dest,
UINT compare_value,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "atomicCompSwap($0[$1], $2, $3)")
+ __target_intrinsic(glsl, "atomicCompSwap($0._data[$1], $2, $3)")
void InterlockedCompareStore(
UINT dest,
UINT compare_value,
UINT value);
- __target_intrinsic(glsl, "($3 = atomicExchange($0[$1], $2))")
+ __target_intrinsic(glsl, "($3 = atomicExchange($0._data[$1], $2))")
void InterlockedExchange(
UINT dest,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "($3 = atomicMax($0[$1], $2))")
+ __target_intrinsic(glsl, "($3 = atomicMax($0._data[$1], $2))")
void InterlockedMax(
UINT dest,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "atomicMax($0[$1], $2)")
+ __target_intrinsic(glsl, "atomicMax($0._data[$1], $2)")
void InterlockedMax(
UINT dest,
UINT value);
- __target_intrinsic(glsl, "($3 = atomicMin($0[$1], $2))")
+ __target_intrinsic(glsl, "($3 = atomicMin($0._data[$1], $2))")
void InterlockedMin(
UINT dest,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "atomicMin($0[$1], $2)")
+ __target_intrinsic(glsl, "atomicMin($0._data[$1], $2)")
void InterlockedMin(
UINT dest,
UINT value);
- __target_intrinsic(glsl, "($3 = atomicOr($0[$1], $2))")
+ __target_intrinsic(glsl, "($3 = atomicOr($0._data[$1], $2))")
void InterlockedOr(
UINT dest,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "atomicOr($0[$1], $2)")
+ __target_intrinsic(glsl, "atomicOr($0._data[$1], $2)")
void InterlockedOr(
UINT dest,
UINT value);
- __target_intrinsic(glsl, "($3 = atomicXor($0[$1], $2))")
+ __target_intrinsic(glsl, "($3 = atomicXor($0._data[$1], $2))")
void InterlockedXor(
UINT dest,
UINT value,
out UINT original_value);
- __target_intrinsic(glsl, "atomicXor($0[$1], $2)")
+ __target_intrinsic(glsl, "atomicXor($0._data[$1], $2)")
void InterlockedXor(
UINT dest,
UINT value);
- __target_intrinsic(glsl, "$0[$1] = $2")
+ __target_intrinsic(glsl, "$0._data[$1] = $2")
void Store(
uint address,
uint value);
- __target_intrinsic(glsl, "$0[$1] = $2.x, $0[$1+4] = $2.y")
+ __target_intrinsic(glsl, "$0._data[$1] = $2.x, $0._data[$1+4] = $2.y")
void Store2(
uint address,
uint2 value);
- __target_intrinsic(glsl, "$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z")
+ __target_intrinsic(glsl, "$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z")
void Store3(
uint address,
uint3 value);
- __target_intrinsic(glsl, "$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z, $0[$1+12] = $2.w")
+ __target_intrinsic(glsl, "$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z, $0._data[$1+12] = $2.w")
void Store4(
uint address,
uint4 value);
@@ -270,11 +274,11 @@ struct $(item.name)
T Load(int location);
T Load(int location, out uint status);
- __subscript(uint index) -> T
- {
- __intrinsic_op(bufferElementRef)
+ __subscript(uint index) -> T
+ {
+ __target_intrinsic(glsl, "$0._data[$1]")
ref;
- }
+ }
};
${{{{
diff --git a/source/slang/hlsl.meta.slang.h b/source/slang/hlsl.meta.slang.h
index 8d908c13d..ea21a0fde 100644
--- a/source/slang/hlsl.meta.slang.h
+++ b/source/slang/hlsl.meta.slang.h
@@ -24,26 +24,26 @@ SLANG_SPLICE(kIROp_HLSLByteAddressBufferType
SLANG_RAW(")\n")
SLANG_RAW("struct ByteAddressBuffer\n")
SLANG_RAW("{\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$1 = $0.length()\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$1 = $0._data.length()\")\n")
SLANG_RAW(" void GetDimensions(\n")
SLANG_RAW(" out uint dim);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$0[$1]\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1]\")\n")
SLANG_RAW(" uint Load(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint Load(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"uvec2($0[$1], $0[$1+1])\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"uvec2($0._data[$1], $0._data[$1+1])\")\n")
SLANG_RAW(" uint2 Load2(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint2 Load2(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"uvec3($0[$1], $0[$1+1], $0[$1+2])\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"uvec3($0._data[$1], $0._data[$1+1], $0._data[$1+2])\")\n")
SLANG_RAW(" uint3 Load3(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint3 Load3(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"uvec4($0[$1], $0[$1+1], $0[$1+2], $0[$1+3])\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"uvec4($0._data[$1], $0._data[$1+1], $0._data[$1+2], $0._data[$1+3])\")\n")
SLANG_RAW(" uint4 Load4(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint4 Load4(int location, out uint status);\n")
@@ -64,7 +64,11 @@ SLANG_RAW("\n")
SLANG_RAW(" T Load(int location);\n")
SLANG_RAW(" T Load(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __subscript(uint index) -> T { __intrinsic_op(bufferLoad) get; };\n")
+SLANG_RAW(" __subscript(uint index) -> T\n")
+SLANG_RAW(" {\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1]\")\n")
+SLANG_RAW(" get;\n")
+SLANG_RAW(" };\n")
SLANG_RAW("};\n")
SLANG_RAW("\n")
SLANG_RAW("__generic<T>\n")
@@ -132,133 +136,133 @@ SLANG_RAW("{\n")
SLANG_RAW(" // Note(tfoley): supports all operations from `ByteAddressBuffer`\n")
SLANG_RAW(" // TODO(tfoley): can this be made a sub-type?\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$1 = $0.length()\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$1 = $0._data.length()\")\n")
SLANG_RAW(" void GetDimensions(\n")
SLANG_RAW(" out uint dim);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$0[$1]\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1]\")\n")
SLANG_RAW(" uint Load(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint Load(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"uvec2($0[$1], $0[$1+4])\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"uvec2($0._data[$1], $0._data[$1+4])\")\n")
SLANG_RAW(" uint2 Load2(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint2 Load2(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"uvec3($0[$1], $0[$1+4], $0[$1+8])\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"uvec3($0._data[$1], $0._data[$1+4], $0._data[$1+8])\")\n")
SLANG_RAW(" uint3 Load3(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint3 Load3(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"uvec4($0[$1], $0[$1+4], $0[$1+8], $0[$1+12])\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"uvec4($0._data[$1], $0._data[$1+4], $0._data[$1+8], $0._data[$1+12])\")\n")
SLANG_RAW(" uint4 Load4(int location);\n")
SLANG_RAW("\n")
SLANG_RAW(" uint4 Load4(int location, out uint status);\n")
SLANG_RAW("\n")
SLANG_RAW(" // Added operations:\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicAdd($0[$1], $2))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicAdd($0._data[$1], $2))\")\n")
SLANG_RAW(" void InterlockedAdd(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"atomicAdd($0[$1], $2)\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"atomicAdd($0._data[$1], $2)\")\n")
SLANG_RAW(" void InterlockedAdd(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicAnd($0[$1], $2))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicAnd($0._data[$1], $2))\")\n")
SLANG_RAW(" void InterlockedAnd(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"atomicAnd($0[$1], $2)\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"atomicAnd($0._data[$1], $2)\")\n")
SLANG_RAW(" void InterlockedAnd(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($4 = atomicCompSwap($0[$1], $2, $3))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($4 = atomicCompSwap($0._data[$1], $2, $3))\")\n")
SLANG_RAW(" void InterlockedCompareExchange(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT compare_value,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"atomicCompSwap($0[$1], $2, $3)\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"atomicCompSwap($0._data[$1], $2, $3)\")\n")
SLANG_RAW(" void InterlockedCompareStore(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT compare_value,\n")
SLANG_RAW(" UINT value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicExchange($0[$1], $2))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicExchange($0._data[$1], $2))\")\n")
SLANG_RAW(" void InterlockedExchange(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicMax($0[$1], $2))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicMax($0._data[$1], $2))\")\n")
SLANG_RAW(" void InterlockedMax(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"atomicMax($0[$1], $2)\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"atomicMax($0._data[$1], $2)\")\n")
SLANG_RAW(" void InterlockedMax(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicMin($0[$1], $2))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicMin($0._data[$1], $2))\")\n")
SLANG_RAW(" void InterlockedMin(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"atomicMin($0[$1], $2)\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"atomicMin($0._data[$1], $2)\")\n")
SLANG_RAW(" void InterlockedMin(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicOr($0[$1], $2))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicOr($0._data[$1], $2))\")\n")
SLANG_RAW(" void InterlockedOr(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"atomicOr($0[$1], $2)\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"atomicOr($0._data[$1], $2)\")\n")
SLANG_RAW(" void InterlockedOr(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicXor($0[$1], $2))\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"($3 = atomicXor($0._data[$1], $2))\")\n")
SLANG_RAW(" void InterlockedXor(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value,\n")
SLANG_RAW(" out UINT original_value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"atomicXor($0[$1], $2)\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"atomicXor($0._data[$1], $2)\")\n")
SLANG_RAW(" void InterlockedXor(\n")
SLANG_RAW(" UINT dest,\n")
SLANG_RAW(" UINT value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$0[$1] = $2\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1] = $2\")\n")
SLANG_RAW(" void Store(\n")
SLANG_RAW(" uint address,\n")
SLANG_RAW(" uint value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$0[$1] = $2.x, $0[$1+4] = $2.y\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1] = $2.x, $0._data[$1+4] = $2.y\")\n")
SLANG_RAW(" void Store2(\n")
SLANG_RAW(" uint address,\n")
SLANG_RAW(" uint2 value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z\")\n")
SLANG_RAW(" void Store3(\n")
SLANG_RAW(" uint address,\n")
SLANG_RAW(" uint3 value);\n")
SLANG_RAW("\n")
-SLANG_RAW(" __target_intrinsic(glsl, \"$0[$1] = $2.x, $0[$1+4] = $2.y, $0[$1+8] = $2.z, $0[$1+12] = $2.w\")\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1] = $2.x, $0._data[$1+4] = $2.y, $0._data[$1+8] = $2.z, $0._data[$1+12] = $2.w\")\n")
SLANG_RAW(" void Store4(\n")
SLANG_RAW(" uint address,\n")
SLANG_RAW(" uint4 value);\n")
@@ -306,11 +310,11 @@ SLANG_RAW("\n")
SLANG_RAW(" T Load(int location);\n")
SLANG_RAW(" T Load(int location, out uint status);\n")
SLANG_RAW("\n")
-SLANG_RAW("\t__subscript(uint index) -> T\n")
-SLANG_RAW("\t{\n")
-SLANG_RAW(" __intrinsic_op(bufferElementRef)\n")
+SLANG_RAW(" __subscript(uint index) -> T\n")
+SLANG_RAW(" {\n")
+SLANG_RAW(" __target_intrinsic(glsl, \"$0._data[$1]\")\n")
SLANG_RAW(" ref;\n")
-SLANG_RAW("\t}\n")
+SLANG_RAW(" }\n")
SLANG_RAW("};\n")
SLANG_RAW("\n")
diff --git a/source/slang/ir-inst-defs.h b/source/slang/ir-inst-defs.h
index 8f997cbe2..09c11ed16 100644
--- a/source/slang/ir-inst-defs.h
+++ b/source/slang/ir-inst-defs.h
@@ -217,10 +217,6 @@ INST(Var, var, 0, 0)
INST(Load, load, 1, 0)
INST(Store, store, 2, 0)
-INST(BufferLoad, bufferLoad, 2, 0)
-INST(BufferStore, bufferStore, 3, 0)
-INST(BufferElementRef, bufferElementRef, 2, 0)
-
INST(FieldExtract, get_field, 2, 0)
INST(FieldAddress, get_field_addr, 2, 0)
diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp
index 599b02ea7..2f16f4ebc 100644
--- a/source/slang/ir.cpp
+++ b/source/slang/ir.cpp
@@ -3576,7 +3576,6 @@ namespace Slang
case kIROp_makeArray:
case kIROp_makeStruct:
case kIROp_Load: // We are ignoring the possibility of loads from bad addresses, or `volatile` loads
- case kIROp_BufferLoad:
case kIROp_FieldExtract:
case kIROp_FieldAddress:
case kIROp_getElement:
diff --git a/source/slang/type-layout.cpp b/source/slang/type-layout.cpp
index 8fc48fe4f..2d21d7aef 100644
--- a/source/slang/type-layout.cpp
+++ b/source/slang/type-layout.cpp
@@ -2276,6 +2276,17 @@ RefPtr<TypeLayout> CreateTypeLayout(
return typeLayout;
}
+RefPtr<TypeLayout> TypeLayout::unwrapArray()
+{
+ TypeLayout* typeLayout = this;
+
+ while(auto arrayTypeLayout = dynamic_cast<ArrayTypeLayout*>(typeLayout))
+ typeLayout = arrayTypeLayout->elementTypeLayout;
+
+ return typeLayout;
+}
+
+
RefPtr<GlobalGenericParamDecl> GenericParamTypeLayout::getGlobalGenericParamDecl()
{
auto declRefType = type->AsDeclRefType();
diff --git a/source/slang/type-layout.h b/source/slang/type-layout.h
index fa874cb80..6f6dad055 100644
--- a/source/slang/type-layout.h
+++ b/source/slang/type-layout.h
@@ -354,6 +354,13 @@ public:
info.count = count;
addResourceUsage(info);
}
+
+ /// "Unwrap" any layers of array-ness from this type layout.
+ ///
+ /// If this is an `ArrayTypeLayout`, returns the result of unwrapping the elemnt type layout.
+ /// Otherwise, returns this type layout.
+ ///
+ RefPtr<TypeLayout> unwrapArray();
};
typedef unsigned int VarLayoutFlags;
diff --git a/source/slang/vm.cpp b/source/slang/vm.cpp
index fa59a741b..0f79c763b 100644
--- a/source/slang/vm.cpp
+++ b/source/slang/vm.cpp
@@ -846,72 +846,6 @@ void resumeThread(
}
break;
- case kIROp_BufferLoad:
- {
- VMType type = decodeType(frame, &ip);
- UInt argCount = decodeUInt(&ip);
- void* argPtrs[16] = { 0 };
- for( UInt aa = 0; aa < argCount; ++aa )
- {
- void* argPtr = decodeOperandPtr<void>(frame, &ip);
- argPtrs[aa] = argPtr;
- }
-
- void* dest = decodeOperandPtr<void>(frame, &ip);
-
- char* bufferData = *(char**)argPtrs[0];
- uint32_t index = *(uint32_t*)argPtrs[1];
-
- auto size = type.getSize();
- char* elementData = bufferData + index*size;
- memcpy(dest, elementData, size);
- }
- break;
-
- case kIROp_BufferStore:
- {
- VMType resultType = decodeType(frame, &ip);
- /*UInt argCount = */decodeUInt(&ip);
-
- char* bufferData = decodeOperand<char*>(frame, &ip);
- uint32_t index = decodeOperand<uint32_t>(frame, &ip);
-
- auto srcPtrAndType = decodeOperandPtrAndType(frame, &ip);
- void* srcPtr = srcPtrAndType.ptr;
- VMType type = srcPtrAndType.type;
-
- auto size = type.getSize();
- char* elementData = bufferData + index*size;
- memcpy(elementData, srcPtr, size);
- }
- break;
-
- case kIROp_BufferElementRef:
- {
- VMType ptrType = decodeType(frame, &ip);
- VMType type = ((VMPtrTypeImpl*)ptrType.getImpl())->base;
-
- UInt argCount = decodeUInt(&ip);
- void* argPtrs[16] = { 0 };
- for( UInt aa = 0; aa < argCount; ++aa )
- {
- void* argPtr = decodeOperandPtr<void>(frame, &ip);
- argPtrs[aa] = argPtr;
- }
-
- void* dest = decodeOperandPtr<void>(frame, &ip);
-
- char* bufferData = *(char**)argPtrs[0];
- uint32_t index = *(uint32_t*)argPtrs[1];
-
- auto size = type.getSize();
- char* elementData = bufferData + index*size;
-
- *(void**)dest = elementData;
- }
- break;
-
-
case kIROp_Call:
{
VMType type = decodeType(frame, &ip);
diff --git a/tests/bindings/binding0.hlsl b/tests/bindings/binding0.hlsl
index 85f17e940..5516b0135 100644
--- a/tests/bindings/binding0.hlsl
+++ b/tests/bindings/binding0.hlsl
@@ -6,8 +6,15 @@
#ifdef __SLANG__
#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+
#else
#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define C C_0
#define t t_0
@@ -22,12 +29,13 @@ float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
Texture2D t R(: register(t0));
SamplerState s R(: register(s0));
-cbuffer C R(: register(b0))
+BEGIN_CBUFFER(C)
{
float c;
}
+END_CBUFFER(C, register(b0))
float4 main() : SV_TARGET
{
- return use(t,s) + use(c);
+ return use(t,s) + use(CBUFFER_REF(C,c));
} \ No newline at end of file
diff --git a/tests/bindings/binding1.hlsl b/tests/bindings/binding1.hlsl
index 8709c31c6..47ab22bb9 100644
--- a/tests/bindings/binding1.hlsl
+++ b/tests/bindings/binding1.hlsl
@@ -13,8 +13,14 @@
#ifdef __SLANG__
#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
#else
#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define tB tB_0
#define sB sB_0
@@ -32,17 +38,19 @@ Texture2D tB R(: register(t1));
SamplerState sA R(: register(s0));
SamplerState sB R(: register(s1));
-cbuffer C0 R(: register(b0))
+BEGIN_CBUFFER(C0)
{
float c0;
}
+END_CBUFFER(C0, register(b0))
-cbuffer C1 R(: register(b1))
+BEGIN_CBUFFER(C1)
{
float c1;
}
+END_CBUFFER(C1, register(b1))
float4 main() : SV_TARGET
{
- return use(tB,sB) + use(c1);
+ return use(tB,sB) + use(CBUFFER_REF(C1,c1));
} \ No newline at end of file
diff --git a/tests/bindings/explicit-binding.hlsl b/tests/bindings/explicit-binding.hlsl
index 9c38cdee0..420eafec1 100644
--- a/tests/bindings/explicit-binding.hlsl
+++ b/tests/bindings/explicit-binding.hlsl
@@ -5,8 +5,16 @@
#ifdef __SLANG__
#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define BEGIN_CBUFFER_R(NAME, REG) cbuffer NAME : REG
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
#else
#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define BEGIN_CBUFFER_R(NAME, REG) BEGIN_CBUFFER(NAME)
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define CA CA_0
#define ca ca_0
@@ -49,25 +57,29 @@ SamplerState sc : register(s1);
// and even to make things non-contiguous. Here we bind
// the third constnat buffer to register `b9`
//
-cbuffer CA R(: register(b0))
+BEGIN_CBUFFER(CA)
{
float ca;
}
+END_CBUFFER(CA, register(b0))
+
//
-cbuffer CB R(: register(b1))
+BEGIN_CBUFFER(CB)
{
float cb;
}
+END_CBUFFER(CB, register(b1))
//
-cbuffer CC : register(b9)
+BEGIN_CBUFFER_R(CC, register(b9))
{
float cc;
}
+END_CBUFFER(CC, register(b9))
float4 main() : SV_TARGET
{
// Go ahead and use everything in this case:
- return use(ta, sa) + use(ca)
- + use(tb, sb) + use(cb)
- + use(tc, sc) + use(cc);
+ return use(ta, sa) + use(CBUFFER_REF(CA,ca))
+ + use(tb, sb) + use(CBUFFER_REF(CB,cb))
+ + use(tc, sc) + use(CBUFFER_REF(CC,cc));
} \ No newline at end of file
diff --git a/tests/bindings/glsl-parameter-blocks.slang.glsl b/tests/bindings/glsl-parameter-blocks.slang.glsl
index 3ade8bb6b..a27fbb3db 100644
--- a/tests/bindings/glsl-parameter-blocks.slang.glsl
+++ b/tests/bindings/glsl-parameter-blocks.slang.glsl
@@ -25,8 +25,8 @@ struct Test
layout(binding = 0)
uniform ParameterBlock_gTest
{
- Test gTest;
-};
+ Test _data;
+} gTest;
layout(binding = 1)
uniform texture2D gTest_t;
@@ -42,7 +42,7 @@ in vec2 uv;
void main()
{
- vec4 temp_a = gTest.a;
+ vec4 temp_a = gTest._data.a;
vec4 temp_sample = texture(sampler2D(gTest_t, gTest_s), uv);
diff --git a/tests/bindings/multi-file-extra.hlsl b/tests/bindings/multi-file-extra.hlsl
index 92227d54a..fe8766dcd 100644
--- a/tests/bindings/multi-file-extra.hlsl
+++ b/tests/bindings/multi-file-extra.hlsl
@@ -7,8 +7,14 @@
#ifdef __SLANG__
#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
#else
#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define sharedC sharedC_0
#define sharedCA sharedCA_0
@@ -50,13 +56,15 @@ float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
// Start with some parameters that will appear in both shaders
Texture2D sharedT R(: register(t0));
SamplerState sharedS R(: register(s0));
-cbuffer sharedC R(: register(b0))
+
+BEGIN_CBUFFER(sharedC)
{
- float3 sharedCA R(: packoffset(c0));
- float sharedCB R(: packoffset(c0.w));
- float3 sharedCC R(: packoffset(c1));
- float2 sharedCD R(: packoffset(c2));
+ float3 sharedCA;
+ float sharedCB;
+ float3 sharedCC;
+ float2 sharedCD;
}
+END_CBUFFER(sharedC, register(b0))
// Then some parameters specific to this shader.
// These will be placed *after* the ones from the main file,
@@ -65,13 +73,15 @@ cbuffer sharedC R(: register(b0))
Texture2D fragmentT R(: register(t4));
SamplerState fragmentS R(: register(s2));
-cbuffer fragmentC R(: register(b2))
+
+BEGIN_CBUFFER(fragmentC)
{
- float3 fragmentCA R(: packoffset(c0));
- float fragmentCB R(: packoffset(c0.w));
- float3 fragmentCC R(: packoffset(c1));
- float2 fragmentCD R(: packoffset(c2));
+ float3 fragmentCA;
+ float fragmentCB;
+ float3 fragmentCC;
+ float2 fragmentCD;
}
+END_CBUFFER(fragmentC, register(b2))
// And end with some shared parameters again
Texture2D sharedTV R(: register(t2));
@@ -82,9 +92,9 @@ float4 main() : SV_TARGET
{
// Go ahead and use everything here, just to make sure things got placed correctly
return use(sharedT, sharedS)
- + use(sharedCD)
+ + use(CBUFFER_REF(sharedC,sharedCD))
+ use(fragmentT, fragmentS)
- + use(fragmentCD)
+ + use(CBUFFER_REF(fragmentC, fragmentCD))
+ use(sharedTF, sharedS)
;
} \ No newline at end of file
diff --git a/tests/bindings/multi-file.hlsl b/tests/bindings/multi-file.hlsl
index 6269c703e..8c719bbcf 100644
--- a/tests/bindings/multi-file.hlsl
+++ b/tests/bindings/multi-file.hlsl
@@ -8,8 +8,14 @@
#ifdef __SLANG__
#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
#else
#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define sharedC sharedC_0
#define sharedCA sharedCA_0
@@ -55,13 +61,15 @@ float4 use(Texture2D t, SamplerState s)
// Start with some parameters that will appear in both shaders
Texture2D sharedT R(: register(t0));
SamplerState sharedS R(: register(s0));
-cbuffer sharedC R(: register(b0))
+
+BEGIN_CBUFFER(sharedC)
{
- float3 sharedCA R(: packoffset(c0));
- float sharedCB R(: packoffset(c0.w));
- float3 sharedCC R(: packoffset(c1));
- float2 sharedCD R(: packoffset(c2));
+ float3 sharedCA;
+ float sharedCB;
+ float3 sharedCC;
+ float2 sharedCD;
}
+END_CBUFFER(sharedC, register(b0))
// Then some parameters specific to this shader
// (these will get placed before the ones in the `extra` file,
@@ -69,13 +77,15 @@ cbuffer sharedC R(: register(b0))
Texture2D vertexT R(: register(t1));
SamplerState vertexS R(: register(s1));
-cbuffer vertexC R(: register(b1))
+
+BEGIN_CBUFFER(vertexC)
{
- float3 vertexCA R(: packoffset(c0));
- float vertexCB R(: packoffset(c0.w));
- float3 vertexCC R(: packoffset(c1));
- float2 vertexCD R(: packoffset(c2));
+ float3 vertexCA;
+ float vertexCB;
+ float3 vertexCC;
+ float2 vertexCD;
}
+END_CBUFFER(vertexC, register(b1))
// And end with some shared parameters again
Texture2D sharedTV R(: register(t2));
@@ -86,9 +96,9 @@ float4 main() : SV_POSITION
{
// Go ahead and use everything here, just to make sure things got placed correctly
return use(sharedT, sharedS)
- + use(sharedCD)
+ + use(CBUFFER_REF(sharedC, sharedCD))
+ use(vertexT, vertexS)
- + use(vertexCD)
+ + use(CBUFFER_REF(vertexC, vertexCD))
+ use(sharedTV, vertexS)
;
} \ No newline at end of file
diff --git a/tests/bindings/packoffset.hlsl b/tests/bindings/packoffset.hlsl
deleted file mode 100644
index 81913d672..000000000
--- a/tests/bindings/packoffset.hlsl
+++ /dev/null
@@ -1,51 +0,0 @@
-//TEST:COMPARE_HLSL:-no-mangle -profile ps_4_0 -entry main
-
-// Let's make sure we generate correct output in cases
-// where there are non-trivial `packoffset`s needed
-
-#ifdef __SLANG__
-#define R(X) /**/
-#else
-#define R(X) X
-
-#define CA CA_0
-#define ca ca_0
-#define cb cb_0
-#define cc cc_0
-#define cd cd_0
-#define ce ce_0
-
-#define ta CA_ta_0
-#define sa CA_sa_0
-
-#endif
-
-float4 use(float val) { return val; };
-float4 use(float2 val) { return float4(val,0.0,0.0); };
-float4 use(float3 val) { return float4(val,0.0); };
-float4 use(float4 val) { return val; };
-float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
-
-cbuffer CA R(: register(b0))
-{
- float4 ca R(: packoffset(c0));
- float3 cb R(: packoffset(c1.x));
- float cc R(: packoffset(c1.w));
- float2 cd R(: packoffset(c2.x));
- float2 ce R(: packoffset(c2.z));
-
- Texture2D ta R(: register(t0));
- SamplerState sa R(: register(s0));
-}
-
-float4 main() : SV_TARGET
-{
- // Go ahead and use everything in this case:
- return use(ta, sa)
- + use(ca)
- + use(cb)
- + use(cc)
- + use(cd)
- + use(ce)
- ;
-} \ No newline at end of file
diff --git a/tests/bindings/resources-in-cbuffer.hlsl b/tests/bindings/resources-in-cbuffer.hlsl
index 4d3d381d9..71eaf40aa 100644
--- a/tests/bindings/resources-in-cbuffer.hlsl
+++ b/tests/bindings/resources-in-cbuffer.hlsl
@@ -2,12 +2,20 @@
// Confirm that resources inside constant buffers get correct locations,
// including the case where there are *multiple* constant buffers
-// with reosurces.
+// with resources.
#ifdef __SLANG__
#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME {
+#define MID_CBUFFER(NAME) /**/
+#define END_CBUFFER(NAME, REG) /**/ }
+#define CBUFFER_REF(NAME, FIELD) FIELD
#else
#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME {
+#define MID_CBUFFER(NAME) };
+#define END_CBUFFER(NAME, REG) cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define CA CA_0
#define caa caa_0
@@ -46,43 +54,52 @@ float4 use(float3 val) { return float4(val,0.0); };
float4 use(float4 val) { return val; };
float4 use(Texture2D t, SamplerState s) { return t.Sample(s, 0.0); }
-cbuffer CA R(: register(b0))
-{
- float4 caa R(: packoffset(c0));
- float3 cab R(: packoffset(c1.x));
- float cac R(: packoffset(c1.w));
- float2 cad R(: packoffset(c2.x));
- float2 cae R(: packoffset(c2.z));
+BEGIN_CBUFFER(CA)
+
+ float4 caa;
+ float3 cab;
+ float cac;
+ float2 cad;
+ float2 cae;
+
+MID_CBUFFER(CA)
Texture2D ta R(: register(t0));
SamplerState sa R(: register(s0));
-}
-cbuffer CB R(: register(b1))
-{
- float4 cba R(: packoffset(c0));
- float3 cbb R(: packoffset(c1.x));
- float cbc R(: packoffset(c1.w));
- float2 cbd R(: packoffset(c2.x));
- float2 cbe R(: packoffset(c2.z));
+END_CBUFFER(CA, register(b0))
+
+BEGIN_CBUFFER(CB)
+
+ float4 cba;
+ float3 cbb;
+ float cbc;
+ float2 cbd;
+ float2 cbe;
+
+MID_CBUFFER(CB)
Texture2D tbx R(: register(t1));
Texture2D tby R(: register(t2));
SamplerState sb R(: register(s1));
-}
-cbuffer CC R(: register(b2))
-{
- float4 cca R(: packoffset(c0));
- float3 ccb R(: packoffset(c1.x));
- float ccc R(: packoffset(c1.w));
- float2 ccd R(: packoffset(c2.x));
- float2 cce R(: packoffset(c2.z));
+END_CBUFFER(CB, register(b1))
+
+BEGIN_CBUFFER(CC)
+
+ float4 cca;
+ float3 ccb;
+ float ccc;
+ float2 ccd;
+ float2 cce;
+
+MID_CBUFFER(CC)
Texture2D tc R(: register(t3));
SamplerState scx R(: register(s2));
SamplerState scy R(: register(s3));
-}
+
+END_CBUFFER(CC, register(b2))
float4 main() : SV_TARGET
{
@@ -91,8 +108,8 @@ float4 main() : SV_TARGET
+ use(tbx, sb)
+ use(tby, scx)
+ use(tc, scy)
- + use(cae)
- + use(cbe)
- + use(cce)
+ + use(CBUFFER_REF(CA, cae))
+ + use(CBUFFER_REF(CB, cbe))
+ + use(CBUFFER_REF(CC, cce))
;
} \ No newline at end of file
diff --git a/tests/bugs/gh-103.slang b/tests/bugs/gh-103.slang
index 65e71837b..4bad20b20 100644
--- a/tests/bugs/gh-103.slang
+++ b/tests/bugs/gh-103.slang
@@ -2,7 +2,15 @@
// Ensure that matrix-times-scalar works
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
#define C C_0
#define a a_0
#define b b_0
@@ -10,16 +18,19 @@
float4x4 doIt(float4x4 a, float b)
{
- return a * b;
+ return a * b;
}
-cbuffer C
+BEGIN_CBUFFER(C)
{
- float4x4 a;
- float b;
-};
+ float4x4 a;
+ float b;
+}
+END_CBUFFER(C,register(b0))
float4 main() : SV_TARGET
{
- return doIt(a, b)[0];
+ return doIt(
+ CBUFFER_REF(C,a),
+ CBUFFER_REF(C,b))[0];
}
diff --git a/tests/bugs/gh-172.slang b/tests/bugs/gh-172.slang
index dd5f4d47a..6d9e69503 100644
--- a/tests/bugs/gh-172.slang
+++ b/tests/bugs/gh-172.slang
@@ -8,22 +8,27 @@
cbuffer C
{
- Texture2D t0, t1;
- SamplerState s;
- float2 uv;
+ Texture2D t0, t1;
+ SamplerState s;
+ float2 uv;
};
float4 main() : SV_Target
{
- return t0.Sample(s, uv)
+ return t0.Sample(s, uv)
+ t1.Sample(s, uv);
}
#else
+struct SLANG_ParameterGroup_C_0
+{
+ float2 uv_0;
+};
+
cbuffer C_0 : register(b0)
{
- float2 uv_0;
+ SLANG_ParameterGroup_C_0 C_0;
};
Texture2D C_t0_0 : register(t0);
@@ -32,8 +37,8 @@ SamplerState C_s_0 : register(s0);
float4 main() : SV_TARGET
{
- return C_t0_0.Sample(C_s_0, uv_0)
- + C_t1_0.Sample(C_s_0, uv_0);
+ return C_t0_0.Sample(C_s_0, C_0.uv_0)
+ + C_t1_0.Sample(C_s_0, C_0.uv_0);
}
#endif
diff --git a/tests/bugs/gh-333.slang b/tests/bugs/gh-333.slang
index 2a23f7751..a1e3ea20d 100644
--- a/tests/bugs/gh-333.slang
+++ b/tests/bugs/gh-333.slang
@@ -2,7 +2,15 @@
// Ensure declaration order in output is correct
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
#define A A_0
#define x x_0
#define B B_0
@@ -23,13 +31,14 @@ struct B
Texture2D t;
};
-cbuffer C
+BEGIN_CBUFFER(C)
{
A a;
B b;
-};
+}
+END_CBUFFER(C,register(b0))
float4 main() : SV_TARGET
{
- return a.x;
+ return CBUFFER_REF(C,a).x;
}
diff --git a/tests/bugs/split-nested-types.hlsl b/tests/bugs/split-nested-types.hlsl
index 577f64a75..2bfea49c2 100644
--- a/tests/bugs/split-nested-types.hlsl
+++ b/tests/bugs/split-nested-types.hlsl
@@ -1,9 +1,19 @@
//TEST:COMPARE_HLSL:-no-mangle -profile ps_5_0
#ifdef __SLANG__
+
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+
import split_nested_types;
+
#else
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
#define A A_0
#define x x_0
@@ -31,12 +41,13 @@ struct M
#endif
-cbuffer C
+BEGIN_CBUFFER(C)
{
M m;
}
+END_CBUFFER(C,register(b0))
float4 main() : SV_TARGET
{
- return m.b.y;
+ return CBUFFER_REF(C,m).b.y;
}
diff --git a/tests/bugs/vec-init-list.hlsl b/tests/bugs/vec-init-list.hlsl
index 2f82a96b0..fe8cba09f 100644
--- a/tests/bugs/vec-init-list.hlsl
+++ b/tests/bugs/vec-init-list.hlsl
@@ -2,7 +2,14 @@
// Check handling of initializer list for vector
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define C C_0
#define a a_0
@@ -10,10 +17,11 @@
#endif
-cbuffer C : register(b0)
+BEGIN_CBUFFER(C)
{
- float4 a;
-};
+ float4 a;
+}
+END_CBUFFER(C, register(b0))
float w0(float x) { return x; }
float w1(float x) { return x; }
@@ -22,6 +30,10 @@ float w3(float x) { return x; }
float4 main() : SV_Position
{
- float4 wx = { w0(a.x), w1(a.x), w2(a.x), w3(a.x), };
+ float4 wx = {
+ w0(CBUFFER_REF(C,a).x),
+ w1(CBUFFER_REF(C,a).x),
+ w2(CBUFFER_REF(C,a).x),
+ w3(CBUFFER_REF(C,a).x), };
return wx;
}
diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl b/tests/bugs/vk-structured-buffer-binding.hlsl
index cdfeb19ab..3ef1bcc8c 100644
--- a/tests/bugs/vk-structured-buffer-binding.hlsl
+++ b/tests/bugs/vk-structured-buffer-binding.hlsl
@@ -1,5 +1,4 @@
-//TEST:SIMPLE: -profile ps_4_0 -entry main -target glsl
-///////TEST:REFLECTION:-profile ps_4_0 -target spirv
+//TEST:CROSS_COMPILE:-profile ps_4_0 -entry main -target spirv-assembly
[[vk::binding(3, 4)]]
RWStructuredBuffer<uint> gDoneGroups : register(u3);
diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl.expected b/tests/bugs/vk-structured-buffer-binding.hlsl.expected
deleted file mode 100644
index fc1cb93ea..000000000
--- a/tests/bugs/vk-structured-buffer-binding.hlsl.expected
+++ /dev/null
@@ -1,31 +0,0 @@
-result code = 0
-standard error = {
-}
-standard output = {
-#version 450
-layout(row_major) uniform;
-layout(row_major) buffer;
-
-#line 5 0
-layout(std430, binding = 3, set = 4) buffer _S1 {
- uint gDoneGroups_0[];
-};
-
-#line 7
-layout(location = 0)
-out vec4 _S2;
-
-
-#line 7
-layout(location = 0)
-in vec3 _S3;
-
-
-#line 7
-void main()
-{
- _S2 = vec4(gDoneGroups_0[uint(int(_S3.z))]);
- return;
-}
-
-}
diff --git a/tests/bugs/vk-structured-buffer-binding.hlsl.glsl b/tests/bugs/vk-structured-buffer-binding.hlsl.glsl
new file mode 100644
index 000000000..7298ea594
--- /dev/null
+++ b/tests/bugs/vk-structured-buffer-binding.hlsl.glsl
@@ -0,0 +1,26 @@
+// vk-structured-buffer-binding.hlsl.glsl
+//TEST_IGNORE_FILE:
+
+#version 450
+
+#define gDoneGroups gDoneGroups_0
+#define uv _S3
+#define SV_Target _S2
+
+layout(std430, binding = 3, set = 4)
+buffer _S1
+{
+ uint _data[];
+} gDoneGroups;
+
+layout(location = 0)
+out vec4 SV_Target;
+
+layout(location = 0)
+in vec3 uv;
+
+void main()
+{
+ SV_Target = vec4(gDoneGroups._data[uint(int(uv.z))]);
+ return;
+}
diff --git a/tests/compute/matrix-layout.hlsl b/tests/compute/matrix-layout.hlsl
index ad456d8be..034ac512c 100644
--- a/tests/compute/matrix-layout.hlsl
+++ b/tests/compute/matrix-layout.hlsl
@@ -32,7 +32,14 @@ cbuffer C0
//TEST_INPUT:cbuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]):dxbinding(1),glbinding(1)
cbuffer C1
{
- column_major
+
+// Note: support for the explicit `row_major` and `column_major` modifiers is being
+// disabled for now, since our current Vulkan output strategy cannot possibly match the
+// semantics of these modifiers in D3D. Once we do a more complete implementation of
+// matrix layout (see GitHub issue #695) we can add a directed test for all the
+// corners cases of explicit matrix layout.
+//
+// column_major
int3x4 cc;
int dd;
};
@@ -47,7 +54,7 @@ int test(int val)
int a = s.a[val / 4][val % 3];
int b = s.b;
- int c = cc[val / 4][val % 4];
+ int c = cc[val / 4][val % 3];
int d = dd;
return ((a*N + b) * N + c) * N + d;
diff --git a/tests/compute/matrix-layout.hlsl.expected.txt b/tests/compute/matrix-layout.hlsl.expected.txt
index cb8e2cae7..3b67fe0cb 100644
--- a/tests/compute/matrix-layout.hlsl.expected.txt
+++ b/tests/compute/matrix-layout.hlsl.expected.txt
@@ -1,12 +1,12 @@
-10D0111
-20D0511
-30D0911
-10D0D11
-60D0211
-70D0611
-50D0A11
-60D0E11
-B0D0311
-90D0711
-A0D0B11
-B0D0F11
+10D010D
+20D020D
+30D030D
+10D010D
+60D060D
+70D070D
+50D050D
+60D060D
+B0D0B0D
+90D090D
+A0D0A0D
+B0D0B0D
diff --git a/tests/cross-compile/array-of-buffers.slang b/tests/cross-compile/array-of-buffers.slang
new file mode 100644
index 000000000..de87e6d9d
--- /dev/null
+++ b/tests/cross-compile/array-of-buffers.slang
@@ -0,0 +1,32 @@
+// array-of-buffers.slang
+
+//TEST:CROSS_COMPILE:-target spirv-assembly -entry main -stage fragment
+//TEST:CROSS_COMPILE:-target dxil-assembly -entry main -stage fragment -profile sm_6_0
+
+// This test ensures that we cross-compile arrays of structured/constant
+// buffers into appropriate GLSL, where these are not first-class types.
+//
+// Note that this test does *not* currently test the case of passing
+// a structured or constant buffer into a subroutine, which requires
+// further work.
+
+struct S { float4 f; };
+
+cbuffer C
+{
+ uint index;
+}
+
+ConstantBuffer<S> cb [3];
+StructuredBuffer<S> sb1[4];
+RWStructuredBuffer<float4> sb2[5];
+ByteAddressBuffer bb [6];
+
+float4 main() : SV_Target
+{
+ return cb [index] .f
+ + sb1[index][index].f
+ + sb2[index][index]
+ + float4(bb[index].Load(index*4));
+}
+
diff --git a/tests/cross-compile/array-of-buffers.slang.glsl b/tests/cross-compile/array-of-buffers.slang.glsl
new file mode 100644
index 000000000..d3aa5082f
--- /dev/null
+++ b/tests/cross-compile/array-of-buffers.slang.glsl
@@ -0,0 +1,57 @@
+//TEST_IGNORE_FILE:
+#version 450
+
+struct SLANG_ParameterGroup_C_0
+{
+ uint index_0;
+};
+
+layout(binding = 0)
+layout(std140) uniform _S1
+{
+ SLANG_ParameterGroup_C_0 _data;
+} C_0;
+
+struct S_0
+{
+ vec4 f_0;
+};
+
+layout(binding = 1)
+layout(std140) uniform _S2
+{
+ S_0 _data;
+} cb_0[3];
+
+
+layout(std430, binding = 2)
+buffer _S3 {
+ S_0 _data[];
+} sb1_0[4];
+
+layout(std430, binding = 3)
+buffer _S4 {
+ vec4 _data[];
+} sb2_0[5];
+
+layout(std430, binding = 4)
+buffer _S5
+{
+ uint _data[];
+} bb_0[6];
+
+layout(location = 0)
+out vec4 _S6;
+
+void main()
+{
+ vec4 _S7 = cb_0[C_0._data.index_0]._data.f_0;
+
+ S_0 _S8 = sb1_0[C_0._data.index_0]._data[C_0._data.index_0];
+
+ vec4 _S9 = _S7 + _S8.f_0;
+ vec4 _S10 = _S9 + sb2_0[C_0._data.index_0]._data[C_0._data.index_0];
+ uint _S11 = bb_0[C_0._data.index_0]._data[int(C_0._data.index_0 * uint(4))];
+ _S6 = _S10 + vec4(_S11);
+ return;
+}
diff --git a/tests/cross-compile/array-of-buffers.slang.hlsl b/tests/cross-compile/array-of-buffers.slang.hlsl
new file mode 100644
index 000000000..129993edc
--- /dev/null
+++ b/tests/cross-compile/array-of-buffers.slang.hlsl
@@ -0,0 +1,35 @@
+//TEST_IGNORE_FILE:
+
+struct SLANG_ParameterGroup_C_0
+{
+ uint index_0;
+};
+
+cbuffer C_0 : register(b0)
+{
+ SLANG_ParameterGroup_C_0 C_0;
+}
+
+struct S_0
+{
+ float4 f_0;
+};
+
+ConstantBuffer<S_0> cb_0 [3] : register(b1);
+StructuredBuffer<S_0> sb1_0[4] : register(t0);
+RWStructuredBuffer<float4> sb2_0[5] : register(u1);
+ByteAddressBuffer bb_0[6] : register(t4);
+
+float4 main() : SV_TARGET
+{
+ float4 _S1 = cb_0[C_0.index_0].f_0;
+
+ S_0 _S2 = sb1_0[C_0.index_0][C_0.index_0];
+
+ float4 _S3 = _S1 + _S2.f_0;
+ float4 _S4 = _S3 + sb2_0[C_0.index_0][C_0.index_0];
+ uint _S5 = bb_0[C_0.index_0].Load(
+ (int) (C_0.index_0 * (uint) 4));
+
+ return _S4 + (float4) _S5;
+}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl
deleted file mode 100644
index c6b4ac197..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/Render.hlsl
+++ /dev/null
@@ -1,65 +0,0 @@
-//TEST(smoke):COMPARE_HLSL:-no-mangle -profile sm_4_0 -entry RenderBaseVS -stage vertex -entry RenderPS -stage fragment
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-
-//--------------------------------------------------------------------------------------
-// File: Render.hlsl
-//
-// The shaders for rendering tessellated mesh and base mesh
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- row_major matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 );
-}
-
-// The tessellated vertex structure
-struct TessedVertex
-{
- uint BaseTriID; // Which triangle of the base mesh this tessellated vertex belongs to?
- float2 bc; // Barycentric coordinates with regard to the base triangle
-};
-Buffer<float4> g_base_vb_buffer : register(t0); // Base mesh vertex buffer
-StructuredBuffer<TessedVertex> g_TessedVertices : register(t1); // Tessellated mesh vertex buffer
-
-float4 bary_centric(float4 v1, float4 v2, float4 v3, float2 bc)
-{
- return (1 - bc.x - bc.y) * v1 + bc.x * v2 + bc.y * v3;
-}
-
-float4 RenderVS( uint vertid : SV_VertexID ) : SV_POSITION
-{
- TessedVertex input = g_TessedVertices[vertid];
-
- // Get the positions of the three vertices of the base triangle
- float4 v[3];
- [unroll]
- for (int i = 0; i < 3; ++ i)
- {
- uint vert_id = input.BaseTriID * 3 + i;
- v[i] = g_base_vb_buffer[vert_id];
- }
-
- // Calculate the position of this tessellated vertex from barycentric coordinates and then project it
- return mul(bary_centric(v[0], v[1], v[2], input.bc), g_mWorldViewProjection);
-}
-
-struct BaseVertex
-{
- float4 pos : POSITION;
-};
-
-float4 RenderBaseVS( BaseVertex input ) : SV_POSITION
-{
- return mul( input.pos, g_mWorldViewProjection );
-}
-
-float4 RenderPS() : SV_TARGET
-{
- return float4( 1.0f, 1.0f, 0.0f, 1.0f );
-} \ No newline at end of file
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl
deleted file mode 100644
index a4472179f..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/ScanCS.hlsl
+++ /dev/null
@@ -1,109 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSScanInBucket -entry CSScanBucketResult -entry CSScanAddBucketResult
-//--------------------------------------------------------------------------------------
-// File: ScanCS.hlsl
-//
-// A simple inclusive prefix sum(scan) implemented in CS4.0,
-// using a typical up sweep and down sweep scheme
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<uint2> Input : register( t0 ); // Change uint2 here if scan other types, and
-RWStructuredBuffer<uint2> Result : register( u0 ); // also here
-
-#define groupthreads 128
-groupshared uint4 bucket[groupthreads]; // Change uint4 to the "type x2" if scan other types, e.g.
- // if scan uint2, then put uint4 here,
- // if scan float, then put float2 here
-
-void CSScan( uint3 DTid, uint GI, uint2 x ) // Change the type of x here if scan other types
-{
- // since CS40 can only support one shared memory for one shader, we use .xy and .zw as ping-ponging buffers
- // if scan a single element type like int, search and replace all .xy to .x and .zw to .y below
- bucket[GI].xy = x;
- bucket[GI].zw = 0;
-
- // Up sweep
- [unroll]
- for ( uint stride = 2; stride <= groupthreads; stride <<= 1 )
- {
- GroupMemoryBarrierWithGroupSync();
-
- if ( (GI & (stride - 1)) == (stride - 1) )
- {
- bucket[GI].xy += bucket[GI - stride/2].xy;
- }
- }
-
- if ( GI == (groupthreads - 1) )
- {
- bucket[GI].xy = 0;
- }
-
- // Down sweep
- bool n = true;
- [unroll]
- for ( stride = groupthreads / 2; stride >= 1; stride >>= 1 )
- {
- GroupMemoryBarrierWithGroupSync();
-
- uint a = stride - 1;
- uint b = stride | a;
-
- if ( n ) // ping-pong between passes
- {
- if ( ( GI & b) == b )
- {
- bucket[GI].zw = bucket[GI-stride].xy + bucket[GI].xy;
- } else
- if ( (GI & a) == a )
- {
- bucket[GI].zw = bucket[GI+stride].xy;
- } else
- {
- bucket[GI].zw = bucket[GI].xy;
- }
- } else
- {
- if ( ( GI & b) == b )
- {
- bucket[GI].xy = bucket[GI-stride].zw + bucket[GI].zw;
- } else
- if ( (GI & a) == a )
- {
- bucket[GI].xy = bucket[GI+stride].zw;
- } else
- {
- bucket[GI].xy = bucket[GI].zw;
- }
- }
-
- n = !n;
- }
-
- Result[DTid.x] = bucket[GI].zw + x;
-}
-
-// scan in each bucket
-[numthreads( groupthreads, 1, 1 )]
-void CSScanInBucket( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex )
-{
- uint2 x = Input[DTid.x]; // Change the type of x here if scan other types
- CSScan( DTid, GI, x );
-}
-
-// record and scan the sum of each bucket
-[numthreads( groupthreads, 1, 1 )]
-void CSScanBucketResult( uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex )
-{
- uint2 x = Input[DTid.x*groupthreads - 1]; // Change the type of x here if scan other types
- CSScan( DTid, GI, x );
-}
-
-StructuredBuffer<uint2> Input1 : register( t1 );
-
-// add the bucket scanned result to each bucket to get the final result
-[numthreads( groupthreads, 1, 1 )]
-void CSScanAddBucketResult( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI: SV_GroupIndex )
-{
- Result[DTid.x] = Input[DTid.x] + Input1[Gid.x];
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl
deleted file mode 100644
index 1bd204efc..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_EdgeFactorCS.hlsl
+++ /dev/null
@@ -1,217 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSEdgeFactor
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_EdgeFactorCS.hlsl
-//
-// The CS to compute edge tessellation factor acoording to current world, view, projection matrix
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// http://jgt.akpeters.com/papers/akeninemoller01/tribox.html
-bool planeBoxOverlap(float3 normal, float d, float3 maxbox)
-{
- float3 vmin = maxbox, vmax = maxbox;
- [unroll]
- for (int q = 0;q <= 2; ++ q)
- {
- if (normal[q] > 0.0f)
- {
- vmin[q] *= -1;
- }
- else
- {
- vmax[q] *= -1;
- }
- }
- if (dot(normal, vmin) + d > 0.0f)
- {
- return false;
- }
- if (dot(normal, vmax) + d >= 0.0f)
- {
- return true;
- }
-
- return false;
-}
-
-/*======================== X-tests ========================*/
-bool AXISTEST_X01(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab)
-{
- float p0 = ab.x * v0.y - ab.y * v0.z;
- float p2 = ab.x * v2.y - ab.y * v2.z;
- float min_v = min(p0, p2);
- float max_v = max(p0, p2);
- float rad = dot(fab, boxhalfsize.yz);
- return (min_v < rad) && (max_v > -rad);
-}
-
-bool AXISTEST_X2(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab)
-{
- float p0 = ab.x * v0.y - ab.y * v0.z;
- float p1 = ab.x * v1.y - ab.y * v1.z;
- float min_v = min(p0, p1);
- float max_v = max(p0, p1);
- float rad = dot(fab, boxhalfsize.yz);
- return (min_v < rad) && (max_v > -rad);
-}
-
-/*======================== Y-tests ========================*/
-bool AXISTEST_Y02(float3 v0, float3 v2, float3 boxhalfsize, float2 ab, float2 fab)
-{
- float p0 = -ab.x * v0.x + ab.y * v0.z;
- float p2 = -ab.x * v2.x + ab.y * v2.z;
- float min_v = min(p0, p2);
- float max_v = max(p0, p2);
- float rad = dot(fab, boxhalfsize.xz);
- return (min_v < rad) && (max_v > -rad);
-}
-
-bool AXISTEST_Y1(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab)
-{
- float p0 = -ab.x * v0.x + ab.y * v0.z;
- float p1 = -ab.x * v1.x + ab.y * v1.z;
- float min_v = min(p0, p1);
- float max_v = max(p0, p1);
- float rad = dot(fab, boxhalfsize.xz);
- return (min_v < rad) && (max_v > -rad);
-}
-
-/*======================== Z-tests ========================*/
-bool AXISTEST_Z12(float3 v1, float3 v2, float3 boxhalfsize, float2 ab, float2 fab)
-{
- float p1 = ab.x * v1.x - ab.y * v1.y;
- float p2 = ab.x * v2.x - ab.y * v2.y;
- float min_v = min(p1, p2);
- float max_v = max(p1, p2);
- float rad = dot(fab, boxhalfsize.xy);
- return (min_v < rad) && (max_v > -rad);
-}
-
-bool AXISTEST_Z0(float3 v0, float3 v1, float3 boxhalfsize, float2 ab, float2 fab)
-{
- float p0 = ab.x * v0.x - ab.y * v0.y;
- float p1 = ab.x * v1.x - ab.y * v1.y;
- float min_v = min(p0, p1);
- float max_v = max(p0, p1);
- float rad = dot(fab, boxhalfsize.xy);
- return (min_v < rad) && (max_v > -rad);
-}
-
-bool triBoxOverlap(float3 boxcenter,float3 boxhalfsize,float3 triverts0, float3 triverts1, float3 triverts2)
-{
- /* use separating axis theorem to test overlap between triangle and box */
- /* need to test for overlap in these directions: */
- /* 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle */
- /* we do not even need to test these) */
- /* 2) normal of the triangle */
- /* 3) crossproduct(edge from tri, {x,y,z}-directin) */
- /* this gives 3x3=9 more tests */
-
- /* This is the fastest branch on Sun */
- /* move everything so that the boxcenter is in (0,0,0) */
- float3 v0 = triverts0 - boxcenter;
- float3 v1 = triverts1 - boxcenter;
- float3 v2 = triverts2 - boxcenter;
-
- /* compute triangle edges */
- float3 e0 = v1 - v0; /* tri edge 0 */
- float3 e1 = v2 - v1; /* tri edge 1 */
- float3 e2 = v0 - v2; /* tri edge 2 */
-
- /* Bullet 3: */
- /* test the 9 tests first (this was faster) */
- float3 fe = abs(e0);
- if (!AXISTEST_X01(v0, v2, boxhalfsize, e0.zy, fe.zy)
- || !AXISTEST_Y02(v0, v2, boxhalfsize, e0.zx, fe.zx)
- || !AXISTEST_Z12(v1, v2, boxhalfsize, e0.yx, fe.yx))
- {
- return false;
- }
-
- fe = abs(e1);
- if (!AXISTEST_X01(v0, v2, boxhalfsize, e1.zy, fe.zy)
- || !AXISTEST_Y02(v0, v2, boxhalfsize, e1.zx, fe.zx)
- || !AXISTEST_Z0(v0, v1, boxhalfsize, e1.yx, fe.yx))
- {
- return false;
- }
-
- fe = abs(e2);
- if (!AXISTEST_X2(v0, v1, boxhalfsize, e2.zy, fe.zy)
- || !AXISTEST_Y1(v0, v1, boxhalfsize, e2.zx, fe.zx)
- || !AXISTEST_Z12(v1, v2, boxhalfsize, e2.yx, fe.yx))
- {
- return false;
- }
-
- /* Bullet 1: */
- /* first test overlap in the {x,y,z}-directions */
- /* find min, max of the triangle each direction, and test for overlap in */
- /* that direction -- this is equivalent to testing a minimal AABB around */
- /* the triangle against the AABB */
-
- float3 min_v = min(min(v0, v1), v2);
- float3 max_v = max(max(v0, v1), v2);
- if ((min_v.x > boxhalfsize.x || max_v.x < -boxhalfsize.x)
- || (min_v.y > boxhalfsize.y || max_v.y < -boxhalfsize.y)
- || (min_v.z > boxhalfsize.z || max_v.z < -boxhalfsize.z))
- {
- return false;
- }
-
- /* Bullet 2: */
- /* test if the box intersects the plane of the triangle */
- /* compute plane equation of triangle: normal*x+d=0 */
- float3 normal = cross(e0, e1);
- float d = -dot(normal, v0); /* plane eq: normal.x+d=0 */
- if (!planeBoxOverlap(normal, d, boxhalfsize))
- {
- return false;
- }
-
- return true; /* box and triangle overlaps */
-}
-
-
-Buffer<float4> InputVertices : register(t0);
-RWStructuredBuffer<float4> EdgeFactorBufOut : register(u0);
-
-cbuffer cb
-{
- row_major matrix g_matWVP;
- float2 g_tess_edge_length_scale;
- int num_triangles;
- float dummy;
-}
-
-[numthreads(128, 1, 1)]
-void CSEdgeFactor( uint3 DTid : SV_DispatchThreadID )
-{
- if (DTid.x < num_triangles)
- {
- float4 p0 = mul(InputVertices[DTid.x*3+0], g_matWVP);
- float4 p1 = mul(InputVertices[DTid.x*3+1], g_matWVP);
- float4 p2 = mul(InputVertices[DTid.x*3+2], g_matWVP);
- p0 = p0 / p0.w;
- p1 = p1 / p1.w;
- p2 = p2 / p2.w;
-
- float4 factor;
- // Only triangles which are completely inside or intersect with the view frustum are taken into account
- if ( triBoxOverlap( float3(0, 0, 0.5), float3(1.02, 1.02, 0.52), p0.xyz, p1.xyz, p2.xyz ) )
- {
- factor.x = length((p0.xy - p2.xy) * g_tess_edge_length_scale);
- factor.y = length((p1.xy - p0.xy) * g_tess_edge_length_scale);
- factor.z = length((p2.xy - p1.xy) * g_tess_edge_length_scale);
- factor.w = min(min(factor.x, factor.y), factor.z);
- factor = clamp(factor, 0, 9);
- } else
- {
- factor = 0;
- }
-
- EdgeFactorBufOut[DTid.x] = factor;
- }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl
deleted file mode 100644
index 672996589..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_NumVerticesIndicesCS.hlsl
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSNumVerticesIndices
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_NumVerticesIndicesCS.hlsl
-//
-// The CS to compute number of vertices and triangles to be generated from edge tessellation factor
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_common.hlsl"
-
-StructuredBuffer<float4> InputEdgeFactor : register(t0);
-RWStructuredBuffer<uint2> NumVerticesIndicesOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
- uint4 g_param;
-}
-
-[numthreads(128, 1, 1)]
-void CSNumVerticesIndices( uint3 DTid : SV_DispatchThreadID )
-{
- if (DTid.x < g_param.x)
- {
- float4 edge_factor = InputEdgeFactor[DTid.x];
-
- PROCESSED_TESS_FACTORS_TRI processedTessFactors;
- int num_points = TriProcessTessFactors(edge_factor, processedTessFactors, g_partitioning);
-
- int num_index;
- if (0 == num_points)
- {
- num_index = 0;
- }
- else if (3 == num_points)
- {
- num_index = 4;
- }
- else
- {
- int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now
-
- int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor));
- uint3 n = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity);
- num_index = n.x + n.y + n.z;
- num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3;
- if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD )
- {
- num_index += 4;
- }
- }
-
- NumVerticesIndicesOut[DTid.x] = uint2(num_points, num_index);
- }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl
deleted file mode 100644
index f6f9081da..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_ScatterIDCS.hlsl
+++ /dev/null
@@ -1,45 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSScatterVertexTriIDIndexID -entry CSScatterIndexTriIDIndexID
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_ScatterIDCS.hlsl
-//
-// The CS to scatter vertex ID and triangle ID
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<uint2> InputScanned : register(t0);
-RWStructuredBuffer<uint2> TriIDIndexIDOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
- uint4 g_param;
-}
-
-[numthreads(128, 1, 1)]
-void CSScatterVertexTriIDIndexID( uint3 DTid : SV_DispatchThreadID )
-{
- if (DTid.x < g_param.x)
- {
- uint start = InputScanned[DTid.x-1].x;
- uint end = InputScanned[DTid.x].x;
-
- for ( uint i = start; i < end; ++i )
- {
- TriIDIndexIDOut[i] = uint2(DTid.x, i - start);
- }
- }
-}
-
-[numthreads(128, 1, 1)]
-void CSScatterIndexTriIDIndexID( uint3 DTid : SV_DispatchThreadID )
-{
- if (DTid.x < g_param.x)
- {
- uint start = InputScanned[DTid.x-1].y;
- uint end = InputScanned[DTid.x].y;
-
- for ( uint i = start; i < end; ++i )
- {
- TriIDIndexIDOut[i] = uint2(DTid.x, i - start);
- }
- }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl
deleted file mode 100644
index 8c0a5b63b..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateIndicesCS.hlsl
+++ /dev/null
@@ -1,628 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSTessellationIndices
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_TessellateIndicesCS.hlsl
-//
-// The CS to tessellate indices
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_common.hlsl"
-
-StructuredBuffer<uint2> InputTriIDIndexID : register(t0);
-StructuredBuffer<float4> InputEdgeFactor : register(t1);
-StructuredBuffer<uint2> InputScanned : register(t2);
-
-RWByteAddressBuffer TessedIndicesOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
- uint4 g_param;
-}
-
-
-int TransformIndex1(int index, int vertices_base)
-{
- return vertices_base + index;
-}
-
-int TransformIndex2(int index, int vertices_base, INDEX_PATCH_CONTEXT IndexPatchContext)
-{
- if( index >= IndexPatchContext.outsidePointIndexPatchBase ) // assumed remapped outide indices are > remapped inside vertices
- {
- if( index == IndexPatchContext.outsidePointIndexBadValue )
- {
- index = IndexPatchContext.outsidePointIndexReplacementValue;
- }
- else
- {
- index += IndexPatchContext.outsidePointIndexDeltaToRealValue;
- }
- }
- else
- {
- if( index == IndexPatchContext.insidePointIndexBadValue )
- {
- index = IndexPatchContext.insidePointIndexReplacementValue;
- }
- else
- {
- index += IndexPatchContext.insidePointIndexDeltaToRealValue;
- }
- }
-
- return vertices_base + index;
-}
-
-
-int AStitchRegular(bool bTrapezoid, int diagonals,
- uint numInsideEdgePoints,
- int2 outsideInsideEdgePointBaseOffset,
- int i)
-{
- if (bTrapezoid)
- {
- ++ outsideInsideEdgePointBaseOffset.x;
- }
-
- int pt;
-
- if ((i < 4) && bTrapezoid)
- {
- if (i < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x - 1 + i;
- }
- else if (i == 2)
- {
- pt = outsideInsideEdgePointBaseOffset.y;
- }
- else
- {
- pt = -1;
- }
- }
-
- int index = i;
- if (bTrapezoid)
- {
- index -= 4;
- }
-
- if (index >= 0)
- {
- uint uindex = (uint)index;
-
- switch( diagonals )
- {
- case DIAGONALS_INSIDE_TO_OUTSIDE:
- if (uindex < 5 * numInsideEdgePoints - 5)
- {
- uint p = uindex / 5;
- uint r = uindex - p * 5;
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + p + r;
- }
- else if (r < 4)
- {
- pt = outsideInsideEdgePointBaseOffset.y + p + r;
- }
- else
- {
- pt = -1;
- }
- }
- else
- {
- int r = i - (4 + 5 * numInsideEdgePoints - 5);
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r;
- }
- else if (r == 2)
- {
- pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1;
- }
- else
- {
- pt = -1;
- }
- }
- break;
-
- case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
- if (uindex < (numInsideEdgePoints / 2 - 1) * 5)
- {
- // First half
- uint p = uindex / 5;
- uint r = uindex - p * 5;
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + p + r;
- }
- else if (r < 4)
- {
- pt = outsideInsideEdgePointBaseOffset.y + p;
- }
- else
- {
- pt = -1;
- }
- }
- else if (uindex < (numInsideEdgePoints / 2 - 1) * 5 + 8)
- {
- // Middle
- uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5;
- if (0 == r)
- {
- pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1;
- }
- else if (r < 3)
- {
- pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + (2 - r);
- }
- else if (r == 3)
- {
- pt = -1;
- }
- else if (r < 6)
- {
- pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2 - 1 + (r - 4);
- }
- else if (r == 6)
- {
- pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints / 2 - 1 + 1;
- }
- else if (r == 7)
- {
- pt = -1;
- }
- }
- //else if (uindex < (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5)
- else if (uindex < numInsideEdgePoints * 5 - 2)
- {
- // Second half
- uint p = (uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8) / 5 + numInsideEdgePoints / 2 + 1;
- uint r = uindex - (numInsideEdgePoints / 2 - 1) * 5 + 8 - (p - (numInsideEdgePoints / 2 + 1)) * 5;
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + p - 1 + r;
- }
- else if (r < 4)
- {
- pt = outsideInsideEdgePointBaseOffset.y + p - 1 + r;
- }
- else
- {
- pt = -1;
- }
- }
- else
- {
- //int r = i - (4 + (numInsideEdgePoints/2-1) * 5 + 8 + (numInsideEdgePoints - numInsideEdgePoints/2 - 1) * 5);
- int r = i - (numInsideEdgePoints * 5 + 2);
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r;
- }
- else if (r == 2)
- {
- pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1;
- }
- else
- {
- pt = -1;
- }
- }
- break;
-
- case DIAGONALS_MIRRORED:
- if (uindex < (numInsideEdgePoints / 2 + 1) * 2)
- {
- uint p = uindex / 2;
- uint r = uindex - p * 2;
- if (0 == r)
- {
- pt = outsideInsideEdgePointBaseOffset.y + p;
- }
- else
- {
- pt = outsideInsideEdgePointBaseOffset.x + p;
- }
- }
- else if (uindex == (numInsideEdgePoints / 2 + 1) * 2)
- {
- pt = -1;
- }
- else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 1)
- {
- pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints / 2;
- }
- //else if (uindex < (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2)
- else if (uindex < numInsideEdgePoints * 2 + 4)
- {
- uint p = (uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2)) / 2 + numInsideEdgePoints / 2;
- uint r = uindex - ((numInsideEdgePoints / 2 + 1) * 2 + 2) - (p - numInsideEdgePoints / 2) * 2;
- if (0 == r)
- {
- pt = outsideInsideEdgePointBaseOffset.x + p;
- }
- else
- {
- pt = outsideInsideEdgePointBaseOffset.y + p;
- }
- }
- //else if (uindex == (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2)
- else if (uindex == numInsideEdgePoints * 2 + 4)
- {
- pt = -1;
- }
- else
- {
- //int r = i - (4 + (numInsideEdgePoints / 2 + 1) * 2 + 2 + (numInsideEdgePoints - numInsideEdgePoints / 2) * 2 + 1);
- uint r = i - (numInsideEdgePoints * 2 + 9);
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + numInsideEdgePoints - 1 + r;
- }
- else if (r == 2)
- {
- pt = outsideInsideEdgePointBaseOffset.y + numInsideEdgePoints - 1;
- }
- else
- {
- pt = -1;
- }
- }
- break;
- }
- }
-
- return pt;
-}
-
-int AStitchTransition(int2 outsideInsideEdgePointBaseOffset, int2 outsideInsideNumHalfTessFactorPoints,
- int2 outsideInsideEdgeTessFactorParity,
- uint i)
-{
- outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity);
-
- uint2 out_in_first_half = uint2(outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y, insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y) * 4;
-
- uint3 out_in_middle = 0;
- if ((outsideInsideEdgeTessFactorParity.y != outsideInsideEdgeTessFactorParity.x) || (outsideInsideEdgeTessFactorParity.y == TESSELLATOR_PARITY_ODD))
- {
- if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x)
- {
- // Quad in the middle
- out_in_middle.z = 5;
- out_in_middle.xy = 1;
- }
- else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y)
- {
- // Triangle pointing inside
- out_in_middle.z = 4;
- out_in_middle.x = 1;
- }
- else
- {
- // Triangle pointing outside
- out_in_middle.z = 4;
- out_in_middle.y = 1;
- }
- }
-
-
- int pt = -1;
-
- if (i < out_in_first_half.y)
- {
- // Advance inside
-
- uint p = i / 4;
- uint r = i - p * 4;
- p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].z;
- if ((0 == r) || (2 == r))
- {
- pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y + r / 2;
- }
- else if (1 == r)
- {
- pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y;
- }
- }
- else
- {
- i -= out_in_first_half.y;
-
- if (i < out_in_first_half.x)
- {
- // Advance outside
-
- uint p = i / 4;
- uint r = i - p * 4;
- p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z;
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].y + r;
- }
- else if (r == 2)
- {
- pt = outsideInsideEdgePointBaseOffset.y + insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].y;
- if (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].x)
- {
- ++ pt;
- }
- }
- }
- else
- {
- i -= out_in_first_half.x;
-
- if (i < out_in_middle.z)
- {
- uint r = i;
- if (outsideInsideEdgeTessFactorParity.y == outsideInsideEdgeTessFactorParity.x)
- {
- // Quad in the middle
- if ((0 == r) || (2 == r))
- {
- pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2;
- }
- else if ((1 == r) || (3 == r))
- {
- pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + (3 == r);//(r - 1) / 2;
- }
- }
- else if (TESSELLATOR_PARITY_EVEN == outsideInsideEdgeTessFactorParity.y)
- {
- // Triangle pointing inside
- if (r == 0)
- {
- pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4;
- }
- else if (r < 3)
- {
- pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + r - 1;
- }
- }
- else
- {
- // Triangle pointing outside
- if ((0 == r) || (2 == r))
- {
- pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + (2 == r);//r / 2;
- }
- else if (1 == r)
- {
- pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4;
- }
- }
- }
- else
- {
- i -= out_in_middle.z;
-
- if (i < out_in_first_half.x)
- {
- // Advance outside
-
- uint p = i / 4;
- uint r = i - p * 4;
- p = outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].z;
- if (r < 2)
- {
- pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y) + r;
- }
- else if (r == 2)
- {
- pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y);
- }
- }
- else
- {
- // Advance inside
-
- i -= out_in_first_half.x;
-
- uint p = i / 4;
- uint r = i - p * 4;
- p = insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p].w;
- if ((0 == r) || (2 == r))
- {
- pt = outsideInsideEdgePointBaseOffset.y + out_in_first_half.y / 4 + out_in_middle.y
- + (insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][MAX_FACTOR / 2 + 1].y - insidePointIndex[outsideInsideNumHalfTessFactorPoints.y][p + 1].y) + (2 == r);//r / 2;
- }
- else if (1 == r)
- {
- pt = outsideInsideEdgePointBaseOffset.x + out_in_first_half.x / 4 + out_in_middle.x
- + (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][MAX_FACTOR / 2 + 1].y - outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p + 1].y);
- if (outsidePointIndex[outsideInsideNumHalfTessFactorPoints.x][p].x)
- {
- ++ pt;
- }
- }
- }
- }
- }
- }
-
- return pt;
-}
-
-[numthreads(128, 1, 1)]
-void CSTessellationIndices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
- uint id = DTid.x;
- //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers
-
- if ( id < g_param.x )
- {
- uint tri_id = InputTriIDIndexID[id].x;
- uint index_id = InputTriIDIndexID[id].y;
- uint base_vertex = InputScanned[tri_id-1].x;
-
- float4 outside_inside_factor = InputEdgeFactor[tri_id];
-
- PROCESSED_TESS_FACTORS_TRI processedTessFactors;
- int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning);
-
- uint tessed_indices;
- if (3 == num_points)
- {
- if (index_id < 3)
- {
- tessed_indices = TransformIndex1(index_id, base_vertex);
- }
- else
- {
- tessed_indices = -1;
- }
- }
- else
- {
- // Generate primitives for all the concentric rings, one side at a time for each ring
- static const int startRing = 1;
- int numRings = ((processedTessFactors.numPointsForOutsideInside.w + 1) / 2); // +1 is so even tess includes the center point, which we want to now
-
- int4 outsideInsideHalfTessFactor = int4(ceil(processedTessFactors.outsideInsideHalfTessFactor));
- uint3 num = NumStitchTransition(outsideInsideHalfTessFactor, processedTessFactors.outsideInsideTessFactorParity);
- num.y += num.x;
- num.z += num.y;
- uint num_index = num.z;
- num_index += TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, numRings - 1) * 3;
- if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD )
- {
- num_index += 4;
- }
-
- int pt;
-
- if (index_id < num.x)
- {
- int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing;
-
- pt = AStitchTransition(int2(0, processedTessFactors.insideEdgePointBaseOffset),
- outsideInsideHalfTessFactor.xw,
- processedTessFactors.outsideInsideTessFactorParity.xw,
- index_id);
- if (pt != -1)
- {
- pt = TransformIndex1(pt, base_vertex);
- }
- }
- else if (index_id < num.y)
- {
- int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing;
-
- pt = AStitchTransition(
- int2(processedTessFactors.numPointsForOutsideInside.x - 1, processedTessFactors.insideEdgePointBaseOffset + numPointsForInsideEdge - 1),
- outsideInsideHalfTessFactor.yw,
- processedTessFactors.outsideInsideTessFactorParity.yw,
- index_id - num.x);
- if (pt != -1)
- {
- pt = TransformIndex1(pt, base_vertex);
- }
- }
- else if (index_id < num.z)
- {
- int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * startRing;
-
- INDEX_PATCH_CONTEXT IndexPatchContext;
- IndexPatchContext.insidePointIndexDeltaToRealValue = processedTessFactors.insideEdgePointBaseOffset + 2 * (numPointsForInsideEdge - 1);
- IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge - 1;
- IndexPatchContext.insidePointIndexReplacementValue = processedTessFactors.insideEdgePointBaseOffset;
- IndexPatchContext.outsidePointIndexPatchBase = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range
- IndexPatchContext.outsidePointIndexDeltaToRealValue = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y - 2
- - IndexPatchContext.outsidePointIndexPatchBase;
- IndexPatchContext.outsidePointIndexBadValue = IndexPatchContext.outsidePointIndexPatchBase
- + processedTessFactors.numPointsForOutsideInside.z - 1;
- IndexPatchContext.outsidePointIndexReplacementValue = 0;
-
- pt = AStitchTransition(int2(numPointsForInsideEdge, 0),
- outsideInsideHalfTessFactor.zw,
- processedTessFactors.outsideInsideTessFactorParity.zw,
- index_id - num.y);
- if (pt != -1)
- {
- pt = TransformIndex2(pt, base_vertex, IndexPatchContext);
- }
- }
- else
- {
- if ((processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) && (index_id >= num_index - 4))
- {
- int outsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset
- + ((processedTessFactors.numPointsForOutsideInside.w + 1) - (numRings + startRing)) * (numRings - startRing - 1) * 3;
-
- if (index_id - (num_index - 4) != 3)
- {
- pt = TransformIndex1(outsideEdgePointBaseOffset + index_id - (num_index - 4), base_vertex);
- }
- else
- {
- pt = -1;
- }
- }
- else
- {
- int ring = GetRingFromIndexStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, index_id - num.z);
-
- int tn = TotalNumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w, ring - 1) * 3;
- int n = NumStitchRegular(true, DIAGONALS_MIRRORED, processedTessFactors.numPointsForOutsideInside.w - 2 * ring);
-
- int edge = (index_id - num.z - tn) / n;
- int index = (index_id - num.z - tn) - edge * n;
-
- int2 outsideInsideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset
- + int2(0, 3 * (processedTessFactors.numPointsForOutsideInside.w - 3))
- + ((processedTessFactors.numPointsForOutsideInside.w - (ring + startRing)) + int2(1, -1)) * (ring - startRing - 1) * 3;
-
- int numPointsForInsideEdge = processedTessFactors.numPointsForOutsideInside.w - 2 * ring;
- int numLastPointsForInsideEdge = numPointsForInsideEdge + 2;
-
- if (edge < 2)
- {
- pt = AStitchRegular(true, DIAGONALS_MIRRORED,
- numPointsForInsideEdge,
- outsideInsideEdgePointBaseOffset + (int2(numLastPointsForInsideEdge, numPointsForInsideEdge) - 1) * edge,
- index);
- if (pt != -1)
- {
- pt = TransformIndex1(pt, base_vertex);
- }
- }
- else
- {
- INDEX_PATCH_CONTEXT IndexPatchContext;
- IndexPatchContext.insidePointIndexDeltaToRealValue = outsideInsideEdgePointBaseOffset.y + (numPointsForInsideEdge - 1) * 2;
- IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge - 1;
- IndexPatchContext.insidePointIndexReplacementValue = outsideInsideEdgePointBaseOffset.y;
- IndexPatchContext.outsidePointIndexPatchBase = IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range
- IndexPatchContext.outsidePointIndexDeltaToRealValue = outsideInsideEdgePointBaseOffset.x + (numLastPointsForInsideEdge - 1) * 2
- - IndexPatchContext.outsidePointIndexPatchBase;
- IndexPatchContext.outsidePointIndexBadValue = IndexPatchContext.outsidePointIndexPatchBase
- + numLastPointsForInsideEdge - 1;
- IndexPatchContext.outsidePointIndexReplacementValue = outsideInsideEdgePointBaseOffset.x;
-
- pt = AStitchRegular(true, DIAGONALS_MIRRORED,
- numPointsForInsideEdge,
- int2(numPointsForInsideEdge, 0),
- index);
- if (pt != -1)
- {
- pt = TransformIndex2(pt, base_vertex, IndexPatchContext);
- }
- }
- }
- }
-
- tessed_indices = pt;
- }
-
- TessedIndicesOut.Store(id*4, tessed_indices);
- }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl
deleted file mode 100644
index e1f6b9ec3..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_TessellateVerticesCS.hlsl
+++ /dev/null
@@ -1,206 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSTessellationVertices
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_TessellateVerticesCS.hlsl
-//
-// The CS to tessellate vertices
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_common.hlsl"
-
-StructuredBuffer<uint2> InputTriIDIndexID : register(t0);
-StructuredBuffer<float4> InputEdgeFactor : register(t1);
-
-struct TessedVertex
-{
- uint BaseTriID;
- float2 bc;
-};
-RWStructuredBuffer<TessedVertex> TessedVerticesOut : register(u0);
-
-cbuffer cbCS : register(b1)
-{
- uint4 g_param;
-}
-
-void PlacePointIn1D(PROCESSED_TESS_FACTORS_TRI processedTessFactors, int ctx_index, int pt, out float location, int parity)
-{
- int numHalfTessFactorPoints = int(ceil(processedTessFactors.outsideInsideHalfTessFactor[ctx_index]));
-
- bool bFlip;
- if( pt >= numHalfTessFactorPoints )
- {
- pt = (numHalfTessFactorPoints << 1) - pt;
- if( TESSELLATOR_PARITY_ODD == parity )
- {
- pt -= 1;
- }
- bFlip = true;
- }
- else
- {
- bFlip = false;
- }
-
- if( pt == numHalfTessFactorPoints )
- {
- location = 0.5f;
- }
- else
- {
- unsigned int indexOnCeilHalfTessFactor = pt;
- unsigned int indexOnFloorHalfTessFactor = indexOnCeilHalfTessFactor;
- if( pt > processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor[ctx_index] )
- {
- indexOnFloorHalfTessFactor -= 1;
- }
- float locationOnFloorHalfTessFactor = indexOnFloorHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor[ctx_index];
- float locationOnCeilHalfTessFactor = indexOnCeilHalfTessFactor * processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor[ctx_index];
-
- location = lerp(locationOnFloorHalfTessFactor, locationOnCeilHalfTessFactor, frac(processedTessFactors.outsideInsideHalfTessFactor[ctx_index]));
-
- if( bFlip )
- {
- location = 1.0f - location;
- }
- }
-}
-
-[numthreads(128, 1, 1)]
-void CSTessellationVertices( uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
- uint id = DTid.x;
- //uint id = Gid.x * 128 + GI; // Workaround for some CS4x preview drivers
-
- if ( id < g_param.x )
- {
- uint tri_id = InputTriIDIndexID[id].x;
- uint vert_id = InputTriIDIndexID[id].y;
-
- float4 outside_inside_factor = InputEdgeFactor[tri_id];
-
- PROCESSED_TESS_FACTORS_TRI processedTessFactors;
- int num_points = TriProcessTessFactors(outside_inside_factor, processedTessFactors, g_partitioning);
-
- float2 uv;
- if (3 == num_points)
- {
- if (0 == vert_id)
- {
- uv = float2(0, 1);
- }
- else if (1 == vert_id)
- {
- uv = float2(0, 0);
- }
- else
- {
- uv = float2(1, 0);
- }
- }
- else
- {
- if (vert_id < processedTessFactors.insideEdgePointBaseOffset)
- {
- // Generate exterior ring edge points, clockwise starting from point V (VW, the U==0 edge)
-
- int edge;
- if (vert_id < processedTessFactors.numPointsForOutsideInside.x - 1)
- {
- edge = 0;
- }
- else
- {
- vert_id -= processedTessFactors.numPointsForOutsideInside.x - 1;
- if (vert_id < processedTessFactors.numPointsForOutsideInside.y - 1)
- {
- edge = 1;
- }
- else
- {
- vert_id -= processedTessFactors.numPointsForOutsideInside.y - 1;
- edge = 2;
- }
- }
-
- int p = vert_id;
- int endPoint = processedTessFactors.numPointsForOutsideInside[edge] - 1;
- float param;
- int q = (edge & 0x1) ? p : endPoint - p; // whether to reverse point order given we are defining V or U (W implicit):
- // edge0, VW, has V decreasing, so reverse 1D points below
- // edge1, WU, has U increasing, so don't reverse 1D points below
- // edge2, UV, has U decreasing, so reverse 1D points below
- PlacePointIn1D(processedTessFactors, edge,q,param, processedTessFactors.outsideInsideTessFactorParity[edge]);
- if (0 == edge)
- {
- uv = float2(0, param);
- }
- else if (1 == edge)
- {
- uv = float2(param, 0);
- }
- else
- {
- uv = float2(param, 1 - param);
- }
- }
- else
- {
- // Generate interior ring points, clockwise spiralling in
-
- uint index = vert_id - processedTessFactors.insideEdgePointBaseOffset;
- uint ring = 1 + (((3 * processedTessFactors.numPointsForOutsideInside.w - 6) - sqrt(sqr(3 * processedTessFactors.numPointsForOutsideInside.w - 6) - 4 * 3 * index)) + 0.001f) / 6;
- index -= 3 * (processedTessFactors.numPointsForOutsideInside.w - ring - 1) * (ring - 1);
-
- uint startPoint = ring;
- uint endPoint = processedTessFactors.numPointsForOutsideInside.w - 1 - startPoint;
- if (index < 3 * (endPoint - startPoint))
- {
- uint edge = index / (endPoint - startPoint);
- uint p = index - edge * (endPoint - startPoint) + startPoint;
-
- int perpendicularAxisPoint = startPoint;
- float perpParam;
- PlacePointIn1D(processedTessFactors, 3, perpendicularAxisPoint, perpParam, processedTessFactors.outsideInsideTessFactorParity.w);
- perpParam = perpParam * 2 / 3;
-
- float param;
- int q = (edge & 0x1) ? p : endPoint - (p - startPoint); // whether to reverse point given we are defining V or U (W implicit):
- // edge0, VW, has V decreasing, so reverse 1D points below
- // edge1, WU, has U increasing, so don't reverse 1D points below
- // edge2, UV, has U decreasing, so reverse 1D points below
- PlacePointIn1D(processedTessFactors, 3, q,param, processedTessFactors.outsideInsideTessFactorParity.w);
- // edge0 VW, has perpendicular parameter U constant
- // edge1 WU, has perpendicular parameter V constant
- // edge2 UV, has perpendicular parameter W constant
- const unsigned int deriv = 2; // reciprocal is the rate of change of edge-parallel parameters as they are pushed into the triangle
- if (0 == edge)
- {
- uv = float2(perpParam, param - perpParam / deriv);
- }
- else if (1 == edge)
- {
- uv = float2(param - perpParam / deriv, perpParam);
- }
- else
- {
- uv = float2(param - perpParam / deriv, 1 - (param - perpParam / deriv + perpParam));
- }
- }
- else
- {
- if( processedTessFactors.outsideInsideTessFactorParity.w != TESSELLATOR_PARITY_ODD )
- {
- // Last point is the point at the center.
- uv = 1 / 3.0f;
- }
- }
- }
- }
-
- TessedVerticesOut[id].BaseTriID = tri_id;
- TessedVerticesOut[id].bc = uv;
- }
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl
deleted file mode 100644
index 309044cdb..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_common.hlsl
+++ /dev/null
@@ -1,411 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_common.hlsl
-//
-// The common utils included by other shaders in the sample
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "TessellatorCS40_defines.h"
-
-cbuffer cbNeverChanges : register(b0)
-{
- uint4 insidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2];
- uint4 outsidePointIndex[MAX_FACTOR / 2 + 1][MAX_FACTOR / 2 + 2];
-}
-
-#define D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR ( 64 )
-#define D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR ( 63 )
-#define D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR ( 2 )
-#define D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR ( 1 )
-
-#define D3D11_TESSELLATOR_PARTITIONING_INTEGER ( 0 )
-#define D3D11_TESSELLATOR_PARTITIONING_POW2 ( 1 )
-#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD ( 2 )
-#define D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN ( 3 )
-
-#define TESSELLATOR_PARITY_EVEN ( 0 )
-#define TESSELLATOR_PARITY_ODD ( 1 )
-
-#define EPSILON 1e-6f
-#define MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON (D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON/2)
-
-#define DIAGONALS_INSIDE_TO_OUTSIDE ( 0 )
-#define DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE ( 1 )
-#define DIAGONALS_MIRRORED ( 2 )
-
-
-// This is moved to macro defines at shader compile time, so that the partitioning mode can be changed during runtime
-//#define g_partitioning (D3D11_TESSELLATOR_PARTITIONING_POW2)
-
-
-struct PROCESSED_TESS_FACTORS_TRI
-{
- float4 outsideInsideTessFactor;
- int4 outsideInsideTessFactorParity;
-
- float4 outsideInsideInvNumSegmentsOnFloorTessFactor;
- float4 outsideInsideInvNumSegmentsOnCeilTessFactor;
- float4 outsideInsideHalfTessFactor;
- int4 outsideInsideSplitPointOnFloorHalfTessFactor;
-
- // Stuff below is specific to the traversal order
- uint4 numPointsForOutsideInside;
- uint insideEdgePointBaseOffset;
-};
-
-struct INDEX_PATCH_CONTEXT
-{
- int insidePointIndexDeltaToRealValue;
- int insidePointIndexBadValue;
- int insidePointIndexReplacementValue;
- int outsidePointIndexPatchBase;
- int outsidePointIndexDeltaToRealValue;
- int outsidePointIndexBadValue;
- int outsidePointIndexReplacementValue;
-};
-
-bool4 isEven(float4 input)
-{
- return (((uint4)input) & 1) ? false : true;
-}
-
-uint RemoveMSB(uint val)
-{
- int check;
- if( val <= 0x0000ffff )
- {
- check = ( val <= 0x000000ff ) ? 0x00000080 : 0x00008000;
- }
- else
- {
- check = ( val <= 0x00ffffff ) ? 0x00800000 : 0x80000000;
- }
- for (int i = 0; i < 8; i++, check >>= 1)
- {
- if( val & check )
- {
- return (val & ~check);
- }
- }
- return 0;
-}
-
-uint4 NumPointsForTessFactor(float4 tessFactor, int4 parity)
-{
- return TESSELLATOR_PARITY_ODD == parity ? uint4(ceil(0.5f + tessFactor / 2)) * 2 : uint4(ceil(tessFactor / 2)) * 2 + 1;
-}
-
-void ComputeTessFactorContext(float4 tessFactor, int4 parity,
- out float4 invNumSegmentsOnFloorTessFactor,
- out float4 invNumSegmentsOnCeilTessFactor,
- out float4 halfTessFactor,
- out int4 splitPointOnFloorHalfTessFactor)
-{
- halfTessFactor = tessFactor / 2;
-
- halfTessFactor += 0.5 * ((TESSELLATOR_PARITY_ODD == parity) | (0.5f == halfTessFactor));
-
- float4 floorHalfTessFactor = floor(halfTessFactor);
- float4 ceilHalfTessFactor = ceil(halfTessFactor);
- int4 numHalfTessFactorPoints = int4(ceilHalfTessFactor);
-
- for (int index = 0; index < 4; ++ index)
- {
- if( ceilHalfTessFactor[index] == floorHalfTessFactor[index] )
- {
- splitPointOnFloorHalfTessFactor[index] = /*pick value to cause this to be ignored*/ numHalfTessFactorPoints[index]+1;
- }
- else if( TESSELLATOR_PARITY_ODD == parity[index] )
- {
- if( floorHalfTessFactor[index] == 1 )
- {
- splitPointOnFloorHalfTessFactor[index] = 0;
- }
- else
- {
- splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index]) - 1) << 1) + 1;
- }
- }
- else
- {
- splitPointOnFloorHalfTessFactor[index] = (RemoveMSB(int(floorHalfTessFactor[index])) << 1) + 1;
- }
- }
-
- int4 numFloorSegments = int4(floorHalfTessFactor * 2);
- int4 numCeilSegments = int4(ceilHalfTessFactor * 2);
- int4 s = (TESSELLATOR_PARITY_ODD == parity);
- numFloorSegments -= s;
- numCeilSegments -= s;
- invNumSegmentsOnFloorTessFactor = 1.0f / numFloorSegments;
- invNumSegmentsOnCeilTessFactor = 1.0f / numCeilSegments;
-}
-
-int TriProcessTessFactors( inout float4 tessFactor,
- out PROCESSED_TESS_FACTORS_TRI processedTessFactors,
- int partitioning )
-{
- processedTessFactors = (PROCESSED_TESS_FACTORS_TRI)0;
-
- int parity = TESSELLATOR_PARITY_EVEN;
- switch( partitioning )
- {
- case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
- default:
- break;
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
- parity = TESSELLATOR_PARITY_ODD;
- break;
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
- parity = TESSELLATOR_PARITY_EVEN;
- break;
- }
-
- // Is the patch culled?
- if( !(tessFactor.x > 0) || // NaN will pass
- !(tessFactor.y > 0) ||
- !(tessFactor.z > 0) )
- {
- return 0;
- }
-
- // Clamp edge TessFactors
- float lowerBound, upperBound;
- switch(partitioning)
- {
- case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
- case D3D11_TESSELLATOR_PARTITIONING_POW2: // don't care about pow2 distinction for validation, just treat as integer
- default:
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
- lowerBound = D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
- break;
- }
-
- tessFactor.xyz = min( upperBound, max( lowerBound, tessFactor.xyz ) );
-
- // Clamp inside TessFactors
- if(D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD == partitioning)
- {
- if( (tessFactor.x > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (tessFactor.y > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (tessFactor.z > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON))
- // Don't need the same check for insideTessFactor for tri patches,
- // since there is only one insideTessFactor, as opposed to quad
- // patches which have 2 insideTessFactors.
- {
- // Force picture frame
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON;
- }
- }
-
- tessFactor.w = min( upperBound, max( lowerBound, tessFactor.w ) );
- // Note the above clamps map NaN to lowerBound
-
- if (partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)
- {
- tessFactor = ceil(tessFactor);
- }
- else if (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)
- {
- static const int exponentMask = 0x7f800000;
- static const int mantissaMask = 0x007fffff;
- static const int exponentLSB = 0x00800000;
-
- int4 bits = asint(tessFactor);
- tessFactor = bits & mantissaMask ? asfloat((bits & exponentMask) + exponentLSB) : tessFactor;
- }
-
- // Process tessFactors
- if ((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2))
- {
- bool4 e = isEven(tessFactor);
- processedTessFactors.outsideInsideTessFactorParity.xyz = e.xyz ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- processedTessFactors.outsideInsideTessFactorParity.w = (e.w || (1 == tessFactor.w)) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- else
- {
- processedTessFactors.outsideInsideTessFactorParity = parity;
- }
-
- processedTessFactors.outsideInsideTessFactor = tessFactor;
-
- if (((partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)|| (partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)) || (parity == TESSELLATOR_PARITY_ODD))
- {
- // Special case if all TessFactors are 1
- if( (1 == processedTessFactors.outsideInsideTessFactor.x) &&
- (1 == processedTessFactors.outsideInsideTessFactor.y) &&
- (1 == processedTessFactors.outsideInsideTessFactor.z) &&
- (1 == processedTessFactors.outsideInsideTessFactor.w) )
- {
- return 3;
- }
- }
-
- // Compute per-TessFactor metadata
- ComputeTessFactorContext(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity,
- processedTessFactors.outsideInsideInvNumSegmentsOnFloorTessFactor,
- processedTessFactors.outsideInsideInvNumSegmentsOnCeilTessFactor,
- processedTessFactors.outsideInsideHalfTessFactor,
- processedTessFactors.outsideInsideSplitPointOnFloorHalfTessFactor);
-
- // Compute some initial data.
-
- // outside edge offsets and storage
- processedTessFactors.numPointsForOutsideInside = NumPointsForTessFactor(processedTessFactors.outsideInsideTessFactor, processedTessFactors.outsideInsideTessFactorParity);
- int NumPoints = processedTessFactors.numPointsForOutsideInside.x + processedTessFactors.numPointsForOutsideInside.y + processedTessFactors.numPointsForOutsideInside.z - 3;
-
- // inside edge offsets
- {
- uint pointCountMin = (processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD) ? 4 : 3;
- // max() allows degenerate transition regions when inside TessFactor == 1
- processedTessFactors.numPointsForOutsideInside.w = max(pointCountMin, processedTessFactors.numPointsForOutsideInside.w);
- }
-
- processedTessFactors.insideEdgePointBaseOffset = NumPoints;
-
- // inside storage, including interior edges above
- {
- int numInteriorRings = (processedTessFactors.numPointsForOutsideInside.w >> 1) - 1;
- int numInteriorPoints;
- if( processedTessFactors.outsideInsideTessFactorParity.w == TESSELLATOR_PARITY_ODD )
- {
- numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1) - numInteriorRings);
- }
- else
- {
- numInteriorPoints = 3*(numInteriorRings*(numInteriorRings+1)) + 1;
- }
- NumPoints += numInteriorPoints;
- }
-
- return NumPoints;
-}
-
-int NumStitchRegular(bool bTrapezoid, int diagonals, int numInsideEdgePoints)
-{
- int num_index = 0;
-
- if( bTrapezoid )
- {
- num_index += 8;
- }
- switch( diagonals )
- {
- case DIAGONALS_INSIDE_TO_OUTSIDE:
- // Diagonals pointing from inside edge forward towards outside edge
- num_index += 5 * numInsideEdgePoints - 5;
- break;
-
- case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
- // Diagonals pointing from outside edge forward towards inside edge
- num_index += 5 * numInsideEdgePoints - 2;
- break;
-
- case DIAGONALS_MIRRORED:
- num_index += 2 * numInsideEdgePoints + 5;
- break;
- }
-
- return num_index;
-}
-
-uint TotalNumStitchRegular(bool bTrapezoid, int diagonals,
- int numPointsForInsideTessFactor, int ring)
-{
- uint num_index = 0;
-
- if( bTrapezoid )
- {
- num_index += 8 * (ring - 1);
- }
- switch( diagonals )
- {
- case DIAGONALS_INSIDE_TO_OUTSIDE:
- // Diagonals pointing from inside edge forward towards outside edge
- num_index += (5 * numPointsForInsideTessFactor - 35 - 5 * ring) * (ring - 1);
- break;
-
- case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
- // Diagonals pointing from outside edge forward towards inside edge
- num_index += (5 * numPointsForInsideTessFactor - 12 - 5 * ring) * (ring - 1);
- break;
-
- case DIAGONALS_MIRRORED:
- num_index += (2 * numPointsForInsideTessFactor + 1 - 2 * ring) * (ring - 1);
- break;
- }
-
- return num_index;
-}
-
-int sqr(int x)
-{
- return x * x;
-}
-
-int GetRingFromIndexStitchRegular(bool bTrapezoid, int diagonals, int numPointsForInsideTessFactor, int index)
-{
- int t = 0;
- if (bTrapezoid)
- {
- t = 8;
- }
-
- switch( diagonals )
- {
- case DIAGONALS_INSIDE_TO_OUTSIDE:
- t = (5 * numPointsForInsideTessFactor - (35 - t)) * 3;
- return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30;
-
- case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE:
- t = (5 * numPointsForInsideTessFactor - (12 - t)) * 3;
- return 1 + uint((t + 15) - sqrt(sqr(t + 15) - 4 * 15 * (t + index)) + 0.001f) / 30;
-
- case DIAGONALS_MIRRORED:
- t = ((t + 1) + 2 * numPointsForInsideTessFactor) * 3;
- return 1 + uint((t + 6) - sqrt(sqr(t + 6) - 4 * 6 * (t + index)) + 0.001f) / 12;
-
- default:
- return -1;
- }
-}
-
-uint3 NumStitchTransition(int4 outsideInsideNumHalfTessFactorPoints,
- int4 outsideInsideEdgeTessFactorParity)
-{
- outsideInsideNumHalfTessFactorPoints -= (TESSELLATOR_PARITY_ODD == outsideInsideEdgeTessFactorParity);
-
- uint3 num_index = insidePointIndex[outsideInsideNumHalfTessFactorPoints.w][MAX_FACTOR / 2 + 1].y * 8;
-
- [unroll]
- for (int edge = 0; edge < 3; ++ edge)
- {
- num_index[edge] += outsidePointIndex[outsideInsideNumHalfTessFactorPoints[edge]][MAX_FACTOR / 2 + 1].y * 8;
-
- if( (outsideInsideEdgeTessFactorParity.w != outsideInsideEdgeTessFactorParity[edge]) || (outsideInsideEdgeTessFactorParity.w == TESSELLATOR_PARITY_ODD))
- {
- if( outsideInsideEdgeTessFactorParity.w == outsideInsideEdgeTessFactorParity[edge] )
- {
- num_index[edge] += 5;
- }
- else
- {
- num_index[edge] += 4;
- }
- }
- }
-
- return num_index;
-}
diff --git a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h b/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h
deleted file mode 100644
index 6b4382393..000000000
--- a/tests/hlsl/dxsdk/AdaptiveTessellationCS40/TessellatorCS40_defines.h
+++ /dev/null
@@ -1,9 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: TessellatorCS40_defines.h
-//
-// This file defines common constants which are included by both CPU code and shader code
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#define MAX_FACTOR 16
diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl
deleted file mode 100644
index 1e40c80ef..000000000
--- a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC6HEncode.hlsl
+++ /dev/null
@@ -1,2567 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BC6HEncode.hlsl
-//
-// The Compute Shader for BC6H Encoder
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//#define REF_DEVICE
-
-#define UINTLENGTH 32
-#define NCHANNELS 3
-#define SIGNED_F16 96
-#define UNSIGNED_F16 95
-#define MAX_FLOAT asfloat(0x7F7FFFFF)
-#define MIN_FLOAT asfloat(0xFF7FFFFF)
-#define MAX_INT asint(0x7FFFFFFF)
-#define MIN_INT asint(0x80000000)
-
-cbuffer cbCS : register( b0 )
-{
- uint g_tex_width;
- uint g_num_block_x;
- uint g_format; //either SIGNED_F16 for DXGI_FORMAT_BC6H_SF16 or UNSIGNED_F16 for DXGI_FORMAT_BC6H_UF16
- uint g_mode_id;
- uint g_start_block_id;
- uint g_num_total_blocks;
-};
-
-static const uint candidateModeMemory[14] = { 0x00, 0x01, 0x02, 0x06, 0x0A, 0x0E, 0x12, 0x16, 0x1A, 0x1E, 0x03, 0x07, 0x0B, 0x0F };
-static const uint candidateModeFlag[14] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-static const bool candidateModeTransformed[14] = { true, true, true, true, true, true, true, true, true, false, false, true, true, true };
-static const uint4 candidateModePrec[14] = { uint4(10,5,5,5), uint4(7,6,6,6),
- uint4(11,5,4,4), uint4(11,4,5,4), uint4(11,4,4,5), uint4(9,5,5,5),
- uint4(8,6,5,5), uint4(8,5,6,5), uint4(8,5,5,6), uint4(6,6,6,6),
- uint4(10,10,10,10), uint4(11,9,9,9), uint4(12,8,8,8), uint4(16,4,4,4) };
-
-/*static const uint4x4 candidateSection[32] =
-{
- {0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1}, {0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1}, {0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1}, {0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1},
- {0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1},
- {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1},
- {0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1},
- {0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1}, {0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0}, {0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0},
- {0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1},
- {0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0}, {0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0},
- {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0}, {0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0}, {0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0}
-};*/
-
-static const uint candidateSectionBit[32] =
-{
- 0xCCCC, 0x8888, 0xEEEE, 0xECC8,
- 0xC880, 0xFEEC, 0xFEC8, 0xEC80,
- 0xC800, 0xFFEC, 0xFE80, 0xE800,
- 0xFFE8, 0xFF00, 0xFFF0, 0xF000,
- 0xF710, 0x008E, 0x7100, 0x08CE,
- 0x008C, 0x7310, 0x3100, 0x8CCE,
- 0x088C, 0x3110, 0x6666, 0x366C,
- 0x17E8, 0x0FF0, 0x718E, 0x399C
-};
-
-static const uint candidateFixUpIndex1D[32] =
-{
- 15,15,15,15,
- 15,15,15,15,
- 15,15,15,15,
- 15,15,15,15,
- 15, 2, 8, 2,
- 2, 8, 8,15,
- 2, 8, 2, 2,
- 8, 8, 2, 2
-};
-
-//0, 9, 18, 27, 37, 46, 55, 64
-static const uint aStep1[64] = {0,0,0,0,0,1,1,1,
- 1,1,1,1,1,1,2,2,
- 2,2,2,2,2,2,2,3,
- 3,3,3,3,3,3,3,3,
- 3,4,4,4,4,4,4,4,
- 4,4,5,5,5,5,5,5,
- 5,5,5,6,6,6,6,6,
- 6,6,6,6,7,7,7,7};
-
-//0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64
-static const uint aStep2[64] = { 0, 0, 0, 1, 1, 1, 1, 2,
- 2, 2, 2, 2, 3, 3, 3, 3,
- 4, 4, 4, 4, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 7, 7, 7,
- 7, 8, 8, 8, 8, 9, 9, 9,
- 9,10,10,10,10,10,11,11,
- 11,11,12,12,12,12,13,13,
- 13,13,14,14,14,14,15,15};
-
-static const float3 RGB2LUM = float3(0.2126f, 0.7152f, 0.0722f);
-
-#define THREAD_GROUP_SIZE 64
-#define BLOCK_SIZE_Y 4
-#define BLOCK_SIZE_X 4
-#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X)
-
-
-//Forward declaration
-uint3 float2half( float3 pixel_f );
-int3 start_quantize( uint3 pixel_h );
-void quantize( inout int2x3 endPoint, uint prec );
-void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed );
-void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed );
-void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed );
-
-void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed );
-void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed );
-void unquantize( inout int2x3 color, uint prec );
-uint3 finish_unquantize( int3 color );
-void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i );
-void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i );
-float3 half2float(uint3 color_h );
-
-void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index );
-void block_package( inout uint4 block, int2x3 endPoint, uint mode_type );
-
-void swap(inout int3 lhs, inout int3 rhs)
-{
- int3 tmp = lhs;
- lhs = rhs;
- rhs = tmp;
-}
-
-Texture2D<float4> g_Input : register( t0 );
-StructuredBuffer<uint4> g_InBuff : register( t1 );
-
-RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
-
-struct SharedData
-{
- float3 pixel;
- int3 pixel_ph;
- float3 pixel_hr;
- float pixel_lum;
- float error;
- uint best_mode;
- uint best_partition;
- int3 endPoint_low;
- int3 endPoint_high;
- float endPoint_lum_low;
- float endPoint_lum_high;
-};
-
-groupshared SharedData shared_temp[THREAD_GROUP_SIZE];
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryModeG10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID )
-{
- const uint MAX_USED_THREAD = 16;
- uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- uint blockInGroup = GI / MAX_USED_THREAD;
- uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- uint threadBase = blockInGroup * MAX_USED_THREAD;
- uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
- if (blockID >= g_num_total_blocks)
- {
- return;
- }
-#endif
-
- uint block_y = blockID / g_num_block_x;
- uint block_x = blockID - block_y * g_num_block_x;
- uint base_x = block_x * BLOCK_SIZE_X;
- uint base_y = block_y * BLOCK_SIZE_Y;
-
- if (threadInBlock < 16)
- {
- shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb;
- uint3 pixel_h = float2half( shared_temp[GI].pixel );
- shared_temp[GI].pixel_hr = half2float(pixel_h);
- shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM);
- shared_temp[GI].pixel_ph = start_quantize( pixel_h );
-
- shared_temp[GI].endPoint_low = shared_temp[GI].pixel_ph;
- shared_temp[GI].endPoint_high = shared_temp[GI].pixel_ph;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI].pixel_lum;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI].pixel_lum;
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 8)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- //ergod mode_type 11:14
- if ( threadInBlock == 0 )
- {
- int2x3 endPoint;
- // find_axis
- endPoint[0] = shared_temp[threadBase + 0].endPoint_low;
- endPoint[1] = shared_temp[threadBase + 0].endPoint_high;
-
- //compute_index
- float3 span = endPoint[1] - endPoint[0];// fixed a bug in v0.2
- float span_norm_sqr = dot( span, span );// fixed a bug in v0.2
- float dotProduct = dot( span, shared_temp[threadBase + 0].pixel_ph - endPoint[0] );// fixed a bug in v0.2
- if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 )
- {
- swap(endPoint[0], endPoint[1]);
-
- shared_temp[GI].endPoint_low = endPoint[0];
- shared_temp[GI].endPoint_high = endPoint[1];
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 4)
- {
- int2x3 endPoint;
- endPoint[0] = shared_temp[threadBase + 0].endPoint_low;
- endPoint[1] = shared_temp[threadBase + 0].endPoint_high;
-
- float3 span = endPoint[1] - endPoint[0];
- float span_norm_sqr = dot( span, span );
-
- uint4 prec = candidateModePrec[threadInBlock + 10];
- int2x3 endPoint_q = endPoint;
- quantize( endPoint_q, prec.x );
-
- bool transformed = candidateModeTransformed[threadInBlock + 10];
- if (transformed)
- {
- endPoint_q[1] -= endPoint_q[0];
- }
-
- bool bBadQuantize;
- finish_quantize( bBadQuantize, endPoint_q, prec, transformed );
-
- start_unquantize( endPoint_q, prec, transformed );
-
- unquantize( endPoint_q, prec.x );
-
- float error = 0;
- [loop]for ( uint j = 0; j < 16; j ++ )
- {
- float dotProduct = dot( span, shared_temp[threadBase + j].pixel_ph - endPoint[0] );// fixed a bug in v0.2
- uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] );
-
- uint3 pixel_rh;
- generate_palette_unquantized16( pixel_rh, endPoint_q[0], endPoint_q[1], index );
- float3 pixel_r = half2float( pixel_rh );
- pixel_r -= shared_temp[threadBase + j].pixel_hr;
- error += dot(pixel_r, pixel_r);
- }
- if ( bBadQuantize )
- error = 1e20f;
-
- shared_temp[GI].error = error;
- shared_temp[GI].best_mode = candidateModeFlag[threadInBlock + 10];
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 2)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 2].error )
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 1].error )
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode;
- }
-
- g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, 0, 0);
- }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryModeLE10CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID )
-{
- const uint MAX_USED_THREAD = 32;
- uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- uint blockInGroup = GI / MAX_USED_THREAD;
- uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- uint threadBase = blockInGroup * MAX_USED_THREAD;
- uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
- if (blockID >= g_num_total_blocks)
- {
- return;
- }
-
- if (asfloat(g_InBuff[blockID].x) < 1e-6f)
- {
- g_OutBuff[blockID] = g_InBuff[blockID];
- return;
- }
-#endif
-
- uint block_y = blockID / g_num_block_x;
- uint block_x = blockID - block_y * g_num_block_x;
- uint base_x = block_x * BLOCK_SIZE_X;
- uint base_y = block_y * BLOCK_SIZE_Y;
-
- if (threadInBlock < 16)
- {
- shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb;
- uint3 pixel_h = float2half( shared_temp[GI].pixel );
- shared_temp[GI].pixel_hr = half2float(pixel_h);
- shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel_hr, RGB2LUM);
- shared_temp[GI].pixel_ph = start_quantize( pixel_h );
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- //ergod mode_type 1:10
- if (threadInBlock < 32)
- {
- // find_axis
- int2x3 endPoint[2];
- endPoint[0][0] = MAX_INT;
- endPoint[0][1] = MIN_INT;
- endPoint[1][0] = MAX_INT;
- endPoint[1][1] = MIN_INT;
-
- float2 endPoint_lum[2];
- endPoint_lum[0][0] = MAX_FLOAT;
- endPoint_lum[0][1] = MIN_FLOAT;
- endPoint_lum[1][0] = MAX_FLOAT;
- endPoint_lum[1][1] = MIN_FLOAT;
-
- uint bit = candidateSectionBit[threadInBlock];
- for ( uint i = 0; i < 16; i ++ )
- {
- int3 pixel_ph = shared_temp[threadBase + i].pixel_ph;
- float pixel_lum = shared_temp[threadBase + i].pixel_lum;
- if ( (bit >> i) & 1 ) //It gets error when using "candidateSection" as "endPoint_ph" index
- {
- if (endPoint_lum[1][0] > pixel_lum)
- {
- endPoint[1][0] = pixel_ph;
- endPoint_lum[1][0] = pixel_lum;
- }
- if (endPoint_lum[1][1] < pixel_lum)
- {
- endPoint[1][1] = pixel_ph;
- endPoint_lum[1][1] = pixel_lum;
- }
- }
- else
- {
- if (endPoint_lum[0][0] > pixel_lum)
- {
- endPoint[0][0] = pixel_ph;
- endPoint_lum[0][0] = pixel_lum;
- }
- if (endPoint_lum[0][1] < pixel_lum)
- {
- endPoint[0][1] = pixel_ph;
- endPoint_lum[0][1] = pixel_lum;
- }
- }
- }
-
- //compute_index
- float3 span[2];// fixed a bug in v0.2
- float span_norm_sqr[2];// fixed a bug in v0.2
- [unroll]
- for (uint p = 0; p < 2; ++ p)
- {
- span[p] = endPoint[p][1] - endPoint[p][0];
- span_norm_sqr[p] = dot( span[p], span[p] );
-
- float dotProduct = dot( span[p], shared_temp[threadBase + (0 == p ? 0 : candidateFixUpIndex1D[threadInBlock])].pixel_ph - endPoint[p][0] );// fixed a bug in v0.2
- if ( span_norm_sqr[p] > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr[p] ) > 32 )
- {
- span[p] = -span[p];
- swap(endPoint[p][0], endPoint[p][1]);
- }
- }
-
- uint4 prec = candidateModePrec[g_mode_id];
- int2x3 endPoint_q[2] = endPoint;
- quantize( endPoint_q[0], prec.x );
- quantize( endPoint_q[1], prec.x );
-
- bool transformed = candidateModeTransformed[g_mode_id];
- if (transformed)
- {
- endPoint_q[0][1] -= endPoint_q[0][0];
- endPoint_q[1][0] -= endPoint_q[0][0];
- endPoint_q[1][1] -= endPoint_q[0][0];
- }
-
- int bBadQuantize = 0;
- finish_quantize_0( bBadQuantize, endPoint_q[0], prec, transformed );
- finish_quantize_1( bBadQuantize, endPoint_q[1], prec, transformed );
-
- start_unquantize( endPoint_q, prec, transformed );
-
- unquantize( endPoint_q[0], prec.x );
- unquantize( endPoint_q[1], prec.x );
-
- float error = 0;
- for ( uint j = 0; j < 16; j ++ )
- {
- uint3 pixel_rh;
- if ((bit >> j) & 1)
- {
- float dotProduct = dot( span[1], shared_temp[threadBase + j].pixel_ph - endPoint[1][0] );// fixed a bug in v0.2
- uint index = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[1] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep1[63] );
- generate_palette_unquantized8( pixel_rh, endPoint_q[1][0], endPoint_q[1][1], index );
- }
- else
- {
- float dotProduct = dot( span[0], shared_temp[threadBase + j].pixel_ph - endPoint[0][0] );// fixed a bug in v0.2
- uint index = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[0] ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep1[63] );
- generate_palette_unquantized8( pixel_rh, endPoint_q[0][0], endPoint_q[0][1], index );
- }
-
- float3 pixel_r = half2float( pixel_rh );
- pixel_r -= shared_temp[threadBase + j].pixel_hr;
- error += dot(pixel_r, pixel_r);
- }
- if ( bBadQuantize )
- error = 1e20f;
-
- shared_temp[GI].error = error;
- shared_temp[GI].best_mode = candidateModeFlag[g_mode_id];
- shared_temp[GI].best_partition = threadInBlock;
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 16)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 16].error )
- {
- shared_temp[GI].error = shared_temp[GI + 16].error;
- shared_temp[GI].best_mode = shared_temp[GI + 16].best_mode;
- shared_temp[GI].best_partition = shared_temp[GI + 16].best_partition;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 8)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 8].error )
- {
- shared_temp[GI].error = shared_temp[GI + 8].error;
- shared_temp[GI].best_mode = shared_temp[GI + 8].best_mode;
- shared_temp[GI].best_partition = shared_temp[GI + 8].best_partition;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 4].error )
- {
- shared_temp[GI].error = shared_temp[GI + 4].error;
- shared_temp[GI].best_mode = shared_temp[GI + 4].best_mode;
- shared_temp[GI].best_partition = shared_temp[GI + 4].best_partition;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 2].error )
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].best_mode = shared_temp[GI + 2].best_mode;
- shared_temp[GI].best_partition = shared_temp[GI + 2].best_partition;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 1].error )
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].best_mode = shared_temp[GI + 1].best_mode;
- shared_temp[GI].best_partition = shared_temp[GI + 1].best_partition;
- }
-
- if (asfloat(g_InBuff[blockID].x) > shared_temp[GI].error)
- {
- g_OutBuff[blockID] = uint4(asuint(shared_temp[GI].error), shared_temp[GI].best_mode, shared_temp[GI].best_partition, 0);
- }
- else
- {
- g_OutBuff[blockID] = g_InBuff[blockID];
- }
- }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
-{
- const uint MAX_USED_THREAD = 32;
- uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- uint blockInGroup = GI / MAX_USED_THREAD;
- uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- uint threadBase = blockInGroup * MAX_USED_THREAD;
- uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
- if (blockID >= g_num_total_blocks)
- {
- return;
- }
-#endif
-
- uint block_y = blockID / g_num_block_x;
- uint block_x = blockID - block_y * g_num_block_x;
- uint base_x = block_x * BLOCK_SIZE_X;
- uint base_y = block_y * BLOCK_SIZE_Y;
-
- if (threadInBlock < 16)
- {
- shared_temp[GI].pixel = g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ).rgb;
- shared_temp[GI].pixel_lum = dot(shared_temp[GI].pixel, RGB2LUM);
- uint3 pixel_h = float2half( shared_temp[GI].pixel );
- shared_temp[GI].pixel_ph = start_quantize( pixel_h );
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- uint best_mode = g_InBuff[blockID].y;
- uint best_partition = g_InBuff[blockID].z;
-
- uint4 block = 0;
-
- if (threadInBlock < 32)
- {
- int2x3 endPoint;
- endPoint[0] = MAX_INT;
- endPoint[1] = MIN_INT;
-
- float2 endPoint_lum;
- endPoint_lum[0] = MAX_FLOAT;
- endPoint_lum[1] = MIN_FLOAT;
-
- int2 endPoint_lum_index;
- endPoint_lum_index[0] = -1;
- endPoint_lum_index[1] = -1;
-
- int3 pixel_ph = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_ph;
- float pixel_lum = shared_temp[threadBase + (threadInBlock & 0xF)].pixel_lum;
- if (threadInBlock < 16)
- {
- if (best_mode > 10)
- {
- endPoint[0] = endPoint[1] = pixel_ph;
- endPoint_lum[0] = endPoint_lum[1] = pixel_lum;
- }
- else
- {
- uint bits = candidateSectionBit[best_partition];
- if (0 == ((bits >> threadInBlock) & 1))
- {
- endPoint[0] = endPoint[1] = pixel_ph;
- endPoint_lum[0] = endPoint_lum[1] = pixel_lum;
- }
- }
- }
- else
- {
- if (best_mode <= 10)
- {
- uint bits = candidateSectionBit[best_partition];
- if (1 == ((bits >> (threadInBlock & 0xF)) & 1))
- {
- endPoint[0] = endPoint[1] = pixel_ph;
- endPoint_lum[0] = endPoint_lum[1] = pixel_lum;
- }
- }
- }
-
- shared_temp[GI].endPoint_low = endPoint[0];
- shared_temp[GI].endPoint_high = endPoint[1];
-
- shared_temp[GI].endPoint_lum_low = endPoint_lum[0];
- shared_temp[GI].endPoint_lum_high = endPoint_lum[1];
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if ((threadInBlock & 0xF) < 8)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 8].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 8].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 8].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 8].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 8].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 8].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if ((threadInBlock & 0xF) < 4)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 4].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 4].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 4].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 4].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 4].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 4].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if ((threadInBlock & 0xF) < 2)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 2].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 2].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 2].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 2].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 2].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 2].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if ((threadInBlock & 0xF) < 1)
- {
- if (shared_temp[GI].endPoint_lum_low > shared_temp[GI + 1].endPoint_lum_low)
- {
- shared_temp[GI].endPoint_low = shared_temp[GI + 1].endPoint_low;
- shared_temp[GI].endPoint_lum_low = shared_temp[GI + 1].endPoint_lum_low;
- }
- if (shared_temp[GI].endPoint_lum_high < shared_temp[GI + 1].endPoint_lum_high)
- {
- shared_temp[GI].endPoint_high = shared_temp[GI + 1].endPoint_high;
- shared_temp[GI].endPoint_lum_high = shared_temp[GI + 1].endPoint_lum_high;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 2)
- {
- // find_axis
- int2x3 endPoint;
- endPoint[0] = shared_temp[threadBase + threadInBlock * 16].endPoint_low;
- endPoint[1] = shared_temp[threadBase + threadInBlock * 16].endPoint_high;
-
- uint fixup = 0;
- if ((1 == threadInBlock) && (best_mode <= 10))
- {
- fixup = candidateFixUpIndex1D[best_partition];
- }
-
- float3 span = endPoint[1] - endPoint[0];
- float span_norm_sqr = dot( span, span );
- float dotProduct = dot( span, shared_temp[threadBase + fixup].pixel_ph - endPoint[0] );
- if ( span_norm_sqr > 0 && dotProduct >= 0 && uint( dotProduct * 63.49999 / span_norm_sqr ) > 32 )
- {
- swap(endPoint[0], endPoint[1]);
- }
-
- shared_temp[GI].endPoint_low = endPoint[0];
- shared_temp[GI].endPoint_high = endPoint[1];
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 16)
- {
- uint bits;
- if (best_mode > 10)
- {
- bits = 0;
- }
- else
- {
- bits = candidateSectionBit[best_partition];
- }
-
- float3 span;
- float dotProduct;
- if ((bits >> threadInBlock) & 1)
- {
- span = shared_temp[threadBase + 1].endPoint_high - shared_temp[threadBase + 1].endPoint_low;
- dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 1].endPoint_low );
- }
- else
- {
- span = shared_temp[threadBase + 0].endPoint_high - shared_temp[threadBase + 0].endPoint_low;
- dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel_ph - shared_temp[threadBase + 0].endPoint_low );
- }
- float span_norm_sqr = dot( span, span );
-
- if (best_mode > 10)
- {
- uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr ) ? aStep2[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep2[63] );
- if (threadInBlock == 0)
- {
- block.z |= index << 1;
- }
- else if (threadInBlock < 8)
- {
- block.z |= index << (threadInBlock * 4);
- }
- else
- {
- block.w |= index << ((threadInBlock - 8) * 4);
- }
- }
- else
- {
- uint index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr ) ? aStep1[ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep1[63] );
-
- uint fixup = candidateFixUpIndex1D[best_partition];
- int2 offset = int2((fixup != 2), (fixup == 15));
-
- if (threadInBlock == 0)
- {
- block.z |= index << 18;
- }
- else if (threadInBlock < 3)
- {
- block.z |= index << (20 + (threadInBlock - 1) * 3);
- }
- else if (threadInBlock < 5)
- {
- block.z |= index << (25 + (threadInBlock - 3) * 3 + offset.x);
- }
- else if (threadInBlock == 5)
- {
- block.w |= index >> !offset.x;
- if (!offset.x)
- {
- block.z |= index << 31;
- }
- }
- else if (threadInBlock < 9)
- {
- block.w |= index << (2 + (threadInBlock - 6) * 3 + offset.x);
- }
- else
- {
- block.w |= index << (11 + (threadInBlock - 9) * 3 + offset.y);
- }
- }
-
- shared_temp[GI].pixel_hr.xy = asfloat(block.zw);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 8)
- {
- shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 8].pixel_hr.xy));
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 4].pixel_hr.xy));
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 2].pixel_hr.xy));
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- shared_temp[GI].pixel_hr.xy = asfloat(asuint(shared_temp[GI].pixel_hr.xy) | asuint(shared_temp[GI + 1].pixel_hr.xy));
-
- block.zw = asuint(shared_temp[GI].pixel_hr.xy);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- bool transformed = candidateModeTransformed[best_mode - 1];
- uint4 prec = candidateModePrec[best_mode - 1];
- if (threadInBlock == 2)
- {
- int2x3 endPoint_q;
- endPoint_q[0] = shared_temp[threadBase + 0].endPoint_low;
- endPoint_q[1] = shared_temp[threadBase + 0].endPoint_high;
-
- quantize( endPoint_q, prec.x );
- if (transformed)
- {
- endPoint_q[1] -= endPoint_q[0];
- }
-
- shared_temp[GI].endPoint_low = endPoint_q[0];
- shared_temp[GI].endPoint_high = endPoint_q[1];
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock == 3)
- {
- int3 ep0 = shared_temp[threadBase + 2].endPoint_low;
- int2x3 endPoint_q;
- endPoint_q[0] = shared_temp[threadBase + 1].endPoint_low;
- endPoint_q[1] = shared_temp[threadBase + 1].endPoint_high;
-
- if (best_mode <= 10)
- {
- quantize( endPoint_q, prec.x );
- if (transformed)
- {
- endPoint_q[0] -= ep0;
- endPoint_q[1] -= ep0;
- }
-
- shared_temp[GI].endPoint_low = endPoint_q[0];
- shared_temp[GI].endPoint_high = endPoint_q[1];
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 2)
- {
- int2x3 endPoint_q;
- endPoint_q[0] = shared_temp[threadBase + threadInBlock + 2].endPoint_low;
- endPoint_q[1] = shared_temp[threadBase + threadInBlock + 2].endPoint_high;
-
- int bBadQuantize = 0;
- if (threadInBlock == 0)
- {
- if (best_mode > 10)
- {
- finish_quantize( bBadQuantize, endPoint_q, prec, transformed );
- }
- else
- {
- finish_quantize_0( bBadQuantize, endPoint_q, prec, transformed );
- }
- }
- else // if (threadInBlock == 1)
- {
- if (best_mode <= 10)
- {
- finish_quantize_1( bBadQuantize, endPoint_q, prec, transformed );
- }
- }
-
- shared_temp[GI].endPoint_low = endPoint_q[0];
- shared_temp[GI].endPoint_high = endPoint_q[1];
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if ( threadInBlock == 0 )
- {
- int2x3 endPoint_q[2];
- endPoint_q[0][0] = shared_temp[threadBase + 0].endPoint_low;
- endPoint_q[0][1] = shared_temp[threadBase + 0].endPoint_high;
- endPoint_q[1][0] = shared_temp[threadBase + 1].endPoint_low;
- endPoint_q[1][1] = shared_temp[threadBase + 1].endPoint_high;
-
- if ( best_mode > 10 )
- {
- block_package( block, endPoint_q[0], best_mode );
- }
- else
- {
- block_package( block, endPoint_q, best_mode, best_partition );
- }
-
- g_OutBuff[blockID] = block;
- }
-}
-
-uint float2half1( float f )
-{
- uint Result;
-
- uint IValue = asuint(f);
- uint Sign = (IValue & 0x80000000U) >> 16U;
- IValue = IValue & 0x7FFFFFFFU;
-
- if (IValue > 0x47FFEFFFU)
- {
- // The number is too large to be represented as a half. Saturate to infinity.
- Result = 0x7FFFU;
- }
- else
- {
- if (IValue < 0x38800000U)
- {
- // The number is too small to be represented as a normalized half.
- // Convert it to a denormalized value.
- uint Shift = 113U - (IValue >> 23U);
- IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
- }
- else
- {
- // Rebias the exponent to represent the value as a normalized half.
- IValue += 0xC8000000U;
- }
-
- Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU;
- }
- return (Result|Sign);
-}
-
-uint3 float2half( float3 endPoint_f )
-{
- //uint3 sign = asuint(endPoint_f) & 0x80000000;
- //uint3 expo = asuint(endPoint_f) & 0x7F800000;
- //uint3 base = asuint(endPoint_f) & 0x007FFFFF;
- //return ( expo < 0x33800000 ) ? 0
- // //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present
- // : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2
- // //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation
- // : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff )
- // // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present
- // // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number
- // : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) );
-
-
- return uint3( float2half1( endPoint_f.x ), float2half1( endPoint_f.y ), float2half1( endPoint_f.z ) );
-}
-int3 start_quantize( uint3 pixel_h )
-{
- if ( g_format == UNSIGNED_F16 )
- {
- return asint( ( pixel_h << 6 ) / 31 );
- }
- else
- {
- return ( pixel_h < 0x8000 ) ? ( ( pixel_h == 0x7bff ) ? 0x7fff : asint( ( pixel_h << 5 ) / 31 ) )// fixed a bug in v0.2
- : ( ( pixel_h == 0x7bff ) ? 0xffff8001 : -asint( ( ( 0x00007fff & pixel_h ) << 5 ) / 31 ) );// fixed a bug in v0.2
- }
-}
-void quantize( inout int2x3 endPoint, uint prec )
-{
- int iprec = asint( prec );
- if ( g_format == UNSIGNED_F16 )
- {
- endPoint = ( ( iprec >= 15 ) | ( endPoint == 0 ) ) ? endPoint
- : ( ( endPoint == asint(0xFFFF) ) ? ( ( 1 << iprec ) - 1 )
- : ( ( ( endPoint << iprec ) + asint(0x0000) ) >> 16 ) );
- }
- else
- {
- endPoint = ( ( iprec >= 16 ) | ( endPoint == 0 ) ) ? endPoint
- : ( ( endPoint >= 0 ) ? ( ( endPoint == asint(0x7FFF) ) ? ( ( 1 << ( iprec - 1 ) ) - 1 ) : ( ( ( endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) )
- : ( ( -endPoint == asint(0x7FFF) ) ? -( ( 1 << ( iprec - 1 ) ) - 1 ) : -( ( ( -endPoint << ( iprec - 1 ) ) + asint(0x0000) ) >> 15 ) ) );
- }
-}
-void finish_quantize_0( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed )
-{
- if ( transformed )
- {
- bool3 bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) )
- : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) );
- bBadQuantize |= any(bBadComponent);
-
- endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 );
- endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] )
- : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) );
- }
- else
- {
- endPoint &= ( ( 1 << prec.x ) - 1 );
- }
-}
-void finish_quantize_1( inout int bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed )
-{
- if ( transformed )
- {
- bool2x3 bBadComponent;
- bBadComponent[0] = ( endPoint[0] >= 0 ) ? ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) )
- : ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) );
- bBadComponent[1] = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) )
- : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) );
- bBadQuantize |= any(bBadComponent);
-
- endPoint[0] = ( endPoint[0] >= 0 ) ? ( ( endPoint[0] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[0] )
- : ( ( -endPoint[0] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[0] & ( ( 1 << prec.yzw ) - 1 ) ) );
- endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] )
- : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) );
- }
- else
- {
- endPoint &= ( ( 1 << prec.x ) - 1 );
- }
-}
-void finish_quantize( out bool bBadQuantize, inout int2x3 endPoint, uint4 prec, bool transformed )
-{
- if ( transformed )
- {
- bool3 bBadComponent;
- bBadComponent = ( endPoint[1] >= 0 ) ? ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) )
- : ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) );
- bBadQuantize = any( bBadComponent );
-
- endPoint[0] = endPoint[0] & ( ( 1 << prec.x ) - 1 );
- endPoint[1] = ( endPoint[1] >= 0 ) ? ( ( endPoint[1] >= ( 1 << ( prec.yzw - 1 ) ) ) ? ( ( 1 << ( prec.yzw - 1 ) ) - 1 ) : endPoint[1] )
- : ( ( -endPoint[1] > ( 1 << ( prec.yzw - 1 ) ) ) ? ( 1 << ( prec.yzw - 1 ) ) : ( endPoint[1] & ( ( 1 << prec.yzw ) - 1 ) ) );
- }
- else
- {
- endPoint &= ( ( 1 << prec.x ) - 1 );
-
- bBadQuantize = 0;
- }
-}
-
-void SIGN_EXTEND( uint3 prec, inout int3 color )
-{
- uint3 p = 1 << (prec - 1);
- color = (color & p) ? (color & (p - 1)) - p : color;
-}
-
-void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint )
-{
- if ( g_format == SIGNED_F16 )
- SIGN_EXTEND( prec.x, endPoint[0] );
- if ( g_format == SIGNED_F16 || transformed )
- SIGN_EXTEND( prec.yzw, endPoint[1] );
-}
-
-void sign_extend( bool transformed, uint4 prec, inout int2x3 endPoint[2] )
-{
- if ( g_format == SIGNED_F16 )
- SIGN_EXTEND( prec.x, endPoint[0][0] );
- if ( g_format == SIGNED_F16 || transformed )
- {
- SIGN_EXTEND( prec.yzw, endPoint[0][1] );
- SIGN_EXTEND( prec.yzw, endPoint[1][0] );
- SIGN_EXTEND( prec.yzw, endPoint[1][1] );
- }
-}
-void start_unquantize( inout int2x3 endPoint[2], uint4 prec, bool transformed )
-{
- sign_extend( transformed, prec, endPoint );
- if ( transformed )
- {
- endPoint[0][1] += endPoint[0][0];
- endPoint[1][0] += endPoint[0][0];
- endPoint[1][1] += endPoint[0][0];
- }
-}
-void start_unquantize( inout int2x3 endPoint, uint4 prec, bool transformed )
-{
- sign_extend( transformed, prec, endPoint );
- if ( transformed )
- endPoint[1] += endPoint[0];
-}
-void unquantize( inout int2x3 color, uint prec )
-{
- int iprec = asint( prec );
- if (g_format == UNSIGNED_F16 )
- {
- if (prec < 15)
- {
- color = (color != 0) ? (color == ((1 << iprec) - 1) ? 0xFFFF : (((color << 16) + 0x8000) >> iprec)) : color;
- }
- }
- else
- {
- if (prec < 16)
- {
- uint2x3 s = color >= 0 ? 0 : 1;
- color = abs(color);
- color = (color != 0) ? (color >= ((1 << (iprec - 1)) - 1) ? 0x7FFF : (((color << 15) + 0x4000) >> (iprec - 1))) : color;
- color = s > 0 ? -color : color;
- }
- }
-}
-uint3 finish_unquantize( int3 color )
-{
- if ( g_format == UNSIGNED_F16 )
- color = ( color * 31 ) >> 6;
- else
- {
- color = ( color < 0 ) ? -( ( -color * 31 ) >> 5 ) : ( color * 31 ) >> 5;
- color = ( color < 0 ) ? ( ( -color ) | 0x8000 ) : color;
- }
- return asuint(color);
-}
-void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, int i )
-{
- static const int aWeight3[] = {0, 9, 18, 27, 37, 46, 55, 64};
-
- int3 tmp = ( low * ( 64 - aWeight3[i] ) + high * aWeight3[i] + 32 ) >> 6;
- palette = finish_unquantize( tmp );
-}
-void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, int i )
-{
- static const int aWeight4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
-
- int3 tmp = ( low * ( 64 - aWeight4[i] ) + high * aWeight4[i] + 32 ) >> 6;
- palette = finish_unquantize( tmp );
-}
-
-float half2float1( uint Value )
-{
- uint Mantissa = (uint)(Value & 0x03FF);
-
- uint Exponent;
- if ((Value & 0x7C00) != 0) // The value is normalized
- {
- Exponent = (uint)((Value >> 10) & 0x1F);
- }
- else if (Mantissa != 0) // The value is denormalized
- {
- // Normalize the value in the resulting float
- Exponent = 1;
-
- do
- {
- Exponent--;
- Mantissa <<= 1;
- } while ((Mantissa & 0x0400) == 0);
-
- Mantissa &= 0x03FF;
- }
- else // The value is zero
- {
- Exponent = (uint)(-112);
- }
-
- uint Result = ((Value & 0x8000) << 16) | // Sign
- ((Exponent + 112) << 23) | // Exponent
- (Mantissa << 13); // Mantissa
-
- return asfloat(Result);
-}
-
-float3 half2float(uint3 color_h )
-{
- //uint3 sign = color_h & 0x8000;
- //uint3 expo = color_h & 0x7C00;
- //uint3 base = color_h & 0x03FF;
- //return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24
- // : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00
-
- return float3( half2float1( color_h.x ), half2float1( color_h.y ), half2float1( color_h.z ) );
-}
-
-void block_package( inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index ) // for mode 1 - 10
-{
- block.xy = 0;
- block.z &= 0xFFFC0000;
-
- //block.z |= (partition_index & 0x1f) << 13;
-
- if ( mode_type == candidateModeFlag[0])
- {
- /*block.x = candidateModeMemory[0];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.x |= ( endPoint[1][0].g >> 2 ) & 0x00000004;
- block.x |= ( endPoint[1][0].b >> 1 ) & 0x00000008;
- block.x |= endPoint[1][1].b & 0x00000010;
- block.y |= ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 );
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
- block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) );
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[0] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[0] >> 1) & 1) << 1;
- block.x |= ((endPoint[1][0].g >> 4) & 1) << 2;
- block.x |= ((endPoint[1][0].b >> 4) & 1) << 3;
- block.x |= ((endPoint[1][1].b >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[1])
- {
- /*block.x = candidateModeMemory[1];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00000FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x003F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 );
- block.x |= ( ( endPoint[1][0].g >> 3 ) & 0x00000004 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 );
- block.x |= ( endPoint[1][1].g >> 1 ) & 0x00000018;
- block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 );
- block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80);
- block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 );
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[1] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[1] >> 1) & 1) << 1;
- block.x |= ((endPoint[1][0].g >> 5) & 1) << 2;
- block.x |= ((endPoint[1][1].g >> 4) & 1) << 3;
- block.x |= ((endPoint[1][1].g >> 5) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[1][1].b >> 0) & 1) << 12;
- block.x |= ((endPoint[1][1].b >> 1) & 1) << 13;
- block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[1][0].b >> 5) & 1) << 22;
- block.x |= ((endPoint[1][1].b >> 2) & 1) << 23;
- block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[1][1].b >> 3) & 1) << 0;
- block.y |= ((endPoint[1][1].b >> 5) & 1) << 1;
- block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[0][1].r >> 5) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[0][1].g >> 5) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[0][1].b >> 5) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][0].r >> 5) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].r >> 5) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[2])
- {
- /*block.x = candidateModeMemory[2];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.y |= ( endPoint[0][0].r >> 2 ) & 0x00000100;
- block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000;
- block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
- block.yz |= ( ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000) ) | ( ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040) );
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[2] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[2] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[2] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[2] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[2] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[0][0].r >> 10) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][0].g >> 10) & 1) << 17;
- block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][0].b >> 10) & 1) << 27;
- block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[3])
- {
- /*block.x = candidateModeMemory[3];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080;
- block.y |= ( endPoint[0][0].g << 8 ) & 0x00040000;
- block.y |= ( ( endPoint[0][0].b << 17 ) & 0x08000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x07800000 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000001E);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 );
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780);
- block.yz |= ( endPoint[1][1].b << uint2(27, 9) ) & uint2(0x10000000, 0x00001000);
- block.z |= ( ( endPoint[1][0].g << 7 ) & 0x00000800 );
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
- block.z |= ( endPoint[1][1].b << 4 ) & 0x00000040;
- block.z |= ( endPoint[1][1].b << 5 ) & 0x00000020;*/
-
- block.x |= ((candidateModeMemory[3] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[3] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[3] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[3] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[3] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][0].r >> 10) & 1) << 7;
- block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[0][0].g >> 10) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][0].b >> 10) & 1) << 27;
- block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][1].b >> 0) & 1) << 5;
- block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][0].g >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[4])
- {
- /*block.x = candidateModeMemory[4];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.y |= ( endPoint[0][0].r >> 3 ) & 0x00000080;
- block.y |= ( endPoint[0][0].g << 7 ) & 0x00020000;
- block.y |= ( ( endPoint[0][0].b << 18 ) & 0x10000000 ) | ( ( endPoint[0][0].b >> 7 ) & 0x00000007 );
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x00000078 ) | ( ( endPoint[0][1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
- block.y |= ( ( endPoint[1][0].g << 9 ) & 0x00001E00 ) | ( ( endPoint[1][0].b << 4 ) & 0x00000100 );
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000780);
- block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000060);
- block.z |= ( endPoint[1][0].r << 1 ) & 0x0000001E;
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
- block.z |= ( ( endPoint[1][1].b << 7 ) & 0x00000800 ) | ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/
-
- block.x |= ((candidateModeMemory[4] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[4] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[4] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[4] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[4] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0][0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0][0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0][0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][0].r >> 10) & 1) << 7;
- block.y |= ((endPoint[1][0].b >> 4) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][0].g >> 10) & 1) << 17;
- block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[0][0].b >> 10) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][1].b >> 1) & 1) << 5;
- block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].b >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[5])
- {
- /*block.x = candidateModeMemory[5];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00003FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x00FF8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000);
- block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000003;
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
- block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 );
- block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 );
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
- block.yz |= ( endPoint[1][1].b << uint2(18, 4) ) & uint2(0x00040000, 0x00000040);
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
- block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 );*/
-
- block.x |= ((candidateModeMemory[5] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[5] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[5] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[5] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[5] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0][0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0][0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0][0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[6])
- {
- /*block.x = candidateModeMemory[6];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001;
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
- block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000);
- block.x |= ( ( endPoint[1][1].g << 9 ) & 0x00002000 ) | ( ( endPoint[1][1].b << 21 ) & 0x00800000);
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80);
- block.y |= ( ( endPoint[1][1].b >> 2 ) & 0x00000006 );
- block.y |= ( ( endPoint[1][1].b << 27 ) & 0x10000000 ) | ( ( endPoint[1][1].b << 18 ) & 0x00040000 );
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[6] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[6] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[6] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[6] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[6] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[1][1].g >> 4) & 1) << 13;
- block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[1][1].b >> 2) & 1) << 23;
- block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[1][1].b >> 3) & 1) << 1;
- block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[0][1].r >> 5) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][0].r >> 5) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].r >> 5) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[7])
- {
- /*block.x = candidateModeMemory[7];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001;
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x0F800000 );
- block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
- block.x |= ( ( endPoint[1][0].g << 18 ) & 0x00800000 );
- block.x |= ( ( endPoint[1][1].b << 13 ) & 0x00002000 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.y |= ( ( endPoint[1][1].g >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 );
- block.y |= ( endPoint[1][1].b << 27 ) & 0x10000000;
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
- block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/
-
- block.x |= ((candidateModeMemory[7] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[7] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[7] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[7] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[7] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[1][1].b >> 0) & 1) << 13;
- block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[1][0].g >> 5) & 1) << 23;
- block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[1][1].g >> 5) & 1) << 1;
- block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[0][1].g >> 5) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[1][1].b >> 1) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[8])
- {
- /*block.x = candidateModeMemory[8];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x00001FE0 ) | ( ( endPoint[0][0].g << 15 ) & 0x007F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0xFE000000 );
- block.y |= ( endPoint[0][0].b >> 7 ) & 0x00000001;
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000000F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0003E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 );
- block.x |= ( ( endPoint[1][0].g << 20 ) & 0x01000000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
- block.x |= ( ( endPoint[1][0].b << 18 ) & 0x00800000 );
- block.x |= ( endPoint[1][1].b << 12 ) & 0x00002000;
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.y |= ( ( endPoint[1][1].g << 4 ) & 0x00000100 ) | ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000003E);
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00000F80);
- block.y |= ( endPoint[1][1].b << 18 ) & 0x00040000;
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;
- block.z |= ( ( endPoint[1][1].b << 9 ) & 0x00001000 ) | ( ( endPoint[1][1].b << 4 ) & 0x00000040 );*/
-
- block.x |= ((candidateModeMemory[8] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[8] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[8] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[8] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[8] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0][0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0][0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[1][1].b >> 1) & 1) << 13;
- block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0][0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0][0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[1][0].b >> 5) & 1) << 23;
- block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0][0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0][0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[1][1].b >> 5) & 1) << 1;
- block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[1][1].g >> 4) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[1][1].b >> 0) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[0][1].b >> 5) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][1].b >> 2) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].b >> 3) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
- else if ( mode_type == candidateModeFlag[9])
- {
- /*block.x = candidateModeMemory[9];
- block.x |= ( ( endPoint[0][0].r << 5 ) & 0x000007E0 ) | ( ( endPoint[0][0].g << 15 ) & 0x001F8000 ) | ( ( endPoint[0][0].b << 25 ) & 0x7E000000 );
- block.y |= ( ( endPoint[0][1].r << 3 ) & 0x000001F8 ) | ( ( endPoint[0][1].g << 13 ) & 0x0007E000 ) | ( ( endPoint[0][1].b << 23 ) & 0x1F800000 );
- block.x |= ( ( endPoint[1][0].g << 16 ) & 0x00200000 ) | ( ( endPoint[1][0].g << 20 ) & 0x01000000 );
- block.x |= ( ( endPoint[1][0].b << 17 ) & 0x00400000 ) | ( ( endPoint[1][0].b << 10 ) & 0x00004000 );
- block.x |= ( ( endPoint[1][1].b << 21 ) & 0x00800000 ) | ( ( endPoint[1][1].b << 12 ) & 0x00003000 );
- block.x |= ( ( endPoint[1][1].g << 26 ) & 0x80000000 ) | ( ( endPoint[1][1].g << 7 ) & 0x00000800 );
- block.yz |= ( endPoint[1][0].gr << uint2(9, 1) ) & uint2(0x00001E00, 0x0000007E);
- block.yz |= ( endPoint[1][1].gr << uint2(19, 7) ) & uint2(0x00780000, 0x00001F80);
- block.y |= ( endPoint[1][0].b << 29 ) & 0xE0000000;
- block.y |= ( ( endPoint[1][1].b >> 4 ) & 0x00000002 ) | ( ( endPoint[1][1].b >> 2 ) & 0x00000004 ) | ( ( endPoint[1][1].b >> 3 ) & 0x00000001 );
- block.z |= ( endPoint[1][0].b >> 3 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[9] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[9] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[9] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[9] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[9] >> 4) & 1) << 4;
- block.x |= ((endPoint[0][0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0][0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0][0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0][0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0][0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0][0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[1][1].g >> 4) & 1) << 11;
- block.x |= ((endPoint[1][1].b >> 0) & 1) << 12;
- block.x |= ((endPoint[1][1].b >> 1) & 1) << 13;
- block.x |= ((endPoint[1][0].b >> 4) & 1) << 14;
- block.x |= ((endPoint[0][0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0][0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0][0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0][0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0][0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0][0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[1][0].g >> 5) & 1) << 21;
- block.x |= ((endPoint[1][0].b >> 5) & 1) << 22;
- block.x |= ((endPoint[1][1].b >> 2) & 1) << 23;
- block.x |= ((endPoint[1][0].g >> 4) & 1) << 24;
- block.x |= ((endPoint[0][0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0][0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0][0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0][0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0][0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0][0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[1][1].g >> 5) & 1) << 31;
- block.y |= ((endPoint[1][1].b >> 3) & 1) << 0;
- block.y |= ((endPoint[1][1].b >> 5) & 1) << 1;
- block.y |= ((endPoint[1][1].b >> 4) & 1) << 2;
- block.y |= ((endPoint[0][1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[0][1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[0][1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[0][1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0][1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[0][1].r >> 5) & 1) << 8;
- block.y |= ((endPoint[1][0].g >> 0) & 1) << 9;
- block.y |= ((endPoint[1][0].g >> 1) & 1) << 10;
- block.y |= ((endPoint[1][0].g >> 2) & 1) << 11;
- block.y |= ((endPoint[1][0].g >> 3) & 1) << 12;
- block.y |= ((endPoint[0][1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[0][1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[0][1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[0][1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0][1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[0][1].g >> 5) & 1) << 18;
- block.y |= ((endPoint[1][1].g >> 0) & 1) << 19;
- block.y |= ((endPoint[1][1].g >> 1) & 1) << 20;
- block.y |= ((endPoint[1][1].g >> 2) & 1) << 21;
- block.y |= ((endPoint[1][1].g >> 3) & 1) << 22;
- block.y |= ((endPoint[0][1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[0][1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[0][1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[0][1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0][1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[0][1].b >> 5) & 1) << 28;
- block.y |= ((endPoint[1][0].b >> 0) & 1) << 29;
- block.y |= ((endPoint[1][0].b >> 1) & 1) << 30;
- block.y |= ((endPoint[1][0].b >> 2) & 1) << 31;
- block.z |= ((endPoint[1][0].b >> 3) & 1) << 0;
- block.z |= ((endPoint[1][0].r >> 0) & 1) << 1;
- block.z |= ((endPoint[1][0].r >> 1) & 1) << 2;
- block.z |= ((endPoint[1][0].r >> 2) & 1) << 3;
- block.z |= ((endPoint[1][0].r >> 3) & 1) << 4;
- block.z |= ((endPoint[1][0].r >> 4) & 1) << 5;
- block.z |= ((endPoint[1][0].r >> 5) & 1) << 6;
- block.z |= ((endPoint[1][1].r >> 0) & 1) << 7;
- block.z |= ((endPoint[1][1].r >> 1) & 1) << 8;
- block.z |= ((endPoint[1][1].r >> 2) & 1) << 9;
- block.z |= ((endPoint[1][1].r >> 3) & 1) << 10;
- block.z |= ((endPoint[1][1].r >> 4) & 1) << 11;
- block.z |= ((endPoint[1][1].r >> 5) & 1) << 12;
- block.z |= ((partition_index >> 0) & 1) << 13;
- block.z |= ((partition_index >> 1) & 1) << 14;
- block.z |= ((partition_index >> 2) & 1) << 15;
- block.z |= ((partition_index >> 3) & 1) << 16;
- block.z |= ((partition_index >> 4) & 1) << 17;
- }
-}
-void block_package( inout uint4 block, int2x3 endPoint, uint mode_type ) // for mode 11 - 14
-{
- /*block.x = ( ( endPoint[0].r << 5 ) & 0x00007FE0 ) | ( ( endPoint[0].g << 15 ) & 0x01FF8000 ) | ( ( endPoint[0].b << 25 ) & 0xFE000000 );
- block.y |= ( endPoint[0].b >> 7 ) & 0x00000007;*/
-
- block.xy = 0;
- block.z &= 0xFFFFFFFE;
-
-
- if ( mode_type == candidateModeFlag[10])
- {
- /* block.x |= candidateModeMemory[10];
- block.y |= ( ( endPoint[1].r << 3 ) & 0x00001FF8 ) | ( ( endPoint[1].g << 13 ) & 0x007FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 );
- block.z |= ( endPoint[1].b >> 9 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[10] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[10] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[10] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[10] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[10] >> 4) & 1) << 4;
- block.x |= ((endPoint[0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[1].r >> 5) & 1) << 8;
- block.y |= ((endPoint[1].r >> 6) & 1) << 9;
- block.y |= ((endPoint[1].r >> 7) & 1) << 10;
- block.y |= ((endPoint[1].r >> 8) & 1) << 11;
- block.y |= ((endPoint[1].r >> 9) & 1) << 12;
- block.y |= ((endPoint[1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[1].g >> 5) & 1) << 18;
- block.y |= ((endPoint[1].g >> 6) & 1) << 19;
- block.y |= ((endPoint[1].g >> 7) & 1) << 20;
- block.y |= ((endPoint[1].g >> 8) & 1) << 21;
- block.y |= ((endPoint[1].g >> 9) & 1) << 22;
- block.y |= ((endPoint[1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[1].b >> 5) & 1) << 28;
- block.y |= ((endPoint[1].b >> 6) & 1) << 29;
- block.y |= ((endPoint[1].b >> 7) & 1) << 30;
- block.y |= ((endPoint[1].b >> 8) & 1) << 31;
- block.z |= ((endPoint[1].b >> 9) & 1) << 0;
- }
- else if (mode_type == candidateModeFlag[11])
- {
- /*block.x |= candidateModeMemory[11];
- block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 );
- block.y |= ( ( endPoint[1].r << 3 ) & 0x00000FF8 ) | ( ( endPoint[1].g << 13 ) & 0x003FE000 ) | ( ( endPoint[1].b << 23 ) & 0xFF800000 );
- block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[11] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[11] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[11] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[11] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[11] >> 4) & 1) << 4;
- block.x |= ((endPoint[0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[1].r >> 5) & 1) << 8;
- block.y |= ((endPoint[1].r >> 6) & 1) << 9;
- block.y |= ((endPoint[1].r >> 7) & 1) << 10;
- block.y |= ((endPoint[1].r >> 8) & 1) << 11;
- block.y |= ((endPoint[0].r >> 10) & 1) << 12;
- block.y |= ((endPoint[1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[1].g >> 5) & 1) << 18;
- block.y |= ((endPoint[1].g >> 6) & 1) << 19;
- block.y |= ((endPoint[1].g >> 7) & 1) << 20;
- block.y |= ((endPoint[1].g >> 8) & 1) << 21;
- block.y |= ((endPoint[0].g >> 10) & 1) << 22;
- block.y |= ((endPoint[1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[1].b >> 5) & 1) << 28;
- block.y |= ((endPoint[1].b >> 6) & 1) << 29;
- block.y |= ((endPoint[1].b >> 7) & 1) << 30;
- block.y |= ((endPoint[1].b >> 8) & 1) << 31;
- block.z |= ((endPoint[0].b >> 10) & 1) << 0;
- }
- else if (mode_type == candidateModeFlag[12])// violate the spec in [0].low
- {
- /*block.x |= candidateModeMemory[12];
- block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 ) | ( ( endPoint[0].g << 12 ) & 0x00400000 );
- block.y |= ( ( endPoint[0].r << 0 ) & 0x00000800 ) | ( ( endPoint[0].g << 10 ) & 0x00200000 );
- block.y |= ( endPoint[0].b << 20 ) & 0x80000000;
- block.y |= ( ( endPoint[1].r << 3 ) & 0x000007F8 ) | ( ( endPoint[1].g << 13 ) & 0x001FE000 ) | ( ( endPoint[1].b << 23 ) & 0x7F800000 );
- block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[12] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[12] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[12] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[12] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[12] >> 4) & 1) << 4;
- block.x |= ((endPoint[0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[1].r >> 4) & 1) << 7;
- block.y |= ((endPoint[1].r >> 5) & 1) << 8;
- block.y |= ((endPoint[1].r >> 6) & 1) << 9;
- block.y |= ((endPoint[1].r >> 7) & 1) << 10;
- block.y |= ((endPoint[0].r >> 11) & 1) << 11;
- block.y |= ((endPoint[0].r >> 10) & 1) << 12;
- block.y |= ((endPoint[1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[1].g >> 4) & 1) << 17;
- block.y |= ((endPoint[1].g >> 5) & 1) << 18;
- block.y |= ((endPoint[1].g >> 6) & 1) << 19;
- block.y |= ((endPoint[1].g >> 7) & 1) << 20;
- block.y |= ((endPoint[0].g >> 11) & 1) << 21;
- block.y |= ((endPoint[0].g >> 10) & 1) << 22;
- block.y |= ((endPoint[1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[1].b >> 4) & 1) << 27;
- block.y |= ((endPoint[1].b >> 5) & 1) << 28;
- block.y |= ((endPoint[1].b >> 6) & 1) << 29;
- block.y |= ((endPoint[1].b >> 7) & 1) << 30;
- block.y |= ((endPoint[0].b >> 11) & 1) << 31;
- block.z |= ((endPoint[0].b >> 10) & 1) << 0;
- }
- else if (mode_type == candidateModeFlag[13])
- {
- /*block.x |= candidateModeMemory[13];
- block.y |= ( ( endPoint[0].r >> 8 ) & 0x00000080 );
- block.y |= ( ( endPoint[0].r >> 6 ) & 0x00000100 );
- block.y |= ( ( endPoint[0].r >> 4 ) & 0x00000200 );
- block.y |= ( ( endPoint[0].r >> 2 ) & 0x00000400 );
- block.y |= ( ( endPoint[0].r >> 0 ) & 0x00000800 );
- block.y |= ( ( endPoint[0].r << 2 ) & 0x00001000 );
- block.y |= ( ( endPoint[0].g << 2 ) & 0x00020000 );
- block.y |= ( ( endPoint[0].g << 4 ) & 0x00040000 );
- block.y |= ( ( endPoint[0].g << 6 ) & 0x00080000 );
- block.y |= ( ( endPoint[0].g << 8 ) & 0x00100000 );
- block.y |= ( ( endPoint[0].g << 10 ) & 0x00200000 );
- block.y |= ( ( endPoint[0].g << 12 ) & 0x00400000 );
- block.y |= ( ( endPoint[0].b << 12 ) & 0x08000000 );
- block.y |= ( ( endPoint[0].b << 14 ) & 0x10000000 );
- block.y |= ( ( endPoint[0].b << 16 ) & 0x20000000 );
- block.y |= ( ( endPoint[0].b << 18 ) & 0x40000000 );
- block.y |= ( ( endPoint[0].b << 20 ) & 0x80000000 );
- block.y |= ( ( endPoint[1].r << 3 ) & 0x00000078 ) | ( ( endPoint[1].g << 13 ) & 0x0001E000 ) | ( ( endPoint[1].b << 23 ) & 0x07800000 );
- block.z |= ( endPoint[0].b >> 10 ) & 0x00000001;*/
-
- block.x |= ((candidateModeMemory[13] >> 0) & 1) << 0;
- block.x |= ((candidateModeMemory[13] >> 1) & 1) << 1;
- block.x |= ((candidateModeMemory[13] >> 2) & 1) << 2;
- block.x |= ((candidateModeMemory[13] >> 3) & 1) << 3;
- block.x |= ((candidateModeMemory[13] >> 4) & 1) << 4;
- block.x |= ((endPoint[0].r >> 0) & 1) << 5;
- block.x |= ((endPoint[0].r >> 1) & 1) << 6;
- block.x |= ((endPoint[0].r >> 2) & 1) << 7;
- block.x |= ((endPoint[0].r >> 3) & 1) << 8;
- block.x |= ((endPoint[0].r >> 4) & 1) << 9;
- block.x |= ((endPoint[0].r >> 5) & 1) << 10;
- block.x |= ((endPoint[0].r >> 6) & 1) << 11;
- block.x |= ((endPoint[0].r >> 7) & 1) << 12;
- block.x |= ((endPoint[0].r >> 8) & 1) << 13;
- block.x |= ((endPoint[0].r >> 9) & 1) << 14;
- block.x |= ((endPoint[0].g >> 0) & 1) << 15;
- block.x |= ((endPoint[0].g >> 1) & 1) << 16;
- block.x |= ((endPoint[0].g >> 2) & 1) << 17;
- block.x |= ((endPoint[0].g >> 3) & 1) << 18;
- block.x |= ((endPoint[0].g >> 4) & 1) << 19;
- block.x |= ((endPoint[0].g >> 5) & 1) << 20;
- block.x |= ((endPoint[0].g >> 6) & 1) << 21;
- block.x |= ((endPoint[0].g >> 7) & 1) << 22;
- block.x |= ((endPoint[0].g >> 8) & 1) << 23;
- block.x |= ((endPoint[0].g >> 9) & 1) << 24;
- block.x |= ((endPoint[0].b >> 0) & 1) << 25;
- block.x |= ((endPoint[0].b >> 1) & 1) << 26;
- block.x |= ((endPoint[0].b >> 2) & 1) << 27;
- block.x |= ((endPoint[0].b >> 3) & 1) << 28;
- block.x |= ((endPoint[0].b >> 4) & 1) << 29;
- block.x |= ((endPoint[0].b >> 5) & 1) << 30;
- block.x |= ((endPoint[0].b >> 6) & 1) << 31;
- block.y |= ((endPoint[0].b >> 7) & 1) << 0;
- block.y |= ((endPoint[0].b >> 8) & 1) << 1;
- block.y |= ((endPoint[0].b >> 9) & 1) << 2;
- block.y |= ((endPoint[1].r >> 0) & 1) << 3;
- block.y |= ((endPoint[1].r >> 1) & 1) << 4;
- block.y |= ((endPoint[1].r >> 2) & 1) << 5;
- block.y |= ((endPoint[1].r >> 3) & 1) << 6;
- block.y |= ((endPoint[0].r >> 15) & 1) << 7;
- block.y |= ((endPoint[0].r >> 14) & 1) << 8;
- block.y |= ((endPoint[0].r >> 13) & 1) << 9;
- block.y |= ((endPoint[0].r >> 12) & 1) << 10;
- block.y |= ((endPoint[0].r >> 11) & 1) << 11;
- block.y |= ((endPoint[0].r >> 10) & 1) << 12;
- block.y |= ((endPoint[1].g >> 0) & 1) << 13;
- block.y |= ((endPoint[1].g >> 1) & 1) << 14;
- block.y |= ((endPoint[1].g >> 2) & 1) << 15;
- block.y |= ((endPoint[1].g >> 3) & 1) << 16;
- block.y |= ((endPoint[0].g >> 15) & 1) << 17;
- block.y |= ((endPoint[0].g >> 14) & 1) << 18;
- block.y |= ((endPoint[0].g >> 13) & 1) << 19;
- block.y |= ((endPoint[0].g >> 12) & 1) << 20;
- block.y |= ((endPoint[0].g >> 11) & 1) << 21;
- block.y |= ((endPoint[0].g >> 10) & 1) << 22;
- block.y |= ((endPoint[1].b >> 0) & 1) << 23;
- block.y |= ((endPoint[1].b >> 1) & 1) << 24;
- block.y |= ((endPoint[1].b >> 2) & 1) << 25;
- block.y |= ((endPoint[1].b >> 3) & 1) << 26;
- block.y |= ((endPoint[0].b >> 15) & 1) << 27;
- block.y |= ((endPoint[0].b >> 14) & 1) << 28;
- block.y |= ((endPoint[0].b >> 13) & 1) << 29;
- block.y |= ((endPoint[0].b >> 12) & 1) << 30;
- block.y |= ((endPoint[0].b >> 11) & 1) << 31;
- block.z |= ((endPoint[0].b >> 10) & 1) << 0;
- }
-}
diff --git a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl b/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl
deleted file mode 100644
index 6a57c3862..000000000
--- a/tests/hlsl/dxsdk/BC6HBC7EncoderCS/Shaders/BC7Encode.hlsl
+++ /dev/null
@@ -1,1908 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BC7Encode.hlsl
-//
-// The Compute Shader for BC7 Encoder
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//#define REF_DEVICE
-
-#define CHAR_LENGTH 8
-#define NCHANNELS 4
-#define BC7_UNORM 98
-#define MAX_UINT 0xFFFFFFFF
-#define MIN_UINT 0
-
-static const uint candidateSectionBit[64] = //Associated to partition 0-63
-{
- 0xCCCC, 0x8888, 0xEEEE, 0xECC8,
- 0xC880, 0xFEEC, 0xFEC8, 0xEC80,
- 0xC800, 0xFFEC, 0xFE80, 0xE800,
- 0xFFE8, 0xFF00, 0xFFF0, 0xF000,
- 0xF710, 0x008E, 0x7100, 0x08CE,
- 0x008C, 0x7310, 0x3100, 0x8CCE,
- 0x088C, 0x3110, 0x6666, 0x366C,
- 0x17E8, 0x0FF0, 0x718E, 0x399C,
- 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc,
- 0x3c3c, 0x55aa, 0x9696, 0xa55a,
- 0x73ce, 0x13c8, 0x324c, 0x3bdc,
- 0x6996, 0xc33c, 0x9966, 0x660,
- 0x272, 0x4e4, 0x4e40, 0x2720,
- 0xc936, 0x936c, 0x39c6, 0x639c,
- 0x9336, 0x9cc6, 0x817e, 0xe718,
- 0xccf0, 0xfcc, 0x7744, 0xee22,
-};
-static const uint candidateSectionBit2[64] = //Associated to partition 64-127
-{
- 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8,
- 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050,
- 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090,
- 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250,
- 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0,
- 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500,
- 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,
- 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200,
- 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424,
- 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50,
- 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,
- 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600,
- 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600,
- 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000,
- 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000,
- 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,
-};
-static const uint2 candidateFixUpIndex1D[128] =
-{
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
- { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-
- {15, 0},{15, 0},{ 6, 0},{ 8, 0},
- { 2, 0},{ 8, 0},{15, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 2, 0},{15, 0},{15, 0},{ 6, 0},
- { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
- {15, 0},{15, 0},{ 2, 0},{ 2, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 2, 0},{15, 0},
- //candidateFixUpIndex1D[i][1], i < 64 should not be used
-
- { 3,15},{ 3, 8},{15, 8},{15, 3},
- { 8,15},{ 3,15},{15, 3},{15, 8},
- { 8,15},{ 8,15},{ 6,15},{ 6,15},
- { 6,15},{ 5,15},{ 3,15},{ 3, 8},
- { 3,15},{ 3, 8},{ 8,15},{15, 3},
- { 3,15},{ 3, 8},{ 6,15},{10, 8},
- { 5, 3},{ 8,15},{ 8, 6},{ 6,10},
- { 8,15},{ 5,15},{15,10},{15, 8},
-
- { 8,15},{15, 3},{ 3,15},{ 5,10},
- { 6,10},{10, 8},{ 8, 9},{15,10},
- {15, 6},{ 3,15},{15, 8},{ 5,15},
- {15, 3},{15, 6},{15, 6},{15, 8}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct
- { 3,15},{15, 3},{ 5,15},{ 5,15},
- { 5,15},{ 8,15},{ 5,15},{10,15},
- { 5,15},{10,15},{ 8,15},{13,15},
- {15, 3},{12,15},{ 3,15},{ 3, 8},
-};
-static const uint2 candidateFixUpIndex1DOrdered[128] = //Same with candidateFixUpIndex1D but order the result when i >= 64
-{
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
- { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-
- {15, 0},{15, 0},{ 6, 0},{ 8, 0},
- { 2, 0},{ 8, 0},{15, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 2, 0},{15, 0},{15, 0},{ 6, 0},
- { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
- {15, 0},{15, 0},{ 2, 0},{ 2, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 2, 0},{15, 0},
- //candidateFixUpIndex1DOrdered[i][1], i < 64 should not be used
-
- { 3,15},{ 3, 8},{ 8,15},{ 3,15},
- { 8,15},{ 3,15},{ 3,15},{ 8,15},
- { 8,15},{ 8,15},{ 6,15},{ 6,15},
- { 6,15},{ 5,15},{ 3,15},{ 3, 8},
- { 3,15},{ 3, 8},{ 8,15},{ 3,15},
- { 3,15},{ 3, 8},{ 6,15},{ 8,10},
- { 3, 5},{ 8,15},{ 6, 8},{ 6,10},
- { 8,15},{ 5,15},{10,15},{ 8,15},
-
- { 8,15},{ 3,15},{ 3,15},{ 5,10},
- { 6,10},{ 8,10},{ 8, 9},{10,15},
- { 6,15},{ 3,15},{ 8,15},{ 5,15},
- { 3,15},{ 6,15},{ 6,15},{ 8,15}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct
- { 3,15},{ 3,15},{ 5,15},{ 5,15},
- { 5,15},{ 8,15},{ 5,15},{10,15},
- { 5,15},{10,15},{ 8,15},{13,15},
- { 3,15},{12,15},{ 3,15},{ 3, 8},
-};
-//static const uint4x4 candidateRotation[4] =
-//{
-// {1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1},
-// {0,0,0,1},{0,1,0,0},{0,0,1,0},{1,0,0,0},
-// {1,0,0,0},{0,0,0,1},{0,0,1,0},{0,1,0,0},
-// {1,0,0,0},{0,1,0,0},{0,0,0,1},{0,0,1,0}
-//};
-//static const uint2 candidateIndexPrec[8] = {{3,0},{3,0},{2,0},{2,0},
-// {2,3}, //color index and alpha index can exchange
-// {2,2},{4,4},{2,2}};
-
-static const uint aWeight[3][16] = { {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64},
- {0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
-
- //4 bit index: 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64
-static const uint aStep[3][64] = { { 0, 0, 0, 1, 1, 1, 1, 2,
- 2, 2, 2, 2, 3, 3, 3, 3,
- 4, 4, 4, 4, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 7, 7, 7,
- 7, 8, 8, 8, 8, 9, 9, 9,
- 9,10,10,10,10,10,11,11,
- 11,11,12,12,12,12,13,13,
- 13,13,14,14,14,14,15,15 },
- //3 bit index: 0, 9, 18, 27, 37, 46, 55, 64
- { 0,0,0,0,0,1,1,1,
- 1,1,1,1,1,1,2,2,
- 2,2,2,2,2,2,2,3,
- 3,3,3,3,3,3,3,3,
- 3,4,4,4,4,4,4,4,
- 4,4,5,5,5,5,5,5,
- 5,5,5,6,6,6,6,6,
- 6,6,6,6,7,7,7,7 },
- //2 bit index: 0, 21, 43, 64
- { 0,0,0,0,0,0,0,0,
- 0,0,0,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,3,3,
- 3,3,3,3,3,3,3,3 } };
-
-cbuffer cbCS : register( b0 )
-{
- uint g_tex_width;
- uint g_num_block_x;
- uint g_format;
- uint g_mode_id;
- uint g_start_block_id;
- uint g_num_total_blocks;
- float g_alpha_weight;
-};
-
-//Forward declaration
-uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P ); //Mode = 0
-uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P ); //Mode = 1
-uint2x4 compress_endpoints2( inout uint2x4 endPoint ); //Mode = 2
-uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P ); //Mode = 3
-uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P ); //Mode = 7
-uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P ); //Mode = 6
-uint2x4 compress_endpoints4( inout uint2x4 endPoint ); //Mode = 4
-uint2x4 compress_endpoints5( inout uint2x4 endPoint ); //Mode = 5
-
-void block_package0( out uint4 block, uint partition, uint threadBase ); //Mode0
-void block_package1( out uint4 block, uint partition, uint threadBase ); //Mode1
-void block_package2( out uint4 block, uint partition, uint threadBase ); //Mode2
-void block_package3( out uint4 block, uint partition, uint threadBase ); //Mode3
-void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase ); //Mode4
-void block_package5( out uint4 block, uint rotation, uint threadBase ); //Mode5
-void block_package6( out uint4 block, uint threadBase ); //Mode6
-void block_package7( out uint4 block, uint partition, uint threadBase ); //Mode7
-
-
-void swap(inout uint4 lhs, inout uint4 rhs)
-{
- uint4 tmp = lhs;
- lhs = rhs;
- rhs = tmp;
-}
-void swap(inout uint3 lhs, inout uint3 rhs)
-{
- uint3 tmp = lhs;
- lhs = rhs;
- rhs = tmp;
-}
-void swap(inout uint lhs, inout uint rhs)
-{
- uint tmp = lhs;
- lhs = rhs;
- rhs = tmp;
-}
-
-uint ComputeError(in uint4 a, in uint4 b)
-{
- return dot(a.rgb, b.rgb) + g_alpha_weight * a.a*b.a;
-}
-
-void Ensure_A_Is_Larger( inout uint4 a, inout uint4 b )
-{
- if ( a.x < b.x )
- swap( a.x, b.x );
- if ( a.y < b.y )
- swap( a.y, b.y );
- if ( a.z < b.z )
- swap( a.z, b.z );
- if ( a.w < b.w )
- swap( a.w, b.w );
-}
-
-
-Texture2D g_Input : register( t0 );
-StructuredBuffer<uint4> g_InBuff : register( t1 );
-
-RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
-
-#define THREAD_GROUP_SIZE 64
-#define BLOCK_SIZE_Y 4
-#define BLOCK_SIZE_X 4
-#define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X)
-
-struct BufferShared
-{
- uint4 pixel;
- uint error;
- uint mode;
- uint partition;
- uint index_selector;
- uint rotation;
- uint4 endPoint_low;
- uint4 endPoint_high;
- uint4 endPoint_low_quantized;
- uint4 endPoint_high_quantized;
-};
-groupshared BufferShared shared_temp[THREAD_GROUP_SIZE];
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryMode456CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 4 5 6 all have 1 subset per block, and fix-up index is always index 0
-{
- // we process 4 BC blocks per thread group
- const uint MAX_USED_THREAD = 16; // pixels in a BC (block compressed) block
- uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD; // the number of BC blocks a thread group processes = 64 / 16 = 4
- uint blockInGroup = GI / MAX_USED_THREAD; // what BC block this thread is on within this thread group
- uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup; // what global BC block this thread is on
- uint threadBase = blockInGroup * MAX_USED_THREAD; // the first id of the pixel in this BC block in this thread group
- uint threadInBlock = GI - threadBase; // id of the pixel in this BC block
-
-#ifndef REF_DEVICE
- if (blockID >= g_num_total_blocks)
- {
- return;
- }
-#endif
-
- uint block_y = blockID / g_num_block_x;
- uint block_x = blockID - block_y * g_num_block_x;
- uint base_x = block_x * BLOCK_SIZE_X;
- uint base_y = block_y * BLOCK_SIZE_Y;
-
- if (threadInBlock < 16)
- {
- shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
-
- shared_temp[GI].endPoint_low = shared_temp[GI].pixel;
- shared_temp[GI].endPoint_high = shared_temp[GI].pixel;
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 8)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- uint2x4 endPoint;
- endPoint[0] = shared_temp[threadBase].endPoint_low;
- endPoint[1] = shared_temp[threadBase].endPoint_high;
-
- uint error = 0xFFFFFFFF;
- uint mode = 0;
- uint index_selector = 0;
- uint rotation = 0;
-
- uint2 indexPrec;
- if (threadInBlock < 8) // all threads of threadInBlock < 8 will be working on trying out mode 4, since only mode 4 has index selector bit
- {
- if (0 == (threadInBlock & 1)) // thread 0, 2, 4, 6
- {
- //2 represents 2bit index precision; 1 represents 3bit index precision
- index_selector = 0;
- indexPrec = uint2( 2, 1 );
- }
- else // thread 1, 3, 5, 7
- {
- //2 represents 2bit index precision; 1 represents 3bit index precision
- index_selector = 1;
- indexPrec = uint2( 1, 2 );
- }
- }
- else
- {
- //2 represents 2bit index precision
- indexPrec = uint2( 2, 2 );
- }
-
- uint4 pixel_r;
- uint color_index;
- uint alpha_index;
- int4 span;
- int2 span_norm_sqr;
- int2 dotProduct;
- if (threadInBlock < 12) // Try mode 4 5 in threads 0..11
- {
- // mode 4 5 have component rotation
- if ((threadInBlock < 2) || (8 == threadInBlock)) // rotation = 0 in thread 0, 1
- {
- rotation = 0;
- }
- else if ((threadInBlock < 4) || (9 == threadInBlock)) // rotation = 1 in thread 2, 3
- {
- endPoint[0].ra = endPoint[0].ar;
- endPoint[1].ra = endPoint[1].ar;
-
- rotation = 1;
- }
- else if ((threadInBlock < 6) || (10 == threadInBlock)) // rotation = 2 in thread 4, 5
- {
- endPoint[0].ga = endPoint[0].ag;
- endPoint[1].ga = endPoint[1].ag;
-
- rotation = 2;
- }
- else if ((threadInBlock < 8) || (11 == threadInBlock)) // rotation = 3 in thread 6, 7
- {
- endPoint[0].ba = endPoint[0].ab;
- endPoint[1].ba = endPoint[1].ab;
-
- rotation = 3;
- }
-
- if (threadInBlock < 8) // try mode 4 in threads 0..7
- {
- // mode 4 thread distribution
- // Thread 0 1 2 3 4 5 6 7
- // Rotation 0 0 1 1 2 2 3 3
- // Index selector 0 1 0 1 0 1 0 1
-
- mode = 4;
- compress_endpoints4( endPoint );
- }
- else // try mode 5 in threads 8..11
- {
- // mode 5 thread distribution
- // Thread 8 9 10 11
- // Rotation 0 1 2 3
-
- mode = 5;
- compress_endpoints5( endPoint );
- }
-
- uint4 pixel = shared_temp[threadBase + 0].pixel;
- if (1 == rotation)
- {
- pixel.ra = pixel.ar;
- }
- else if (2 == rotation)
- {
- pixel.ga = pixel.ag;
- }
- else if (3 == rotation)
- {
- pixel.ba = pixel.ab;
- }
-
- span = endPoint[1] - endPoint[0];
- span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a );
-
- // in mode 4 5 6, end point 0 must be closer to pixel 0 than end point 1, because of the fix-up index is always index 0
- // TODO: this shouldn't be necessary here in error calculation
- /*
- dotProduct = int2( dot( span.rgb, pixel.rgb - endPoint[0].rgb ), span.a * ( pixel.a - endPoint[0].a ) );
- if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) )
- {
- span.rgb = -span.rgb;
- swap(endPoint[0].rgb, endPoint[1].rgb);
- }
- if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) )
- {
- span.a = -span.a;
- swap(endPoint[0].a, endPoint[1].a);
- }
- */
-
- // should be the same as above
- dotProduct = int2( dot( pixel.rgb - endPoint[0].rgb, pixel.rgb - endPoint[0].rgb ), dot( pixel.rgb - endPoint[1].rgb, pixel.rgb - endPoint[1].rgb ) );
- if ( dotProduct.x > dotProduct.y )
- {
- span.rgb = -span.rgb;
- swap(endPoint[0].rgb, endPoint[1].rgb);
- }
- dotProduct = int2( dot( pixel.a - endPoint[0].a, pixel.a - endPoint[0].a ), dot( pixel.a - endPoint[1].a, pixel.a - endPoint[1].a ) );
- if ( dotProduct.x > dotProduct.y )
- {
- span.a = -span.a;
- swap(endPoint[0].a, endPoint[1].a);
- }
-
- error = 0;
- for ( uint i = 0; i < 16; i ++ )
- {
- pixel = shared_temp[threadBase + i].pixel;
- if (1 == rotation)
- {
- pixel.ra = pixel.ar;
- }
- else if (2 == rotation)
- {
- pixel.ga = pixel.ag;
- }
- else if (3 == rotation)
- {
- pixel.ba = pixel.ab;
- }
-
- dotProduct.x = dot( span.rgb, pixel.rgb - endPoint[0].rgb );
- color_index = ( span_norm_sqr.x <= 0 /*endPoint[0] == endPoint[1]*/ || dotProduct.x <= 0 /*pixel == endPoint[0]*/ ) ? 0
- : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] );
- dotProduct.y = dot( span.a, pixel.a - endPoint[0].a );
- alpha_index = ( span_norm_sqr.y <= 0 || dotProduct.y <= 0 ) ? 0
- : ( ( dotProduct.y < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct.y * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] );
-
- // the same color_index and alpha_index should be used for reconstruction, so this should be left commented out
- /*if (index_selector)
- {
- swap(color_index, alpha_index);
- }*/
-
- pixel_r.rgb = ( ( 64 - aWeight[indexPrec.x][color_index] ) * endPoint[0].rgb +
- aWeight[indexPrec.x][color_index] * endPoint[1].rgb +
- 32 ) >> 6;
- pixel_r.a = ( ( 64 - aWeight[indexPrec.y][alpha_index] ) * endPoint[0].a +
- aWeight[indexPrec.y][alpha_index] * endPoint[1].a +
- 32 ) >> 6;
-
- Ensure_A_Is_Larger( pixel_r, pixel );
- pixel_r -= pixel;
- if (1 == rotation)
- {
- pixel_r.ra = pixel_r.ar;
- }
- else if (2 == rotation)
- {
- pixel_r.ga = pixel_r.ag;
- }
- else if (3 == rotation)
- {
- pixel_r.ba = pixel_r.ab;
- }
- error += ComputeError(pixel_r, pixel_r);
- }
- }
- else if (threadInBlock < 16) // Try mode 6 in threads 12..15, since in mode 4 5 6, only mode 6 has p bit
- {
- uint p = threadInBlock - 12;
-
- compress_endpoints6( endPoint, uint2(p >> 0, p >> 1) & 1 );
-
- uint4 pixel = shared_temp[threadBase + 0].pixel;
-
- span = endPoint[1] - endPoint[0];
- span_norm_sqr = dot( span, span );
- dotProduct = dot( span, pixel - endPoint[0] );
- if ( span_norm_sqr.x > 0 && dotProduct.x >= 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) )
- {
- span = -span;
- swap(endPoint[0], endPoint[1]);
- }
-
- error = 0;
- for ( uint i = 0; i < 16; i ++ )
- {
- pixel = shared_temp[threadBase + i].pixel;
-
- dotProduct.x = dot( span, pixel - endPoint[0] );
- color_index = ( span_norm_sqr.x <= 0 || dotProduct.x <= 0 ) ? 0
- : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[0][ uint( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[0][63] );
-
- pixel_r = ( ( 64 - aWeight[0][color_index] ) * endPoint[0]
- + aWeight[0][color_index] * endPoint[1] + 32 ) >> 6;
-
- Ensure_A_Is_Larger( pixel_r, pixel );
- pixel_r -= pixel;
- error += ComputeError(pixel_r, pixel_r);
- }
-
- mode = 6;
- rotation = p; // Borrow rotation for p
- }
-
- shared_temp[GI].error = error;
- shared_temp[GI].mode = mode;
- shared_temp[GI].index_selector = index_selector;
- shared_temp[GI].rotation = rotation;
-
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 8)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 8].error )
- {
- shared_temp[GI].error = shared_temp[GI + 8].error;
- shared_temp[GI].mode = shared_temp[GI + 8].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 8].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 4].error )
- {
- shared_temp[GI].error = shared_temp[GI + 4].error;
- shared_temp[GI].mode = shared_temp[GI + 4].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 4].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 2].error )
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].mode = shared_temp[GI + 2].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 2].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 1].error )
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].mode = shared_temp[GI + 1].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 1].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
- }
-
- g_OutBuff[blockID] = uint4(shared_temp[GI].error, (shared_temp[GI].index_selector << 31) | shared_temp[GI].mode,
- 0, shared_temp[GI].rotation); // rotation is indeed rotation for mode 4 5. for mode 6, rotation is p bit
- }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 1 3 7 all have 2 subsets per block
-{
- const uint MAX_USED_THREAD = 64;
- uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- uint blockInGroup = GI / MAX_USED_THREAD;
- uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- uint threadBase = blockInGroup * MAX_USED_THREAD;
- uint threadInBlock = GI - threadBase;
-
- uint block_y = blockID / g_num_block_x;
- uint block_x = blockID - block_y * g_num_block_x;
- uint base_x = block_x * BLOCK_SIZE_X;
- uint base_y = block_y * BLOCK_SIZE_Y;
-
- if (threadInBlock < 16)
- {
- shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
- }
- GroupMemoryBarrierWithGroupSync();
-
- shared_temp[GI].error = 0xFFFFFFFF;
-
- uint4 pixel_r;
- uint2x4 endPoint[2]; // endPoint[0..1 for subset id][0..1 for low and high in the subset]
- uint2x4 endPointBackup[2];
- uint color_index;
- if (threadInBlock < 64)
- {
- uint partition = threadInBlock;
-
- endPoint[0][0] = MAX_UINT;
- endPoint[0][1] = MIN_UINT;
- endPoint[1][0] = MAX_UINT;
- endPoint[1][1] = MIN_UINT;
- uint bits = candidateSectionBit[partition];
- for ( uint i = 0; i < 16; i ++ )
- {
- uint4 pixel = shared_temp[threadBase + i].pixel;
- if ( (( bits >> i ) & 0x01) == 1 )
- {
- endPoint[1][0] = min( endPoint[1][0], pixel );
- endPoint[1][1] = max( endPoint[1][1], pixel );
- }
- else
- {
- endPoint[0][0] = min( endPoint[0][0], pixel );
- endPoint[0][1] = max( endPoint[0][1], pixel );
- }
- }
-
- endPointBackup[0] = endPoint[0];
- endPointBackup[1] = endPoint[1];
-
- uint max_p;
- if (1 == g_mode_id)
- {
- // in mode 1, there is only one p bit per subset
- max_p = 4;
- }
- else
- {
- // in mode 3 7, there are two p bits per subset, one for each end point
- max_p = 16;
- }
-
- uint rotation = 0;
- uint error = MAX_UINT;
- for ( uint p = 0; p < max_p; p ++ )
- {
- endPoint[0] = endPointBackup[0];
- endPoint[1] = endPointBackup[1];
-
- for ( i = 0; i < 2; i ++ ) // loop through 2 subsets
- {
- if (g_mode_id == 1)
- {
- compress_endpoints1( endPoint[i], (p >> i) & 1 );
- }
- else if (g_mode_id == 3)
- {
- compress_endpoints3( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
- }
- else if (g_mode_id == 7)
- {
- compress_endpoints7( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
- }
- }
-
- int4 span[2];
- span[0] = endPoint[0][1] - endPoint[0][0];
- span[1] = endPoint[1][1] - endPoint[1][0];
-
- if (g_mode_id != 7)
- {
- span[0].w = span[1].w = 0;
- }
-
- int span_norm_sqr[2];
- span_norm_sqr[0] = dot( span[0], span[0] );
- span_norm_sqr[1] = dot( span[1], span[1] );
-
- // TODO: again, this shouldn't be necessary here in error calculation
- int dotProduct = dot( span[0], shared_temp[threadBase + 0].pixel - endPoint[0][0] );
- if ( span_norm_sqr[0] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[0] ) )
- {
- span[0] = -span[0];
- swap(endPoint[0][0], endPoint[0][1]);
- }
- dotProduct = dot( span[1], shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel - endPoint[1][0] );
- if ( span_norm_sqr[1] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[1] ) )
- {
- span[1] = -span[1];
- swap(endPoint[1][0], endPoint[1][1]);
- }
-
- uint step_selector;
- if (g_mode_id != 1)
- {
- step_selector = 2; // mode 3 7 have 2 bit index
- }
- else
- {
- step_selector = 1; // mode 1 has 3 bit index
- }
-
- uint p_error = 0;
- for ( i = 0; i < 16; i ++ )
- {
- if (((bits >> i) & 0x01) == 1)
- {
- dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] );
- color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0) ? 0
- : ((dotProduct < span_norm_sqr[1]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[1])] : aStep[step_selector][63]);
- }
- else
- {
- dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] );
- color_index = (span_norm_sqr[0] <= 0 || dotProduct <= 0) ? 0
- : ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[0])] : aStep[step_selector][63]);
- }
-
- uint subset_index = (bits >> i) & 0x01;
-
- pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0]
- + aWeight[step_selector][color_index] * endPoint[subset_index][1] + 32) >> 6;
- if (g_mode_id != 7)
- {
- pixel_r.a = 255;
- }
-
- uint4 pixel = shared_temp[threadBase + i].pixel;
- Ensure_A_Is_Larger( pixel_r, pixel );
- pixel_r -= pixel;
- p_error += ComputeError(pixel_r, pixel_r);
- }
-
- if (p_error < error)
- {
- error = p_error;
- rotation = p;
- }
- }
-
- shared_temp[GI].error = error;
- shared_temp[GI].mode = g_mode_id;
- shared_temp[GI].partition = partition;
- shared_temp[GI].rotation = rotation; // mode 1 3 7 don't have rotation, we use rotation for p bits
- }
- GroupMemoryBarrierWithGroupSync();
-
- if (threadInBlock < 32)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 32].error )
- {
- shared_temp[GI].error = shared_temp[GI + 32].error;
- shared_temp[GI].mode = shared_temp[GI + 32].mode;
- shared_temp[GI].partition = shared_temp[GI + 32].partition;
- shared_temp[GI].rotation = shared_temp[GI + 32].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-if (threadInBlock < 16)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 16].error )
- {
- shared_temp[GI].error = shared_temp[GI + 16].error;
- shared_temp[GI].mode = shared_temp[GI + 16].mode;
- shared_temp[GI].partition = shared_temp[GI + 16].partition;
- shared_temp[GI].rotation = shared_temp[GI + 16].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 8)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 8].error )
- {
- shared_temp[GI].error = shared_temp[GI + 8].error;
- shared_temp[GI].mode = shared_temp[GI + 8].mode;
- shared_temp[GI].partition = shared_temp[GI + 8].partition;
- shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 4].error )
- {
- shared_temp[GI].error = shared_temp[GI + 4].error;
- shared_temp[GI].mode = shared_temp[GI + 4].mode;
- shared_temp[GI].partition = shared_temp[GI + 4].partition;
- shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 2].error )
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].mode = shared_temp[GI + 2].mode;
- shared_temp[GI].partition = shared_temp[GI + 2].partition;
- shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 1].error )
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].mode = shared_temp[GI + 1].mode;
- shared_temp[GI].partition = shared_temp[GI + 1].partition;
- shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
- }
-
- if (g_InBuff[blockID].x > shared_temp[GI].error)
- {
- g_OutBuff[blockID] = uint4(shared_temp[GI].error, shared_temp[GI].mode, shared_temp[GI].partition, shared_temp[GI].rotation); // mode 1 3 7 don't have rotation, we use rotation for p bits
- }
- else
- {
- g_OutBuff[blockID] = g_InBuff[blockID];
- }
- }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode 0 2 have 3 subsets per block
-{
- const uint MAX_USED_THREAD = 64;
- uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- uint blockInGroup = GI / MAX_USED_THREAD;
- uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- uint threadBase = blockInGroup * MAX_USED_THREAD;
- uint threadInBlock = GI - threadBase;
-
- uint block_y = blockID / g_num_block_x;
- uint block_x = blockID - block_y * g_num_block_x;
- uint base_x = block_x * BLOCK_SIZE_X;
- uint base_y = block_y * BLOCK_SIZE_Y;
-
- if (threadInBlock < 16)
- {
- shared_temp[GI].pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
- }
- GroupMemoryBarrierWithGroupSync();
-
- shared_temp[GI].error = 0xFFFFFFFF;
-
- uint num_partitions;
- if (0 == g_mode_id)
- {
- num_partitions = 16;
- }
- else
- {
- num_partitions = 64;
- }
-
- uint4 pixel_r;
- uint2x4 endPoint[3]; // endPoint[0..1 for subset id][0..1 for low and high in the subset]
- uint2x4 endPointBackup[3];
- uint color_index[16];
- if (threadInBlock < num_partitions)
- {
- uint partition = threadInBlock + 64;
-
- endPoint[0][0] = MAX_UINT;
- endPoint[0][1] = MIN_UINT;
- endPoint[1][0] = MAX_UINT;
- endPoint[1][1] = MIN_UINT;
- endPoint[2][0] = MAX_UINT;
- endPoint[2][1] = MIN_UINT;
- uint bits2 = candidateSectionBit2[partition - 64];
- for ( uint i = 0; i < 16; i ++ )
- {
- uint4 pixel = shared_temp[threadBase + i].pixel;
- uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
- if ( subset_index == 2 )
- {
- endPoint[2][0] = min( endPoint[2][0], pixel );
- endPoint[2][1] = max( endPoint[2][1], pixel );
- }
- else if ( subset_index == 1 )
- {
- endPoint[1][0] = min( endPoint[1][0], pixel );
- endPoint[1][1] = max( endPoint[1][1], pixel );
- }
- else
- {
- endPoint[0][0] = min( endPoint[0][0], pixel );
- endPoint[0][1] = max( endPoint[0][1], pixel );
- }
- }
-
- endPointBackup[0] = endPoint[0];
- endPointBackup[1] = endPoint[1];
- endPointBackup[2] = endPoint[2];
-
- uint max_p;
- if (0 == g_mode_id)
- {
- max_p = 64; // changed from 32 to 64
- }
- else
- {
- max_p = 1;
- }
-
- uint rotation = 0;
- uint error = MAX_UINT;
- for ( uint p = 0; p < max_p; p ++ )
- {
- endPoint[0] = endPointBackup[0];
- endPoint[1] = endPointBackup[1];
- endPoint[2] = endPointBackup[2];
-
- for ( i = 0; i < 3; i ++ )
- {
- if (0 == g_mode_id)
- {
- compress_endpoints0( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
- }
- else
- {
- compress_endpoints2( endPoint[i] );
- }
- }
-
- uint step_selector = 1 + (2 == g_mode_id);
-
- int4 span[3];
- span[0] = endPoint[0][1] - endPoint[0][0];
- span[1] = endPoint[1][1] - endPoint[1][0];
- span[2] = endPoint[2][1] - endPoint[2][0];
- span[0].w = span[1].w = span[2].w = 0;
- int span_norm_sqr[3];
- span_norm_sqr[0] = dot( span[0], span[0] );
- span_norm_sqr[1] = dot( span[1], span[1] );
- span_norm_sqr[2] = dot( span[2], span[2] );
-
- // TODO: again, this shouldn't be necessary here in error calculation
- uint ci[3] = { 0, candidateFixUpIndex1D[partition].x, candidateFixUpIndex1D[partition].y };
- for (i = 0; i < 3; i ++)
- {
- int dotProduct = dot( span[i], shared_temp[threadBase + ci[i]].pixel - endPoint[i][0] );
- if ( span_norm_sqr[i] > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr[i] ) )
- {
- span[i] = -span[i];
- swap(endPoint[i][0], endPoint[i][1]);
- }
- }
-
- uint p_error = 0;
- for ( i = 0; i < 16; i ++ )
- {
- uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
- if ( subset_index == 2 )
- {
- int dotProduct = dot( span[2], shared_temp[threadBase + i].pixel - endPoint[2][0] );
- color_index[i] = ( span_norm_sqr[2] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[2] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[2] ) ] : aStep[step_selector][63] );
- }
- else if ( subset_index == 1 )
- {
- int dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] );
- color_index[i] = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[1] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep[step_selector][63] );
- }
- else
- {
- int dotProduct = dot( span[0], shared_temp[threadBase + i].pixel - endPoint[0][0] );
- color_index[i] = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[0] ) ? aStep[step_selector][ uint( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep[step_selector][63] );
- }
-
- pixel_r = ( ( 64 - aWeight[step_selector][color_index[i]] ) * endPoint[subset_index][0]
- + aWeight[step_selector][color_index[i]] * endPoint[subset_index][1] + 32 ) >> 6;
- pixel_r.a = 255;
-
- uint4 pixel = shared_temp[threadBase + i].pixel;
- Ensure_A_Is_Larger( pixel_r, pixel );
- pixel_r -= pixel;
- p_error += ComputeError(pixel_r, pixel_r);
- }
-
- if (p_error < error)
- {
- error = p_error;
- rotation = p; // Borrow rotation for p
- }
- }
-
- shared_temp[GI].error = error;
- shared_temp[GI].partition = partition;
- shared_temp[GI].rotation = rotation;
- }
- GroupMemoryBarrierWithGroupSync();
-
- if (threadInBlock < 32)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 32].error )
- {
- shared_temp[GI].error = shared_temp[GI + 32].error;
- shared_temp[GI].partition = shared_temp[GI + 32].partition;
- shared_temp[GI].rotation = shared_temp[GI + 32].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 16)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 16].error )
- {
- shared_temp[GI].error = shared_temp[GI + 16].error;
- shared_temp[GI].partition = shared_temp[GI + 16].partition;
- shared_temp[GI].rotation = shared_temp[GI + 16].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 8)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 8].error )
- {
- shared_temp[GI].error = shared_temp[GI + 8].error;
- shared_temp[GI].partition = shared_temp[GI + 8].partition;
- shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 4].error )
- {
- shared_temp[GI].error = shared_temp[GI + 4].error;
- shared_temp[GI].partition = shared_temp[GI + 4].partition;
- shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 2].error )
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].partition = shared_temp[GI + 2].partition;
- shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
- }
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 1].error )
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].partition = shared_temp[GI + 1].partition;
- shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
- }
-
- if (g_InBuff[blockID].x > shared_temp[GI].error)
- {
- g_OutBuff[blockID] = uint4(shared_temp[GI].error, g_mode_id, shared_temp[GI].partition, shared_temp[GI].rotation); // rotation is actually p bit for mode 0. for mode 2, rotation is always 0
- }
- else
- {
- g_OutBuff[blockID] = g_InBuff[blockID];
- }
- }
-}
-
-[numthreads( THREAD_GROUP_SIZE, 1, 1 )]
-void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
-{
- const uint MAX_USED_THREAD = 16;
- uint BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- uint blockInGroup = GI / MAX_USED_THREAD;
- uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- uint threadBase = blockInGroup * MAX_USED_THREAD;
- uint threadInBlock = GI - threadBase;
-
-#ifndef REF_DEVICE
- if (blockID >= g_num_total_blocks)
- {
- return;
- }
-#endif
-
- uint block_y = blockID / g_num_block_x;
- uint block_x = blockID - block_y * g_num_block_x;
- uint base_x = block_x * BLOCK_SIZE_X;
- uint base_y = block_y * BLOCK_SIZE_Y;
-
- uint mode = g_InBuff[blockID].y & 0x7FFFFFFF;
- uint partition = g_InBuff[blockID].z;
- uint index_selector = (g_InBuff[blockID].y >> 31) & 1;
- uint rotation = g_InBuff[blockID].w;
-
- if (threadInBlock < 16)
- {
- uint4 pixel = clamp(uint4(g_Input.Load( uint3( base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0 ) ) * 255), 0, 255);
-
- if ((4 == mode) || (5 == mode))
- {
- if (1 == rotation)
- {
- pixel.ra = pixel.ar;
- }
- else if (2 == rotation)
- {
- pixel.ga = pixel.ag;
- }
- else if (3 == rotation)
- {
- pixel.ba = pixel.ab;
- }
- }
-
- shared_temp[GI].pixel = pixel;
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- uint bits = candidateSectionBit[partition];
- uint bits2 = candidateSectionBit2[partition - 64];
-
- uint2x4 ep;
- uint2x4 ep_quantized;
- [unroll]
- for (int ii = 2; ii >= 0; -- ii)
- {
- if (threadInBlock < 16)
- {
- uint2x4 ep;
- ep[0] = MAX_UINT;
- ep[1] = MIN_UINT;
-
- uint4 pixel = shared_temp[GI].pixel;
-
- uint subset_index = ( bits >> threadInBlock ) & 0x01;
- uint subset_index2 = ( bits2 >> ( threadInBlock * 2 ) ) & 0x03;
- if (0 == ii)
- {
- if ((0 == mode) || (2 == mode))
- {
- if (0 == subset_index2)
- {
- ep[0] = ep[1] = pixel;
- }
- }
- else if ((1 == mode) || (3 == mode) || (7 == mode))
- {
- if (0 == subset_index)
- {
- ep[0] = ep[1] = pixel;
- }
- }
- else if ((4 == mode) || (5 == mode) || (6 == mode))
- {
- ep[0] = ep[1] = pixel;
- }
- }
- else if (1 == ii)
- {
- if ((0 == mode) || (2 == mode))
- {
- if (1 == subset_index2)
- {
- ep[0] = ep[1] = pixel;
- }
- }
- else if ((1 == mode) || (3 == mode) || (7 == mode))
- {
- if (1 == subset_index)
- {
- ep[0] = ep[1] = pixel;
- }
- }
- }
- else
- {
- if ((0 == mode) || (2 == mode))
- {
- if (2 == subset_index2)
- {
- ep[0] = ep[1] = pixel;
- }
- }
- }
-
- shared_temp[GI].endPoint_low = ep[0];
- shared_temp[GI].endPoint_high = ep[1];
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 8)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 4)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 2)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
- if (threadInBlock < 1)
- {
- shared_temp[GI].endPoint_low = min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low);
- shared_temp[GI].endPoint_high = max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high);
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (ii == (int)threadInBlock)
- {
- ep[0] = shared_temp[threadBase].endPoint_low;
- ep[1] = shared_temp[threadBase].endPoint_high;
- }
- }
-
- if (threadInBlock < 3)
- {
- uint2 P;
- if (1 == mode)
- {
- P = (rotation >> threadInBlock) & 1;
- }
- else
- {
- P = uint2(rotation >> (threadInBlock * 2 + 0), rotation >> (threadInBlock * 2 + 1)) & 1;
- }
-
- if (0 == mode)
- {
- ep_quantized = compress_endpoints0( ep, P );
- }
- else if (1 == mode)
- {
- ep_quantized = compress_endpoints1( ep, P );
- }
- else if (2 == mode)
- {
- ep_quantized = compress_endpoints2( ep );
- }
- else if (3 == mode)
- {
- ep_quantized = compress_endpoints3( ep, P );
- }
- else if (4 == mode)
- {
- ep_quantized = compress_endpoints4( ep );
- }
- else if (5 == mode)
- {
- ep_quantized = compress_endpoints5( ep );
- }
- else if (6 == mode)
- {
- ep_quantized = compress_endpoints6( ep, P );
- }
- else //if (7 == mode)
- {
- ep_quantized = compress_endpoints7( ep, P );
- }
-
- int4 span = ep[1] - ep[0];
- if (mode < 4)
- {
- span.w = 0;
- }
-
- if ((4 == mode) || (5 == mode))
- {
- if (0 == threadInBlock)
- {
- int2 span_norm_sqr = uint2( dot( span.rgb, span.rgb ), span.a * span.a );
- int2 dotProduct = int2( dot( span.rgb, shared_temp[threadBase + 0].pixel.rgb - ep[0].rgb ), span.a * ( shared_temp[threadBase + 0].pixel.a - ep[0].a ) );
- if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && uint( dotProduct.x * 63.49999 ) > uint( 32 * span_norm_sqr.x ) )
- {
- swap(ep[0].rgb, ep[1].rgb);
- swap(ep_quantized[0].rgb, ep_quantized[1].rgb);
- }
- if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && uint( dotProduct.y * 63.49999 ) > uint( 32 * span_norm_sqr.y ) )
- {
- swap(ep[0].a, ep[1].a);
- swap(ep_quantized[0].a, ep_quantized[1].a);
- }
- }
- }
- else //if ((0 == mode) || (2 == mode) || (1 == mode) || (3 == mode) || (7 == mode) || (6 == mode))
- {
- int p;
- if (0 == threadInBlock)
- {
- p = 0;
- }
- else if (1 == threadInBlock)
- {
- p = candidateFixUpIndex1D[partition].x;
- }
- else //if (2 == threadInBlock)
- {
- p = candidateFixUpIndex1D[partition].y;
- }
-
- int span_norm_sqr = dot( span, span );
- int dotProduct = dot( span, shared_temp[threadBase + p].pixel - ep[0] );
- if ( span_norm_sqr > 0 && dotProduct > 0 && uint( dotProduct * 63.49999 ) > uint( 32 * span_norm_sqr ) )
- {
- swap(ep[0], ep[1]);
- swap(ep_quantized[0], ep_quantized[1]);
- }
- }
-
- shared_temp[GI].endPoint_low = ep[0];
- shared_temp[GI].endPoint_high = ep[1];
- shared_temp[GI].endPoint_low_quantized = ep_quantized[0];
- shared_temp[GI].endPoint_high_quantized = ep_quantized[1];
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (threadInBlock < 16)
- {
- uint color_index = 0;
- uint alpha_index = 0;
-
- uint2x4 ep;
-
- uint2 indexPrec;
- if ((0 == mode) || (1 == mode))
- {
- indexPrec = 1;
- }
- else if (6 == mode)
- {
- indexPrec = 0;
- }
- else if (4 == mode)
- {
- if (0 == index_selector)
- {
- indexPrec = uint2(2, 1);
- }
- else
- {
- indexPrec = uint2(1, 2);
- }
- }
- else
- {
- indexPrec = 2;
- }
-
- int subset_index;
- if ((0 == mode) || (2 == mode))
- {
- subset_index = (bits2 >> (threadInBlock * 2)) & 0x03;
- }
- else if ((1 == mode) || (3 == mode) || (7 == mode))
- {
- subset_index = (bits >> threadInBlock) & 0x01;
- }
- else
- {
- subset_index = 0;
- }
-
- ep[0] = shared_temp[threadBase + subset_index].endPoint_low;
- ep[1] = shared_temp[threadBase + subset_index].endPoint_high;
-
- int4 span = ep[1] - ep[0];
- if (mode < 4)
- {
- span.w = 0;
- }
-
- if ((4 == mode) || (5 == mode))
- {
- int2 span_norm_sqr;
- span_norm_sqr.x = dot( span.rgb, span.rgb );
- span_norm_sqr.y = span.a * span.a;
-
- int dotProduct = dot( span.rgb, shared_temp[threadBase + threadInBlock].pixel.rgb - ep[0].rgb );
- color_index = ( span_norm_sqr.x <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr.x ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] );
- dotProduct = dot( span.a, shared_temp[threadBase + threadInBlock].pixel.a - ep[0].a );
- alpha_index = ( span_norm_sqr.y <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr.y ) ? aStep[indexPrec.y][ uint( dotProduct * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] );
-
- if (index_selector)
- {
- swap(color_index, alpha_index);
- }
- }
- else
- {
- int span_norm_sqr = dot( span, span );
-
- int dotProduct = dot( span, shared_temp[threadBase + threadInBlock].pixel - ep[0] );
- color_index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr ) ? aStep[indexPrec.x][ uint( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep[indexPrec.x][63] );
- }
-
- shared_temp[GI].error = color_index;
- shared_temp[GI].mode = alpha_index;
- }
-#ifdef REF_DEVICE
- GroupMemoryBarrierWithGroupSync();
-#endif
-
- if (0 == threadInBlock)
- {
- uint4 block;
- if (0 == mode)
- {
- block_package0( block, partition, threadBase );
- }
- else if (1 == mode)
- {
- block_package1( block, partition, threadBase );
- }
- else if (2 == mode)
- {
- block_package2( block, partition, threadBase );
- }
- else if (3 == mode)
- {
- block_package3( block, partition, threadBase );
- }
- else if (4 == mode)
- {
- block_package4( block, rotation, index_selector, threadBase );
- }
- else if (5 == mode)
- {
- block_package5( block, rotation, threadBase );
- }
- else if (6 == mode)
- {
- block_package6( block, threadBase );
- }
- else //if (7 == mode)
- {
- block_package7( block, partition, threadBase );
- }
-
- g_OutBuff[blockID] = block;
- }
-}
-
-//uint4 truncate_and_round( uint4 color, uint bits)
-//{
-// uint precisionMask = ((1 << bits) - 1) << (8 - bits);
-// uint precisionHalf = (1 << (7-bits));
-//
-// uint4 truncated = color & precisionMask;
-// uint4 rounded = min(255, color + precisionHalf) & precisionMask;
-//
-// uint4 truncated_bak = truncated = truncated | (truncated >> bits);
-// uint4 rounded_bak = rounded = rounded | (rounded >> bits);
-//
-// uint4 color_bak = color;
-//
-// Ensure_A_Is_Larger( rounded, color );
-// Ensure_A_Is_Larger( truncated, color_bak );
-//
-// if (dot(rounded - color, rounded - color) <
-// dot(truncated - color_bak, truncated - color_bak))
-// {
-// return rounded_bak;
-// }
-// else
-// {
-// return truncated_bak;
-// }
-//}
-
-uint4 quantize( uint4 color, uint uPrec )
-{
- uint4 rnd = min(255, color + (1 << (7 - uPrec)));
- return rnd >> (8 - uPrec);
-}
-
-uint4 unquantize( uint4 color, uint uPrec )
-{
- color = color << (8 - uPrec);
- return color | (color >> uPrec);
-}
-
-uint2x4 compress_endpoints0( inout uint2x4 endPoint, uint2 P )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb & 0xFFFFFFFE;
- quantized[j].rgb |= P[j];
- quantized[j].a = 0xFF;
-
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
- endPoint[j].a = 0xFF;
-
- quantized[j] <<= 3;
- }
- return quantized;
-}
-uint2x4 compress_endpoints1( inout uint2x4 endPoint, uint2 P )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb & 0xFFFFFFFE;
- quantized[j].rgb |= P[j];
- quantized[j].a = 0xFF;
-
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb;
- endPoint[j].a = 0xFF;
-
- quantized[j] <<= 1;
- }
- return quantized;
-}
-uint2x4 compress_endpoints2( inout uint2x4 endPoint )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb;
- quantized[j].a = 0xFF;
-
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
- endPoint[j].a = 0xFF;
-
- quantized[j] <<= 3;
- }
- return quantized;
-}
-uint2x4 compress_endpoints3( inout uint2x4 endPoint, uint2 P )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = endPoint[j].rgb & 0xFFFFFFFE;
- quantized[j].rgb |= P[j];
- quantized[j].a = 0xFF;
-
- endPoint[j].rgb = quantized[j].rgb;
- endPoint[j].a = 0xFF;
- }
- return quantized;
-}
-uint2x4 compress_endpoints4( inout uint2x4 endPoint )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb;
- quantized[j].a = quantize(endPoint[j].a, 6).r;
-
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
- endPoint[j].a = unquantize(quantized[j].a, 6).r;
-
- quantized[j].rgb <<= 3;
- quantized[j].a <<= 2;
- }
- return quantized;
-}
-uint2x4 compress_endpoints5( inout uint2x4 endPoint )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb;
- quantized[j].a = endPoint[j].a;
-
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb;
- // endPoint[j].a Alpha is full precision
-
- quantized[j].rgb <<= 1;
- }
- return quantized;
-}
-uint2x4 compress_endpoints6( inout uint2x4 endPoint, uint2 P )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j] = endPoint[j] & 0xFFFFFFFE;
- quantized[j] |= P[j];
-
- endPoint[j] = quantized[j];
- }
- return quantized;
-}
-uint2x4 compress_endpoints7( inout uint2x4 endPoint, uint2 P )
-{
- uint2x4 quantized;
- for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j] = quantize(endPoint[j], 6) & 0xFFFFFFFE;
- quantized[j] |= P[j];
-
- endPoint[j] = unquantize(quantized[j], 6);
- }
- return quantized << 2;
-}
-
-#define get_end_point_l(subset) shared_temp[threadBase + subset].endPoint_low_quantized
-#define get_end_point_h(subset) shared_temp[threadBase + subset].endPoint_high_quantized
-#define get_color_index(index) shared_temp[threadBase + index].error
-#define get_alpha_index(index) shared_temp[threadBase + index].mode
-
-void block_package0( out uint4 block, uint partition, uint threadBase )
-{
- block.x = 0x01 | ( (partition - 64) << 1 )
- | ( ( get_end_point_l(0).r & 0xF0 ) << 1 ) | ( ( get_end_point_h(0).r & 0xF0 ) << 5 )
- | ( ( get_end_point_l(1).r & 0xF0 ) << 9 ) | ( ( get_end_point_h(1).r & 0xF0 ) << 13 )
- | ( ( get_end_point_l(2).r & 0xF0 ) << 17 ) | ( ( get_end_point_h(2).r & 0xF0 ) << 21 )
- | ( ( get_end_point_l(0).g & 0xF0 ) << 25 );
- block.y = ( ( get_end_point_l(0).g & 0xF0 ) >> 7 ) | ( ( get_end_point_h(0).g & 0xF0 ) >> 3 )
- | ( ( get_end_point_l(1).g & 0xF0 ) << 1 ) | ( ( get_end_point_h(1).g & 0xF0 ) << 5 )
- | ( ( get_end_point_l(2).g & 0xF0 ) << 9 ) | ( ( get_end_point_h(2).g & 0xF0 ) << 13 )
- | ( ( get_end_point_l(0).b & 0xF0 ) << 17 ) | ( ( get_end_point_h(0).b & 0xF0 ) << 21 )
- | ( ( get_end_point_l(1).b & 0xF0 ) << 25 );
- block.z = ( ( get_end_point_l(1).b & 0xF0 ) >> 7 ) | ( ( get_end_point_h(1).b & 0xF0 ) >> 3 )
- | ( ( get_end_point_l(2).b & 0xF0 ) << 1 ) | ( ( get_end_point_h(2).b & 0xF0 ) << 5 )
- | ( ( get_end_point_l(0).r & 0x08 ) << 10 ) | ( ( get_end_point_h(0).r & 0x08 ) << 11 )
- | ( ( get_end_point_l(1).r & 0x08 ) << 12 ) | ( ( get_end_point_h(1).r & 0x08 ) << 13 )
- | ( ( get_end_point_l(2).r & 0x08 ) << 14 ) | ( ( get_end_point_h(2).r & 0x08 ) << 15 )
- | ( get_color_index(0) << 19 );
- block.w = 0;
- uint i = 1;
- for ( ; i <= min( candidateFixUpIndex1DOrdered[partition][0], 4 ); i ++ )
- {
- block.z |= get_color_index(i) << ( i * 3 + 18 );
- }
- if ( candidateFixUpIndex1DOrdered[partition][0] < 4 ) //i = 4
- {
- block.z |= get_color_index(4) << 29;
- i += 1;
- }
- else //i = 5
- {
- block.w |= ( get_color_index(4) & 0x04 ) >> 2;
- for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
- block.w |= get_color_index(i) << ( i * 3 - 14 );
- }
- for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 3 - 15 );
- }
- for ( ; i < 16; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 3 - 16 );
- }
-}
-void block_package1( out uint4 block, uint partition, uint threadBase )
-{
- block.x = 0x02 | ( partition << 2 )
- | ( ( get_end_point_l(0).r & 0xFC ) << 6 ) | ( ( get_end_point_h(0).r & 0xFC ) << 12 )
- | ( ( get_end_point_l(1).r & 0xFC ) << 18 ) | ( ( get_end_point_h(1).r & 0xFC ) << 24 );
- block.y = ( ( get_end_point_l(0).g & 0xFC ) >> 2 ) | ( ( get_end_point_h(0).g & 0xFC ) << 4 )
- | ( ( get_end_point_l(1).g & 0xFC ) << 10 ) | ( ( get_end_point_h(1).g & 0xFC ) << 16 )
- | ( ( get_end_point_l(0).b & 0xFC ) << 22 ) | ( ( get_end_point_h(0).b & 0xFC ) << 28 );
- block.z = ( ( get_end_point_h(0).b & 0xFC ) >> 4 ) | ( ( get_end_point_l(1).b & 0xFC ) << 2 )
- | ( ( get_end_point_h(1).b & 0xFC ) << 8 )
- | ( ( get_end_point_l(0).r & 0x02 ) << 15 ) | ( ( get_end_point_l(1).r & 0x02 ) << 16 )
- | ( get_color_index(0) << 18 );
- if ( candidateFixUpIndex1DOrdered[partition][0] == 15 )
- {
- block.w = (get_color_index(15) << 30) | (get_color_index(14) << 27) | (get_color_index(13) << 24) | (get_color_index(12) << 21) | (get_color_index(11) << 18) | (get_color_index(10) << 15)
- | (get_color_index(9) << 12) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5);
- block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
- else if ( candidateFixUpIndex1DOrdered[partition][0] == 2 )
- {
- block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14)
- | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 5) | (get_color_index(6) << 2) | (get_color_index(5) >> 1);
- block.z |= (get_color_index(5) << 31) | (get_color_index(4) << 28) | (get_color_index(3) << 25) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
- else if ( candidateFixUpIndex1DOrdered[partition][0] == 8 )
- {
- block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14)
- | (get_color_index(9) << 11) | (get_color_index(8) << 9) | (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5);
- block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
- else //candidateFixUpIndex1DOrdered[partition] == 6
- {
- block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) | (get_color_index(11) << 17) | (get_color_index(10) << 14)
- | (get_color_index(9) << 11) | (get_color_index(8) << 8) | (get_color_index(7) << 6) | (get_color_index(6) << 4) | get_color_index(5);
- block.z |= (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
-}
-void block_package2( out uint4 block, uint partition, uint threadBase )
-{
- block.x = 0x04 | ( (partition - 64) << 3 )
- | ( ( get_end_point_l(0).r & 0xF8 ) << 6 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 11 )
- | ( ( get_end_point_l(1).r & 0xF8 ) << 16 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 21 )
- | ( ( get_end_point_l(2).r & 0xF8 ) << 26 );
- block.y = ( ( get_end_point_l(2).r & 0xF8 ) >> 6 ) | ( ( get_end_point_h(2).r & 0xF8 ) >> 1 )
- | ( ( get_end_point_l(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 9 )
- | ( ( get_end_point_l(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_h(1).g & 0xF8 ) << 19 )
- | ( ( get_end_point_l(2).g & 0xF8 ) << 24 );
- block.z = ( ( get_end_point_h(2).g & 0xF8 ) >> 3 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 2 )
- | ( ( get_end_point_h(0).b & 0xF8 ) << 7 ) | ( ( get_end_point_l(1).b & 0xF8 ) << 12 )
- | ( ( get_end_point_h(1).b & 0xF8 ) << 17 ) | ( ( get_end_point_l(2).b & 0xF8 ) << 22 )
- | ( ( get_end_point_h(2).b & 0xF8 ) << 27 );
- block.w = ( ( get_end_point_h(2).b & 0xF8 ) >> 5 )
- | ( get_color_index(0) << 3 );
- uint i = 1;
- for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 2 + 2 );
- }
- for ( ; i <= candidateFixUpIndex1DOrdered[partition][1]; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 2 + 1 );
- }
- for ( ; i < 16; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 2 );
- }
-}
-void block_package3( out uint4 block, uint partition, uint threadBase )
-{
- block.x = 0x08 | ( partition << 4 )
- | ( ( get_end_point_l(0).r & 0xFE ) << 9 ) | ( ( get_end_point_h(0).r & 0xFE ) << 16 )
- | ( ( get_end_point_l(1).r & 0xFE ) << 23 ) | ( ( get_end_point_h(1).r & 0xFE ) << 30 );
- block.y = ( ( get_end_point_h(1).r & 0xFE ) >> 2 ) | ( ( get_end_point_l(0).g & 0xFE ) << 5 )
- | ( ( get_end_point_h(0).g & 0xFE ) << 12 ) | ( ( get_end_point_l(1).g & 0xFE ) << 19 )
- | ( ( get_end_point_h(1).g & 0xFE ) << 26 );
- block.z = ( ( get_end_point_h(1).g & 0xFE ) >> 6 ) | ( ( get_end_point_l(0).b & 0xFE ) << 1 )
- | ( ( get_end_point_h(0).b & 0xFE ) << 8 ) | ( ( get_end_point_l(1).b & 0xFE ) << 15 )
- | ( ( get_end_point_h(1).b & 0xFE ) << 22 )
- | ( ( get_end_point_l(0).r & 0x01 ) << 30 ) | ( ( get_end_point_h(0).r & 0x01 ) << 31 );
- block.w = ( ( get_end_point_l(1).r & 0x01 ) << 0 ) | ( ( get_end_point_h(1).r & 0x01 ) << 1 )
- | ( get_color_index(0) << 2 );
- uint i = 1;
- for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 2 + 1 );
- }
- for ( ; i < 16; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 2 );
- }
-}
-void block_package4( out uint4 block, uint rotation, uint index_selector, uint threadBase )
-{
- block.x = 0x10 | ( (rotation & 3) << 5 ) | ( (index_selector & 1) << 7 )
- | ( ( get_end_point_l(0).r & 0xF8 ) << 5 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 10 )
- | ( ( get_end_point_l(0).g & 0xF8 ) << 15 ) | ( ( get_end_point_h(0).g & 0xF8 ) << 20 )
- | ( ( get_end_point_l(0).b & 0xF8 ) << 25 );
-
- block.y = ( ( get_end_point_l(0).b & 0xF8 ) >> 7 ) | ( ( get_end_point_h(0).b & 0xF8 ) >> 2 )
- | ( ( get_end_point_l(0).a & 0xFC ) << 4 ) | ( ( get_end_point_h(0).a & 0xFC ) << 10 )
- | ( (get_color_index(0) & 1) << 18 ) | ( get_color_index(1) << 19 ) | ( get_color_index(2) << 21 ) | ( get_color_index(3) << 23 )
- | ( get_color_index(4) << 25 ) | ( get_color_index(5) << 27 ) | ( get_color_index(6) << 29 ) | ( get_color_index(7) << 31 );
-
- block.z = ( get_color_index(7) >> 1 ) | ( get_color_index(8) << 1 ) | ( get_color_index(9) << 3 ) | ( get_color_index(10)<< 5 )
- | ( get_color_index(11)<< 7 ) | ( get_color_index(12)<< 9 ) | ( get_color_index(13)<< 11 ) | ( get_color_index(14)<< 13 )
- | ( get_color_index(15)<< 15 ) | ( (get_alpha_index(0) & 3) << 17 ) | ( get_alpha_index(1) << 19 ) | ( get_alpha_index(2) << 22 )
- | ( get_alpha_index(3) << 25 ) | ( get_alpha_index(4) << 28 ) | ( get_alpha_index(5) << 31 );
-
- block.w = ( get_alpha_index(5) >> 1 ) | ( get_alpha_index(6) << 2 ) | ( get_alpha_index(7) << 5 ) | ( get_alpha_index(8) << 8 )
- | ( get_alpha_index(9) << 11 ) | ( get_alpha_index(10)<< 14 ) | ( get_alpha_index(11)<< 17 ) | ( get_alpha_index(12)<< 20 )
- | ( get_alpha_index(13)<< 23 ) | ( get_alpha_index(14)<< 26 ) | ( get_alpha_index(15)<< 29 );
-}
-void block_package5( out uint4 block, uint rotation, uint threadBase )
-{
- block.x = 0x20 | ( rotation << 6 )
- | ( ( get_end_point_l(0).r & 0xFE ) << 7 ) | ( ( get_end_point_h(0).r & 0xFE ) << 14 )
- | ( ( get_end_point_l(0).g & 0xFE ) << 21 ) | ( ( get_end_point_h(0).g & 0xFE ) << 28 );
- block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 4 ) | ( ( get_end_point_l(0).b & 0xFE ) << 3 )
- | ( ( get_end_point_h(0).b & 0xFE ) << 10 ) | ( get_end_point_l(0).a << 18 ) | ( get_end_point_h(0).a << 26 );
- block.z = ( get_end_point_h(0).a >> 6 )
- | ( get_color_index(0) << 2 ) | ( get_color_index(1) << 3 ) | ( get_color_index(2) << 5 ) | ( get_color_index(3) << 7 )
- | ( get_color_index(4) << 9 ) | ( get_color_index(5) << 11 ) | ( get_color_index(6) << 13 ) | ( get_color_index(7) << 15 )
- | ( get_color_index(8) << 17 ) | ( get_color_index(9) << 19 ) | ( get_color_index(10)<< 21 ) | ( get_color_index(11)<< 23 )
- | ( get_color_index(12)<< 25 ) | ( get_color_index(13)<< 27 ) | ( get_color_index(14)<< 29 ) | ( get_color_index(15)<< 31 );
- block.w = ( get_color_index(15)>> 1 ) | ( get_alpha_index(0) << 1 ) | ( get_alpha_index(1) << 2 ) | ( get_alpha_index(2) << 4 )
- | ( get_alpha_index(3) << 6 ) | ( get_alpha_index(4) << 8 ) | ( get_alpha_index(5) << 10 ) | ( get_alpha_index(6) << 12 )
- | ( get_alpha_index(7) << 14 ) | ( get_alpha_index(8) << 16 ) | ( get_alpha_index(9) << 18 ) | ( get_alpha_index(10)<< 20 )
- | ( get_alpha_index(11)<< 22 ) | ( get_alpha_index(12)<< 24 ) | ( get_alpha_index(13)<< 26 ) | ( get_alpha_index(14)<< 28 )
- | ( get_alpha_index(15)<< 30 );
-}
-void block_package6( out uint4 block, uint threadBase )
-{
- block.x = 0x40
- | ( ( get_end_point_l(0).r & 0xFE ) << 6 ) | ( ( get_end_point_h(0).r & 0xFE ) << 13 )
- | ( ( get_end_point_l(0).g & 0xFE ) << 20 ) | ( ( get_end_point_h(0).g & 0xFE ) << 27 );
- block.y = ( ( get_end_point_h(0).g & 0xFE ) >> 5 ) | ( ( get_end_point_l(0).b & 0xFE ) << 2 )
- | ( ( get_end_point_h(0).b & 0xFE ) << 9 ) | ( ( get_end_point_l(0).a & 0xFE ) << 16 )
- | ( ( get_end_point_h(0).a & 0xFE ) << 23 )
- | ( get_end_point_l(0).r & 0x01 ) << 31;
- block.z = ( get_end_point_h(0).r & 0x01 )
- | ( get_color_index(0) << 1 ) | ( get_color_index(1) << 4 ) | ( get_color_index(2) << 8 ) | ( get_color_index(3) << 12 )
- | ( get_color_index(4) << 16 ) | ( get_color_index(5) << 20 ) | ( get_color_index(6) << 24 ) | ( get_color_index(7) << 28 );
- block.w = ( get_color_index(8) << 0 ) | ( get_color_index(9) << 4 ) | ( get_color_index(10)<< 8 ) | ( get_color_index(11)<< 12 )
- | ( get_color_index(12)<< 16 ) | ( get_color_index(13)<< 20 ) | ( get_color_index(14)<< 24 ) | ( get_color_index(15)<< 28 );
-}
-void block_package7( out uint4 block, uint partition, uint threadBase )
-{
- block.x = 0x80 | ( partition << 8 )
- | ( ( get_end_point_l(0).r & 0xF8 ) << 11 ) | ( ( get_end_point_h(0).r & 0xF8 ) << 16 )
- | ( ( get_end_point_l(1).r & 0xF8 ) << 21 ) | ( ( get_end_point_h(1).r & 0xF8 ) << 26 );
- block.y = ( ( get_end_point_h(1).r & 0xF8 ) >> 6 ) | ( ( get_end_point_l(0).g & 0xF8 ) >> 1 )
- | ( ( get_end_point_h(0).g & 0xF8 ) << 4 ) | ( ( get_end_point_l(1).g & 0xF8 ) << 9 )
- | ( ( get_end_point_h(1).g & 0xF8 ) << 14 ) | ( ( get_end_point_l(0).b & 0xF8 ) << 19 )
- | ( ( get_end_point_h(0).b & 0xF8 ) << 24 );
- block.z = ( ( get_end_point_l(1).b & 0xF8 ) >> 3 ) | ( ( get_end_point_h(1).b & 0xF8 ) << 2 )
- | ( ( get_end_point_l(0).a & 0xF8 ) << 7 ) | ( ( get_end_point_h(0).a & 0xF8 ) << 12 )
- | ( ( get_end_point_l(1).a & 0xF8 ) << 17 ) | ( ( get_end_point_h(1).a & 0xF8 ) << 22 )
- | ( ( get_end_point_l(0).r & 0x04 ) << 28 ) | ( ( get_end_point_h(0).r & 0x04 ) << 29 );
- block.w = ( ( get_end_point_l(1).r & 0x04 ) >> 2 ) | ( ( get_end_point_h(1).r & 0x04 ) >> 1 )
- | ( get_color_index(0) << 2 );
- uint i = 1;
- for ( ; i <= candidateFixUpIndex1DOrdered[partition][0]; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 2 + 1 );
- }
- for ( ; i < 16; i ++ )
- {
- block.w |= get_color_index(i) << ( i * 2 );
- }
-} \ No newline at end of file
diff --git a/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl b/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl
deleted file mode 100644
index 664e92e5d..000000000
--- a/tests/hlsl/dxsdk/BasicCompute11/BasicCompute11.hlsl
+++ /dev/null
@@ -1,72 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//--------------------------------------------------------------------------------------
-// File: BasicCompute11.hlsl
-//
-// This file contains the Compute Shader to perform array A + array B
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#ifdef USE_STRUCTURED_BUFFERS
-
-struct BufType
-{
- int i;
- float f;
-#ifdef TEST_DOUBLE
- double d;
-#endif
-};
-
-StructuredBuffer<BufType> Buffer0 : register(t0);
-StructuredBuffer<BufType> Buffer1 : register(t1);
-RWStructuredBuffer<BufType> BufferOut : register(u0);
-
-[numthreads(1, 1, 1)]
-void CSMain( uint3 DTid : SV_DispatchThreadID )
-{
- BufferOut[DTid.x].i = Buffer0[DTid.x].i + Buffer1[DTid.x].i;
- BufferOut[DTid.x].f = Buffer0[DTid.x].f + Buffer1[DTid.x].f;
-#ifdef TEST_DOUBLE
- BufferOut[DTid.x].d = Buffer0[DTid.x].d + Buffer1[DTid.x].d;
-#endif
-}
-
-#else // The following code is for raw buffers
-
-ByteAddressBuffer Buffer0 : register(t0);
-ByteAddressBuffer Buffer1 : register(t1);
-RWByteAddressBuffer BufferOut : register(u0);
-
-[numthreads(1, 1, 1)]
-void CSMain( uint3 DTid : SV_DispatchThreadID )
-{
-#ifdef TEST_DOUBLE
- int i0 = asint( Buffer0.Load( DTid.x*16 ) );
- float f0 = asfloat( Buffer0.Load( DTid.x*16+4 ) );
- double d0 = asdouble( Buffer0.Load( DTid.x*16+8 ), Buffer0.Load( DTid.x*16+12 ) );
- int i1 = asint( Buffer1.Load( DTid.x*16 ) );
- float f1 = asfloat( Buffer1.Load( DTid.x*16+4 ) );
- double d1 = asdouble( Buffer1.Load( DTid.x*16+8 ), Buffer1.Load( DTid.x*16+12 ) );
-
- BufferOut.Store( DTid.x*16, asuint(i0 + i1) );
- BufferOut.Store( DTid.x*16+4, asuint(f0 + f1) );
-
- uint dl, dh;
- asuint( d0 + d1, dl, dh );
-
- BufferOut.Store( DTid.x*16+8, dl );
- BufferOut.Store( DTid.x*16+12, dh );
-#else
- int i0 = asint( Buffer0.Load( DTid.x*8 ) );
- float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) );
- int i1 = asint( Buffer1.Load( DTid.x*8 ) );
- float f1 = asfloat( Buffer1.Load( DTid.x*8+4 ) );
-
- BufferOut.Store( DTid.x*8, asuint(i0 + i1) );
- BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) );
-#endif // TEST_DOUBLE
-}
-
-#endif // USE_STRUCTURED_BUFFERS
diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx
deleted file mode 100644
index bd28f862b..000000000
--- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL.fx
+++ /dev/null
@@ -1,158 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL.fx
-//
-// The effect file for the BasicHLSL sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Global variables
-//--------------------------------------------------------------------------------------
-float4 g_MaterialAmbientColor; // Material's ambient color
-float4 g_MaterialDiffuseColor; // Material's diffuse color
-int g_nNumLights;
-
-float3 g_LightDir; // Light's direction in world space
-float4 g_LightDiffuse; // Light's diffuse color
-float4 g_LightAmbient; // Light's ambient color
-
-texture g_MeshTexture; // Color texture for mesh
-
-float g_fTime; // App's time in seconds
-float4x4 g_mWorld; // World matrix for object
-float4x4 g_mWorldViewProjection; // World * View * Projection matrix
-
-
-
-//--------------------------------------------------------------------------------------
-// Texture samplers
-//--------------------------------------------------------------------------------------
-sampler MeshTextureSampler =
-sampler_state
-{
- Texture = <g_MeshTexture>;
- MipFilter = LINEAR;
- MinFilter = LINEAR;
- MagFilter = LINEAR;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex shader output structure
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
- float4 Position : POSITION; // vertex position
- float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1)
- float2 TextureUV : TEXCOORD0; // vertex texture coords
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( float4 vPos : POSITION,
- float3 vNormal : NORMAL,
- float2 vTexCoord0 : TEXCOORD0,
- uniform int nNumLights,
- uniform bool bTexture,
- uniform bool bAnimate )
-{
-
- VS_OUTPUT Output;
- float3 vNormalWorldSpace;
-
- // Transform the position from object space to homogeneous projection space
- Output.Position = mul(vPos, g_mWorldViewProjection);
-
- // Transform the normal from object space to world space
- vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space)
-
- // Compute simple directional lighting equation
- float3 vTotalLightDiffuse = float3(0,0,0);
- for(int i=0; i<nNumLights; i++ )
- vTotalLightDiffuse += g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir));
-
- Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse +
- g_MaterialAmbientColor * g_LightAmbient;
- Output.Diffuse.a = 1.0f;
-
- // Just copy the texture coordinate through
- if( bTexture )
- Output.TextureUV = vTexCoord0;
- else
- Output.TextureUV = 0;
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader output structure
-//--------------------------------------------------------------------------------------
-struct PS_OUTPUT
-{
- float4 RGBColor : COLOR0; // Pixel color
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-// color with diffuse material color
-//--------------------------------------------------------------------------------------
-PS_OUTPUT RenderScenePS( VS_OUTPUT In,
- uniform bool bTexture )
-{
- PS_OUTPUT Output;
-
- // Lookup mesh texture and modulate it with diffuse
- if( bTexture )
- Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse;
- else
- Output.RGBColor = In.Diffuse;
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Renders scene to render target
-//--------------------------------------------------------------------------------------
-technique RenderSceneWithTexture1Light
-{
- pass P0
- {
- VertexShader = compile vs_2_0 RenderSceneVS( 1, true, true );
- PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired)
- }
-}
-
-technique RenderSceneWithTexture2Light
-{
- pass P0
- {
- VertexShader = compile vs_2_0 RenderSceneVS( 2, true, true );
- PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired)
- }
-}
-
-technique RenderSceneWithTexture3Light
-{
- pass P0
- {
- VertexShader = compile vs_2_0 RenderSceneVS( 3, true, true );
- PixelShader = compile ps_2_0 RenderScenePS( true ); // trivial pixel shader (could use FF instead if desired)
- }
-}
-
-technique RenderSceneNoTexture
-{
- pass P0
- {
- VertexShader = compile vs_2_0 RenderSceneVS( 1, false, false );
- PixelShader = compile ps_2_0 RenderScenePS( false ); // trivial pixel shader (could use FF instead if desired)
- }
-}
diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl
deleted file mode 100644
index 33ea61b07..000000000
--- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_PS.hlsl
+++ /dev/null
@@ -1,60 +0,0 @@
-//TEST:COMPARE_HLSL:-no-mangle -profile ps_4_0 -entry PSMain
-
-#ifndef __SLANG__
-#define cbPerFrame cbPerFrame_0
-#define g_vLightDir g_vLightDir_0
-#define g_fAmbient g_fAmbient_0
-#define g_samLinear g_samLinear_0
-#define g_txDiffuse g_txDiffuse_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL11_PS.hlsl
-//
-// The pixel shader file for the BasicHLSL11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- float4 g_vObjectColor ;//SLANG: : packoffset( c0 );
-};
-
-cbuffer cbPerFrame : register( b1 )
-{
- float3 g_vLightDir ;//SLANG: : packoffset( c0 );
- float g_fAmbient ;//SLANG: : packoffset( c0.w );
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse : register( t0 );
-SamplerState g_samLinear : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
-};
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PSMain( PS_INPUT Input ) : SV_TARGET
-{
- float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord );
-
- float fLighting = saturate( dot( g_vLightDir, Input.vNormal ) );
- fLighting = max( fLighting, g_fAmbient );
-
- return vDiffuse * fLighting;
-}
-
diff --git a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl b/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl
deleted file mode 100644
index a0fb3c9ce..000000000
--- a/tests/hlsl/dxsdk/BasicHLSL11/BasicHLSL11_VS.hlsl
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#define g_mWorld g_mWorld_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL11_VS.hlsl
-//
-// The vertex shader file for the BasicHLSL11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 );
- matrix g_mWorld ;//SLANG: : packoffset( c4 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
- float4 vPosition : SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
- VS_OUTPUT Output;
-
- Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
- Output.vNormal = mul( Input.vNormal, (float3x3)g_mWorld );
- Output.vTexcoord = Input.vTexcoord;
-
- return Output;
-}
-
diff --git a/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx b/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx
deleted file mode 100644
index 1ecc1930a..000000000
--- a/tests/hlsl/dxsdk/BasicHLSLFX11/BasicHLSLFX11.fx
+++ /dev/null
@@ -1,181 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: BasicHLSL11.fx
-//
-// The effect file for the BasicHLSL sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Global variables
-//--------------------------------------------------------------------------------------
-float4 g_MaterialAmbientColor; // Material's ambient color
-float4 g_MaterialDiffuseColor; // Material's diffuse color
-int g_nNumLights;
-
-float3 g_LightDir[3]; // Light's direction in world space
-float4 g_LightDiffuse[3]; // Light's diffuse color
-float4 g_LightAmbient; // Light's ambient color
-
-Texture2D g_MeshTexture; // Color texture for mesh
-
-float g_fTime; // App's time in seconds
-float4x4 g_mWorld; // World matrix for object
-float4x4 g_mWorldViewProjection; // World * View * Projection matrix
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
- DepthEnable = TRUE;
- DepthWriteMask = ALL;
- DepthFunc = LESS_EQUAL;
-};
-
-//--------------------------------------------------------------------------------------
-// Texture samplers
-//--------------------------------------------------------------------------------------
-SamplerState MeshTextureSampler
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Wrap;
- AddressV = Wrap;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex shader output structure
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
- float4 Position : SV_POSITION; // vertex position
- float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1)
- float2 TextureUV : TEXCOORD0; // vertex texture coords
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( float4 vPos : POSITION,
- float3 vNormal : NORMAL,
- float2 vTexCoord0 : TEXCOORD,
- uniform int nNumLights,
- uniform bool bTexture,
- uniform bool bAnimate )
-{
- VS_OUTPUT Output;
- float3 vNormalWorldSpace;
-
- float4 vAnimatedPos = vPos;
-
- // Animation the vertex based on time and the vertex's object space position
- if( bAnimate )
- vAnimatedPos += float4(vNormal, 0) * (sin(g_fTime+5.5)+0.5)*5;
-
- // Transform the position from object space to homogeneous projection space
- Output.Position = mul(vAnimatedPos, g_mWorldViewProjection);
-
- // Transform the normal from object space to world space
- vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space)
-
- // Compute simple directional lighting equation
- float3 vTotalLightDiffuse = float3(0,0,0);
- for(int i=0; i<nNumLights; i++ )
- vTotalLightDiffuse += g_LightDiffuse[i] * max(0,dot(vNormalWorldSpace, g_LightDir[i]));
-
- Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse +
- g_MaterialAmbientColor * g_LightAmbient;
- Output.Diffuse.a = 1.0f;
-
- // Just copy the texture coordinate through
- if( bTexture )
- Output.TextureUV = vTexCoord0;
- else
- Output.TextureUV = 0;
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader output structure
-//--------------------------------------------------------------------------------------
-struct PS_OUTPUT
-{
- float4 RGBColor : SV_Target; // Pixel color
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-// color with diffuse material color
-//--------------------------------------------------------------------------------------
-PS_OUTPUT RenderScenePS( VS_OUTPUT In,
- uniform bool bTexture )
-{
- PS_OUTPUT Output;
-
- // Lookup mesh texture and modulate it with diffuse
- if( bTexture )
- Output.RGBColor = g_MeshTexture.Sample(MeshTextureSampler, In.TextureUV) * In.Diffuse;
- else
- Output.RGBColor = In.Diffuse;
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Renders scene to render target using D3D11 Techniques
-//--------------------------------------------------------------------------------------
-technique11 RenderSceneWithTexture1Light
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
-technique11 RenderSceneWithTexture2Light
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 2, true, true ) ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
-technique11 RenderSceneWithTexture3Light
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 3, true, true ) ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( true ) ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
-technique11 RenderSceneNoTexture
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0_level_9_1, RenderSceneVS( 1, true, true ) ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0_level_9_1, RenderScenePS( false ) ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-} \ No newline at end of file
diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl
deleted file mode 100644
index 3224d783c..000000000
--- a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeScene.hlsl
+++ /dev/null
@@ -1,506 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSMain
-//--------------------------------------------------------------------------------------
-// File: RenderCascadeScene.hlsl
-//
-// This is the main shader file. This shader is compiled with several different flags
-// to provide different customizations based on user controls.
-//
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-
-// This flag uses the derivative information to map the texels in a shadow map to the
-// view space plane of the primitive being rendred. This depth is then used as the
-// comparison depth and reduces self shadowing aliases. This technique is expensive
-// and is only valid when objects are planer ( such as a ground plane ).
-#ifndef USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG
-#define USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG 0
-#endif
-
-// This flag enables the shadow to blend between cascades. This is most useful when the
-// the shadow maps are small and artifact can be seen between the various cascade layers.
-#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG
-#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0
-#endif
-
-// There are two methods for selecting the proper cascade a fragment lies in. Interval selection
-// compares the depth of the fragment against the frustum's depth partition.
-// Map based selection compares the texture coordinates against the acutal cascade maps.
-// Map based selection gives better coverage.
-// Interval based selection is easier to extend and understand.
-#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG
-#define SELECT_CASCADE_BY_INTERVAL_FLAG 0
-#endif
-
-// The number of cascades
-#ifndef CASCADE_COUNT_FLAG
-#define CASCADE_COUNT_FLAG 3
-#endif
-
-
-// Most titles will find that 3-4 cascades with
-// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs.
-// High end PCs will be able to handle more cascades, and larger blur bands.
-// In some cases such as when large PCF kernels are used, derivative based depth offsets could be used
-// with larger PCF blur kernels on high end PCs for the ground plane.
-
-cbuffer cbAllShadowData : register( b0 )
-{
- matrix m_mWorldViewProjection;
- matrix m_mWorld;
- matrix m_mWorldView;
- matrix m_mShadow;
- float4 m_vCascadeOffset[8];
- float4 m_vCascadeScale[8];
- int m_nCascadeLevels; // Number of Cascades
- int m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene
- int m_iPCFBlurForLoopStart; // For loop begin value. For a 5x5 Kernal this would be -2.
- int m_iPCFBlurForLoopEnd; // For loop end value. For a 5x5 kernel this would be 3.
-
- // For Map based selection scheme, this keeps the pixels inside of the the valid range.
- // When there is no boarder, these values are 0 and 1 respectivley.
- float m_fMinBorderPadding;
- float m_fMaxBorderPadding;
- float m_fShadowBiasFromGUI; // A shadow map offset to deal with self shadow artifacts.
- //These artifacts are aggravated by PCF.
- float m_fShadowPartitionSize;
- float m_fCascadeBlendArea; // Amount to overlap when blending between cascades.
- float m_fTexelSize;
- float m_fNativeTexelSizeInX;
- float m_fPaddingForCB3; // Padding variables exist because CBs must be a multiple of 16 bytes.
- float4 m_fCascadeFrustumsEyeSpaceDepthsFloat[2]; // The values along Z that seperate the cascades.
- float4 m_fCascadeFrustumsEyeSpaceDepthsFloat4[8]; // the values along Z that separte the cascades.
- // Wastefully stored in float4 so they are array indexable.
- float3 m_vLightDir;
- float m_fPaddingCB4;
-
-};
-
-
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse : register( t0 );
-Texture2D g_txShadow : register( t5 );
-
-
-SamplerState g_samLinear : register( s0 );
-SamplerComparisonState g_samShadow : register( s5 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
- float4 vTexShadow : TEXCOORD1;
- float4 vPosition : SV_POSITION;
- float4 vInterpPos : TEXCOORD2;
- float vDepth : TEXCOORD3;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
- VS_OUTPUT Output;
-
- Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection );
- Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld );
- Output.vTexcoord = Input.vTexcoord;
- Output.vInterpPos = Input.vPosition;
- Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ;
-
- // Transform the shadow texture coordinates for all the cascades.
- Output.vTexShadow = mul( Input.vPosition, m_mShadow );
- return Output;
-
-}
-
-
-
-static const float4 vCascadeColorsMultiplier[8] =
-{
- float4 ( 1.5f, 0.0f, 0.0f, 1.0f ),
- float4 ( 0.0f, 1.5f, 0.0f, 1.0f ),
- float4 ( 0.0f, 0.0f, 5.5f, 1.0f ),
- float4 ( 1.5f, 0.0f, 5.5f, 1.0f ),
- float4 ( 1.5f, 1.5f, 0.0f, 1.0f ),
- float4 ( 1.0f, 1.0f, 1.0f, 1.0f ),
- float4 ( 0.0f, 1.0f, 5.5f, 1.0f ),
- float4 ( 0.5f, 3.5f, 0.75f, 1.0f )
-};
-
-
-void ComputeCoordinatesTransform( in int iCascadeIndex,
- in float4 InterpolatedPosition ,
- in out float4 vShadowTexCoord ,
- in out float4 vShadowTexCoordViewSpace )
-{
- // Now that we know the correct map, we can transform the world space position of the current fragment
- if( SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex];
- vShadowTexCoord += m_vCascadeOffset[iCascadeIndex];
- }
-
- vShadowTexCoord.x *= m_fShadowPartitionSize; // precomputed (float)iCascadeIndex / (float)CASCADE_CNT
- vShadowTexCoord.x += (m_fShadowPartitionSize * (float)iCascadeIndex );
-
-
-}
-
-
-//--------------------------------------------------------------------------------------
-// This function calculates the screen space depth for shadow space texels
-//--------------------------------------------------------------------------------------
-void CalculateRightAndUpTexelDepthDeltas ( in float3 vShadowTexDDX,
- in float3 vShadowTexDDY,
- out float fUpTextDepthWeight,
- out float fRightTextDepthWeight
- ) {
-
- // We use the derivatives in X and Y to create a transformation matrix. Because these derivives give us the
- // transformation from screen space to shadow space, we need the inverse matrix to take us from shadow space
- // to screen space. This new matrix will allow us to map shadow map texels to screen space. This will allow
- // us to find the screen space depth of a corresponding depth pixel.
- // This is not a perfect solution as it assumes the underlying geometry of the scene is a plane. A more
- // accureate way of finding the actual depth would be to do a deferred rendering approach and actually
- //sample the depth.
-
- // Using an offset, or using variance shadow maps is a better approach to reducing these artifacts in most cases.
-
- float2x2 matScreentoShadow = float2x2( vShadowTexDDX.xy, vShadowTexDDY.xy );
- float fDeterminant = determinant ( matScreentoShadow );
-
- float fInvDeterminant = 1.0f / fDeterminant;
-
- float2x2 matShadowToScreen = float2x2 (
- matScreentoShadow._22 * fInvDeterminant, matScreentoShadow._12 * -fInvDeterminant,
- matScreentoShadow._21 * -fInvDeterminant, matScreentoShadow._11 * fInvDeterminant );
-
- float2 vRightShadowTexelLocation = float2( m_fTexelSize, 0.0f );
- float2 vUpShadowTexelLocation = float2( 0.0f, m_fTexelSize );
-
- // Transform the right pixel by the shadow space to screen space matrix.
- float2 vRightTexelDepthRatio = mul( vRightShadowTexelLocation, matShadowToScreen );
- float2 vUpTexelDepthRatio = mul( vUpShadowTexelLocation, matShadowToScreen );
-
- // We can now caculate how much depth changes when you move up or right in the shadow map.
- // We use the ratio of change in x and y times the dervivite in X and Y of the screen space
- // depth to calculate this change.
- fUpTextDepthWeight =
- vUpTexelDepthRatio.x * vShadowTexDDX.z
- + vUpTexelDepthRatio.y * vShadowTexDDY.z;
- fRightTextDepthWeight =
- vRightTexelDepthRatio.x * vShadowTexDDX.z
- + vRightTexelDepthRatio.y * vShadowTexDDY.z;
-
-}
-
-
-//--------------------------------------------------------------------------------------
-// Use PCF to sample the depth map and return a percent lit value.
-//--------------------------------------------------------------------------------------
-void CalculatePCFPercentLit ( in float4 vShadowTexCoord,
- in float fRightTexelDepthDelta,
- in float fUpTexelDepthDelta,
- in float fBlurRowSize,
- out float fPercentLit
- )
-{
- fPercentLit = 0.0f;
- // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed.
- // This would be performance improvment.
- for( int x = m_iPCFBlurForLoopStart; x < m_iPCFBlurForLoopEnd; ++x )
- {
- for( int y = m_iPCFBlurForLoopStart; y < m_iPCFBlurForLoopEnd; ++y )
- {
- float depthcompare = vShadowTexCoord.z;
- // A very simple solution to the depth bias problems of PCF is to use an offset.
- // Unfortunately, too much offset can lead to Peter-panning (shadows near the base of object disappear )
- // Too little offset can lead to shadow acne ( objects that should not be in shadow are partially self shadowed ).
- depthcompare -= m_fShadowBiasFromGUI;
- if ( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG )
- {
- // Add in derivative computed depth scale based on the x and y pixel.
- depthcompare += fRightTexelDepthDelta * ( (float) x ) + fUpTexelDepthDelta * ( (float) y );
- }
- // Compare the transformed pixel depth to the depth read from the map.
- fPercentLit += g_txShadow.SampleCmpLevelZero( g_samShadow,
- float2(
- vShadowTexCoord.x + ( ( (float) x ) * m_fNativeTexelSizeInX ) ,
- vShadowTexCoord.y + ( ( (float) y ) * m_fTexelSize )
- ),
- depthcompare );
- }
- }
- fPercentLit /= (float)fBlurRowSize;
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForInterval ( in int iCurrentCascadeIndex,
- in out float fPixelDepth,
- in out float fCurrentPixelsBlendBandLocation,
- out float fBlendBetweenCascadesAmount
- )
-{
-
- // We need to calculate the band of the current shadow map where it will fade into the next cascade.
- // We can then early out of the expensive PCF for loop.
- //
- float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepthsFloat4[ iCurrentCascadeIndex ].x;
- //if( iNextCascadeIndex > 1 )
- int fBlendIntervalbelowIndex = min(0, iCurrentCascadeIndex-1);
- fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x;
- fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepthsFloat4[ fBlendIntervalbelowIndex ].x;
-
- // The current pixel's blend band location will be used to determine when we need to blend and by how much.
- fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval;
- fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation;
- // The fBlendBetweenCascadesAmount is our location in the blend band.
- fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord,
- in out float fCurrentPixelsBlendBandLocation,
- out float fBlendBetweenCascadesAmount )
-{
- // Calcaulte the blend band for the map based selection.
- float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y );
- fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y );
- float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y );
- fCurrentPixelsBlendBandLocation =
- min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 );
- fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate the shadow based on several options and rende the scene.
-//--------------------------------------------------------------------------------------
-float4 PSMain( VS_OUTPUT Input ) : SV_TARGET
-{
- float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord );
-
- float4 vShadowMapTextureCoord = 0.0f;
- float4 vShadowMapTextureCoord_blend = 0.0f;
-
- float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f);
-
- float fPercentLit = 0.0f;
- float fPercentLit_blend = 0.0f;
-
-
- float fUpTextDepthWeight=0;
- float fRightTextDepthWeight=0;
- float fUpTextDepthWeight_blend=0;
- float fRightTextDepthWeight_blend=0;
-
- int iBlurRowSize = m_iPCFBlurForLoopEnd - m_iPCFBlurForLoopStart;
- iBlurRowSize *= iBlurRowSize;
- float fBlurRowSize = (float)iBlurRowSize;
-
- int iCascadeFound = 0;
- int iNextCascadeIndex = 1;
-
- float fCurrentPixelDepth;
-
- // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions.
- fCurrentPixelDepth = Input.vDepth;
-
- // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used.
- // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum.
- int iCurrentCascadeIndex;
-
- float4 vShadowMapTextureCoordViewSpace = Input.vTexShadow;
- if( SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- iCurrentCascadeIndex = 0;
- if ( CASCADE_COUNT_FLAG > 1 )
- {
- float4 vCurrentPixelDepth = Input.vDepth;
- float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[0]);
- float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsFloat[1]);
- float fIndex = dot(
- float4( CASCADE_COUNT_FLAG > 0,
- CASCADE_COUNT_FLAG > 1,
- CASCADE_COUNT_FLAG > 2,
- CASCADE_COUNT_FLAG > 3)
- , fComparison )
- + dot(
- float4(
- CASCADE_COUNT_FLAG > 4,
- CASCADE_COUNT_FLAG > 5,
- CASCADE_COUNT_FLAG > 6,
- CASCADE_COUNT_FLAG > 7)
- , fComparison2 ) ;
-
- fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 );
- iCurrentCascadeIndex = (int)fIndex;
- }
- }
-
- if ( !SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- iCurrentCascadeIndex = 0;
- if ( CASCADE_COUNT_FLAG == 1 )
- {
- vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0];
- vShadowMapTextureCoord += m_vCascadeOffset[0];
- }
- if ( CASCADE_COUNT_FLAG > 1 ) {
- for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex )
- {
- vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex];
- vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex];
-
- if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding
- && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding )
- {
- iCurrentCascadeIndex = iCascadeIndex;
- iCascadeFound = 1;
- }
- }
- }
- }
-
- float4 color = 0;
-
- if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG )
- {
- // Repeat text coord calculations for the next cascade.
- // The next cascade index is used for blurring between maps.
- iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 );
- }
-
- float fBlendBetweenCascadesAmount = 1.0f;
- float fCurrentPixelsBlendBandLocation = 1.0f;
-
- if( SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 )
- {
- CalculateBlendAmountForInterval ( iCurrentCascadeIndex, fCurrentPixelDepth,
- fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
- }
- }
- else
- {
-
- if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG )
- {
- CalculateBlendAmountForMap ( vShadowMapTextureCoord,
- fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
- }
- }
-
- float3 vShadowMapTextureCoordDDX;
- float3 vShadowMapTextureCoordDDY;
- // The derivatives are used to find the slope of the current plane.
- // The derivative calculation has to be inside of the loop in order to prevent divergent flow control artifacts.
- if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG )
- {
- vShadowMapTextureCoordDDX = ddx( vShadowMapTextureCoordViewSpace );
- vShadowMapTextureCoordDDY = ddy( vShadowMapTextureCoordViewSpace );
-
- vShadowMapTextureCoordDDX *= m_vCascadeScale[iCurrentCascadeIndex];
- vShadowMapTextureCoordDDY *= m_vCascadeScale[iCurrentCascadeIndex];
- }
-
- ComputeCoordinatesTransform( iCurrentCascadeIndex,
- Input.vInterpPos,
- vShadowMapTextureCoord,
- vShadowMapTextureCoordViewSpace );
-
-
- vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex];
-
- if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG )
- {
- CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX, vShadowMapTextureCoordDDY,
- fUpTextDepthWeight, fRightTextDepthWeight );
- }
-
- CalculatePCFPercentLit ( vShadowMapTextureCoord, fRightTextDepthWeight,
- fUpTextDepthWeight, fBlurRowSize, fPercentLit );
-
- if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 )
- {
- if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea)
- { // the current pixel is within the blend band.
-
- // Repeat text coord calculations for the next cascade.
- // The next cascade index is used for blurring between maps.
- if( !SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex];
- vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex];
- }
-
- ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos,
- vShadowMapTextureCoord_blend,
- vShadowMapTextureCoordViewSpace );
-
- // We repeat the calcuation for the next cascade layer, when blending between maps.
- if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea)
- { // the current pixel is within the blend band.
- if( USE_DERIVATIVES_FOR_DEPTH_OFFSET_FLAG )
- {
-
- CalculateRightAndUpTexelDepthDeltas ( vShadowMapTextureCoordDDX,
- vShadowMapTextureCoordDDY,
- fUpTextDepthWeight_blend,
- fRightTextDepthWeight_blend );
- }
- CalculatePCFPercentLit ( vShadowMapTextureCoord_blend, fRightTextDepthWeight_blend,
- fUpTextDepthWeight_blend, fBlurRowSize, fPercentLit_blend );
- fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount );
- // Blend the two calculated shadows by the blend amount.
- }
- }
- }
-
-
- if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4(1.0f,1.0f,1.0f,1.0f);
-
- float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f );
- float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f );
- float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f );
- float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f );
- // Some ambient-like lighting.
- float fLighting =
- saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f +
- saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f +
- saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f +
- saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ;
-
- float4 vShadowLighting = fLighting * 0.5f;
- fLighting += saturate( dot( m_vLightDir , Input.vNormal ) );
- fLighting = lerp( vShadowLighting, fLighting, fPercentLit );
-
- return fLighting * vVisualizeCascadeColor * vDiffuse;
-
-}
-
diff --git a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl b/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl
deleted file mode 100644
index af9679ada..000000000
--- a/tests/hlsl/dxsdk/CascadedShadowMaps11/RenderCascadeShadow.hlsl
+++ /dev/null
@@ -1,59 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VSMain -stage vertex -entry VSMainPancake -stage vertex
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: RenderCascadeShadow.hlsl
-//
-// The shader file for the RenderCascadeScene sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
-};
-
-struct VS_OUTPUT
-{
- float4 vPosition : SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
- VS_OUTPUT Output;
-
- // There is nothing special here, just transform and write out the depth.
- Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-
- return Output;
-}
-
-
-VS_OUTPUT VSMainPancake( VS_INPUT Input )
-{
- VS_OUTPUT Output;
- // after transform move clipped geometry to near plane
- Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
- //Output.vPosition.z = max( Output.vPosition.z, 0.0f );
- return Output;
-} \ No newline at end of file
diff --git a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl
deleted file mode 100644
index 6e14bc10e..000000000
--- a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl
+++ /dev/null
@@ -1,75 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose
-//--------------------------------------------------------------------------------------
-// File: ComputeShaderSort11.hlsl
-//
-// This file contains the compute shaders to perform GPU sorting using DirectX 11.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#define BITONIC_BLOCK_SIZE 512
-
-#define TRANSPOSE_BLOCK_SIZE 16
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer CB : register( b0 )
-{
- unsigned int g_iLevel;
- unsigned int g_iLevelMask;
- unsigned int g_iWidth;
- unsigned int g_iHeight;
-};
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-StructuredBuffer<unsigned int> Input : register( t0 );
-RWStructuredBuffer<unsigned int> Data : register( u0 );
-
-//--------------------------------------------------------------------------------------
-// Bitonic Sort Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE];
-
-[numthreads(BITONIC_BLOCK_SIZE, 1, 1)]
-void BitonicSort( uint3 Gid : SV_GroupID,
- uint3 DTid : SV_DispatchThreadID,
- uint3 GTid : SV_GroupThreadID,
- uint GI : SV_GroupIndex )
-{
- // Load shared data
- shared_data[GI] = Data[DTid.x];
- GroupMemoryBarrierWithGroupSync();
-
- // Sort the shared data
- for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1)
- {
- unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI];
- GroupMemoryBarrierWithGroupSync();
- shared_data[GI] = result;
- GroupMemoryBarrierWithGroupSync();
- }
-
- // Store shared data
- Data[DTid.x] = shared_data[GI];
-}
-
-//--------------------------------------------------------------------------------------
-// Matrix Transpose Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE];
-
-[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)]
-void MatrixTranspose( uint3 Gid : SV_GroupID,
- uint3 DTid : SV_DispatchThreadID,
- uint3 GTid : SV_GroupThreadID,
- uint GI : SV_GroupIndex )
-{
- transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x];
- GroupMemoryBarrierWithGroupSync();
- uint2 XY = DTid.yx - GTid.yx + GTid.xy;
- Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y];
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx
deleted file mode 100644
index e1fead571..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02.fx
+++ /dev/null
@@ -1,28 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VS -stage vertex -entry PS -stage fragment
-
-#ifndef __SLANG__
-#define SV_Target SV_TARGET
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: Tutorial02.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-float4 VS( float4 Pos : POSITION ) : SV_POSITION
-{
- return Pos;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( float4 Pos : SV_POSITION ) : SV_Target
-{
- return float4( 1.0f, 1.0f, 0.0f, 1.0f ); // Yellow, with Alpha = 1
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl
deleted file mode 100644
index 82300c10c..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial02.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl
deleted file mode 100644
index cdf4f9649..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial02/Tutorial02_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial02.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx
deleted file mode 100644
index e1fead571..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03.fx
+++ /dev/null
@@ -1,28 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VS -stage vertex -entry PS -stage fragment
-
-#ifndef __SLANG__
-#define SV_Target SV_TARGET
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: Tutorial02.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-float4 VS( float4 Pos : POSITION ) : SV_POSITION
-{
- return Pos;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( float4 Pos : SV_POSITION ) : SV_Target
-{
- return float4( 1.0f, 1.0f, 0.0f, 1.0f ); // Yellow, with Alpha = 1
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl
deleted file mode 100644
index 684788198..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial03.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl
deleted file mode 100644
index 40d9770fc..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial03/Tutorial03_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial03.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx
deleted file mode 100644
index d311edc5a..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04.fx
+++ /dev/null
@@ -1,46 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial04.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-cbuffer ConstantBuffer : register( b0 )
-{
- matrix World;
- matrix View;
- matrix Projection;
-}
-
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
- float4 Pos : SV_POSITION;
- float4 Color : COLOR0;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VS( float4 Pos : POSITION, float4 Color : COLOR )
-{
- VS_OUTPUT output = (VS_OUTPUT)0;
- output.Pos = mul( Pos, World );
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Color = Color;
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( VS_OUTPUT input ) : SV_Target
-{
- return input.Color;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl
deleted file mode 100644
index 65c36988f..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial04.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl
deleted file mode 100644
index 4505c1a98..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial04/Tutorial04_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial04.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx
deleted file mode 100644
index 5ef5487da..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05.fx
+++ /dev/null
@@ -1,54 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial05.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-cbuffer ConstantBuffer : register( b0 )
-{
- matrix World;
- matrix View;
- matrix Projection;
-}
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 Pos : POSITION;
- float4 Color : COLOR;
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float4 Color : COLOR;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
- output.Pos = mul( input.Pos, World );
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Color = input.Color;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- return input.Color;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl
deleted file mode 100644
index 4226d4b47..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial05.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl
deleted file mode 100644
index 1c2f5519f..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial05/Tutorial05_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial05.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx
deleted file mode 100644
index 219e96b9f..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06.fx
+++ /dev/null
@@ -1,76 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS -entry PSSolid
-//--------------------------------------------------------------------------------------
-// File: Tutorial06.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-cbuffer ConstantBuffer : register( b0 )
-{
- matrix World;
- matrix View;
- matrix Projection;
- float4 vLightDir[2];
- float4 vLightColor[2];
- float4 vOutputColor;
-}
-
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 Pos : POSITION;
- float3 Norm : NORMAL;
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float3 Norm : TEXCOORD0;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
- output.Pos = mul( input.Pos, World );
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = mul( float4( input.Norm, 1 ), World ).xyz;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- float4 finalColor = 0;
-
- //do NdotL lighting for 2 lights
- for(int i=0; i<2; i++)
- {
- finalColor += saturate( dot( (float3)vLightDir[i],input.Norm) * vLightColor[i] );
- }
- finalColor.a = 1;
- return finalColor;
-}
-
-
-//--------------------------------------------------------------------------------------
-// PSSolid - render a solid color
-//--------------------------------------------------------------------------------------
-float4 PSSolid( PS_INPUT input) : SV_Target
-{
- return vOutputColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl
deleted file mode 100644
index 7bd5ece78..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial06.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl
deleted file mode 100644
index 50fcdbf56..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial06/Tutorial06_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial06.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx
deleted file mode 100644
index f99aeba1b..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07.fx
+++ /dev/null
@@ -1,67 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial07.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbNeverChanges : register( b0 )
-{
- matrix View;
-};
-
-cbuffer cbChangeOnResize : register( b1 )
-{
- matrix Projection;
-};
-
-cbuffer cbChangesEveryFrame : register( b2 )
-{
- matrix World;
- float4 vMeshColor;
-};
-
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 Pos : POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
- output.Pos = mul( input.Pos, World );
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Tex = input.Tex;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl
deleted file mode 100644
index f81862efd..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_PS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PS
-#include "Tutorial07.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl b/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl
deleted file mode 100644
index 3ce6baf34..000000000
--- a/tests/hlsl/dxsdk/Direct3D11Tutorials/Tutorial07/Tutorial07_VS.hlsl
+++ /dev/null
@@ -1,3 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS
-#include "Tutorial07.fx"
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx
deleted file mode 100644
index f3c6a5774..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial08/Tutorial08.fx
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial08.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbChangesEveryFrame : register( b0 )
-{
- matrix WorldViewProj;
- matrix World;
- float4 vMeshColor;
-};
-
-
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 Pos : POSITION;
- float2 Tex : TEXCOORD;
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
- output.Pos = mul( input.Pos, WorldViewProj );
- output.Tex = input.Tex;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- return txDiffuse.Sample( samLinear, input.Tex ) * vMeshColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx
deleted file mode 100644
index 2be29fb40..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial09/Tutorial09.fx
+++ /dev/null
@@ -1,69 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial09.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbNeverChanges : register( b0 )
-{
- float3 vLightDir;
-};
-
-cbuffer cbChangesEveryFrame : register( b1 )
-{
- matrix WorldViewProj;
- matrix World;
-};
-
-struct VS_INPUT
-{
- float3 Pos : POSITION; //position
- float3 Norm : NORMAL; //normal
- float2 Tex : TEXCOORD0; //texture coordinate
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float4 Diffuse : COLOR0;
- float2 Tex : TEXCOORD1;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
- output.Pos = mul( float4(input.Pos,1), WorldViewProj );
- float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) );
-
- float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) );
- output.Diffuse.rgb = fLighting;
- output.Diffuse.a = 1.0f;
-
- output.Tex = input.Tex;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- //calculate lighting assuming light color is <1,1,1,1>
- float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse;
- outputColor.a = 1;
- return outputColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx
deleted file mode 100644
index 68f53c0b6..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsDXUT/Tutorial10/Tutorial10.fx
+++ /dev/null
@@ -1,73 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VS -profile ps_4_0 -entry PS
-//--------------------------------------------------------------------------------------
-// File: Tutorial10.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D txDiffuse : register( t0 );
-SamplerState samLinear : register( s0 );
-
-cbuffer cbNeverChanges : register( b0 )
-{
- float3 vLightDir;
-};
-
-cbuffer cbChangesEveryFrame : register( b1 )
-{
- matrix WorldViewProj;
- matrix World;
- float Puffiness;
-};
-
-struct VS_INPUT
-{
- float3 Pos : POSITION; //position
- float3 Norm : NORMAL; //normal
- float2 Tex : TEXCOORD0; //texture coordinate
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float4 Diffuse : COLOR0;
- float2 Tex : TEXCOORD1;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
-
- input.Pos += input.Norm * Puffiness;
-
- output.Pos = mul( float4(input.Pos,1), WorldViewProj );
- float3 vNormalWorldSpace = normalize( mul( input.Norm, (float3x3)World ) );
-
- float fLighting = saturate( dot( vNormalWorldSpace, vLightDir ) );
- output.Diffuse.rgb = fLighting;
- output.Diffuse.a = 1.0f;
-
- output.Tex = input.Tex;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- //calculate lighting assuming light color is <1,1,1,1>
- float4 outputColor = txDiffuse.Sample( samLinear, input.Tex ) * input.Diffuse;
- outputColor.a = 1;
- return outputColor;
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx
deleted file mode 100644
index a647a9079..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial11/Tutorial11.fx
+++ /dev/null
@@ -1,117 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Tutorial11.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Wrap;
- AddressV = Wrap;
-};
-
-cbuffer cbConstant
-{
- float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
- matrix World;
- matrix View;
- matrix Projection;
- float Time;
-};
-
-cbuffer cbUserChanges
-{
- float Waviness;
-};
-
-struct VS_INPUT
-{
- float3 Pos : POSITION;
- float3 Norm : NORMAL;
- float2 Tex : TEXCOORD0;
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float3 Norm : TEXCOORD0;
- float2 Tex : TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
- DepthEnable = TRUE;
- DepthWriteMask = ALL;
- DepthFunc = LESS_EQUAL;
-};
-
-BlendState NoBlending
-{
- AlphaToCoverageEnable = FALSE;
- BlendEnable[0] = FALSE;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
-
- output.Pos = mul( float4(input.Pos,1), World );
-
- output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness;
-
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = mul( input.Norm, World );
- output.Tex = input.Tex;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- // Calculate lighting assuming light color is <1,1,1,1>
- float fLighting = saturate( dot( input.Norm, vLightDir ) );
- float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
- outputColor.a = 1;
- return outputColor;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Technique
-//--------------------------------------------------------------------------------------
-technique11 Render
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, VS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PS() ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
-
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx
deleted file mode 100644
index aae7f9a87..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial12/Tutorial12.fx
+++ /dev/null
@@ -1,129 +0,0 @@
-//TEST_IGNORE_FILE:
-//
-// Constant Buffer Variables
-//
-
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Wrap;
- AddressV = Wrap;
-};
-
-TextureCube g_txEnvMap;
-SamplerState samLinearClamp
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Clamp;
- AddressV = Clamp;
-};
-
-cbuffer cbConstant
-{
- float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
- matrix World;
- matrix View;
- matrix Projection;
- float Time;
-};
-
-cbuffer cbUserChanges
-{
- float Waviness;
-};
-
-struct VS_INPUT
-{
- float3 Pos : POSITION; //position
- float3 Norm : NORMAL; //normal
- float2 Tex : TEXCOORD0; //texture coordinate
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float3 Norm : TEXCOORD0;
- float2 Tex : TEXCOORD1;
- float3 ViewR : TEXCOORD2;
-};
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
- DepthEnable = TRUE;
- DepthWriteMask = ALL;
- DepthFunc = LESS_EQUAL;
-};
-
-BlendState NoBlending
-{
- AlphaToCoverageEnable = FALSE;
- BlendEnable[0] = FALSE;
-};
-
-//
-// Vertex Shader
-//
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
-
- output.Pos = mul( float4(input.Pos,1), World );
-
- output.Pos.x += sin( output.Pos.y*0.1f + Time )*Waviness;
-
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = mul( input.Norm, (float3x3)World );
- output.Tex = input.Tex;
-
- // Calculate the reflection vector
- float3 viewNorm = mul( output.Norm, (float3x3)View );
- output.ViewR = reflect( viewNorm, float3(0,0,-1.0) );
-
- return output;
-}
-
-
-//
-// Pixel Shader
-//
-float4 PS( PS_INPUT input) : SV_Target
-{
- // Calculate lighting assuming light color is <1,1,1,1>
- float fLighting = saturate( dot( input.Norm, vLightDir ) );
-
- // Load the environment map texture
- float4 cReflect = g_txEnvMap.Sample( samLinearClamp, input.ViewR );
-
- // Load the diffuse texture and multiply by the lighting amount
- float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
-
- // Add diffuse to reflection and go
- float4 cTotal = cDiffuse + cReflect;
- cTotal.a = 1;
- return cTotal;
-}
-
-//
-// Technique
-//
-technique11 Render
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, VS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PS() ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx
deleted file mode 100644
index a6f09ecc7..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial13/Tutorial13.fx
+++ /dev/null
@@ -1,191 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Tutorial13.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Wrap;
- AddressV = Wrap;
-};
-
-TextureCube g_txEnvMap;
-SamplerState samLinearClamp
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Clamp;
- AddressV = Clamp;
-};
-
-cbuffer cbConstant
-{
- float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
- matrix World;
- matrix View;
- matrix Projection;
- float Time;
-};
-
-cbuffer cbUserChanges
-{
- float Explode;
-};
-
-struct VS_INPUT
-{
- float3 Pos : POSITION;
- float3 Norm : NORMAL;
- float2 Tex : TEXCOORD0;
-};
-
-struct GSPS_INPUT
-{
- float4 Pos : SV_POSITION;
- float3 Norm : TEXCOORD0;
- float2 Tex : TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// DepthStates
-//--------------------------------------------------------------------------------------
-DepthStencilState EnableDepth
-{
- DepthEnable = TRUE;
- DepthWriteMask = ALL;
- DepthFunc = LESS_EQUAL;
-};
-
-BlendState NoBlending
-{
- AlphaToCoverageEnable = FALSE;
- BlendEnable[0] = FALSE;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-GSPS_INPUT VS( VS_INPUT input )
-{
- GSPS_INPUT output = (GSPS_INPUT)0;
-
- output.Pos = mul( float4(input.Pos,1), World );
- output.Norm = mul( input.Norm, (float3x3)World );
- output.Tex = input.Tex;
-
- return output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Geometry Shader
-//--------------------------------------------------------------------------------------
-[maxvertexcount(12)]
-void GS( triangle GSPS_INPUT input[3], inout TriangleStream<GSPS_INPUT> TriStream )
-{
- GSPS_INPUT output;
-
- //
- // Calculate the face normal
- //
- float3 faceEdgeA = input[1].Pos - input[0].Pos;
- float3 faceEdgeB = input[2].Pos - input[0].Pos;
- float3 faceNormal = normalize( cross(faceEdgeA, faceEdgeB) );
- float3 ExplodeAmt = faceNormal*Explode;
-
- //
- // Calculate the face center
- //
- float3 centerPos = (input[0].Pos.xyz + input[1].Pos.xyz + input[2].Pos.xyz)/3.0;
- float2 centerTex = (input[0].Tex + input[1].Tex + input[2].Tex)/3.0;
- centerPos += faceNormal*Explode;
-
- //
- // Output the pyramid
- //
- for( int i=0; i<3; i++ )
- {
- output.Pos = input[i].Pos + float4(ExplodeAmt,0);
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = input[i].Norm;
- output.Tex = input[i].Tex;
- TriStream.Append( output );
-
- int iNext = (i+1)%3;
- output.Pos = input[iNext].Pos + float4(ExplodeAmt,0);
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = input[iNext].Norm;
- output.Tex = input[iNext].Tex;
- TriStream.Append( output );
-
- output.Pos = float4(centerPos,1) + float4(ExplodeAmt,0);
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = faceNormal;
- output.Tex = centerTex;
- TriStream.Append( output );
-
- TriStream.RestartStrip();
- }
-
- for( int i=2; i>=0; i-- )
- {
- output.Pos = input[i].Pos + float4(ExplodeAmt,0);
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = -input[i].Norm;
- output.Tex = input[i].Tex;
- TriStream.Append( output );
- }
- TriStream.RestartStrip();
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( GSPS_INPUT input) : SV_Target
-{
- // Calculate lighting assuming light color is <1,1,1,1>
- float fLighting = saturate( dot( input.Norm, vLightDir ) );
-
- // Load the diffuse texture and multiply by the lighting amount
- float4 cDiffuse = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
- cDiffuse.a = 1;
-
- // return diffuse
- return cDiffuse;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Technique
-//--------------------------------------------------------------------------------------
-technique11 Render
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, VS() ) );
- SetGeometryShader( CompileShader( gs_4_0, GS() ) );
- SetPixelShader( CompileShader( ps_4_0, PS() ) );
-
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
-
diff --git a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx b/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx
deleted file mode 100644
index b1e45b842..000000000
--- a/tests/hlsl/dxsdk/Direct3D11TutorialsFX11/Tutorial14/Tutorial14.fx
+++ /dev/null
@@ -1,294 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Tutorial14.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffer Variables
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-SamplerState samLinear
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Wrap;
- AddressV = Wrap;
-};
-
-cbuffer cbConstant
-{
- float3 vLightDir = float3(-0.577,0.577,-0.577);
-};
-
-cbuffer cbChangesEveryFrame
-{
- matrix World;
- matrix View;
- matrix Projection;
-};
-
-struct VS_INPUT
-{
- float3 Pos : POSITION; //position
- float3 Norm : NORMAL; //normal
- float2 Tex : TEXCOORD0; //texture coordinate
-};
-
-struct PS_INPUT
-{
- float4 Pos : SV_POSITION;
- float3 Norm : TEXCOORD0;
- float2 Tex : TEXCOORD1;
-};
-
-struct QUADVS_INPUT
-{
- float4 Pos : POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-struct QUADVS_OUTPUT
-{
- float4 Pos : SV_POSITION; // Transformed position
- float2 Tex : TEXCOORD0;
-};
-
-//--------------------------------------------------------------------------------------
-// Blending States
-//--------------------------------------------------------------------------------------
-BlendState NoBlending
-{
- BlendEnable[0] = FALSE;
-};
-
-BlendState SrcAlphaBlendingAdd
-{
- BlendEnable[0] = TRUE;
- SrcBlend = SRC_ALPHA;
- DestBlend = ONE;
- BlendOp = ADD;
- SrcBlendAlpha = ZERO;
- DestBlendAlpha = ZERO;
- BlendOpAlpha = ADD;
- RenderTargetWriteMask[0] = 0x0F;
-};
-
-BlendState SrcAlphaBlendingSub
-{
- BlendEnable[0] = TRUE;
- SrcBlend = SRC_ALPHA;
- DestBlend = ONE;
- BlendOp = SUBTRACT;
- SrcBlendAlpha = ZERO;
- DestBlendAlpha = ZERO;
- BlendOpAlpha = ADD;
- RenderTargetWriteMask[0] = 0x0F;
-};
-
-BlendState SrcColorBlendingAdd
-{
- BlendEnable[0] = TRUE;
- SrcBlend = SRC_COLOR;
- DestBlend = ONE;
- BlendOp = ADD;
- SrcBlendAlpha = ZERO;
- DestBlendAlpha = ZERO;
- BlendOpAlpha = ADD;
- RenderTargetWriteMask[0] = 0x0F;
-};
-
-BlendState SrcColorBlendingSub
-{
- BlendEnable[0] = TRUE;
- SrcBlend = SRC_COLOR;
- DestBlend = ONE;
- BlendOp = SUBTRACT;
- SrcBlendAlpha = ZERO;
- DestBlendAlpha = ZERO;
- BlendOpAlpha = ADD;
- RenderTargetWriteMask[0] = 0x0F;
-};
-
-//--------------------------------------------------------------------------------------
-// Depth/Stencil States
-//--------------------------------------------------------------------------------------
-DepthStencilState RenderWithStencilState
-{
- DepthEnable = false;
- DepthWriteMask = ZERO;
- DepthFunc = Less;
-
- // Setup stencil states
- StencilEnable = true;
- StencilReadMask = 0xFF;
- StencilWriteMask = 0x00;
-
- FrontFaceStencilFunc = Not_Equal;
- FrontFaceStencilPass = Keep;
- FrontFaceStencilFail = Zero;
-
- BackFaceStencilFunc = Not_Equal;
- BackFaceStencilPass = Keep;
- BackFaceStencilFail = Zero;
-};
-
-
-
-//--------------------------------------------------------------------------------------
-// Scene Vertex Shader
-//--------------------------------------------------------------------------------------
-PS_INPUT VS( VS_INPUT input )
-{
- PS_INPUT output = (PS_INPUT)0;
-
- output.Pos = mul( float4(input.Pos,1), World );
- output.Pos = mul( output.Pos, View );
- output.Pos = mul( output.Pos, Projection );
- output.Norm = mul( input.Norm, World );
- output.Tex = input.Tex;
-
- return output;
-}
-
-//-----------------------------------------------------------------------------
-// Quad Vertex Shaders
-//-----------------------------------------------------------------------------
-QUADVS_OUTPUT QuadVS( QUADVS_INPUT Input )
-{
- QUADVS_OUTPUT Output;
- Output.Pos = mul( Input.Pos, World );
- Output.Pos = mul( Output.Pos, View );
- Output.Pos = mul( Output.Pos, Projection );
- Output.Tex = Input.Tex;
- return Output;
-}
-
-QUADVS_OUTPUT ScreenQuadVS( QUADVS_INPUT Input )
-{
- QUADVS_OUTPUT Output;
- Output.Pos = Input.Pos;
- Output.Tex = Input.Tex;
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PS( PS_INPUT input) : SV_Target
-{
- // Calculate lighting assuming light color is <1,1,1,1>
- float fLighting = saturate( dot( input.Norm, vLightDir ) );
- float4 outputColor = g_txDiffuse.Sample( samLinear, input.Tex ) * fLighting;
- outputColor.a = 1;
- return outputColor;
-}
-
-//--------------------------------------------------------------------------------------
-// Quad Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 QuadPS( QUADVS_OUTPUT input) : SV_Target
-{
- return g_txDiffuse.Sample( samLinear, input.Tex );
-}
-
-
-//--------------------------------------------------------------------------------------
-// Scene Techniques
-//--------------------------------------------------------------------------------------
-technique11 RenderScene
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, VS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PS() ) );
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
-
-//--------------------------------------------------------------------------------------
-// RenderWithStencil - set the depth stencil state inside of the technique
-//--------------------------------------------------------------------------------------
-technique11 RenderWithStencil
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, ScreenQuadVS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );
-
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- SetDepthStencilState( RenderWithStencilState, 0 );
- }
-}
-
-//--------------------------------------------------------------------------------------
-// Quad Techniques: Alpha blending state is set inside the technique
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSolid
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );
-
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcAlphaAdd
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );
-
- SetBlendState( SrcAlphaBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcAlphaSub
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );
-
- SetBlendState( SrcAlphaBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcColorAdd
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );
-
- SetBlendState( SrcColorBlendingAdd, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
-
-//--------------------------------------------------------------------------------------
-technique11 RenderQuadSrcColorSub
-{
- pass P0
- {
- SetVertexShader( CompileShader( vs_4_0, QuadVS() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, QuadPS() ) );
-
- SetBlendState( SrcColorBlendingSub, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- }
-}
-
-
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h
deleted file mode 100644
index b44251829..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_LightPSH.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_LightPSH.h
-//
-// The pixel shader light header file for the DynamicShaderLinkage11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseLight
-{
- float3 IlluminateAmbient(float3 vNormal);
-
- float3 IlluminateDiffuse(float3 vNormal);
-
- float3 IlluminateSpecular(float3 vNormal, int specularPower );
-
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cAmbientLight : iBaseLight
-{
- float3 m_vLightColor;
- bool m_bEnable;
-
- float3 IlluminateAmbient(float3 vNormal);
-
- float3 IlluminateDiffuse(float3 vNormal)
- {
- return (float3)0;
- }
-
- float3 IlluminateSpecular(float3 vNormal, int specularPower )
- {
- return (float3)0;
- }
-};
-
-class cHemiAmbientLight : cAmbientLight
-{
- // inherited float4 m_vLightColor is the SkyColor
- float4 m_vGroundColor;
- float4 m_vDirUp;
-
- float3 IlluminateAmbient(float3 vNormal);
-
-};
-
-class cDirectionalLight : cAmbientLight
-{
- // inherited float4 m_vLightColor is the LightColor
- float4 m_vLightDir;
-
- float3 IlluminateDiffuse( float3 vNormal );
-
- float3 IlluminateSpecular( float3 vNormal, int specularPower );
-
-};
-
-class cOmniLight : cAmbientLight
-{
- float3 m_vLightPosition;
- float radius;
-
- float3 IlluminateDiffuse( float3 vNormal );
-
-};
-
-class cSpotLight : cAmbientLight
-{
- float3 m_vLightPosition;
- float3 m_vLightDir;
-};
-
-class cEnvironmentLight : cAmbientLight
-{
- float3 IlluminateSpecular( float3 vNormal, int specularPower );
-};
-
-
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h
deleted file mode 100644
index 7f6bc3d22..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_MaterialPSH.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_MATERIALPSH.h
-//
-// The pixel shader material header file for the DynamicShaderLinkage11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord);
-
- float3 GetDiffuseColor(float2 vTexcoord);
-
- int GetSpecularPower();
-
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cBaseMaterial : iBaseMaterial
-{
- float3 m_vColor;
- int m_iSpecPower;
-
- float3 GetAmbientColor(float2 vTexcoord)
- {
- return m_vColor;
- }
-
- float3 GetDiffuseColor(float2 vTexcoord)
- {
- return (float3)m_vColor;
- }
-
- int GetSpecularPower()
- {
- return m_iSpecPower;
- }
-
-};
-
-class cPlasticMaterial : cBaseMaterial
-{
-
-};
-
-class cPlasticTexturedMaterial : cPlasticMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord);
-
- float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-class cPlasticLightingOnlyMaterial : cBaseMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
- float3 GetDiffuseColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
-};
-
-class cRoughMaterial : cBaseMaterial
-{
- int GetSpecularPower()
- {
- return m_iSpecPower;
- }
-};
-
-class cRoughTexturedMaterial : cRoughMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord);
-
- float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-
-class cRoughLightingOnlyMaterial : cRoughMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
- float3 GetDiffuseColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
-};
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl
deleted file mode 100644
index 6850ad9cb..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PS.hlsl
+++ /dev/null
@@ -1,84 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSMain
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11.psh
-//
-// The pixel shader header file for the DynamicShaderLinkage11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Header Includes
-//--------------------------------------------------------------------------------------
-#include "DynamicShaderLinkage11_PSBuffers.h"
-
-// Defines for default static permutated setting
-#if defined( STATIC_PERMUTE )
- #define HEMI_AMBIENT //CONST_AMBIENT //HEMI_AMBIENT
- #define TEXTURE_ENABLE
- #define SPECULAR_ENABLE
-#endif
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
- float4 vPosition : SV_POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
- float4 vMatrix : TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// Abstract Interface Instances for dyamic linkage / permutation
-//--------------------------------------------------------------------------------------
-#if !defined( STATIC_PERMUTE )
- iBaseLight g_abstractAmbientLighting;
- iBaseLight g_abstractDirectLighting;
- iBaseLight g_abstractEnvironmentLighting;
- iBaseMaterial g_abstractMaterial;
-#else
-//--------------------------------------------------------------------------------------
-// Concrete Instances for STATIC_PERMUTE - static permutation
-//--------------------------------------------------------------------------------------
- #if defined( HEMI_AMBIENT )
- #define g_abstractAmbientLighting g_hemiAmbientLight
- #else
- // CONST_AMBIENT
- #define g_abstractAmbientLighting g_ambientLight
- #endif
- #define g_abstractDirectLighting g_directionalLight
- #define g_abstractEnvironmentLighting g_environmentLight
- #if defined( TEXTURE_ENABLE )
- #define g_abstractMaterial g_plasticTexturedMaterial
- #else
- #define g_abstractMaterial g_plasticMaterial
- #endif
-#endif
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PSMain( PS_INPUT Input ) : SV_TARGET
-{
- // Compute the Ambient term
- float3 Ambient = (float3)0.0f;
- Ambient = g_abstractMaterial.GetAmbientColor( Input.vTexcoord ) * g_abstractAmbientLighting.IlluminateAmbient( Input.vNormal );
-
- // Accumulate the Diffuse contribution
- float3 Diffuse = (float3)0.0f;
-
- Diffuse += g_abstractMaterial.GetDiffuseColor( Input.vTexcoord ) * g_abstractDirectLighting.IlluminateDiffuse( Input.vNormal );
-
- // Compute the Specular contribution
- float3 Specular = (float3)0.0f;
- Specular += g_abstractDirectLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() );
- Specular += g_abstractEnvironmentLighting.IlluminateSpecular( Input.vNormal, g_abstractMaterial.GetSpecularPower() );
-
- // Accumulate the lighting with saturation
- float3 Lighting = saturate( Ambient + Diffuse + Specular );
-
- return float4(Lighting,1.0f);
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h
deleted file mode 100644
index e2263b832..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_PSBuffers.h
+++ /dev/null
@@ -1,129 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_LightPSH.hlsl
-//
-// The pixel shader light source module file for the DynamicShaderLinkage11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "DynamicShaderLinkage11_LightPSH.h"
-#include "DynamicShaderLinkage11_MaterialPSH.h"
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerFrame : register( b0 )
-{
- cAmbientLight g_ambientLight;
- cHemiAmbientLight g_hemiAmbientLight;
- cDirectionalLight g_directionalLight;
- cEnvironmentLight g_environmentLight;
- float4 g_vEyeDir;
-};
-
-cbuffer cbPerPrimitive : register( b1 )
-{
- cPlasticMaterial g_plasticMaterial;
- cPlasticTexturedMaterial g_plasticTexturedMaterial;
- cPlasticLightingOnlyMaterial g_plasticLightingOnlyMaterial;
- cRoughMaterial g_roughMaterial;
- cRoughTexturedMaterial g_roughTexturedMaterial;
- cRoughLightingOnlyMaterial g_roughLightingOnlyMaterial;
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse : register( t0 );
-Texture2D g_txNormalMap : register( t1 );
-TextureCube g_txEnvironmentMap : register( t2 );
-
-SamplerState g_samLinear : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// Lighting Class Methods
-//--------------------------------------------------------------------------------------
-// Ambient Lighting Class Methods
-float3 cAmbientLight::IlluminateAmbient(float3 vNormal)
-{
- return float4( m_vLightColor * m_bEnable, 1.0f);
-}
-
-float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal)
-{
- float thetha = (dot( vNormal, m_vDirUp ) + 1.0f) / 2.0f;
-
- return lerp( m_vGroundColor, m_vLightColor, thetha) * m_bEnable;
-}
-
-// Directional Light class
-float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal )
-{
- float lambert = saturate(dot( vNormal, m_vLightDir ));
- return ((float3)lambert * m_vLightColor * m_bEnable);
-}
-
-float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower )
-{
- float3 H = -normalize(g_vEyeDir) + m_vLightDir;
- float3 halfAngle = normalize( H );
- float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower );
-
- return ((float3)specular * m_vLightColor * m_bEnable);
-}
-
-// Omni Light Class
-float3 cOmniLight::IlluminateDiffuse( float3 vNormal )
-{
- return (float3)0.0f; // TO DO!
-}
-
-// Environment Lighting
-float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower )
-{
- // compute reflection vector taking into account a cheap fresnel falloff;
- float3 N = normalize(vNormal);
- float3 E = normalize(g_vEyeDir);
- float3 R = reflect( E, N );
- float fresnel = 1 - dot( -E, N );
- fresnel = (fresnel * fresnel * fresnel );
-
- float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ) * fresnel;
-
- return (specular * (float3)m_bEnable);
-// return ((float3)fresnel);
-
-}
-
-//--------------------------------------------------------------------------------------
-// Material Class Methods
-//--------------------------------------------------------------------------------------
-// Plastic Material Methods
-float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse;
-}
-
-float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse;
-}
-
-// Rough Material Methods
-float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse;
-}
-
-float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse;
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl
deleted file mode 100644
index d47f20c23..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkage11/DynamicShaderLinkage11_VS.hlsl
+++ /dev/null
@@ -1,73 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#define g_mWorld g_mWorld_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkage11_VS.hlsl
-//
-// The vertex shader file for the DynamicShaderLinkage11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- float4x4 g_mWorldViewProjection ;//SLANG: : packoffset( c0 );
- float4x4 g_mWorld ;//SLANG: : packoffset( c4 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
- float4 vPosition : SV_POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord0 : TEXCOORD0;
- float4 vMatrix : TEXCOORD1; // DEBUG
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-// We aliased signed vectors as a unsigned format.
-// Need to recover signed values. The values 1.0 and 2.0
-// are slightly inaccurate here.
-float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec )
-{
- vVec *= 2.0f;
- return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec;
-}
-
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-
- VS_OUTPUT Output;
- float3 tmpNormal;
-
- Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-
- // Expand compressed vectors
- tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal );
- Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld );
-
- Output.vTexcoord0 = Input.vTexcoord;
-
- Output.vMatrix = (float4)g_mWorld[0]; // DEBUG
- return Output;
-}
-
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx
deleted file mode 100644
index c72b98843..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11.fx
+++ /dev/null
@@ -1,192 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11.fx
-//
-// The effect file for the DynamicShaderLinkageFX11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "DynamicShaderLinkageFX11_VS.hlsl"
-#include "DynamicShaderLinkageFX11_PS.hlsl"
-
-//
-// Settings for static permutations.
-// All of the pre-5.0 targets need static specialization
-// since they don't support late binding. The below
-// just selects a single specialization but you could
-// create any number of them, each one representing
-// a new shader with the interfaces compiled out
-// due to the compile-time class references.
-//
-
-#define StaticMaterial g_plasticTexturedMaterial
-#define StaticAmbientLight g_ambientLight
-#define StaticDirectLight g_directionalLight
-#define StaticEnvironmentLight g_environmentLight
-
-technique11 FeatureLevel10
-{
- pass
- {
- SetRasterizerState(g_rasterizerState[g_fillMode]);
- SetVertexShader(CompileShader(vs_4_0,
- VSMain()));
- SetPixelShader(CompileShader(ps_4_0,
- PSMainUniform(StaticAmbientLight,
- StaticDirectLight,
- StaticEnvironmentLight,
- StaticMaterial)));
- }
-}
-
-technique11 FeatureLevel10_1
-{
- pass
- {
- SetRasterizerState(g_rasterizerState[g_fillMode]);
- SetVertexShader(CompileShader(vs_4_1,
- VSMain()));
- SetPixelShader(CompileShader(ps_4_1,
- PSMainUniform(StaticAmbientLight,
- StaticDirectLight,
- StaticEnvironmentLight,
- StaticMaterial)));
- }
-}
-
-//
-// Variables for dynamic shader linkage.
-// There are two variations here for dynamic usage.
-// In the first we use the uniform entry point
-// and pass in global interface variables. This
-// creates a shader which refers to the global
-// interface variables when running and we can bind
-// concrete instances in our C++ code by using
-// ID3DX11EffectInterfaceVariable::SetClassInstance.
-// This approach works well when you have several
-// independent variations and want to bind them
-// individually in your C++ code, such as the
-// different lighting and material parameters in
-// this sample.
-//
-
-iBaseLight g_abstractAmbientLighting;
-iBaseLight g_abstractDirectLighting;
-iBaseLight g_abstractEnvironmentLighting;
-iBaseMaterial g_abstractMaterial;
-
-technique11 FeatureLevel11
-{
- pass
- {
- SetRasterizerState(g_rasterizerState[g_fillMode]);
- SetVertexShader(CompileShader(vs_5_0,
- VSMain()));
- SetPixelShader(CompileShader(ps_5_0,
- PSMainUniform(g_abstractAmbientLighting,
- g_abstractDirectLighting,
- g_abstractEnvironmentLighting,
- g_abstractMaterial)));
- }
-}
-
-//
-// In this second variation we use the non-uniform
-// entry point so that we don't have to specify
-// any interfaces when compiling the shader. We
-// then reuse the compiled shader with different
-// BindInterfaces calls so that all bindings are
-// handled automatically by the effect runtime.
-// Below we have multiple techniques where
-// we've given a concrete binding for the material.
-// Lighting parameters are left as interfaces for
-// binding via effect variables, but could also
-// be specified concretely if the number of variations
-// is manageable.
-// This approach works well for a small number of variations
-// that are known in advance, as you can just list them
-// in your effect and you don't need to do the
-// binding work explicitly in your C++ code.
-//
-
-VertexShader g_NonUniVS = CompileShader(vs_5_0, VSMain());
-PixelShader g_NonUniPS = CompileShader(ps_5_0, PSMainNonUniform());
-
-technique11 FeatureLevel11_g_plasticMaterial
-{
- pass
- {
- SetVertexShader(g_NonUniVS);
- SetPixelShader(BindInterfaces(g_NonUniPS,
- g_abstractAmbientLighting,
- g_abstractDirectLighting,
- g_abstractEnvironmentLighting,
- g_plasticMaterial));
- }
-}
-
-technique11 FeatureLevel11_g_plasticTexturedMaterial
-{
- pass
- {
- SetVertexShader(g_NonUniVS);
- SetPixelShader(BindInterfaces(g_NonUniPS,
- g_abstractAmbientLighting,
- g_abstractDirectLighting,
- g_abstractEnvironmentLighting,
- g_plasticTexturedMaterial));
- }
-}
-
-technique11 FeatureLevel11_g_plasticLightingOnlyMaterial
-{
- pass
- {
- SetVertexShader(g_NonUniVS);
- SetPixelShader(BindInterfaces(g_NonUniPS,
- g_abstractAmbientLighting,
- g_abstractDirectLighting,
- g_abstractEnvironmentLighting,
- g_plasticLightingOnlyMaterial));
- }
-}
-
-technique11 FeatureLevel11_g_roughMaterial
-{
- pass
- {
- SetVertexShader(g_NonUniVS);
- SetPixelShader(BindInterfaces(g_NonUniPS,
- g_abstractAmbientLighting,
- g_abstractDirectLighting,
- g_abstractEnvironmentLighting,
- g_roughMaterial));
- }
-}
-
-technique11 FeatureLevel11_g_roughTexturedMaterial
-{
- pass
- {
- SetVertexShader(g_NonUniVS);
- SetPixelShader(BindInterfaces(g_NonUniPS,
- g_abstractAmbientLighting,
- g_abstractDirectLighting,
- g_abstractEnvironmentLighting,
- g_roughTexturedMaterial));
- }
-}
-
-technique11 FeatureLevel11_g_roughLightingOnlyMaterial
-{
- pass
- {
- SetVertexShader(g_NonUniVS);
- SetPixelShader(BindInterfaces(g_NonUniPS,
- g_abstractAmbientLighting,
- g_abstractDirectLighting,
- g_abstractEnvironmentLighting,
- g_roughLightingOnlyMaterial));
- }
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h
deleted file mode 100644
index 6f9a0f4d8..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_LightPSH.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_LightPSH.h
-//
-// The pixel shader light header file for the DynamicShaderLinkageFX11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseLight
-{
- float3 IlluminateAmbient(float3 vNormal);
-
- float3 IlluminateDiffuse(float3 vNormal);
-
- float3 IlluminateSpecular(float3 vNormal, int specularPower );
-
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cAmbientLight : iBaseLight
-{
- float3 m_vLightColor;
- bool m_bEnable;
-
- float3 IlluminateAmbient(float3 vNormal);
-
- float3 IlluminateDiffuse(float3 vNormal)
- {
- return (float3)0;
- }
-
- float3 IlluminateSpecular(float3 vNormal, int specularPower )
- {
- return (float3)0;
- }
-};
-
-class cHemiAmbientLight : cAmbientLight
-{
- // inherited float4 m_vLightColor is the SkyColor
- float4 m_vGroundColor;
- float4 m_vDirUp;
-
- float3 IlluminateAmbient(float3 vNormal);
-
-};
-
-class cDirectionalLight : cAmbientLight
-{
- // inherited float4 m_vLightColor is the LightColor
- float4 m_vLightDir;
-
- float3 IlluminateDiffuse( float3 vNormal );
-
- float3 IlluminateSpecular( float3 vNormal, int specularPower );
-
-};
-
-class cOmniLight : cAmbientLight
-{
- float3 m_vLightPosition;
- float radius;
-
- float3 IlluminateDiffuse( float3 vNormal );
-
-};
-
-class cSpotLight : cAmbientLight
-{
- float3 m_vLightPosition;
- float3 m_vLightDir;
-};
-
-class cEnvironmentLight : cAmbientLight
-{
- float3 IlluminateSpecular( float3 vNormal, int specularPower );
-};
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h
deleted file mode 100644
index cd54a283d..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_MaterialPSH.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_MaterialPSH.h
-//
-// The pixel shader material header file for the DynamicShaderLinkageFX11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Interfaces
-//--------------------------------------------------------------------------------------
-interface iBaseMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord);
-
- float3 GetDiffuseColor(float2 vTexcoord);
-
- int GetSpecularPower();
-
-};
-
-//--------------------------------------------------------------------------------------
-// Classes
-//--------------------------------------------------------------------------------------
-class cBaseMaterial : iBaseMaterial
-{
- float3 m_vColor;
- int m_iSpecPower;
-
- float3 GetAmbientColor(float2 vTexcoord)
- {
- return m_vColor;
- }
-
- float3 GetDiffuseColor(float2 vTexcoord)
- {
- return (float3)m_vColor;
- }
-
- int GetSpecularPower()
- {
- return m_iSpecPower;
- }
-
-};
-
-class cPlasticMaterial : cBaseMaterial
-{
-
-};
-
-class cPlasticTexturedMaterial : cPlasticMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord);
-
- float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-class cPlasticLightingOnlyMaterial : cBaseMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
- float3 GetDiffuseColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
-};
-
-class cRoughMaterial : cBaseMaterial
-{
- int GetSpecularPower()
- {
- return m_iSpecPower;
- }
-};
-
-class cRoughTexturedMaterial : cRoughMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord);
-
- float3 GetDiffuseColor(float2 vTexcoord);
-
-};
-
-
-class cRoughLightingOnlyMaterial : cRoughMaterial
-{
- float3 GetAmbientColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
- float3 GetDiffuseColor(float2 vTexcoord)
- {
- return (float3)1.0f;
- }
-
-};
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h
deleted file mode 100644
index 3b4c528be..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_PSBuffers.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_LightPSH.hlsl
-//
-// The pixel shader light source module file for the DynamicShaderLinkageFX11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#include "DynamicShaderLinkageFX11_LightPSH.h"
-#include "DynamicShaderLinkageFX11_MaterialPSH.h"
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerFrame : register( b0 )
-{
- cAmbientLight g_ambientLight;
- cHemiAmbientLight g_hemiAmbientLight;
- cDirectionalLight g_directionalLight;
- cEnvironmentLight g_environmentLight;
- float4 g_vEyeDir;
-};
-
-cbuffer cbPerPrimitive : register( b1 )
-{
- cPlasticMaterial g_plasticMaterial;
- cPlasticTexturedMaterial g_plasticTexturedMaterial;
- cPlasticLightingOnlyMaterial g_plasticLightingOnlyMaterial;
- cRoughMaterial g_roughMaterial;
- cRoughTexturedMaterial g_roughTexturedMaterial;
- cRoughLightingOnlyMaterial g_roughLightingOnlyMaterial;
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse : register( t0 );
-Texture2D g_txNormalMap : register( t1 );
-TextureCube g_txEnvironmentMap : register( t2 );
-
-SamplerState g_samLinear : register( s0 )
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = WRAP;
- AddressV = WRAP;
- AddressW = WRAP;
-};
-
-//--------------------------------------------------------------------------------------
-// Rasterization State
-//--------------------------------------------------------------------------------------
-uint g_fillMode = 0;
-
-RasterizerState g_rasterizerState[2]
-{
-{
- FillMode = SOLID;
- MultisampleEnable = true;
-},
-{
- FillMode = WIREFRAME;
- MultisampleEnable = true;
-}
-};
-
-//--------------------------------------------------------------------------------------
-// Lighting Class Methods
-//--------------------------------------------------------------------------------------
-// Ambient Lighting Class Methods
-float3 cAmbientLight::IlluminateAmbient(float3 vNormal)
-{
- return m_vLightColor * m_bEnable;
-}
-
-float3 cHemiAmbientLight::IlluminateAmbient(float3 vNormal)
-{
- float thetha = (dot( vNormal, m_vDirUp.xyz ) + 1.0f) / 2.0f;
-
- return lerp( m_vGroundColor.xyz, m_vLightColor, thetha) * m_bEnable;
-}
-
-// Directional Light class
-float3 cDirectionalLight::IlluminateDiffuse( float3 vNormal )
-{
- float lambert = saturate(dot( vNormal, m_vLightDir.xyz ));
- return ((float3)lambert * m_vLightColor * m_bEnable);
-}
-
-float3 cDirectionalLight::IlluminateSpecular( float3 vNormal, int specularPower )
-{
- float3 H = -normalize(g_vEyeDir.xyz) + m_vLightDir.xyz;
- float3 halfAngle = normalize( H );
- float specular = pow( max(0,dot( halfAngle, normalize(vNormal) )), specularPower );
-
- return ((float3)specular * m_vLightColor * m_bEnable);
-}
-
-// Omni Light Class
-float3 cOmniLight::IlluminateDiffuse( float3 vNormal )
-{
- return (float3)0.0f; // TO DO!
-}
-
-// Environment Lighting
-float3 cEnvironmentLight::IlluminateSpecular( float3 vNormal, int specularPower )
-{
- // compute reflection vector taking into account a cheap fresnel falloff;
- float3 N = normalize(vNormal);
- float3 E = normalize(g_vEyeDir.xyz);
- float3 R = reflect( E, N );
- float fresnel = 1 - dot( -E, N );
- fresnel = (fresnel * fresnel * fresnel );
-
- float3 specular = g_txEnvironmentMap.Sample( g_samLinear, R ).xyz * fresnel;
-
- return (specular * (float3)m_bEnable);
-// return ((float3)fresnel);
-
-}
-
-//--------------------------------------------------------------------------------------
-// Material Class Methods
-//--------------------------------------------------------------------------------------
-// Plastic Material Methods
-float3 cPlasticTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse.xyz;
-}
-
-float3 cPlasticTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse.xyz;
-}
-
-// Rough Material Methods
-float3 cRoughTexturedMaterial::GetAmbientColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse.xyz;
-}
-
-float3 cRoughTexturedMaterial::GetDiffuseColor(float2 vTexcoord)
-{
- float4 vDiffuse = (float4)1.0f;
- vDiffuse = g_txDiffuse.Sample( g_samLinear, vTexcoord );
- return m_vColor * vDiffuse.xyz;
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl
deleted file mode 100644
index 55d206259..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_ps.hlsl
+++ /dev/null
@@ -1,113 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11.psh
-//
-// The pixel shader header file for the DynamicShaderLinkageFX11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Header Includes
-//--------------------------------------------------------------------------------------
-#include "DynamicShaderLinkageFX11_PSBuffers.h"
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
- float4 vPosition : SV_POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
- float4 vMatrix : TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-
-// This pixel shader uses several interfaces during its
-// work. We show three different ways of providing interface
-// bindings for the PS and those have two different
-// entry points so we've separated the base PS code
-// into a worker routine that's called by the entry
-// points. Normally only one technique would be used
-// and this layering of entry point and worker would
-// not be necessary.
-float4 PSMainWorker( iBaseLight ambientLighting,
- iBaseLight directLighting,
- iBaseLight environmentLighting,
- iBaseMaterial material,
- PS_INPUT Input )
-{
- // Compute the Ambient term
- float3 Ambient = (float3)0.0f;
- Ambient = material.GetAmbientColor( Input.vTexcoord ) * ambientLighting.IlluminateAmbient( Input.vNormal );
-
- // Accumulate the Diffuse contribution
- float3 Diffuse = (float3)0.0f;
-
- Diffuse += material.GetDiffuseColor( Input.vTexcoord ) * directLighting.IlluminateDiffuse( Input.vNormal );
-
- // Compute the Specular contribution
- float3 Specular = (float3)0.0f;
- Specular += directLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() );
- Specular += environmentLighting.IlluminateSpecular( Input.vNormal, material.GetSpecularPower() );
-
- // Accumulate the lighting with saturation
- float3 Lighting = saturate( Ambient + Diffuse + Specular);
-
- return float4(Lighting,1.0f);
-}
-
-// One way to provide bindings for shaders in Effects 11 is
-// to use uniform interface parameters. As with non-interface
-// uniform parameters you must specify a value for these
-// parameters in your CompileShader invocations in the effect.
-// You can provide concrete class instances if you want
-// to statically specialize your shaders, such as for targets
-// that don't support abstract interfaces; or you can provide
-// other interfaces that you bind using effect variables.
-// Both are shown in this sample's technique passes.
-float4 PSMainUniform( uniform iBaseLight ambientLighting,
- uniform iBaseLight directLighting,
- uniform iBaseLight environmentLighting,
- uniform iBaseMaterial material,
- PS_INPUT Input ) : SV_Target
-{
- return PSMainWorker(ambientLighting,
- directLighting,
- environmentLighting,
- material,
- Input);
-}
-
-// Another way to use Effects 11 with interfaces is
-// to have non-uniform parameters, which then are
-// bound with a BindInterfaces in a technique pass.
-// BindInterfaces gives concrete instances to use
-// with a shader but does not do static specialization,
-// it just saves information for the effect runtime
-// to use when setting up the shader to run.
-// This lets you share a single shader, compiled with
-// interface usage, while still getting the convenience
-// of declaring concrete bindings in the effect and
-// not needed explicit binding in code via effect
-// variable updates. If you have many different
-// variations it may be simpler to use bindings
-// through effect variables, as then you don't
-// need to list every possible binding set in your
-// techniques.
-float4 PSMainNonUniform( iBaseLight ambientLighting,
- iBaseLight directLighting,
- iBaseLight environmentLighting,
- iBaseMaterial material,
- PS_INPUT Input ) : SV_Target
-{
- return PSMainWorker(ambientLighting,
- directLighting,
- environmentLighting,
- material,
- Input);
-}
diff --git a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl b/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl
deleted file mode 100644
index 4791e5786..000000000
--- a/tests/hlsl/dxsdk/DynamicShaderLinkageFX11/DynamicShaderLinkageFX11_vs.hlsl
+++ /dev/null
@@ -1,65 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: DynamicShaderLinkageFX11_VS.hlsl
-//
-// The vertex shader file for the DynamicShaderLinkageFX11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- float4x4 g_mWorldViewProjection : packoffset( c0 );
- float4x4 g_mWorld : packoffset( c4 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
- float4 vPosition : SV_POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord0 : TEXCOORD0;
- float4 vMatrix : TEXCOORD1; // DEBUG
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-// We aliased signed vectors as a unsigned format.
-// Need to recover signed values. The values 1.0 and 2.0
-// are slightly inaccurate here.
-float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec )
-{
- vVec *= 2.0f;
- return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec;
-}
-
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
-
- VS_OUTPUT Output;
- float3 tmpNormal;
-
- Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-
- // Expand compressed vectors
- tmpNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal );
- Output.vNormal = mul( tmpNormal, (float3x3)g_mWorld );
-
- Output.vTexcoord0 = Input.vTexcoord;
-
- Output.vMatrix = (float4)g_mWorld[0]; // DEBUG
- return Output;
-}
diff --git a/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx b/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx
deleted file mode 100644
index 699df8655..000000000
--- a/tests/hlsl/dxsdk/FixedFuncEMUFX11/FixedFuncEMU.fx
+++ /dev/null
@@ -1,468 +0,0 @@
-//TEST_IGNORE_FILE:
-// FixedFuncEMU.fx
-// Copyright (c) 2005 Microsoft Corporation. All rights reserved.
-//
-
-struct VSSceneIn
-{
- float3 pos : POSITION; //position of the particle
- float3 norm : NORMAL; //velocity of the particle
- float2 tex : TEXTURE0; //tex coords
-};
-
-struct VSSceneOut
-{
- float4 pos : SV_Position; //position
- float2 tex : TEXTURE0; //texture coordinate
- float3 wPos : TEXTURE1; //world space pos
- float3 wNorm : TEXTURE2; //world space normal
- float4 colorD : COLOR0; //color for gouraud and flat shading
- float4 colorS : COLOR1; //color for specular
- float fogDist : FOGDISTANCE; //distance used for fog calculations
- float3 planeDist : SV_ClipDistance0; //clip distance for 3 planes
-};
-
-struct PSSceneIn
-{
- float4 pos : SV_Position; //position
- float2 tex : TEXTURE0; //texture coordinate
- float3 wPos : TEXTURE1; //world space pos
- float3 wNorm : TEXTURE2; //world space normal
- float4 colorD : COLOR0; //color for gouraud and flat shading
- float4 colorS : COLOR1; //color for specular
- float fogDist : FOGDISTANCE; //distance used for fog calculations
-};
-
-struct Light
-{
- float4 Position;
- float4 Diffuse;
- float4 Specular;
- float4 Ambient;
- float4 Atten;
-};
-
-#define FOGMODE_NONE 0
-#define FOGMODE_LINEAR 1
-#define FOGMODE_EXP 2
-#define FOGMODE_EXP2 3
-#define E 2.71828
-
-cbuffer cbLights
-{
- float4 g_clipplanes[3];
- Light g_lights[8];
-};
-
-cbuffer cbPerFrame
-{
- float4x4 g_mWorld;
- float4x4 g_mView;
- float4x4 g_mProj;
- float4x4 g_mInvProj;
- float4x4 g_mLightViewProj;
-};
-
-cbuffer cbPerTechnique
-{
- bool g_bEnableLighting = true;
- bool g_bEnableClipping = true;
- bool g_bPointScaleEnable = false;
- float g_pointScaleA;
- float g_pointScaleB;
- float g_pointScaleC;
- float g_pointSize;
-
- //fog params
- int g_fogMode = FOGMODE_NONE;
- float g_fogStart;
- float g_fogEnd;
- float g_fogDensity;
- float4 g_fogColor;
-};
-
-cbuffer cbPerViewChange
-{
- //viewport params
- float g_viewportHeight;
- float g_viewportWidth;
- float g_nearPlane;
-};
-
-cbuffer cbImmutable
-{
- float3 g_positions[4] =
- {
- float3( -0.5, 0.5, 0 ),
- float3( 0.5, 0.5, 0 ),
- float3( -0.5, -0.5, 0 ),
- float3( 0.5, -0.5, 0 ),
- };
-};
-
-Texture2D g_txDiffuse;
-Texture2D g_txProjected;
-SamplerState g_samLinear
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Clamp;
- AddressV = Clamp;
-};
-
-DepthStencilState DisableDepth
-{
- DepthEnable = FALSE;
- DepthWriteMask = ZERO;
-};
-
-DepthStencilState EnableDepth
-{
- DepthEnable = TRUE;
- DepthWriteMask = ALL;
-};
-
-struct ColorsOutput
-{
- float4 Diffuse;
- float4 Specular;
-};
-
-ColorsOutput CalcLighting( float3 worldNormal, float3 worldPos, float3 cameraPos )
-{
- ColorsOutput output = (ColorsOutput)0.0;
-
- for(int i=0; i<8; i++)
- {
- float3 toLight = g_lights[i].Position.xyz - worldPos;
- float lightDist = length( toLight );
- float fAtten = 1.0/dot( g_lights[i].Atten, float4(1,lightDist,lightDist*lightDist,0) );
- float3 lightDir = normalize( toLight );
- float3 halfAngle = normalize( normalize(-cameraPos) + lightDir );
-
- output.Diffuse += max(0,dot( lightDir, worldNormal ) * g_lights[i].Diffuse * fAtten) + g_lights[i].Ambient;
- output.Specular += max(0,pow( dot( halfAngle, worldNormal ), 64 ) * g_lights[i].Specular * fAtten );
- }
-
- return output;
-}
-
-//
-// VS for emulating fixed function pipeline
-//
-VSSceneOut VSScenemain(VSSceneIn input)
-{
- VSSceneOut output = (VSSceneOut)0.0;
-
- //output our final position in clipspace
- float4 worldPos = mul( float4( input.pos, 1 ), g_mWorld );
- float4 cameraPos = mul( worldPos, g_mView ); //Save cameraPos for fog calculations
- output.pos = mul( cameraPos, g_mProj );
-
- //save world pos for later
- output.wPos = worldPos;
-
- //save the fog distance for later
- output.fogDist = cameraPos.z;
-
- //find our clipping planes (fixed function clipping is done in world space)
- if( g_bEnableClipping )
- {
- worldPos.w = 1;
-
- //calc the distance from the 3 clipping planes
- output.planeDist.x = dot( worldPos, g_clipplanes[0] );
- output.planeDist.y = dot( worldPos, g_clipplanes[1] );
- output.planeDist.z = dot( worldPos, g_clipplanes[2] );
- }
- else
- {
- output.planeDist.x = 1;
- output.planeDist.y = 1;
- output.planeDist.z = 1;
- }
-
- //do gouraud lighting
- if( g_bEnableLighting )
- {
- float3 worldNormal = normalize( mul( input.norm, (float3x3)g_mWorld ) );
- output.wNorm = worldNormal;
- ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos );
- output.colorD = cOut.Diffuse;
- output.colorS = cOut.Specular;
- }
- else
- {
- output.colorD = float4(1,1,1,1);
- }
-
- //propogate texture coordinate
- output.tex = input.tex;
-
- return output;
-}
-
-//
-// VS for rendering in screen space
-//
-PSSceneIn VSScreenSpacemain(VSSceneIn input)
-{
- PSSceneIn output = (PSSceneIn)0.0;
-
- //output our final position
- output.pos.x = (input.pos.x / (g_viewportWidth/2.0)) -1;
- output.pos.y = -(input.pos.y / (g_viewportHeight/2.0)) +1;
- output.pos.z = input.pos.z;
- output.pos.w = 1;
-
- //propogate texture coordinate
- output.tex = input.tex;
- output.colorD = float4(1,1,1,1);
-
- return output;
-}
-
-//
-// GS for flat shaded rendering
-//
-
-[maxvertexcount(3)]
-void GSFlatmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> FlatTriStream )
-{
- VSSceneOut output;
-
- //
- // Calculate the face normal
- //
- float3 faceEdgeA = input[1].wPos - input[0].wPos;
- float3 faceEdgeB = input[2].wPos - input[0].wPos;
-
- //
- // Cross product
- //
- float3 faceNormal = cross(faceEdgeA, faceEdgeB);
-
- //
- //calculate the face center
- //
- float3 faceCenter = (input[0].wPos + input[1].wPos + input[2].wPos)/3.0;
-
- //find world pos and camera pos
- float4 worldPos = float4( faceCenter, 1 );
- float4 cameraPos = mul( worldPos, g_mView );
-
- //do shading
- float3 worldNormal = normalize( faceNormal );
- ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos );
-
- for(int i=0; i<3; i++)
- {
- output = input[i];
- output.colorD = cOut.Diffuse;
- output.colorS = cOut.Specular;
-
- FlatTriStream.Append( output );
- }
- FlatTriStream.RestartStrip();
-}
-
-//
-// GS for point rendering
-//
-[maxvertexcount(12)]
-void GSPointmain( triangle VSSceneOut input[3], inout TriangleStream<VSSceneOut> PointTriStream )
-{
- VSSceneOut output;
-
- //
- // Calculate the point size
- //
- //float fSizeX = (g_pointSize/g_viewportWidth)/4.0;
- float fSizeY = (g_pointSize/g_viewportHeight)/4.0;
- float fSizeX = fSizeY;
-
- for(int i=0; i<3; i++)
- {
- output = input[i];
-
- //find world pos and camera pos
- float4 worldPos = float4(input[i].wPos,1);
- float4 cameraPos = mul( worldPos, g_mView );
-
- //find our size
- if( g_bPointScaleEnable )
- {
- float dEye = length( cameraPos.xyz );
- fSizeX = fSizeY = g_viewportHeight * g_pointSize *
- sqrt( 1.0f/( g_pointScaleA + g_pointScaleB*dEye + g_pointScaleC*(dEye*dEye) ) );
- }
-
- //do shading
- if(g_bEnableLighting)
- {
- float3 worldNormal = input[i].wNorm;
- ColorsOutput cOut = CalcLighting( worldNormal, worldPos, cameraPos );
-
- output.colorD = cOut.Diffuse;
- output.colorS = cOut.Specular;
- }
- else
- {
- output.colorD = float4(1,1,1,1);
- }
-
- output.tex = input[i].tex;
-
- //
- // Emit two new triangles
- //
- for(int i=0; i<4; i++)
- {
- float4 outPos = mul( worldPos, g_mView );
- output.pos = mul( outPos, g_mProj );
- float zoverNear = (outPos.z)/g_nearPlane;
- float4 posSize = float4( g_positions[i].x*fSizeX*zoverNear,
- g_positions[i].y*fSizeY*zoverNear,
- 0,
- 0 );
- output.pos += posSize;
-
- PointTriStream.Append(output);
- }
- PointTriStream.RestartStrip();
- }
-}
-
-//
-// Calculates fog factor based upon distance
-//
-float CalcFogFactor( float d )
-{
- float fogCoeff = 1.0;
-
- if( FOGMODE_LINEAR == g_fogMode )
- {
- fogCoeff = (g_fogEnd - d)/(g_fogEnd - g_fogStart);
- }
- else if( FOGMODE_EXP == g_fogMode )
- {
- fogCoeff = 1.0 / pow( E, d*g_fogDensity );
- }
- else if( FOGMODE_EXP2 == g_fogMode )
- {
- fogCoeff = 1.0 / pow( E, d*d*g_fogDensity*g_fogDensity );
- }
-
- return clamp( fogCoeff, 0, 1 );
-}
-
-//
-// PS for rendering with clip planes
-//
-float4 PSScenemain(PSSceneIn input) : SV_Target
-{
- //calculate the fog factor
- float fog = CalcFogFactor( input.fogDist );
-
- //calculate the color based off of the normal, textures, etc
- float4 normalColor = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD + input.colorS;
-
- //calculate the color from the projected texture
- float4 cookieCoord = mul( float4(input.wPos,1), g_mLightViewProj );
- //since we don't have texldp, we must perform the w divide ourselves befor the texture lookup
- cookieCoord.xy = 0.5 * cookieCoord.xy / cookieCoord.w + float2( 0.5, 0.5 );
- float4 cookieColor = float4(0,0,0,0);
- if( cookieCoord.z > 0 )
- cookieColor = g_txProjected.Sample( g_samLinear, cookieCoord.xy );
-
- //for standard light-modulating effects just multiply normalcolor and coookiecolor
- normalColor += cookieColor;
-
- return fog * normalColor + (1.0 - fog)*g_fogColor;
-}
-
-//
-// PS for rendering with alpha test
-//
-float4 PSAlphaTestmain(PSSceneIn input) : SV_Target
-{
- float4 color = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.colorD;
- if( color.a < 0.5 )
- discard;
- return color;
-}
-
-//
-// RenderSceneGouraud - renders gouraud-shaded primitives
-//
-technique10 RenderSceneGouraud
-{
- pass p0
- {
- SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
-//
-// RenderSceneFlat - renders flat-shaded primitives
-//
-technique10 RenderSceneFlat
-{
- pass p0
- {
- SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
- SetGeometryShader( CompileShader( gs_4_0, GSFlatmain() ) );
- SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
-//
-// RenderScenePoint - replaces d3dfill_point
-//
-technique10 RenderScenePoint
-{
- pass p0
- {
- SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
- SetGeometryShader( CompileShader( gs_4_0, GSPointmain() ) );
- SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
-//
-// RenderScreneSpace - shows how to render something in screenspace
-//
-technique10 RenderScreenSpaceAlphaTest
-{
- pass p0
- {
- SetVertexShader( CompileShader( vs_4_0, VSScreenSpacemain() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PSAlphaTestmain() ) );
-
- SetDepthStencilState( DisableDepth, 0 );
- }
-}
-
-//
-// RenderScreneSpace - shows how to render something in screenspace
-//
-technique10 RenderTextureOnly
-{
- pass p0
- {
- SetVertexShader( CompileShader( vs_4_0, VSScenemain() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-
- SetDepthStencilState( EnableDepth, 0 );
- }
-}
-
diff --git a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl
deleted file mode 100644
index 6e14bc10e..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/ComputeShaderSort11.hlsl
+++ /dev/null
@@ -1,75 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose
-//--------------------------------------------------------------------------------------
-// File: ComputeShaderSort11.hlsl
-//
-// This file contains the compute shaders to perform GPU sorting using DirectX 11.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#define BITONIC_BLOCK_SIZE 512
-
-#define TRANSPOSE_BLOCK_SIZE 16
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer CB : register( b0 )
-{
- unsigned int g_iLevel;
- unsigned int g_iLevelMask;
- unsigned int g_iWidth;
- unsigned int g_iHeight;
-};
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-StructuredBuffer<unsigned int> Input : register( t0 );
-RWStructuredBuffer<unsigned int> Data : register( u0 );
-
-//--------------------------------------------------------------------------------------
-// Bitonic Sort Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE];
-
-[numthreads(BITONIC_BLOCK_SIZE, 1, 1)]
-void BitonicSort( uint3 Gid : SV_GroupID,
- uint3 DTid : SV_DispatchThreadID,
- uint3 GTid : SV_GroupThreadID,
- uint GI : SV_GroupIndex )
-{
- // Load shared data
- shared_data[GI] = Data[DTid.x];
- GroupMemoryBarrierWithGroupSync();
-
- // Sort the shared data
- for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1)
- {
- unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI];
- GroupMemoryBarrierWithGroupSync();
- shared_data[GI] = result;
- GroupMemoryBarrierWithGroupSync();
- }
-
- // Store shared data
- Data[DTid.x] = shared_data[GI];
-}
-
-//--------------------------------------------------------------------------------------
-// Matrix Transpose Compute Shader
-//--------------------------------------------------------------------------------------
-groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE];
-
-[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)]
-void MatrixTranspose( uint3 Gid : SV_GroupID,
- uint3 DTid : SV_DispatchThreadID,
- uint3 GTid : SV_GroupThreadID,
- uint GI : SV_GroupIndex )
-{
- transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x];
- GroupMemoryBarrierWithGroupSync();
- uint2 XY = DTid.yx - GTid.yx + GTid.xy;
- Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y];
-}
diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl
deleted file mode 100644
index 8966ea3c1..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/FluidCS11.hlsl
+++ /dev/null
@@ -1,529 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry BuildGridCS -entry ClearGridIndicesCS -entry BuildGridIndicesCS -entry RearrangeParticlesCS -entry DensityCS_Simple -entry DensityCS_Shared -entry DensityCS_Grid -entry ForceCS_Simple -entry ForceCS_Shared -entry ForceCS_Grid -entry IntegrateCS
-//--------------------------------------------------------------------------------------
-// File: FluidCS11.hlsl
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Smoothed Particle Hydrodynamics Algorithm Based Upon:
-// Particle-Based Fluid Simulation for Interactive Applications
-// Matthias Müller
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid Algorithm Based Upon:
-// Broad-Phase Collision Detection with CUDA
-// Scott Le Grand
-//--------------------------------------------------------------------------------------
-
-struct Particle
-{
- float2 position;
- float2 velocity;
-};
-
-struct ParticleForces
-{
- float2 acceleration;
-};
-
-struct ParticleDensity
-{
- float density;
-};
-
-cbuffer cbSimulationConstants : register( b0 )
-{
- uint g_iNumParticles;
- float g_fTimeStep;
- float g_fSmoothlen;
- float g_fPressureStiffness;
- float g_fRestDensity;
- float g_fDensityCoef;
- float g_fGradPressureCoef;
- float g_fLapViscosityCoef;
- float g_fWallStiffness;
-
- float4 g_vGravity;
- float4 g_vGridDim;
- float3 g_vPlanes[4];
-};
-
-//--------------------------------------------------------------------------------------
-// Fluid Simulation
-//--------------------------------------------------------------------------------------
-
-#define SIMULATION_BLOCK_SIZE 256
-
-//--------------------------------------------------------------------------------------
-// Structured Buffers
-//--------------------------------------------------------------------------------------
-RWStructuredBuffer<Particle> ParticlesRW : register( u0 );
-StructuredBuffer<Particle> ParticlesRO : register( t0 );
-
-RWStructuredBuffer<ParticleDensity> ParticlesDensityRW : register( u0 );
-StructuredBuffer<ParticleDensity> ParticlesDensityRO : register( t1 );
-
-RWStructuredBuffer<ParticleForces> ParticlesForcesRW : register( u0 );
-StructuredBuffer<ParticleForces> ParticlesForcesRO : register( t2 );
-
-RWStructuredBuffer<unsigned int> GridRW : register( u0 );
-StructuredBuffer<unsigned int> GridRO : register( t3 );
-
-RWStructuredBuffer<uint2> GridIndicesRW : register( u0 );
-StructuredBuffer<uint2> GridIndicesRO : register( t4 );
-
-
-//--------------------------------------------------------------------------------------
-// Grid Construction
-//--------------------------------------------------------------------------------------
-
-// For simplicity, this sample uses a 16-bit hash based on the grid cell and
-// a 16-bit particle ID to keep track of the particles while sorting
-// This imposes a limitation of 64K particles and 256x256 grid work
-// You could extended the implementation to support large scenarios by using a uint2
-
-float2 GridCalculateCell(float2 position)
-{
- return clamp(position * g_vGridDim.xy + g_vGridDim.zw, float2(0, 0), float2(255, 255));
-}
-
-unsigned int GridConstuctKey(uint2 xy)
-{
- // Bit pack [-----UNUSED-----][----Y---][----X---]
- // 16-bit 8-bit 8-bit
- return dot(xy.yx, uint2(256, 1));
-}
-
-unsigned int GridConstuctKeyValuePair(uint2 xy, uint value)
-{
- // Bit pack [----Y---][----X---][-----VALUE------]
- // 8-bit 8-bit 16-bit
- return dot(uint3(xy.yx, value), uint3(256*256*256, 256*256, 1));
-}
-
-unsigned int GridGetKey(unsigned int keyvaluepair)
-{
- return (keyvaluepair >> 16);
-}
-
-unsigned int GridGetValue(unsigned int keyvaluepair)
-{
- return (keyvaluepair & 0xFFFF);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Build Grid
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void BuildGridCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 position = ParticlesRO[P_ID].position;
- float2 grid_xy = GridCalculateCell( position );
-
- GridRW[P_ID] = GridConstuctKeyValuePair((uint2)grid_xy, P_ID);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Build Grid Indices
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ClearGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- GridIndicesRW[DTid.x] = uint2(0, 0);
-}
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void BuildGridIndicesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int G_ID = DTid.x; // Grid ID to operate on
- unsigned int G_ID_PREV = (G_ID == 0)? g_iNumParticles : G_ID; G_ID_PREV--;
- unsigned int G_ID_NEXT = G_ID + 1; if (G_ID_NEXT == g_iNumParticles) { G_ID_NEXT = 0; }
-
- unsigned int cell = GridGetKey( GridRO[G_ID] );
- unsigned int cell_prev = GridGetKey( GridRO[G_ID_PREV] );
- unsigned int cell_next = GridGetKey( GridRO[G_ID_NEXT] );
- if (cell != cell_prev)
- {
- // I'm the start of a cell
- GridIndicesRW[cell].x = G_ID;
- }
- if (cell != cell_next)
- {
- // I'm the end of a cell
- GridIndicesRW[cell].y = G_ID + 1;
- }
-}
-
-
-//--------------------------------------------------------------------------------------
-// Rearrange Particles
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void RearrangeParticlesCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int ID = DTid.x; // Particle ID to operate on
- const unsigned int G_ID = GridGetValue( GridRO[ ID ] );
- ParticlesRW[ID] = ParticlesRO[ G_ID ];
-}
-
-
-//--------------------------------------------------------------------------------------
-// Density Calculation
-//--------------------------------------------------------------------------------------
-
-float CalculateDensity(float r_sq)
-{
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- // Implements this equation:
- // W_poly6(r, h) = 315 / (64 * pi * h^9) * (h^2 - r^2)^3
- // g_fDensityCoef = fParticleMass * 315.0f / (64.0f * PI * fSmoothlen^9)
- return g_fDensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq);
-}
-
-
-//--------------------------------------------------------------------------------------
-// Simple N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x;
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- float2 P_position = ParticlesRO[P_ID].position;
-
- float density = 0;
-
- // Calculate the density based on all neighbors
- for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq)
- {
- density += CalculateDensity(r_sq);
- }
- }
-
- ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Shared Memory Optimized N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-groupshared float2 density_shared_pos[SIMULATION_BLOCK_SIZE];
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x;
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- float2 P_position = ParticlesRO[P_ID].position;
-
- float density = 0;
-
- // Calculate the density based on all neighbors
- [loop]
- for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
- {
- // Cache a tile of particles unto shared memory to increase IO efficiency
- density_shared_pos[GI] = ParticlesRO[N_block_ID + GI].position;
-
- GroupMemoryBarrierWithGroupSync();
-
- for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++)
- {
- float2 N_position = density_shared_pos[N_tile_ID];
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq)
- {
- density += CalculateDensity(r_sq);
- }
- }
-
- GroupMemoryBarrierWithGroupSync();
- }
-
- ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid + Sort Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void DensityCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x;
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
- float2 P_position = ParticlesRO[P_ID].position;
-
- float density = 0;
-
- // Calculate the density based on neighbors from the 8 adjacent cells + current cell
- int2 G_XY = (int2)GridCalculateCell( P_position );
- for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++)
- {
- for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++)
- {
- unsigned int G_CELL = GridConstuctKey(uint2(X, Y));
- uint2 G_START_END = GridIndicesRO[G_CELL];
- for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq)
- {
- density += CalculateDensity(r_sq);
- }
- }
- }
- }
-
- ParticlesDensityRW[P_ID].density = density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Force Calculation
-//--------------------------------------------------------------------------------------
-
-float CalculatePressure(float density)
-{
- // Implements this equation:
- // Pressure = B * ((rho / rho_0)^y - 1)
- return g_fPressureStiffness * max(pow(density / g_fRestDensity, 3) - 1, 0);
-}
-
-float2 CalculateGradPressure(float r, float P_pressure, float N_pressure, float N_density, float2 diff)
-{
- const float h = g_fSmoothlen;
- float avg_pressure = 0.5f * (N_pressure + P_pressure);
- // Implements this equation:
- // W_spkiey(r, h) = 15 / (pi * h^6) * (h - r)^3
- // GRAD( W_spikey(r, h) ) = -45 / (pi * h^6) * (h - r)^2
- // g_fGradPressureCoef = fParticleMass * -45.0f / (PI * fSmoothlen^6)
- return g_fGradPressureCoef * avg_pressure / N_density * (h - r) * (h - r) / r * (diff);
-}
-
-float2 CalculateLapVelocity(float r, float2 P_velocity, float2 N_velocity, float N_density)
-{
- const float h = g_fSmoothlen;
- float2 vel_diff = (N_velocity - P_velocity);
- // Implements this equation:
- // W_viscosity(r, h) = 15 / (2 * pi * h^3) * (-r^3 / (2 * h^3) + r^2 / h^2 + h / (2 * r) - 1)
- // LAPLACIAN( W_viscosity(r, h) ) = 45 / (pi * h^6) * (h - r)
- // g_fLapViscosityCoef = fParticleMass * fViscosity * 45.0f / (PI * fSmoothlen^6)
- return g_fLapViscosityCoef / N_density * (h - r) * vel_diff;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Simple N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Simple( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 P_position = ParticlesRO[P_ID].position;
- float2 P_velocity = ParticlesRO[P_ID].velocity;
- float P_density = ParticlesDensityRO[P_ID].density;
- float P_pressure = CalculatePressure(P_density);
-
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
-
- float2 acceleration = float2(0, 0);
-
- // Calculate the acceleration based on all neighbors
- for (uint N_ID = 0 ; N_ID < g_iNumParticles ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq && P_ID != N_ID)
- {
- float2 N_velocity = ParticlesRO[N_ID].velocity;
- float N_density = ParticlesDensityRO[N_ID].density;
- float N_pressure = CalculatePressure(N_density);
- float r = sqrt(r_sq);
-
- // Pressure Term
- acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-
- // Viscosity Term
- acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
- }
- }
-
- ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Shared Memory Optimized N^2 Algorithm
-//--------------------------------------------------------------------------------------
-
-groupshared struct { float2 position; float2 velocity; float density; } force_shared_pos[SIMULATION_BLOCK_SIZE];
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Shared( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 P_position = ParticlesRO[P_ID].position;
- float2 P_velocity = ParticlesRO[P_ID].velocity;
- float P_density = ParticlesDensityRO[P_ID].density;
- float P_pressure = CalculatePressure(P_density);
-
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
-
- float2 acceleration = float2(0, 0);
-
- // Calculate the acceleration based on all neighbors
- [loop]
- for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
- {
- // Cache a tile of particles unto shared memory to increase IO efficiency
- force_shared_pos[GI].position = ParticlesRO[N_block_ID + GI].position;
- force_shared_pos[GI].velocity = ParticlesRO[N_block_ID + GI].velocity;
- force_shared_pos[GI].density = ParticlesDensityRO[N_block_ID + GI].density;
-
- GroupMemoryBarrierWithGroupSync();
-
- [loop]
- for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++ )
- {
- uint N_ID = N_block_ID + N_tile_ID;
- float2 N_position = force_shared_pos[N_tile_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq && P_ID != N_ID)
- {
- float2 N_velocity = force_shared_pos[N_tile_ID].velocity;
- float N_density = force_shared_pos[N_tile_ID].density;
- float N_pressure = CalculatePressure(N_density);
- float r = sqrt(r_sq);
-
- // Pressure Term
- acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-
- // Viscosity Term
- acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
- }
- }
-
- GroupMemoryBarrierWithGroupSync();
- }
-
- ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Optimized Grid + Sort Algorithm
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void ForceCS_Grid( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 P_position = ParticlesRO[P_ID].position;
- float2 P_velocity = ParticlesRO[P_ID].velocity;
- float P_density = ParticlesDensityRO[P_ID].density;
- float P_pressure = CalculatePressure(P_density);
-
- const float h_sq = g_fSmoothlen * g_fSmoothlen;
-
- float2 acceleration = float2(0, 0);
-
- // Calculate the acceleration based on neighbors from the 8 adjacent cells + current cell
- int2 G_XY = (int2)GridCalculateCell( P_position );
- for (int Y = max(G_XY.y - 1, 0) ; Y <= min(G_XY.y + 1, 255) ; Y++)
- {
- for (int X = max(G_XY.x - 1, 0) ; X <= min(G_XY.x + 1, 255) ; X++)
- {
- unsigned int G_CELL = GridConstuctKey(uint2(X, Y));
- uint2 G_START_END = GridIndicesRO[G_CELL];
- for (unsigned int N_ID = G_START_END.x ; N_ID < G_START_END.y ; N_ID++)
- {
- float2 N_position = ParticlesRO[N_ID].position;
-
- float2 diff = N_position - P_position;
- float r_sq = dot(diff, diff);
- if (r_sq < h_sq && P_ID != N_ID)
- {
- float2 N_velocity = ParticlesRO[N_ID].velocity;
- float N_density = ParticlesDensityRO[N_ID].density;
- float N_pressure = CalculatePressure(N_density);
- float r = sqrt(r_sq);
-
- // Pressure Term
- acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
-
- // Viscosity Term
- acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
- }
- }
- }
- }
-
- ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Integration
-//--------------------------------------------------------------------------------------
-
-[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
-void IntegrateCS( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- const unsigned int P_ID = DTid.x; // Particle ID to operate on
-
- float2 position = ParticlesRO[P_ID].position;
- float2 velocity = ParticlesRO[P_ID].velocity;
- float2 acceleration = ParticlesForcesRO[P_ID].acceleration;
-
- // Apply the forces from the map walls
- [unroll]
- for (unsigned int i = 0 ; i < 4 ; i++)
- {
- float dist = dot(float3(position, 1), g_vPlanes[i]);
- acceleration += min(dist, 0) * -g_fWallStiffness * g_vPlanes[i].xy;
- }
-
- // Apply gravity
- acceleration += g_vGravity.xy;
-
- // Integrate
- velocity += g_fTimeStep * acceleration;
- position += g_fTimeStep * velocity;
-
- // Update
- ParticlesRW[P_ID].position = position;
- ParticlesRW[P_ID].velocity = velocity;
-}
diff --git a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl b/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl
deleted file mode 100644
index cfd14c2b2..000000000
--- a/tests/hlsl/dxsdk/FluidCS11/FluidRender.hlsl
+++ /dev/null
@@ -1,124 +0,0 @@
-//TEST:COMPARE_HLSL:-no-mangle -profile sm_4_0 -entry ParticleVS -stage vertex -entry ParticleGS -stage geometry -entry ParticlePS -stage pixel
-
-#ifndef __SLANG__
-#define ParticlesRO ParticlesRO_0
-#define ParticleDensityRO ParticleDensityRO_0
-#define cbRenderConstants cbRenderConstants_0
-#define g_mViewProjection g_mViewProjection_0
-#define g_fParticleSize g_fParticleSize_0
-#define density density_0
-#define position position_0
-#define velocity velocity_0
-
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: FluidRender.hlsl
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Particle Rendering
-//--------------------------------------------------------------------------------------
-
-struct Particle {
- float2 position;
- float2 velocity;
-};
-
-struct ParticleDensity {
- float density;
-};
-
-StructuredBuffer<Particle> ParticlesRO : register( t0 );
-StructuredBuffer<ParticleDensity> ParticleDensityRO : register( t1 );
-
-cbuffer cbRenderConstants : register( b0 )
-{
- matrix g_mViewProjection;
- float g_fParticleSize;
-};
-
-struct VSParticleOut
-{
- float2 position : POSITION;
- float4 color : COLOR;
-};
-
-struct GSParticleOut
-{
- float4 position : SV_Position;
- float4 color : COLOR;
- float2 texcoord : TEXCOORD;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Visualization Helper
-//--------------------------------------------------------------------------------------
-
-static const float4 Rainbow[5] = {
- float4(1, 0, 0, 1), // red
- float4(1, 1, 0, 1), // orange
- float4(0, 1, 0, 1), // green
- float4(0, 1, 1, 1), // teal
- float4(0, 0, 1, 1), // blue
-};
-
-float4 VisualizeNumber(float n)
-{
- return lerp( Rainbow[ int(floor(n * 4.0f)) ], Rainbow[ int(ceil(n * 4.0f)) ], frac(n * 4.0f) );
-}
-
-float4 VisualizeNumber(float n, float lower, float upper)
-{
- return VisualizeNumber( saturate( (n - lower) / (upper - lower) ) );
-}
-
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-
-VSParticleOut ParticleVS(uint ID : SV_VERTEXID)
-{
- VSParticleOut Out; // = { { 0, 0 } , { 0, 0, 0, 0 } }; // (VSParticleOut)0;
- Out.position = ParticlesRO[ID].position;
- Out.color = VisualizeNumber(ParticleDensityRO[ID].density, 1000.0f, 2000.0f);
- return Out;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Particle Geometry Shader
-//--------------------------------------------------------------------------------------
-
-static const float2 g_positions[4] = { float2(-1, 1), float2(1, 1), float2(-1, -1), float2(1, -1) };
-static const float2 g_texcoords[4] = { float2(0, 1), float2(1, 1), float2(0, 0), float2(1, 0) };
-
-[maxvertexcount(4)]
-void ParticleGS(point VSParticleOut In[1], inout TriangleStream<GSParticleOut> SpriteStream)
-{
- [unroll]
- for (int i = 0; i < 4; i++)
- {
- GSParticleOut Out; // = (GSParticleOut)0;
- float4 position = float4(In[0].position, 0, 1) + g_fParticleSize * float4(g_positions[i], 0, 0);
- Out.position = mul(position, g_mViewProjection);
- Out.color = In[0].color;
- Out.texcoord = g_texcoords[i];
- SpriteStream.Append(Out);
- }
- SpriteStream.RestartStrip();
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-
-float4 ParticlePS(GSParticleOut In) : SV_TARGET
-{
- return In.color;
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl
deleted file mode 100644
index 3addeca08..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/BrightPassAndHorizFilterCS.hlsl
+++ /dev/null
@@ -1,64 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//--------------------------------------------------------------------------------------
-// File: BrightPassAndHorizFilterCS.hlsl
-//
-// The CS for bright pass and horizontal blur, used in CS path of
-// HDRToneMappingCS11 sample
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-static const float MIDDLE_GRAY = 0.72f;
-static const float LUM_WHITE = 1.5f;
-static const float BRIGHT_THRESHOLD = 0.5f;
-
-Texture2D Input : register( t0 );
-StructuredBuffer<float> lum : register( t1 );
-RWStructuredBuffer<float4> Result : register( u0 );
-
-cbuffer cb0
-{
- float4 g_avSampleWeights[15];
- uint g_outputwidth;
- float g_inverse;
- int2 g_inputsize;
-}
-
-#define kernelhalf 7
-#define groupthreads 128
-groupshared float4 temp[groupthreads];
-
-[numthreads( groupthreads, 1, 1 )]
-void CSMain( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
- int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y );
- coord = coord.xy * 8 + int2(4, 3);
- coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) );
- float4 vColor = Input.Load( int3(coord, 0) );
-
- float fLum = lum[0]*g_inverse;
-
- // Bright pass and tone mapping
- vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD );
- vColor *= MIDDLE_GRAY / (fLum + 0.001f);
- vColor *= (1.0f + vColor/LUM_WHITE);
- vColor /= (1.0f + vColor);
-
- temp[GI] = vColor;
-
- GroupMemoryBarrierWithGroupSync();
-
- // Horizontal blur
- if ( GI >= kernelhalf &&
- GI < (groupthreads - kernelhalf) &&
- ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputwidth) )
- {
- float4 vOut = 0;
-
- [unroll]
- for ( int i = -kernelhalf; i <= kernelhalf; ++i )
- vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf];
-
- Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputwidth] = float4(vOut.rgb, 1.0f);
- }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl
deleted file mode 100644
index f2d119eb5..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/DumpToTexture.hlsl
+++ /dev/null
@@ -1,29 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSDump
-//--------------------------------------------------------------------------------------
-// File: DumpToTexture.hlsl
-//
-// The PS for converting CS output buffer to a texture, used in CS path of
-// HDRToneMappingCS11 sample
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<float4> buffer : register( t0 );
-
-struct QuadVS_Output
-{
- float4 Pos : SV_POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-cbuffer cbPS : register( b0 )
-{
- uint4 g_param;
-};
-
-float4 PSDump( QuadVS_Output Input ) : SV_TARGET
-{
- // To calculate the buffer offset, it is natural to use the screen space coordinates,
- // Input.Pos is the screen space coordinates of the pixel being written
- return buffer[ (Input.Pos.x - 0.5) + (Input.Pos.y - 0.5) * g_param.x ];
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl
deleted file mode 100644
index e21b97e30..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/FilterCS.hlsl
+++ /dev/null
@@ -1,73 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSVerticalFilter -entry CSHorizFilter
-//--------------------------------------------------------------------------------------
-// File: FilterCS.hlsl
-//
-// The CSs for doing vertical and horizontal blur, used in CS path of
-// HDRToneMappingCS11 sample
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-StructuredBuffer<float4> InputBuf : register( t0 );
-Texture2D InputTex : register( t1 );
-RWStructuredBuffer<float4> Result : register( u0 );
-
-cbuffer cb0
-{
- float4 g_avSampleWeights[15];
- int2 g_outputsize;
- int2 g_inputsize;
-}
-
-#define kernelhalf 7
-#define groupthreads 128
-groupshared float4 temp[groupthreads];
-
-[numthreads( groupthreads, 1, 1 )]
-void CSVerticalFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
- int offsety = GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y;
- offsety = clamp( offsety, 0, g_inputsize.y-1 );
- int offset = Gid.x + offsety * g_inputsize.x;
- temp[GI] = InputBuf[offset];
-
- GroupMemoryBarrierWithGroupSync();
-
- // Vertical blur
- if ( GI >= kernelhalf &&
- GI < (groupthreads - kernelhalf) &&
- ( (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) < g_outputsize.y) )
- {
- float4 vOut = 0;
-
- [unroll]
- for ( int i = -kernelhalf; i <= kernelhalf; ++i )
- vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf];
-
- Result[Gid.x + (GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.y) * g_outputsize.x] = float4(vOut.rgb, 1.0f);
- }
-}
-
-[numthreads( groupthreads, 1, 1 )]
-void CSHorizFilter( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex )
-{
- int2 coord = int2( GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x, Gid.y );
- coord = clamp( coord, int2(0, 0), int2(g_inputsize.x-1, g_inputsize.y-1) );
- temp[GI] = InputTex.Load( int3(coord, 0) );
-
- GroupMemoryBarrierWithGroupSync();
-
- // Horizontal blur
- if ( GI >= kernelhalf &&
- GI < (groupthreads - kernelhalf) &&
- ( (Gid.x * (groupthreads - 2 * kernelhalf) + GI - kernelhalf) < g_outputsize.x) )
- {
- float4 vOut = 0;
-
- [unroll]
- for ( int i = -kernelhalf; i <= kernelhalf; ++i )
- vOut += temp[GI + i] * g_avSampleWeights[i + kernelhalf];
-
- Result[GI - kernelhalf + (groupthreads - kernelhalf * 2) * Gid.x + Gid.y * g_outputsize.x] = float4(vOut.rgb, 1.0f);
- }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl
deleted file mode 100644
index f5a49d2eb..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/FinalPass.hlsl
+++ /dev/null
@@ -1,79 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry QuadVS -profile ps_4_0 -entry PSFinalPass -entry PSFinalPassForCPUReduction
-//--------------------------------------------------------------------------------------
-// File: FinalPass.hlsl
-//
-// The PSs for doing tone-mapping based on the input luminance, used in CS path of
-// HDRToneMappingCS11 sample
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-struct QuadVS_Input
-{
- float4 Pos : POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-struct QuadVS_Output
-{
- float4 Pos : SV_POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-QuadVS_Output QuadVS( QuadVS_Input Input )
-{
- QuadVS_Output Output;
- Output.Pos = Input.Pos;
- Output.Tex = Input.Tex;
- return Output;
-}
-
-Texture2D<float4> tex : register( t0 );
-StructuredBuffer<float> lum : register( t1 );
-Texture2D<float4> bloom : register( t2 );
-
-SamplerState PointSampler : register (s0);
-SamplerState LinearSampler : register (s1);
-
-
-static const float MIDDLE_GRAY = 0.72f;
-static const float LUM_WHITE = 1.5f;
-
-cbuffer cbPS : register( b0 )
-{
- float4 g_param;
-};
-
-float4 PSFinalPass( QuadVS_Output Input ) : SV_TARGET
-{
- float4 vColor = tex.Sample( PointSampler, Input.Tex );
- float fLum = lum[0]*g_param.x;
- float3 vBloom = bloom.Sample( LinearSampler, Input.Tex );
-
- // Tone mapping
- vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f);
- vColor.rgb *= (1.0f + vColor/LUM_WHITE);
- vColor.rgb /= (1.0f + vColor);
-
- vColor.rgb += 0.6f * vBloom;
- vColor.a = 1.0f;
-
- return vColor;
-}
-
-float4 PSFinalPassForCPUReduction( QuadVS_Output Input ) : SV_TARGET
-{
- float4 vColor = tex.Sample( PointSampler, Input.Tex );
- float fLum = g_param.x;
- float3 vBloom = bloom.Sample( LinearSampler, Input.Tex );
-
- // Tone mapping
- vColor.rgb *= MIDDLE_GRAY / (fLum + 0.001f);
- vColor.rgb *= (1.0f + vColor/LUM_WHITE);
- vColor.rgb /= (1.0f + vColor);
-
- vColor.rgb += 0.6f * vBloom;
- vColor.a = 1.0f;
-
- return vColor;
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl
deleted file mode 100644
index 3f16b2449..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/PSApproach.hlsl
+++ /dev/null
@@ -1,129 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry DownScale2x2_Lum -entry DownScale3x3 -entry FinalPass -entry DownScale3x3_BrightPass -entry Bloom
-//--------------------------------------------------------------------------------------
-// File: PSApproach.hlsl
-//
-// The PSs for doing post-processing, used in PS path of
-// HDRToneMappingCS11 sample
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-static const float4 LUM_VECTOR = float4(.299, .587, .114, 0);
-static const float MIDDLE_GRAY = 0.72f;
-static const float LUM_WHITE = 1.5f;
-static const float BRIGHT_THRESHOLD = 0.5f;
-
-SamplerState PointSampler : register (s0);
-SamplerState LinearSampler : register (s1);
-
-struct QuadVS_Output
-{
- float4 Pos : SV_POSITION;
- float2 Tex : TEXCOORD0;
-};
-
-Texture2D s0 : register(t0);
-Texture2D s1 : register(t1);
-Texture2D s2 : register(t2);
-
-float4 DownScale2x2_Lum ( QuadVS_Output Input ) : SV_TARGET
-{
- float4 vColor = 0.0f;
- float fAvg = 0.0f;
-
- for( int y = -1; y < 1; y++ )
- {
- for( int x = -1; x < 1; x++ )
- {
- // Compute the sum of color values
- vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) );
-
- fAvg += dot( vColor, LUM_VECTOR );
- }
- }
-
- fAvg /= 4;
-
- return float4(fAvg, fAvg, fAvg, 1.0f);
-}
-
-float4 DownScale3x3( QuadVS_Output Input ) : SV_TARGET
-{
- float fAvg = 0.0f;
- float4 vColor;
-
- for( int y = -1; y <= 1; y++ )
- {
- for( int x = -1; x <= 1; x++ )
- {
- // Compute the sum of color values
- vColor = s0.Sample( PointSampler, Input.Tex, int2(x,y) );
-
- fAvg += vColor.r;
- }
- }
-
- // Divide the sum to complete the average
- fAvg /= 9;
-
- return float4(fAvg, fAvg, fAvg, 1.0f);
-}
-
-float4 FinalPass( QuadVS_Output Input ) : SV_TARGET
-{
- //float4 vColor = 0;
- float4 vColor = s0.Sample( PointSampler, Input.Tex );
- float4 vLum = s1.Sample( PointSampler, float2(0,0) );
- float3 vBloom = s2.Sample( LinearSampler, Input.Tex );
-
- // Tone mapping
- vColor.rgb *= MIDDLE_GRAY / (vLum.r + 0.001f);
- vColor.rgb *= (1.0f + vColor/LUM_WHITE);
- vColor.rgb /= (1.0f + vColor);
-
- vColor.rgb += 0.6f * vBloom;
- vColor.a = 1.0f;
-
- return vColor;
-}
-
-float4 DownScale3x3_BrightPass( QuadVS_Output Input ) : SV_TARGET
-{
- float3 vColor = 0.0f;
- float4 vLum = s1.Sample( PointSampler, float2(0, 0) );
- float fLum = vLum.r;
-
- vColor = s0.Sample( PointSampler, Input.Tex ).rgb;
-
- // Bright pass and tone mapping
- vColor = max( 0.0f, vColor - BRIGHT_THRESHOLD );
- vColor *= MIDDLE_GRAY / (fLum + 0.001f);
- vColor *= (1.0f + vColor/LUM_WHITE);
- vColor /= (1.0f + vColor);
-
- return float4(vColor, 1.0f);
-}
-
-cbuffer cb0
-{
- float2 g_avSampleOffsets[15];
- float4 g_avSampleWeights[15];
-}
-
-float4 Bloom( QuadVS_Output Input ) : SV_TARGET
-{
- float4 vSample = 0.0f;
- float4 vColor = 0.0f;
- float2 vSamplePosition;
-
- for( int iSample = 0; iSample < 15; iSample++ )
- {
- // Sample from adjacent points
- vSamplePosition = Input.Tex + g_avSampleOffsets[iSample];
- vColor = s0.Sample( PointSampler, vSamplePosition);
-
- vSample += g_avSampleWeights[iSample]*vColor;
- }
-
- return vSample;
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl
deleted file mode 100644
index 1316250d5..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceTo1DCS.hlsl
+++ /dev/null
@@ -1,72 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//-----------------------------------------------------------------------------
-// File: ReduceTo1DCS.hlsl
-//
-// Desc: Reduce an input Texture2D to a buffer
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-Texture2D Input : register( t0 );
-RWStructuredBuffer<float> Result : register( u0 );
-
-cbuffer cbCS : register( b0 )
-{
- uint4 g_param; // (g_param.x, g_param.y) is the x and y dimensions of the Dispatch call
- // (g_param.z, g_param.w) is the size of the above Input Texture2D
-};
-
-//#define CS_FULL_PIXEL_REDUCITON // Defining this or not must be the same as in HDRToneMappingCS11.cpp
-
-#define blocksize 8
-#define blocksizeY 8
-#define groupthreads (blocksize*blocksizeY)
-groupshared float accum[groupthreads];
-
-static const float4 LUM_VECTOR = float4(.299, .587, .114, 0);
-
-[numthreads(blocksize,blocksizeY,1)]
-void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- float4 s =
-#ifdef CS_FULL_PIXEL_REDUCITON
- Input.Load( uint3(DTid.xy , 0) )+
- Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x, 0), 0) ) +
- Input.Load( uint3(DTid.xy + uint2(0, blocksizeY*g_param.y), 0) ) +
- Input.Load( uint3(DTid.xy + uint2(blocksize*g_param.x, blocksizeY*g_param.y), 0) );
-#else
- Input.Load( uint3((float)DTid.x/81.0f*g_param.z, (float)DTid.y/81.0f*g_param.w, 0) );
-#endif
-
- accum[GI] = dot( s, LUM_VECTOR );
-
- // Parallel reduction algorithm follows
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 32 )
- accum[GI] += accum[32+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 16 )
- accum[GI] += accum[16+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 8 )
- accum[GI] += accum[8+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 4 )
- accum[GI] += accum[4+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 2 )
- accum[GI] += accum[2+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 1 )
- accum[GI] += accum[1+GI];
-
- if ( GI == 0 )
- {
- Result[Gid.y*g_param.x+Gid.x] = accum[0];
- }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
deleted file mode 100644
index 73857a6bb..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
+++ /dev/null
@@ -1,63 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile cs_4_0 -entry CSMain
-//-----------------------------------------------------------------------------
-// File: ReduceToSingleCS.hlsl
-//
-// Desc: Reduce an input buffer by a factor of groupthreads
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-
-StructuredBuffer<float> Input : register( t0 );
-RWStructuredBuffer<float> Result : register( u0 );
-
-cbuffer cbCS : register( b0 )
-{
- uint4 g_param; // g_param.x is the actual elements contained in Input
- // g_param.y is the x dimension of the Dispatch call
-};
-
-#define groupthreads 128
-groupshared float accum[groupthreads];
-
-[numthreads(groupthreads,1,1)]
-void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- if ( DTid.x < g_param.x )
- accum[GI] = Input[DTid.x];
- else
- accum[GI] = 0;
-
- // Parallel reduction algorithm follows
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 64 )
- accum[GI] += accum[64+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 32 )
- accum[GI] += accum[32+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 16 )
- accum[GI] += accum[16+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 8 )
- accum[GI] += accum[8+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 4 )
- accum[GI] += accum[4+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 2 )
- accum[GI] += accum[2+GI];
-
- GroupMemoryBarrierWithGroupSync();
- if ( GI < 1 )
- accum[GI] += accum[1+GI];
-
- if ( GI == 0 )
- {
- Result[Gid.x] = accum[0];
- }
-}
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl
deleted file mode 100644
index a0e44ba95..000000000
--- a/tests/hlsl/dxsdk/HDRToneMappingCS11/skybox11.hlsl
+++ /dev/null
@@ -1,44 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry SkyboxVS -profile ps_4_0 -entry SkyboxPS
-//-----------------------------------------------------------------------------
-// File: SkyBox11.hlsl
-//
-// Desc:
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-
-cbuffer cbPerObject : register( b0 )
-{
- row_major matrix g_mWorldViewProjection : packoffset( c0 );
-}
-
-TextureCube g_EnvironmentTexture : register( t0 );
-SamplerState g_sam : register( s0 );
-
-struct SkyboxVS_Input
-{
- float4 Pos : POSITION;
-};
-
-struct SkyboxVS_Output
-{
- float4 Pos : SV_POSITION;
- float3 Tex : TEXCOORD0;
-};
-
-SkyboxVS_Output SkyboxVS( SkyboxVS_Input Input )
-{
- SkyboxVS_Output Output;
-
- Output.Pos = Input.Pos;
- Output.Tex = normalize( mul(Input.Pos, g_mWorldViewProjection) );
-
- return Output;
-}
-
-float4 SkyboxPS( SkyboxVS_Output Input ) : SV_TARGET
-{
- float4 color = g_EnvironmentTexture.Sample( g_sam, Input.Tex );
- return color;
-}
diff --git a/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx b/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx
deleted file mode 100644
index 3c8d45078..000000000
--- a/tests/hlsl/dxsdk/InstancingFX11/Instancing.fx
+++ /dev/null
@@ -1,591 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: Instancing.fx
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Input and output structures
-//--------------------------------------------------------------------------------------
-struct VSInstIn
-{
- float3 pos : POSITION;
- float3 norm : NORMAL;
- float2 tex : TEXTURE0;
- row_major float4x4 mTransform : mTransform;
-};
-
-struct VSSceneIn
-{
- float3 pos : POSITION;
- float3 norm : NORMAL;
- float2 tex : TEXTURE0;
-};
-
-struct VSGrassIn
-{
- float3 pos : POSITION;
- float3 norm : NORMAL;
- float2 tex : TEXTURE0;
- row_major float4x4 mTransform : mTransform;
- uint VertexID : SV_VertexID;
-};
-
-struct VSGrassOut
-{
- float3 pos : POSITION;
- float3 norm : NORMAL;
- float2 tex : TEXTURE0;
- uint VertexID : VERTID;
-};
-
-struct VSQuadIn
-{
- float3 pos : POSITION;
- float2 tex : TEXTURE0;
- row_major float4x4 mTransform : mTransform;
- float fOcc : fOcc;
- uint InstanceId : SV_InstanceID;
-};
-
-struct PSSceneIn
-{
- float4 pos : SV_Position;
- float2 tex : TEXTURE0;
- float4 color : COLOR0;
-};
-
-struct PSQuadIn
-{
- float4 pos : SV_Position;
- float3 tex : TEXTURE0;
- float4 color : COLOR0;
-};
-
-//--------------------------------------------------------------------------------------
-// Constant buffers
-//--------------------------------------------------------------------------------------
-cbuffer crarely
-{
- float4x4 g_mTreeMatrices[50];
- uint g_iNumTrees;
-};
-
-cbuffer ceveryframe
-{
- float4x4 g_mWorldViewProj;
- float4x4 g_mWorldView;
-};
-
-cbuffer cmultipleperframe
-{
- float g_GrassWidth;
- float g_GrassHeight;
- uint g_iGrassCoverage;
-};
-
-cbuffer cusercontrolled
-{
- float g_GrassMessiness;
-};
-
-struct light_struct
-{
- float4 direction;
- float4 color;
-};
-
-cbuffer cimmutable
-{
- light_struct g_lights[4] = {
- { float4(0.620275, 0.683659, 0.384537, 1), float4(0.75, 0.599, 0.405, 1) }, //sun
- { float4(0.063288, -0.987444, 0.144735, 1), float4(0.192, 0.273, 0.275, 1) }, //bottom
- { float4(0.23007, 0.785579, -0.574422, 1), float4(0.300, 0.292, 0.223, 1) }, //highlight
- { float4(-0.620275, -0.683659, -0.384537, 1), float4(0.0, 0.0, 0.1, 1) } //blue rim-light
- };
-
- float4 g_ambient = float4(0.4945,0.465,0.5,1);
-
- float g_occDimHeight = 2400.0; //scalar that tells us how much to darken the tree near the top
-};
-
-cbuffer cgrassblade
-{
- float3 g_positions[6] =
- {
- float3( -1, 0, 0 ),
- float3( -1, 2, 0 ),
- float3( 1, 0, 0 ),
- float3( 1, 2, 0 ),
-
- float3( -1, 0, 0 ),
- float3( -1, 2, 0 ),
- };
- float2 g_texcoords[6] =
- {
- float2(0,1),
- float2(0,0),
- float2(1,1),
- float2(1,0),
-
- float2(0,1),
- float2(0,0),
- };
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse;
-Texture2DArray g_tx2dArray;
-SamplerState g_samLinear
-{
- Filter = ANISOTROPIC;
- AddressU = Wrap;
- AddressV = Wrap;
-};
-
-Texture1D g_txRandom;
-SamplerState g_samPoint
-{
- Filter = MIN_MAG_MIP_POINT;
- AddressU = Wrap;
- AddressV = Wrap;
-};
-
-//--------------------------------------------------------------------------------------
-// State structures
-//--------------------------------------------------------------------------------------
-BlendState QuadAlphaBlendState
-{
- AlphaToCoverageEnable = TRUE;
- RenderTargetWriteMask[0] = 0x0F;
-};
-
-RasterizerState EnableMSAA
-{
- CullMode = BACK;
- MultisampleEnable = TRUE;
-};
-
-DepthStencilState DisableDepthTestWrite
-{
- DepthEnable = FALSE;
- DepthWriteMask = ZERO;
-};
-
-DepthStencilState EnableDepthTestWrite
-{
- DepthEnable = TRUE;
- DepthWriteMask = ALL;
-};
-
-BlendState NoBlending
-{
- AlphaToCoverageEnable = FALSE;
- BlendEnable[0] = FALSE;
-};
-
-//--------------------------------------------------------------------------------------
-// Sky vertex shader
-//--------------------------------------------------------------------------------------
-PSSceneIn VSSkymain(VSSceneIn input)
-{
- PSSceneIn output;
-
- //
- // Transform the vert to view-space
- //
- float4 v4Position = mul(float4(input.pos, 1), g_mWorldViewProj);
- output.pos = v4Position;
-
- //
- // Transfer the rest
- //
- output.tex = input.tex;
-
- output.color = float4(1,1,1,1);
-
- return output;
-}
-
-//--------------------------------------------------------------------------------------
-// CalcLighting helper function. Calculates lighting from 4 light sources, adds ambient
-// and attenuates for depth. Used by all techniques for lighting.
-//--------------------------------------------------------------------------------------
-float4 CalcLighting( float3 norm, float depth )
-{
- float4 color = float4(0,0,0,0);
-
- // add the contributions of 4 directional lights
- [unroll] for( int i=0; i<4; i++ )
- {
- color += saturate( dot(g_lights[i].direction,norm) )*g_lights[i].color;
- }
-
- // give some attenuation due to depth
- float attenuate = depth / 10000.0;
- float4 attenColor = float4(0.15, 0.2, 0.3, 0);
-
- // add it all up plus ambient
- return (1-attenuate*0.23)*(color + g_ambient) + attenColor*attenuate;
-}
-
-//--------------------------------------------------------------------------------------
-// Instancing vertex shader. Positions the vertices based upon the matrix stored
-// in the second vertex stream.
-//--------------------------------------------------------------------------------------
-PSSceneIn VSInstmain(VSInstIn input)
-{
- PSSceneIn output;
-
- //
- // Transform by our Sceneance matrix
- //
- float4 InstancePosition = mul(float4(input.pos, 1), input.mTransform);
- float4 ViewPos = mul(InstancePosition, g_mWorldView );
-
- //
- // Transform the vert to view-space
- //
- float4 v4Position = mul(InstancePosition, g_mWorldViewProj);
- output.pos = v4Position;
-
- //
- // Transfer the rest
- //
- output.tex = input.tex;
-
- //
- // dot the norm with the light dir
- //
- float3 norm = mul(input.norm,(float3x3)input.mTransform);
- output.color = CalcLighting( norm, ViewPos.z );
-
- //
- // Dim the color by how far up the tree we are.
- // This is a nice way to fake occlusion of the branches by the leaves.
- //
- output.color *= 1.0f - saturate(input.pos.y/g_occDimHeight);
-
-
- return output;
-}
-
-//--------------------------------------------------------------------------------------
-// Quad (leaf) vertex shader. Instances the quad over multiple leaf positions and
-// multiple trees. This demonstrates how to do double instancing.
-//--------------------------------------------------------------------------------------
-PSQuadIn VSQuadmain(VSQuadIn input)
-{
- PSQuadIn output;
-
- // base our leaf texture upon which instance id we are
- uint iLeaf = input.InstanceId/g_iNumTrees;
- uint iLeafTex = iLeaf%3;
- output.tex = float3(input.tex, float(iLeafTex) );
-
- //
- // Transform the position by the Instance matrix
- //
- int iTree = input.InstanceId - (input.InstanceId/g_iNumTrees)*g_iNumTrees;
- float4 vInstancePos = mul( float4(input.pos, 1), input.mTransform );
- float4 InstancePosition = mul(vInstancePos, g_mTreeMatrices[iTree] );
- float4 ViewPos = mul(InstancePosition, g_mWorldView );
-
- //
- // Transform the Instance position to view-space
- //
- output.pos = mul(InstancePosition, g_mWorldViewProj);
-
- // pack distance from the eye into the color alpha channel
- output.color = float4(input.fOcc,input.fOcc,input.fOcc,ViewPos.z);
-
- return output;
-}
-
-//--------------------------------------------------------------------------------------
-// Grass vertex shader. Basically a passthrough except for instancing the island base
-// mesh.
-//--------------------------------------------------------------------------------------
-VSGrassOut VSGrassmain(VSGrassIn input)
-{
- // simple transform into the instance space
- VSGrassOut output;
- output.pos = mul(float4(input.pos, 1), input.mTransform);
- output.norm = mul(input.norm, (float3x3)input.mTransform);
- output.tex = input.tex;
- output.VertexID = input.VertexID;
-
- return output;
-}
-
-//--------------------------------------------------------------------------------------
-// Quad (leaf) GS. Calculates the normal and lighting for the leaf.
-//--------------------------------------------------------------------------------------
-[maxvertexcount(3)]
-void GSQuadmain(triangle PSQuadIn input[3], inout TriangleStream<PSQuadIn> QuadStream)
-{
- PSQuadIn output;
-
- //
- // Calculate the face normal
- //
- float4 faceNormalA = input[1].pos.xyzw - input[0].pos.xyzw;
- float4 faceNormalB = input[2].pos.xyzw - input[0].pos.xyzw;
-
- //
- // Cross product
- //
- float3 faceNormal = cross(faceNormalA, faceNormalB);
-
- //
- // Normalize face normal
- //
- faceNormal = normalize(faceNormal);
-
- //
- // Dot face normal with some arbitrary light vectors
- //
- float4 color1 = CalcLighting( faceNormal, input[0].color.a );
- color1 *= input[0].color;
-
- //
- // Make sure we always have an alpha of 1
- //
- color1.a = 1.0;
-
- //
- // Emit out the new tri
- //
- for(int i=0; i<3; i++)
- {
- output.pos = input[i].pos;
- output.color = color1;
- output.tex = input[i].tex;
- QuadStream.Append(output);
- }
- QuadStream.RestartStrip();
-}
-
-//--------------------------------------------------------------------------------------
-// RandomDir helper. Samples a random dir out of our 1d random texture. In this case
-// we use a texture because the offset could be anywhere. If we were sampling linearly
-// then we would probably just use a buffer and load from that.
-//--------------------------------------------------------------------------------------
-float3 RandomDir(float fOffset)
-{
- float tCoord = (fOffset) / 300.0;
- return g_txRandom.SampleLevel( g_samPoint, tCoord, 0 );
-}
-
-//--------------------------------------------------------------------------------------
-// Helper to determing if a point is within a triangle
-//--------------------------------------------------------------------------------------
-bool IsInTriangle( float3 P, float3 A, float3 B, float3 C )
-{
- float3 crossA = cross( B-A, P-A );
- float3 crossB = cross( C-B, P-B );
- float3 crossC = cross( A-C, P-C );
-
- if( dot( crossA, crossB ) > 0 &&
- dot( crossB, crossC ) > 0 )
- {
- return true;
- }
- else
- {
- return false;
- }
-}
-
-//--------------------------------------------------------------------------------------
-// Gets a random orientation matrix based upon the RandomDir funciton
-//--------------------------------------------------------------------------------------
-float4x4 GetRandomOrientation( float3 Pos, float3 Norm, float fRandOffset )
-{
- float3 Tangent = RandomDir(fRandOffset);
-
- float3 Bitangent = normalize( cross( Tangent, Norm ) );
- Tangent = normalize( cross( Bitangent, Norm ) );
-
- float4x4 matWorld = { float4( Tangent, 0 ),
- float4( Norm, 0 ),
- float4( Bitangent, 0 ),
- float4( Pos, 1 ) };
- return matWorld;
-}
-
-//--------------------------------------------------------------------------------------
-// Generates an actual grass blade
-//--------------------------------------------------------------------------------------
-void OutputGrassBlade( VSGrassOut midPoint, inout TriangleStream<PSQuadIn> GrassStream, int iGrassTex )
-{
- PSQuadIn output;
-
- float4x4 mWorld = GetRandomOrientation( midPoint.pos, midPoint.norm, (float)midPoint.VertexID );
- float4 ViewPos = mul( midPoint.pos, g_mWorldView );
-
- float3 grassNorm = midPoint.norm;
- float4 color1 = CalcLighting( grassNorm, ViewPos.z );
-
- for(int v=0; v<6; v++)
- {
- float3 pos = g_positions[v];
- pos.x *= g_GrassWidth;
- pos.y *= g_GrassHeight;
-
- output.pos = mul( float4(pos,1), mWorld );
- output.pos = mul( output.pos, g_mWorldViewProj );
- output.tex = float3( g_texcoords[v], iGrassTex );
- output.color = color1;
-
- GrassStream.Append( output );
- }
-
- GrassStream.RestartStrip();
-}
-
-//--------------------------------------------------------------------------------------
-// Midpoint of the three vertices A,B,C
-//--------------------------------------------------------------------------------------
-VSGrassOut CalcMidPoint( VSGrassOut A, VSGrassOut B, VSGrassOut C )
-{
- VSGrassOut MidPoint;
-
- MidPoint.pos = (A.pos + B.pos + C.pos)/3.0f;
- MidPoint.norm = (A.norm + B.norm + C.norm)/3.0f;
- MidPoint.tex = (A.tex + B.tex + C.tex)/3.0f;
- MidPoint.VertexID = A.VertexID + B.VertexID + C.VertexID;
-
- return MidPoint;
-}
-
-//--------------------------------------------------------------------------------------
-// The actual grass geometry shader. This generates grass blades based upon an input
-// mesh (the tops of the islands) and a coverage texture. Each of the textures channels
-// determines how much of each of the 4 types of grass to place at a particular spot.
-//--------------------------------------------------------------------------------------
-[maxvertexcount(90)]
-void GSGrassmain(triangle VSGrassOut input[3], inout TriangleStream<PSQuadIn> GrassStream )
-{
- VSGrassOut MidPoint = CalcMidPoint( input[0], input[1], input[2] );
-
- float4 CoverageMask = g_tx2dArray.SampleLevel( g_samPoint, float3(MidPoint.tex,4), 0 );
- float cm[4];
- cm[0] = CoverageMask.r;
- cm[1] = CoverageMask.g;
- cm[2] = CoverageMask.b;
- cm[3] = CoverageMask.a;
-
- for(int g=0; g<4; g++)
- {
- float MaxBlades = float(g_iGrassCoverage)*cm[g];
- for(float i=0; i<MaxBlades; i++)
- {
- float randOffset = g*5 + (i+1);
- float3 Tan = RandomDir( MidPoint.pos.x + randOffset );
- float3 Len = normalize( RandomDir( MidPoint.pos.z + randOffset ) );
- float3 Shift = Len.x*g_GrassMessiness*normalize( cross( Tan, MidPoint.norm ) );
- VSGrassOut grassPoint = MidPoint;
- grassPoint.VertexID += randOffset;
- grassPoint.pos += Shift;
-
- //uncomment this to make the grass strictly conform to the mesh
- //if( IsInTriangle( grassPoint.pos, input[0].pos, input[1].pos, input[2].pos ) )
- {
- OutputGrassBlade( grassPoint, GrassStream, g );
- }
- }
- }
-}
-
-//--------------------------------------------------------------------------------------
-// PS for non-leaf or grass items.
-//--------------------------------------------------------------------------------------
-float4 PSScenemain(PSSceneIn input) : SV_Target
-{
- float4 color = g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color;
- return color;
-}
-
-//--------------------------------------------------------------------------------------
-// PS for leaves and grass
-//--------------------------------------------------------------------------------------
-float4 PSQuadmain(PSQuadIn input) : SV_Target
-{
- float4 color = g_tx2dArray.Sample( g_samLinear, input.tex );
- color.xyz *= input.color.xyz;
- return color;
-}
-
-//--------------------------------------------------------------------------------------
-// Render instanced meshes with vertex lighting
-//--------------------------------------------------------------------------------------
-technique10 RenderInstancedVertLighting
-{
- pass p0
- {
- SetVertexShader( CompileShader( vs_4_0, VSInstmain() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- SetDepthStencilState( EnableDepthTestWrite, 0 );
- SetRasterizerState( EnableMSAA );
- }
-}
-
-//--------------------------------------------------------------------------------------
-// Skybox
-//--------------------------------------------------------------------------------------
-technique10 RenderSkybox
-{
- pass p0
- {
- SetVertexShader( CompileShader( vs_4_0, VSSkymain() ) );
- SetGeometryShader( NULL );
- SetPixelShader( CompileShader( ps_4_0, PSScenemain() ) );
-
- SetBlendState( NoBlending, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- SetDepthStencilState( DisableDepthTestWrite, 0 );
- SetRasterizerState( EnableMSAA );
- }
-}
-
-//--------------------------------------------------------------------------------------
-// Render leaves
-//--------------------------------------------------------------------------------------
-technique10 RenderQuad
-{
- pass p0
- {
-
- SetVertexShader( CompileShader( vs_4_0, VSQuadmain() ) );
- SetGeometryShader( CompileShader( gs_4_0, GSQuadmain() ) );
- SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) );
-
- SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- SetDepthStencilState( EnableDepthTestWrite, 0 );
- SetRasterizerState( EnableMSAA );
- }
-}
-
-//--------------------------------------------------------------------------------------
-// Render grass
-//--------------------------------------------------------------------------------------
-technique10 RenderGrass
-{
- pass p0
- {
-
- SetVertexShader( CompileShader( vs_4_0, VSGrassmain() ) );
- SetGeometryShader( CompileShader( gs_4_0, GSGrassmain() ) );
- SetPixelShader( CompileShader( ps_4_0, PSQuadmain() ) );
-
- SetBlendState( QuadAlphaBlendState, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF );
- SetDepthStencilState( EnableDepthTestWrite, 0 );
- SetRasterizerState( EnableMSAA );
- }
-}
diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl
deleted file mode 100644
index 5463f5b92..000000000
--- a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_PS.hlsl
+++ /dev/null
@@ -1,202 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry PSMain
-//--------------------------------------------------------------------------------------
-// File: MultithreadedRendering11_PS.hlsl
-//
-// The pixel shader file for the MultithreadedRendering11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// Various debug options
-//#define NO_DIFFUSE_MAP
-//#define NO_NORMAL_MAP
-//#define NO_AMBIENT
-//#define NO_DYNAMIC_LIGHTING
-//#define NO_SHADOW_MAP
-
-#define SHADOW_DEPTH_BIAS 0.0005f
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-static const int g_iNumLights = 4;
-static const int g_iNumShadows = 1; // by convention, the first n lights cast shadows
-
-cbuffer cbPerObject : register( b0 )
-{
- float4 g_vObjectColor : packoffset( c0 );
-};
-
-cbuffer cbPerLight : register( b1 )
-{
- struct LightDataStruct
- {
- matrix m_mLightViewProj;
- float4 m_vLightPos;
- float4 m_vLightDir;
- float4 m_vLightColor;
- float4 m_vFalloffs; // x = dist end, y = dist range, z = cos angle end, w = cos range
- } g_LightData[g_iNumLights] : packoffset( c0 );
-};
-
-cbuffer cbPerScene : register( b2 )
-{
- float4 g_vMirrorPlane : packoffset( c0 );
- float4 g_vAmbientColor : packoffset( c1 );
- float4 g_vTintColor : packoffset( c2 );
-};
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse : register( t0 );
-Texture2D g_txNormal : register( t1 );
-Texture2D g_txShadow[g_iNumShadows] : register( t2 );
-
-SamplerState g_samPointClamp : register( s0 );
-SamplerState g_samLinearWrap : register( s1 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct PS_INPUT
-{
- float3 vNormal : NORMAL;
- float3 vTangent : TANGENT;
- float2 vTexcoord : TEXCOORD0;
- float4 vPosWorld : TEXCOORD1;
-};
-
-//--------------------------------------------------------------------------------------
-// Sample normal map, convert to signed, apply tangent-to-world space transform
-//--------------------------------------------------------------------------------------
-float3 CalcPerPixelNormal( float2 vTexcoord, float3 vVertNormal, float3 vVertTangent )
-{
- // Compute tangent frame
- vVertNormal = normalize( vVertNormal );
- vVertTangent = normalize( vVertTangent );
- float3 vVertBinormal = normalize( cross( vVertTangent, vVertNormal ) );
- float3x3 mTangentSpaceToWorldSpace = float3x3( vVertTangent, vVertBinormal, vVertNormal );
-
- // Compute per-pixel normal
- float3 vBumpNormal = g_txNormal.Sample( g_samLinearWrap, vTexcoord );
- vBumpNormal = 2.0f * vBumpNormal - 1.0f;
-
- return mul( vBumpNormal, mTangentSpaceToWorldSpace );
-}
-
-//--------------------------------------------------------------------------------------
-// Test how much pixel is in shadow, using 2x2 percentage-closer filtering
-//--------------------------------------------------------------------------------------
-float4 CalcUnshadowedAmountPCF2x2( int iShadow, float4 vPosWorld )
-{
- matrix mLightViewProj = g_LightData[iShadow].m_mLightViewProj;
- Texture2D txShadow = g_txShadow[iShadow];
-
- // Compute pixel position in light space
- float4 vLightSpacePos = mul( vPosWorld, mLightViewProj );
- vLightSpacePos.xyz /= vLightSpacePos.w;
-
- // Translate from surface coords to texture coords
- // Could fold these into the matrix
- float2 vShadowTexCoord = 0.5f * vLightSpacePos + 0.5f;
- vShadowTexCoord.y = 1.0f - vShadowTexCoord.y;
-
- // Depth bias to avoid pixel self-shadowing
- float vLightSpaceDepth = vLightSpacePos.z - SHADOW_DEPTH_BIAS;
-
- // Find sub-pixel weights
- float2 vShadowMapDims = float2( 2048.0f, 2048.0f ); // need to keep in sync with .cpp file
- float4 vSubPixelCoords;
- vSubPixelCoords.xy = frac( vShadowMapDims * vShadowTexCoord );
- vSubPixelCoords.zw = 1.0f - vSubPixelCoords;
- float4 vBilinearWeights = vSubPixelCoords.zxzx * vSubPixelCoords.wwyy;
-
- // 2x2 percentage closer filtering
- float2 vTexelUnits = 1.0f / vShadowMapDims;
- float4 vShadowDepths;
- vShadowDepths.x = txShadow.Sample( g_samPointClamp, vShadowTexCoord );
- vShadowDepths.y = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( vTexelUnits.x, 0.0f ) );
- vShadowDepths.z = txShadow.Sample( g_samPointClamp, vShadowTexCoord + float2( 0.0f, vTexelUnits.y ) );
- vShadowDepths.w = txShadow.Sample( g_samPointClamp, vShadowTexCoord + vTexelUnits );
-
- // What weighted fraction of the 4 samples are nearer to the light than this pixel?
- float4 vShadowTests = ( vShadowDepths >= vLightSpaceDepth ) ? 1.0f : 0.0f;
- return dot( vBilinearWeights, vShadowTests );
-}
-
-//--------------------------------------------------------------------------------------
-// Diffuse lighting calculation, with angle and distance falloff
-//--------------------------------------------------------------------------------------
-float4 CalcLightingColor( int iLight, float3 vPosWorld, float3 vPerPixelNormal )
-{
- float3 vLightPos = g_LightData[iLight].m_vLightPos.xyz;
- float3 vLightDir = g_LightData[iLight].m_vLightDir.xyz;
- float4 vLightColor = g_LightData[iLight].m_vLightColor;
- float4 vFalloffs = g_LightData[iLight].m_vFalloffs;
-
- float3 vLightToPixelUnNormalized = vPosWorld - vLightPos;
-
- // Dist falloff = 0 at vFalloffs.x, 1 at vFalloffs.x - vFalloffs.y
- float fDist = length( vLightToPixelUnNormalized );
- float fDistFalloff = saturate( ( vFalloffs.x - fDist ) / vFalloffs.y );
-
- // Normalize from here on
- float3 vLightToPixelNormalized = vLightToPixelUnNormalized / fDist;
-
- // Angle falloff = 0 at vFalloffs.z, 1 at vFalloffs.z - vFalloffs.w
- float fCosAngle = dot( vLightToPixelNormalized, vLightDir );
- float fAngleFalloff = saturate( ( fCosAngle - vFalloffs.z ) / vFalloffs.w );
-
- // Diffuse contribution
- float fNDotL = saturate( -dot( vLightToPixelNormalized, vPerPixelNormal ) );
-
- return vLightColor * fNDotL * fDistFalloff * fAngleFalloff;
-}
-
-//--------------------------------------------------------------------------------------
-// Pixel Shader
-//--------------------------------------------------------------------------------------
-float4 PSMain( PS_INPUT Input ) : SV_TARGET
-{
- // Manual clip test, so that objects which are behind the mirror
- // don't show up in the mirror.
- clip( dot( g_vMirrorPlane.xyz, Input.vPosWorld.xyz ) + g_vMirrorPlane.w );
-
-#ifdef NO_DIFFUSE_MAP
- float4 vDiffuse = 0.5f;
-#else // #ifdef NO_DIFFUSE_MAP
- float4 vDiffuse = g_txDiffuse.Sample( g_samLinearWrap, Input.vTexcoord );
-#endif // #ifdef NO_DIFFUSE_MAP #else
-
- // Compute per-pixel normal
-#ifdef NO_NORMAL_MAP
- float3 vPerPixelNormal = Input.vNormal;
-#else // #ifdef NO_NORMAL_MAP
- float3 vPerPixelNormal = CalcPerPixelNormal( Input.vTexcoord, Input.vNormal, Input.vTangent );
-#endif // #ifdef NO_NORMAL_MAP #else
-
- // Compute lighting contribution
-#ifdef NO_AMBIENT
- float4 vTotalLightingColor = 0.0f;
-#else // #ifdef NO_AMBIENT
- float4 vTotalLightingColor = g_vAmbientColor;
-#endif // #ifdef NO_AMBIENT #else
-
-#ifndef NO_DYNAMIC_LIGHTING
- for ( int iLight = 0; iLight < g_iNumLights; ++iLight )
- {
- float4 vLightingColor = CalcLightingColor( iLight, Input.vPosWorld, vPerPixelNormal );
-#ifndef NO_SHADOW_MAP
- if ( iLight < g_iNumShadows && any( vLightingColor.xyz ) > 0.0f ) // Don't bother checking shadow map if the pixel is unlit
- {
- vLightingColor *= CalcUnshadowedAmountPCF2x2( iLight, Input.vPosWorld );
- }
-#endif // #ifndef NO_SHADOW_MAP
- vTotalLightingColor += vLightingColor;
- }
-#endif // #ifndef NO_DYNAMIC_LIGHTING
-
- return vDiffuse * g_vTintColor * g_vObjectColor * vTotalLightingColor;
-}
diff --git a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl b/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl
deleted file mode 100644
index 12fe14ae9..000000000
--- a/tests/hlsl/dxsdk/MultithreadedRendering11/MultithreadedRendering11_VS.hlsl
+++ /dev/null
@@ -1,83 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorld g_mWorld_0
-#define cbPerScene cbPerScene_0
-#define g_mViewProj g_mViewProj_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// File: MultithreadedRendering11_VS.hlsl
-//
-// The vertex shader file for the MultithreadedRendering11 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// Various debug options
-//#define UNCOMPRESSED_VERTEX_DATA // The sdkmesh file contained uncompressed vertex data
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- matrix g_mWorld ;//SLANG: : packoffset( c0 );
-};
-cbuffer cbPerScene : register( b1 )
-{
- matrix g_mViewProj ;//SLANG: : packoffset( c0 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
- float3 vTangent : TANGENT;
-};
-
-struct VS_OUTPUT
-{
- float3 vNormal : NORMAL;
- float3 vTangent : TANGENT;
- float2 vTexcoord : TEXCOORD0;
- float4 vPosWorld : TEXCOORD1;
- float4 vPosition : SV_POSITION;
-};
-
-// We aliased signed vectors as a unsigned format.
-// Need to recover signed values. The values 1.0 and 2.0
-// are slightly inaccurate here.
-float3 R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( in float3 vVec )
-{
- vVec *= 2.0f;
- return vVec >= 1.0f ? ( vVec - 2.0f ) : vVec;
-}
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
- VS_OUTPUT Output;
-
-#ifndef UNCOMPRESSED_VERTEX_DATA
- // Expand compressed vectors
- Input.vNormal = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vNormal );
- Input.vTangent = R10G10B10A2_UNORM_TO_R32G32B32_FLOAT( Input.vTangent );
-#endif // #ifndef UNCOMPRESSED_VERTEX_DATA
-
- Output.vPosWorld = mul( Input.vPosition, g_mWorld );
- Output.vPosition = mul( Output.vPosWorld, g_mViewProj );
- Output.vNormal = mul( Input.vNormal, (float3x3)g_mWorld );
- Output.vTangent = mul( Input.vTangent, (float3x3)g_mWorld );
- Output.vTexcoord = Input.vTexcoord;
-
- return Output;
-}
-
diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl
deleted file mode 100644
index bac2839db..000000000
--- a/tests/hlsl/dxsdk/NBodyGravityCS11/NBodyGravityCS11.hlsl
+++ /dev/null
@@ -1,103 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry CSMain
-//--------------------------------------------------------------------------------------
-// File: NBodyGravityCS11.hlsl
-//
-// Demonstrates how to use Compute Shader to do n-body gravity computation
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-static float softeningSquared = 0.0012500000*0.0012500000;
-static float g_fG = 6.67300e-11f * 10000.0f;
-static float g_fParticleMass = g_fG*10000.0f * 10000.0f;
-
-#define blocksize 128
-groupshared float4 sharedPos[blocksize];
-
-// Body to body interaction, acceleration of the particle at position bi is updated
-void bodyBodyInteraction(inout float3 ai, float4 bj, float4 bi, float mass, int particles )
-{
- float3 r = bj.xyz - bi.xyz;
-
- float distSqr = dot(r, r);
- distSqr += softeningSquared;
-
- float invDist = 1.0f / sqrt(distSqr);
- float invDistCube = invDist * invDist * invDist;
-
- float s = mass * invDistCube * particles;
-
- ai += r * s;
-}
-
-cbuffer cbCS : register( b0 )
-{
- uint4 g_param; // pcbCS->param[0] = MAX_PARTICLES;
- // pcbCS->param[1] = dimx;
- float4 g_paramf; // pcbCS->paramf[0] = 0.1f;
- // pcbCS->paramf[1] = 1;
-};
-
-struct PosVelo
-{
- float4 pos;
- float4 velo;
-};
-
-StructuredBuffer<PosVelo> oldPosVelo;
-RWStructuredBuffer<PosVelo> newPosVelo;
-
-[numthreads(blocksize, 1, 1)]
-void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
-{
- // Each thread of the CS updates one of the particles
-
- float4 pos = oldPosVelo[DTid.x].pos;
- float4 vel = oldPosVelo[DTid.x].velo;
- float3 accel = 0;
- float mass = g_fParticleMass;
-
- // Update current particle using all other particles
- [loop]
- for (uint tile = 0; tile < g_param.y; tile++)
- {
- // Cache a tile of particles unto shared memory to increase IO efficiency
- sharedPos[GI] = oldPosVelo[tile * blocksize + GI].pos;
-
- GroupMemoryBarrierWithGroupSync();
-
- [unroll]
- for (uint counter = 0; counter < blocksize; counter+=8 )
- {
- bodyBodyInteraction(accel, sharedPos[counter], pos, mass, 1);
- bodyBodyInteraction(accel, sharedPos[counter+1], pos, mass, 1);
- bodyBodyInteraction(accel, sharedPos[counter+2], pos, mass, 1);
- bodyBodyInteraction(accel, sharedPos[counter+3], pos, mass, 1);
- bodyBodyInteraction(accel, sharedPos[counter+4], pos, mass, 1);
- bodyBodyInteraction(accel, sharedPos[counter+5], pos, mass, 1);
- bodyBodyInteraction(accel, sharedPos[counter+6], pos, mass, 1);
- bodyBodyInteraction(accel, sharedPos[counter+7], pos, mass, 1);
- }
-
- GroupMemoryBarrierWithGroupSync();
- }
-
- // g_param.x is the number of our particles, however this number might not be an exact multiple of the tile size.
- // In such cases, out of bound reads occur in the process above, which means there will be
- // tooManyParticles "phantom" particles generating false gravity at position (0, 0, 0), so we have to substract them here.
- // NOTE, out of bound reads always return 0 in CS
- const uint tooManyParticles = g_param.y * blocksize - g_param.x;
- bodyBodyInteraction(accel, float4(0, 0, 0, 0), pos, mass, -tooManyParticles);
-
- // Update the velocity and position of current particle using the acceleration computed above
- vel.xyz += accel.xyz * g_paramf.x; //deltaTime;
- vel.xyz *= g_paramf.y; //damping;
- pos.xyz += vel.xyz * g_paramf.x; //deltaTime;
-
- if ( DTid.x < g_param.x )
- {
- newPosVelo[DTid.x].pos = pos;
- newPosVelo[DTid.x].velo = float4(vel.xyz, length(accel));
- }
-}
diff --git a/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl b/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl
deleted file mode 100644
index 7f6292662..000000000
--- a/tests/hlsl/dxsdk/NBodyGravityCS11/ParticleDraw.hlsl
+++ /dev/null
@@ -1,128 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw
-//--------------------------------------------------------------------------------------
-// File: ParticleDraw.hlsl
-//
-// Shaders for rendering the particle as point sprite
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-struct VSParticleIn
-{
- float4 color : COLOR;
- uint id : SV_VERTEXID;
-};
-
-struct VSParticleDrawOut
-{
- float3 pos : POSITION;
- float4 color : COLOR;
-};
-
-struct GSParticleDrawOut
-{
- float2 tex : TEXCOORD0;
- float4 color : COLOR;
- float4 pos : SV_POSITION;
-};
-
-struct PSParticleDrawIn
-{
- float2 tex : TEXCOORD0;
- float4 color : COLOR;
-};
-
-struct PosVelo
-{
- float4 pos;
- float4 velo;
-};
-
-Texture2D g_txDiffuse;
-StructuredBuffer<PosVelo> g_bufPosVelo;
-
-
-SamplerState g_samLinear
-{
- Filter = MIN_MAG_MIP_LINEAR;
- AddressU = Clamp;
- AddressV = Clamp;
-};
-
-cbuffer cb0
-{
- row_major float4x4 g_mWorldViewProj;
- row_major float4x4 g_mInvView;
-};
-
-cbuffer cb1
-{
- static float g_fParticleRad = 10.0f;
-};
-
-cbuffer cbImmutable
-{
- static float3 g_positions[4] =
- {
- float3( -1, 1, 0 ),
- float3( 1, 1, 0 ),
- float3( -1, -1, 0 ),
- float3( 1, -1, 0 ),
- };
-
- static float2 g_texcoords[4] =
- {
- float2(0,0),
- float2(1,0),
- float2(0,1),
- float2(1,1),
- };
-};
-
-//
-// Vertex shader for drawing the point-sprite particles
-//
-VSParticleDrawOut VSParticleDraw(VSParticleIn input)
-{
- VSParticleDrawOut output;
-
- output.pos = g_bufPosVelo[input.id].pos;
-
- float mag = g_bufPosVelo[input.id].velo.w/9;
- output.color = lerp( float4(1,0.1,0.1,1), input.color, mag );
-
- return output;
-}
-
-//
-// GS for rendering point sprite particles. Takes a point and turns it into 2 tris.
-//
-[maxvertexcount(4)]
-void GSParticleDraw(point VSParticleDrawOut input[1], inout TriangleStream<GSParticleDrawOut> SpriteStream)
-{
- GSParticleDrawOut output;
-
- //
- // Emit two new triangles
- //
- for(int i=0; i<4; i++)
- {
- float3 position = g_positions[i] * g_fParticleRad;
- position = mul( position, (float3x3)g_mInvView ) + input[0].pos;
- output.pos = mul( float4(position,1.0), g_mWorldViewProj );
-
- output.color = input[0].color;
- output.tex = g_texcoords[i];
- SpriteStream.Append(output);
- }
- SpriteStream.RestartStrip();
-}
-
-//
-// PS for drawing particles
-//
-float4 PSParticleDraw(PSParticleDrawIn input) : SV_Target
-{
- return g_txDiffuse.Sample( g_samLinear, input.tex ) * input.color;
-} \ No newline at end of file
diff --git a/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl
deleted file mode 100644
index 80a1e165e..000000000
--- a/tests/hlsl/dxsdk/OIT11/OIT_CS.hlsl
+++ /dev/null
@@ -1,277 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile cs_4_0 -entry VSParticleDraw -profile gs_4_0 -entry GSParticleDraw -profile ps_4_0 -entry PSParticleDraw
-//-----------------------------------------------------------------------------
-// File: OIT_CS.hlsl
-//
-// Desc: Compute shaders for used in the Order Independent Transparency sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-// TODO: use structured buffers
-RWBuffer<float> deepBufferDepth : register( u0 );
-RWBuffer<uint> deepBufferColorUINT : register( u1 );
-RWTexture2D<float4> frameBuffer : register( u2 );
-RWBuffer<uint> prefixSum : register( u3 );
-
-Texture2D<uint> fragmentCount : register ( t0 );
-
-cbuffer CB : register( b0 )
-{
- uint g_nFrameWidth : packoffset( c0.x );
- uint g_nFrameHeight : packoffset( c0.y );
- uint g_nPassSize : packoffset( c0.z );
- uint g_nReserved : packoffset( c0.w );
-}
-
-#define blocksize 1
-#define groupthreads (blocksize*blocksize)
-groupshared float accum[groupthreads];
-
-// First pass of the prefix sum creation algorithm. Converts a 2D buffer to a 1D buffer,
-// and sums every other value with the previous value.
-[numthreads(1,1,1)]
-void CreatePrefixSum_Pass0_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
- int nThreadNum = nGid.y*g_nFrameWidth + nGid.x;
- if( nThreadNum%2 == 0 )
- {
- prefixSum[nThreadNum] = fragmentCount[nGid.xy];
-
- // Add the Fragment count to the next bin
- if( (nThreadNum+1) < g_nFrameWidth * g_nFrameHeight )
- {
- int2 nextUV;
- nextUV.x = (nThreadNum+1) % g_nFrameWidth;
- nextUV.y = (nThreadNum+1) / g_nFrameWidth;
- prefixSum[ nThreadNum+1 ] = prefixSum[ nThreadNum ] + fragmentCount[ nextUV ];
- }
- }
-}
-
-// Second and following passes. Each pass distributes the sum of the first half of the group
-// to the second half of the group. There are n/groupsize groups in each pass.
-// Each pass increases the group size until it is the size of the buffer.
-// The resulting buffer holds the prefix sum of all preceding values in each
-// position
-[numthreads(1,1,1)]
-void CreatePrefixSum_Pass1_CS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
- int nThreadNum = nGid.x;
-
- int nValue = prefixSum[nThreadNum*g_nPassSize + g_nPassSize/2 - 1];
- for(int i = nThreadNum*g_nPassSize + g_nPassSize/2; i < nThreadNum*g_nPassSize + g_nPassSize && i < g_nFrameWidth*g_nFrameHeight; i++)
- {
- prefixSum[i] = prefixSum[i] + nValue;
- }
-}
-
-#if 1
-
-// Sort the fragments using a bitonic sort, then accumulate the fragments into the final result.
-groupshared int nIndex[32];
-#define NUM_THREADS 8
-[numthreads(1,1,1)]
-void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
- uint nThreadNum = nGid.y * g_nFrameWidth + nGid.x;
-
-// uint r0, r1, r2;
-// float rd0, rd1, rd2, rd3, rd4, rd5, rd6, rd7;
-
- uint N = fragmentCount[nDTid.xy];
-
- uint N2 = 1 << (int)(ceil(log2(N)));
-
- float fDepth[32];
- for(int i = 0; i < N; i++)
- {
- nIndex[i] = i;
- fDepth[i] = deepBufferDepth[ prefixSum[nThreadNum-1] + i ];
- }
- for(int i = N; i < N2; i++)
- {
- nIndex[i] = i;
- fDepth[i] = 1.1f;
- }
-
- uint idx = blocksize*nGTid.y + nGTid.x;
-
- // Bitonic sort
- for( int k = 2; k <= N2; k = 2*k )
- {
- for( int j = k>>1; j > 0 ; j = j>>1 )
- {
- for( int i = 0; i < N2; i++ )
- {
-// GroupMemoryBarrierWithGroupSync();
- //i = idx;
-
- float di = fDepth[ nIndex[ i ] ];
- int ixj = i^j;
- if ( ( ixj ) > i )
- {
- float dixj = fDepth[ nIndex[ ixj ] ];
- if ( ( i&k ) == 0 && di > dixj )
- {
- int temp = nIndex[ i ];
- nIndex[ i ] = nIndex[ ixj ];
- nIndex[ ixj ] = temp;
- }
- if ( ( i&k ) != 0 && di < dixj )
- {
- int temp = nIndex[ i ];
- nIndex[ i ] = nIndex[ ixj ];
- nIndex[ ixj ] = temp;
- }
- }
- }
- }
- }
-
- // Output the final result to the frame buffer
- if( idx == 0 )
- {
-
- /*
- // Debug
- uint color[8];
- for(int i = 0; i < 8; i++)
- {
- color[i] = deepBufferColorUINT[prefixSum[nThreadNum-1] + i];
- }
-
- for(int i = 0; i < 8; i++)
- {
- deepBufferDepth[nThreadNum*8+i] = fDepth[i];//fDepth[nIndex[i]];
- deepBufferColorUINT[nThreadNum*8+i] = color[nIndex[i]];
- }
- */
-
- // Accumulate fragments into final result
- float4 result = 0.0f;
- for( int x = N-1; x >= 0; x-- )
- {
- uint bufferValue = deepBufferColorUINT[ prefixSum[nThreadNum-1] + nIndex[ x ] ];
- float4 color;
- color.r = ( ( bufferValue >> 0 & 0xFF )) / 255.0f;
- color.g = ( bufferValue >> 8 & 0xFF ) / 255.0f;
- color.b = ( bufferValue >> 16 & 0xFF ) / 255.0f;
- color.a = ( bufferValue >> 24 & 0xFF ) / 255.0f;
- result = lerp( result, color, color.a );
- }
- result.a = 1.0f;
- frameBuffer[ nGid.xy ] = result;
- }
-}
-
-#else
-[numthreads(1,1,1)]
-void SortAndRenderCS( uint3 nGid : SV_GroupID, uint3 nDTid : SV_DispatchThreadID, uint3 nGTid : SV_GroupThreadID )
-{
- uint nThreadNum = nDTid.y * g_nFrameWidth + nDTid.x;
- float d0 = deepBufferDepth[nThreadNum*8];
- float d1 = deepBufferDepth[nThreadNum*8+1];
- float d2 = deepBufferDepth[nThreadNum*8+2];
-
- uint s0 = deepBufferColorUINT[nThreadNum*8 + 0];
- uint s1 = deepBufferColorUINT[nThreadNum*8 + 1];
- uint s2 = deepBufferColorUINT[nThreadNum*8 + 2];
-
- uint r0, r1, r2;
- float rd0, rd1, rd2;
- if( d0 < d1 && d0 < d2 )
- {
- r0 = s0;
- rd0 = d0;
- if( d1 < d2 )
- {
- r1 = s1;
- r2 = s2;
-
- rd1 = d1;
- rd2 = d2;
- }
- else
- {
- r1 = s2;
- r2 = s1;
-
- rd1 = d2;
- rd2 = d1;
- }
- }
- else if( d1 < d2 )
- {
- r0 = s1;
- rd0 = d1;
- if( d0 < d2 )
- {
- r1 = s0;
- r2 = s2;
-
- rd1 = d0;
- rd2 = d2;
- }
- else
- {
- r1 = s2;
- r2 = s0;
-
- rd1 = d2;
- rd2 = d0;
- }
- }
- else
- {
- r0 = s2;
- rd0 = d2;
- if( d1 < d0 )
- {
- r1 = s1;
- r2 = s0;
-
- rd1 = d1;
- rd2 = d0;
- }
- else
- {
- r1 = s0;
- r2 = s1;
-
- rd1 = d0;
- rd2 = d1;
- }
- }
-
- deepBufferDepth[nThreadNum*8] = rd0;
- deepBufferDepth[nThreadNum*8+1] = rd1;
- deepBufferDepth[nThreadNum*8+2] = rd2;
-
- deepBufferColorUINT[nThreadNum*8] = r0;
- deepBufferColorUINT[nThreadNum*8+1] = r1;
- deepBufferColorUINT[nThreadNum*8+2] = r2;
-
- // convert the color to floats
- float4 color[3];
- color[0].r = (r0 >> 0 & 0xFF) / 255.0f;
- color[0].g = (r0 >> 8 & 0xFF) / 255.0f;
- color[0].b = (r0 >> 16 & 0xFF) / 255.0f;
- color[0].a = (r0 >> 24 & 0xFF) / 255.0f;
-
- color[1].r = (r1 >> 0 & 0xFF) / 255.0f;
- color[1].g = (r1 >> 8 & 0xFF) / 255.0f;
- color[1].b = (r1 >> 16 & 0xFF) / 255.0f;
- color[1].a = (r1 >> 24 & 0xFF) / 255.0f;
-
- color[2].r = (r2 >> 0 & 0xFF) / 255.0f;
- color[2].g = (r2 >> 8 & 0xFF) / 255.0f;
- color[2].b = (r2 >> 16 & 0xFF) / 255.0f;
- color[2].a = (r2 >> 24 & 0xFF) / 255.0f;
-
- float4 result = lerp(lerp(lerp(0, color[2], color[2].a), color[1], color[1].a), color[0], color[0].a);
- result.a = 1.0f;
-
- frameBuffer[nDTid.xy] = result;
-}
-
-#endif \ No newline at end of file
diff --git a/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl b/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl
deleted file mode 100644
index 5fae02d62..000000000
--- a/tests/hlsl/dxsdk/OIT11/OIT_PS.hlsl
+++ /dev/null
@@ -1,56 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile ps_4_0 -entry FragmentCountPS -entry FillDeepBufferPS
-//-----------------------------------------------------------------------------
-// File: OITPS.hlsl
-//
-// Desc: Pixel shaders used in the Order Independent Transparency sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-//TODO: Use structured buffers
-RWTexture2D<uint> fragmentCount : register( u1 );
-RWBuffer<float> deepBufferDepth : register( u2 );
-RWBuffer<uint4> deepBufferColor : register( u3 );
-RWBuffer<uint> prefixSum : register( u4 );
-
-cbuffer CB : register( b0 )
-{
- uint g_nFrameWidth : packoffset( c0.x );
- uint g_nFrameHeight : packoffset( c0.y );
- uint g_nReserved0 : packoffset( c0.z );
- uint g_nReserved1 : packoffset( c0.w );
-}
-
-struct SceneVS_Output
-{
- float4 pos : SV_POSITION;
- float4 color : COLOR0;
-};
-
-void FragmentCountPS( SceneVS_Output input)
-{
- // Increments need to be done atomically
- InterlockedAdd(fragmentCount[input.pos.xy], 1);
-}
-
-void FillDeepBufferPS( SceneVS_Output input )
-{
- uint x = input.pos.x;
- uint y = input.pos.y;
-
- // Atomically allocate space in the deep buffer
- uint fc;
- InterlockedAdd(fragmentCount[input.pos.xy], 1, fc);
-
- uint nPrefixSumPos = y*g_nFrameWidth + x;
- uint nDeepBufferPos;
- if( nPrefixSumPos == 0 )
- nDeepBufferPos = fc;
- else
- nDeepBufferPos = prefixSum[nPrefixSumPos-1] + fc;
-
- // Store fragment data into the allocated space
- deepBufferDepth[nDeepBufferPos] = input.pos.z;
- deepBufferColor[nDeepBufferPos] = clamp(input.color, 0, 1)*255;
-}
-
diff --git a/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl b/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl
deleted file mode 100644
index 2254091f6..000000000
--- a/tests/hlsl/dxsdk/OIT11/SceneVS.hlsl
+++ /dev/null
@@ -1,42 +0,0 @@
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry SceneVS
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-//-----------------------------------------------------------------------------
-// File: SceneVS.hlsl
-//
-// Desc: Vertex shader for the scene.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//-----------------------------------------------------------------------------
-
-
-cbuffer cbPerObject : register( b0 )
-{
- row_major matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 );
-}
-
-struct SceneVS_Input
-{
- float4 pos : POSITION;
- float4 color : COLOR;
-};
-
-struct SceneVS_Output
-{
- float4 pos : SV_POSITION;
- float4 color : COLOR0;
-};
-
-SceneVS_Output SceneVS( SceneVS_Input input )
-{
- SceneVS_Output output;
-
- output.color = input.color;
- output.pos = mul(input.pos, g_mWorldViewProjection );
-
- return output;
-}
diff --git a/tests/hlsl/dxsdk/README.md b/tests/hlsl/dxsdk/README.md
deleted file mode 100644
index dd0c0fb6b..000000000
--- a/tests/hlsl/dxsdk/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-DirectX SDK Sample Shaders
-==========================
-
-This directory contains shaders that have shipped as part of the DirectX SDK.
-The licsense terms for these shaders are specificed at the top of the source files. \ No newline at end of file
diff --git a/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl b/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl
deleted file mode 100644
index d01cd7aa4..000000000
--- a/tests/hlsl/dxsdk/SimpleBezier11/SimpleBezier11.hlsl
+++ /dev/null
@@ -1,233 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL: -profile vs_4_0 -entry BezierVS -profile hs_5_0 -entry BezierHS -profile ds_5_0 -entry BezierDS -profile ps_4_0 -entry BezierPS -entry SolidColorPS
-
-// Note(Slang): Disabling this test for now because compiling it via IR ends up creating a local variable of the `OutputPatch<...>` type, which we need to get rid of via SSA optimization.
-
-
-//--------------------------------------------------------------------------------------
-// File: SimpleBezier11.hlsl
-//
-// This sample shows an simple implementation of the DirectX 11 Hardware Tessellator
-// for rendering a Bezier Patch.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-// This allows us to compile the shader with a #define to choose
-// the different partition modes for the hull shader.
-// See the hull shader: [partitioning(BEZIER_HS_PARTITION)]
-// This sample demonstrates "integer", "fractional_even", and "fractional_odd"
-#ifndef BEZIER_HS_PARTITION
-#define BEZIER_HS_PARTITION "integer"
-#endif // BEZIER_HS_PARTITION
-
-// The input patch size. In this sample, it is 16 control points.
-// This value should match the call to IASetPrimitiveTopology()
-#define INPUT_PATCH_SIZE 16
-
-// The output patch size. In this sample, it is also 16 control points.
-#define OUTPUT_PATCH_SIZE 16
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerFrame : register( b0 )
-{
- matrix g_mViewProjection;
- float3 g_vCameraPosWorld;
- float g_fTessellationFactor;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex shader section
-//--------------------------------------------------------------------------------------
-struct VS_CONTROL_POINT_INPUT
-{
- float3 vPosition : POSITION;
-};
-
-struct VS_CONTROL_POINT_OUTPUT
-{
- float3 vPosition : POSITION;
-};
-
-// This simple vertex shader passes the control points straight through to the
-// hull shader. In a more complex scene, you might transform the control points
-// or perform skinning at this step.
-
-// The input to the vertex shader comes from the vertex buffer.
-
-// The output from the vertex shader will go into the hull shader.
-
-VS_CONTROL_POINT_OUTPUT BezierVS( VS_CONTROL_POINT_INPUT Input )
-{
- VS_CONTROL_POINT_OUTPUT Output;
-
- Output.vPosition = Input.vPosition;
-
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Constant data function for the BezierHS. This is executed once per patch.
-//--------------------------------------------------------------------------------------
-struct HS_CONSTANT_DATA_OUTPUT
-{
- float Edges[4] : SV_TessFactor;
- float Inside[2] : SV_InsideTessFactor;
-};
-
-struct HS_OUTPUT
-{
- float3 vPosition : BEZIERPOS;
-};
-
-// This constant hull shader is executed once per patch. For the simple Mobius strip
-// model, it will be executed 4 times. In this sample, we set the tessellation factor
-// via SV_TessFactor and SV_InsideTessFactor for each patch. In a more complex scene,
-// you might calculate a variable tessellation factor based on the camera's distance.
-
-HS_CONSTANT_DATA_OUTPUT BezierConstantHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> ip,
- uint PatchID : SV_PrimitiveID )
-{
- HS_CONSTANT_DATA_OUTPUT Output;
-
- float TessAmount = g_fTessellationFactor;
-
- Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount;
- Output.Inside[0] = Output.Inside[1] = TessAmount;
-
- return Output;
-}
-
-// The hull shader is called once per output control point, which is specified with
-// outputcontrolpoints. For this sample, we take the control points from the vertex
-// shader and pass them directly off to the domain shader. In a more complex scene,
-// you might perform a basis conversion from the input control points into a Bezier
-// patch, such as the SubD11 Sample.
-
-// The input to the hull shader comes from the vertex shader
-
-// The output from the hull shader will go to the domain shader.
-// The tessellation factor, topology, and partition mode will go to the fixed function
-// tessellator stage to calculate the UVW and domain points.
-
-[domain("quad")]
-[partitioning(BEZIER_HS_PARTITION)]
-[outputtopology("triangle_cw")]
-[outputcontrolpoints(OUTPUT_PATCH_SIZE)]
-[patchconstantfunc("BezierConstantHS")]
-HS_OUTPUT BezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, INPUT_PATCH_SIZE> p,
- uint i : SV_OutputControlPointID,
- uint PatchID : SV_PrimitiveID )
-{
- HS_OUTPUT Output;
- Output.vPosition = p[i].vPosition;
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Bezier evaluation domain shader section
-//--------------------------------------------------------------------------------------
-struct DS_OUTPUT
-{
- float4 vPosition : SV_POSITION;
- float3 vWorldPos : WORLDPOS;
- float3 vNormal : NORMAL;
-};
-
-//--------------------------------------------------------------------------------------
-float4 BernsteinBasis(float t)
-{
- float invT = 1.0f - t;
-
- return float4( invT * invT * invT,
- 3.0f * t * invT * invT,
- 3.0f * t * t * invT,
- t * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float4 dBernsteinBasis(float t)
-{
- float invT = 1.0f - t;
-
- return float4( -3 * invT * invT,
- 3 * invT * invT - 6 * t * invT,
- 6 * t * invT - 3 * t * t,
- 3 * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float3 EvaluateBezier( const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch,
- float4 BasisU,
- float4 BasisV )
-{
- float3 Value = float3(0,0,0);
- Value = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w );
- Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w );
- Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w );
- Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w );
-
- return Value;
-}
-
-// The domain shader is run once per vertex and calculates the final vertex's position
-// and attributes. It receives the UVW from the fixed function tessellator and the
-// control point outputs from the hull shader. Since we are using the DirectX 11
-// Tessellation pipeline, it is the domain shader's responsibility to calculate the
-// final SV_POSITION for each vertex. In this sample, we evaluate the vertex's
-// position using a Bernstein polynomial and the normal is calculated as the cross
-// product of the U and V derivatives.
-
-// The input SV_DomainLocation to the domain shader comes from fixed function
-// tessellator. And the OutputPatch comes from the hull shader. From these, you
-// must calculate the final vertex position, color, texcoords, and other attributes.
-
-// The output from the domain shader will be a vertex that will go to the video card's
-// rasterization pipeline and get drawn to the screen.
-
-[domain("quad")]
-DS_OUTPUT BezierDS( HS_CONSTANT_DATA_OUTPUT input,
- float2 UV : SV_DomainLocation,
- const OutputPatch<HS_OUTPUT, OUTPUT_PATCH_SIZE> bezpatch )
-{
- float4 BasisU = BernsteinBasis( UV.x );
- float4 BasisV = BernsteinBasis( UV.y );
- float4 dBasisU = dBernsteinBasis( UV.x );
- float4 dBasisV = dBernsteinBasis( UV.y );
-
- float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV );
- float3 Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV );
- float3 BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV );
- float3 Norm = normalize( cross( Tangent, BiTangent ) );
-
- DS_OUTPUT Output;
- Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection );
- Output.vWorldPos = WorldPos;
- Output.vNormal = Norm;
-
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Smooth shading pixel shader section
-//--------------------------------------------------------------------------------------
-
-// The pixel shader works the same as it would in a normal graphics pipeline.
-// In this sample, it performs very simple N dot L lighting.
-
-float4 BezierPS( DS_OUTPUT Input ) : SV_TARGET
-{
- float3 N = normalize(Input.vNormal);
- float3 L = normalize(Input.vWorldPos - g_vCameraPosWorld);
- return abs(dot(N, L)) * float4(1, 0, 0, 1);
-}
-
-//--------------------------------------------------------------------------------------
-// Solid color shading pixel shader (used for wireframe overlay)
-//--------------------------------------------------------------------------------------
-float4 SolidColorPS( DS_OUTPUT Input ) : SV_TARGET
-{
- // Return a solid green color
- return float4( 0, 1, 0, 1 );
-}
diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx
deleted file mode 100644
index 00883ce70..000000000
--- a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.fx
+++ /dev/null
@@ -1,112 +0,0 @@
-//TEST_IGNORE_FILE:
-//--------------------------------------------------------------------------------------
-// File: SimpleSample.fx
-//
-// The effect file for the SimpleSample sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Global variables
-//--------------------------------------------------------------------------------------
-float4 g_MaterialAmbientColor; // Material's ambient color
-float4 g_MaterialDiffuseColor; // Material's diffuse color
-float3 g_LightDir; // Light's direction in world space
-float4 g_LightDiffuse; // Light's diffuse color
-texture g_MeshTexture; // Color texture for mesh
-
-float g_fTime; // App's time in seconds
-float4x4 g_mWorld; // World matrix for object
-float4x4 g_mWorldViewProjection; // World * View * Projection matrix
-
-
-
-//--------------------------------------------------------------------------------------
-// Texture samplers
-//--------------------------------------------------------------------------------------
-sampler MeshTextureSampler =
-sampler_state
-{
- Texture = <g_MeshTexture>;
- MipFilter = LINEAR;
- MinFilter = LINEAR;
- MagFilter = LINEAR;
-};
-
-
-//--------------------------------------------------------------------------------------
-// Vertex shader output structure
-//--------------------------------------------------------------------------------------
-struct VS_OUTPUT
-{
- float4 Position : POSITION; // vertex position
- float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1)
- float2 TextureUV : TEXCOORD0; // vertex texture coords
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( float4 vPos : POSITION,
- float3 vNormal : NORMAL,
- float2 vTexCoord0 : TEXCOORD0 )
-{
- VS_OUTPUT Output;
- float3 vNormalWorldSpace;
-
- // Transform the position from object space to homogeneous projection space
- Output.Position = mul(vPos, g_mWorldViewProjection);
-
- // Transform the normal from object space to world space
- vNormalWorldSpace = normalize(mul(vNormal, (float3x3)g_mWorld)); // normal (world space)
-
- // Calc diffuse color
- Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)) +
- g_MaterialAmbientColor;
- Output.Diffuse.a = 1.0f;
-
- // Just copy the texture coordinate through
- Output.TextureUV = vTexCoord0;
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader output structure
-//--------------------------------------------------------------------------------------
-struct PS_OUTPUT
-{
- float4 RGBColor : COLOR0; // Pixel color
-};
-
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-// color with diffuse material color
-//--------------------------------------------------------------------------------------
-PS_OUTPUT RenderScenePS( VS_OUTPUT In )
-{
- PS_OUTPUT Output;
-
- // Lookup mesh texture and modulate it with diffuse
- Output.RGBColor = tex2D(MeshTextureSampler, In.TextureUV) * In.Diffuse;
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Renders scene
-//--------------------------------------------------------------------------------------
-technique RenderScene
-{
- pass P0
- {
- VertexShader = compile vs_2_0 RenderSceneVS();
- PixelShader = compile ps_2_0 RenderScenePS();
- }
-}
diff --git a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl b/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl
deleted file mode 100644
index 7c688940b..000000000
--- a/tests/hlsl/dxsdk/SimpleSample11/SimpleSample.hlsl
+++ /dev/null
@@ -1,86 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry RenderSceneVS -profile ps_4_0 -entry RenderScenePS
-//--------------------------------------------------------------------------------------
-// File: SimpleSample.hlsl
-//
-// The HLSL file for the SimpleSample sample for the Direct3D 11 device
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- matrix g_mWorldViewProjection : packoffset( c0 );
- matrix g_mWorld : packoffset( c4 );
- float4 g_MaterialAmbientColor : packoffset( c8 );
- float4 g_MaterialDiffuseColor : packoffset( c9 );
-}
-
-cbuffer cbPerFrame : register( b1 )
-{
- float3 g_vLightDir : packoffset( c0 );
- float g_fTime : packoffset( c0.w );
- float4 g_LightDiffuse : packoffset( c1 );
-};
-
-//-----------------------------------------------------------------------------------------
-// Textures and Samplers
-//-----------------------------------------------------------------------------------------
-Texture2D g_txDiffuse : register( t0 );
-SamplerState g_samLinear : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// shader input/output structure
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 Position : POSITION; // vertex position
- float3 Normal : NORMAL; // this normal comes in per-vertex
- float2 TextureUV : TEXCOORD0;// vertex texture coords
-};
-
-struct VS_OUTPUT
-{
- float4 Position : SV_POSITION; // vertex position
- float4 Diffuse : COLOR0; // vertex diffuse color (note that COLOR0 is clamped from 0..1)
- float2 TextureUV : TEXCOORD0; // vertex texture coords
-};
-
-//--------------------------------------------------------------------------------------
-// This shader computes standard transform and lighting
-//--------------------------------------------------------------------------------------
-VS_OUTPUT RenderSceneVS( VS_INPUT input )
-{
- VS_OUTPUT Output;
- float3 vNormalWorldSpace;
-
- // Transform the position from object space to homogeneous projection space
- Output.Position = mul( input.Position, g_mWorldViewProjection );
-
- // Transform the normal from object space to world space
- vNormalWorldSpace = normalize(mul(input.Normal, (float3x3)g_mWorld)); // normal (world space)
-
- // Calc diffuse color
- Output.Diffuse.rgb = g_MaterialDiffuseColor * g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_vLightDir)) +
- g_MaterialAmbientColor;
- Output.Diffuse.a = 1.0f;
-
- // Just copy the texture coordinate through
- Output.TextureUV = input.TextureUV;
-
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// This shader outputs the pixel's color by modulating the texture's
-// color with diffuse material color
-//--------------------------------------------------------------------------------------
-float4 RenderScenePS( VS_OUTPUT In ) : SV_TARGET
-{
- // Lookup mesh texture and modulate it with diffuse
- return g_txDiffuse.Sample( g_samLinear, In.TextureUV ) * In.Diffuse;
-}
diff --git a/tests/hlsl/dxsdk/SubD11/SubD11.hlsl b/tests/hlsl/dxsdk/SubD11/SubD11.hlsl
deleted file mode 100644
index 839e004e6..000000000
--- a/tests/hlsl/dxsdk/SubD11/SubD11.hlsl
+++ /dev/null
@@ -1,1238 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry PatchSkinningVS -entry MeshSkinningVS -profile hs_5_0 -entry SubDToBezierHS -entry SubDToBezierHS4444 -profile ds_5_0 -entry BezierEvalDS -profile ps_4_0 -entry SmoothPS -entry SolidColorPS
-//--------------------------------------------------------------------------------------
-// File: SubD11.hlsl
-//
-// This file contains functions to convert from a Catmull-Clark subdivision
-// representation to a bicubic patch representation.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//Work-around for an optimization rule problem in the June 2010 HLSL Compiler (9.29.952.3111)
-//see http://support.microsoft.com/kb/2448404
-#if D3DX_VERSION == 0xa2b
-#pragma ruledisable 0x0802405f
-#endif
-
-//--------------------------------------------------------------------------------------
-// A sample extraordinary SubD quad is represented by the following diagram:
-//
-// 15 Valences:
-// / \ Vertex 0: 5
-// / 14 Vertex 1: 4
-// 17---------16 / \ Vertex 2: 5
-// | \ | / \ Vertex 3: 3
-// | \ | / 13
-// | \ |/ / Prefixes:
-// | 3------2------12 Vertex 0: 9
-// | | | | Vertex 1: 12
-// | | | | Vertex 2: 16
-// 4----0------1------11 Vertex 3: 18
-// / /| | |
-// / / | | |
-// 5 / 8------9------10
-// \ / /
-// 6 /
-// \ /
-// 7
-//
-// Where the quad bounded by vertices 0,1,2,3 represents the actual subd surface of interest
-// The 1-ring neighborhood of the quad is represented by vertices 4 through 17. The counter-
-// clockwise winding of this 1-ring neighborhood is important, especially when it comes to compute
-// the corner vertices of the bicubic patch that we will use to approximate the subd quad (0,1,2,3).
-//
-// The resulting bicubic patch fits within the subd quad (0,1,2,3) and has the following control
-// point layout:
-//
-// 12--13--14--15
-// 8---9--10--11
-// 4---5---6---7
-// 0---1---2---3
-//
-// The inner 4 control points of the bicubic patch are a combination of only the vertices (0,1,2,3)
-// of the subd quad. However, the corner control points for the bicubic patch (0,3,15,12) are actually
-// a much more complex weighting of the subd patch and the 1-ring neighborhood. In the example above
-// the bicubic control point 0 is actually a weighted combination of subd points 0,1,2,3 and 1-ring
-// neighborhood points 17, 4, 5, 6, 7, 8, and 9. We can see that the 1-ring neighbor hood is simply
-// walked from the prefix value from the previous corner (corner 3 in this case) to the prefix
-// prefix value for the current corner. We add one more vertex on either side of the prefix values
-// and we have all the data necessary to calculate the value for the corner points.
-//
-// The edge control points of the bicubic patch (1,2,13,14,4,8,7,11) are also combinations of their
-// neighbors, but fortunately each one is only a combination of 6 values and no walk is required.
-//--------------------------------------------------------------------------------------
-
-#define MOD4(x) ((x)&3)
-#ifndef MAX_POINTS
-#define MAX_POINTS 32
-#endif
-#define MAX_BONE_MATRICES 80
-
-//--------------------------------------------------------------------------------------
-// Textures
-//--------------------------------------------------------------------------------------
-Texture2D g_txHeight : register( t0 ); // Height and Bump texture
-Texture2D g_txDiffuse : register( t1 ); // Diffuse texture
-Texture2D g_txSpecular : register( t2 ); // Specular texture
-
-//--------------------------------------------------------------------------------------
-// Samplers
-//--------------------------------------------------------------------------------------
-SamplerState g_samLinear : register( s0 );
-SamplerState g_samPoint : register( s0 );
-
-//--------------------------------------------------------------------------------------
-// Constant Buffers
-//--------------------------------------------------------------------------------------
-cbuffer cbTangentStencilConstants : register( b0 )
-{
- float g_TanM[1024]; // Tangent patch stencils precomputed by the application
- float g_fCi[16]; // Valence coefficients precomputed by the application
-};
-
-cbuffer cbPerMesh : register( b1 )
-{
- matrix g_mConstBoneWorld[MAX_BONE_MATRICES];
-};
-
-cbuffer cbPerFrame : register( b2 )
-{
- matrix g_mViewProjection;
- float3 g_vCameraPosWorld;
- float g_fTessellationFactor;
- float g_fDisplacementHeight;
- float3 g_vSolidColor;
-};
-
-cbuffer cbPerSubset : register( b3 )
-{
- int g_iPatchStartIndex;
-}
-
-//--------------------------------------------------------------------------------------
-Buffer<uint4> g_ValencePrefixBuffer : register( t0 );
-
-//--------------------------------------------------------------------------------------
-struct VS_CONTROL_POINT_OUTPUT
-{
- float3 vPosition : WORLDPOS;
- float2 vUV : TEXCOORD0;
- float3 vTangent : TANGENT;
-};
-
-struct BEZIER_CONTROL_POINT
-{
- float3 vPosition : BEZIERPOS;
-};
-
-struct PS_INPUT
-{
- float3 vWorldPos : POSITION;
- float3 vNormal : NORMAL;
- float2 vUV : TEXCOORD;
- float3 vTangent : TANGENT;
- float3 vBiTangent : BITANGENT;
-};
-
-//--------------------------------------------------------------------------------------
-// SubD to Bezier helper functions
-//--------------------------------------------------------------------------------------
-// Helps with getting tangent stencils from the g_TanM constant array
-#define TANM(a,v) ( g_TanM[ Val[v]*64 + (a) ] )
-
-//--------------------------------------------------------------------------------------
-float3 ComputeInteriorVertex( uint index,
- uint Val[4],
- const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip )
-{
- switch( index )
- {
- case 0:
- return (ip[0].vPosition*Val[0] + ip[1].vPosition*2 + ip[2].vPosition + ip[3].vPosition*2) / (5+Val[0]);
- case 1:
- return (ip[0].vPosition*2 + ip[1].vPosition*Val[1] + ip[2].vPosition*2 + ip[3].vPosition) / (5+Val[1]);
- case 2:
- return (ip[0].vPosition + ip[1].vPosition*2 + ip[2].vPosition*Val[2] + ip[3].vPosition*2) / (5+Val[2]);
- case 3:
- return (ip[0].vPosition*2 + ip[1].vPosition + ip[2].vPosition*2 + ip[3].vPosition*Val[3]) / (5+Val[3]);
- }
-
- return float3(0,0,0);
-}
-
-//--------------------------------------------------------------------------------------
-// Computes the corner vertices of the output UV patch. The corner vertices are
-// a weighted combination of all points that are "connected" to that corner by an edge.
-// The interior 4 points of the original subd quad are easy to get. The points in the
-// 1-ring neighborhood around the interior quad are not.
-//
-// Because the valence of that corner could be any number between 3 and 16, we need to
-// walk around the subd patch vertices connected to that point. This is there the
-// Pref (prefix) values come into play. Each corner has a prefix value that is the index
-// of the last value around the 1-ring neighborhood that should be used in calculating
-// the coefficient of that corner. The walk goes from the prefix value of the previous
-// corner to the prefix value of the current corner.
-//--------------------------------------------------------------------------------------
-void ComputeCornerVertex( uint index,
- out float3 CornerB, // Corner for the Bezier patch
- out float3 CornerU, // Corner for the tangent patch
- out float3 CornerV, // Corner for the bitangent patch
- const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip,
- const in uint Val[4],
- const in uint Pref[4] )
-{
- const float fOWt = 1;
- const float fEWt = 4;
-
- // Figure out where to start the walk by using the previous corner's prefix value
- uint PrefIm1 = 0;
- uint uStart = 4;
- if( index )
- {
- PrefIm1 = Pref[index-1];
- uStart = PrefIm1;
- }
-
- // Setup the walk indices
- uint uTIndexStart = 2 - (index&1);
- uint uTIndex = uTIndexStart;
-
- // Calculate the N*N weight for the final value
- CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part
-
- // Zero out the corners
- CornerU = float4(0,0,0,0);
- CornerV = float4(0,0,0,0);
-
- const uint uV = Val[index] + ( ( index & 1 ) ? 1 : -1 );
-
- // Start the walk with the uStart prefix (the prefix of the corner before us)
- CornerB += ip[uStart].vPosition * fEWt;
- CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index);
-
- // Gather all vertices between the previous corner's prefix and our own prefix
- // We'll do two at a time, since they always come in twos
- while(uStart < Pref[index]-1)
- {
- ++uStart;
- CornerB += ip[uStart].vPosition * fOWt;
- CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- ++uTIndex;
- ++uStart;
- CornerB += ip[uStart].vPosition * fEWt;
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index );
- }
- ++uStart;
-
- // Add in the last guy and make sure to wrap to the beginning if we're the last corner
- if (index == 3)
- uStart = 4;
- CornerB += ip[uStart].vPosition * fOWt;
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- // Add in the guy before the prefix as well
- if (index)
- uStart = PrefIm1-1;
- else
- uStart = Pref[3]-1;
- uTIndex = uTIndexStart-1;
-
- CornerB += ip[uStart].vPosition * fOWt;
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- // We're done with the walk now. Now we need to add the contributions of the original subd quad.
- CornerB += ip[MOD4(index+1)].vPosition * fEWt;
- CornerB += ip[MOD4(index+2)].vPosition * fOWt;
- CornerB += ip[MOD4(index+3)].vPosition * fEWt;
-
- uTIndex = 0 + (index&1)*(Val[index]-1);
- uStart = MOD4(index+1);
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-
- uStart = MOD4(index+2);
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- uStart = MOD4(index+3);
- uTIndex = (uTIndex+1)%Val[index];
-
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-
- // Normalize the corner weights
- CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize
-
- // fixup signs from directional derivatives...
- if( !((index - 1) & 2) ) // 1 and 2
- CornerU *= -1;
-
- if( index >= 2 ) // 2 and 3
- CornerV *= -1;
-}
-
-void ComputeCornerVertex4444( uint index,
- out float3 CornerB, // Corner for the Bezier patch
- out float3 CornerU, // Corner for the tangent patch
- out float3 CornerV, // Corner for the bitangent patch
- const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip,
- const in uint Val[4],
- const in uint Pref[4] )
-{
- const float fOWt = 1;
- const float fEWt = 4;
-
- // Figure out where to start the walk by using the previous corner's prefix value
- uint PrefIm1 = 0;
- uint uStart = 4;
- if( index )
- {
- PrefIm1 = Pref[index-1];
- uStart = PrefIm1;
- }
-
- // Setup the walk indices
- uint uTIndexStart = 2 - (index&1);
- uint uTIndex = uTIndexStart;
-
- // Calculate the N*N weight for the final value
- CornerB = (Val[index]*Val[index])*ip[index].vPosition; // n^2 part
-
- // Zero out the corners
- CornerU = float4(0,0,0,0);
- CornerV = float4(0,0,0,0);
-
- const uint uV = Val[index] + ( ( index & 1 ) ? 1 : -1 );
-
- // Start the walk with the uStart prefix (the prefix of the corner before us)
- CornerB += ip[uStart].vPosition * fEWt;
- CornerU += ip[uStart].vPosition * TANM( uTIndex * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index);
-
- // Gather all vertices between the previous corner's prefix and our own prefix
- // We'll do two at a time, since they always come in twos
- while(uStart < Pref[index]-1)
- {
- ++uStart;
- CornerB += ip[uStart].vPosition * fOWt;
- CornerU += ip[uStart].vPosition * TANM( uTIndex * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- ++uTIndex;
- ++uStart;
- CornerB += ip[uStart].vPosition * fEWt;
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex+uV)%Val[index]) * 2, index );
- }
- ++uStart;
-
- // Add in the last guy and make sure to wrap to the beginning if we're the last corner
- if (index == 3)
- uStart = 4;
- CornerB += ip[uStart].vPosition * fOWt;
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- // Add in the guy before the prefix as well
- if (index)
- uStart = PrefIm1-1;
- else
- uStart = Pref[3]-1;
- uTIndex = uTIndexStart-1;
-
- CornerB += ip[uStart].vPosition * fOWt;
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- // We're done with the walk now. Now we need to add the contributions of the original subd quad.
- CornerB += ip[MOD4(index+1)].vPosition * fEWt;
- CornerB += ip[MOD4(index+2)].vPosition * fOWt;
- CornerB += ip[MOD4(index+3)].vPosition * fEWt;
-
- uTIndex = 0 + (index&1)*(Val[index]-1);
- uStart = MOD4(index+1);
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-
- uStart = MOD4(index+2);
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2 + 1, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2 + 1, index );
-
- uStart = MOD4(index+3);
- uTIndex = (uTIndex+1)%Val[index];
-
- CornerU += ip[uStart].vPosition * TANM( ( uTIndex % Val[index] ) * 2, index );
- CornerV += ip[uStart].vPosition * TANM( ( ( uTIndex + uV ) % Val[index] ) * 2, index );
-
- // Normalize the corner weights
- CornerB *= 1.0f / ( Val[index] * Val[index] + 5 * Val[index] ); // normalize
-
- // fixup signs from directional derivatives...
- if( !((index - 1) & 2) ) // 1 and 2
- CornerU *= -1;
-
- if( index >= 2 ) // 2 and 3
- CornerV *= -1;
-}
-
-//--------------------------------------------------------------------------------------
-// Computes the edge vertices of the output bicubic patch. The edge vertices
-// (1,2,4,7,8,11,13,14) are a weighted (by valence) combination of 6 interior and 1-ring
-// neighborhood points. However, we don't have to do the walk on this one since we
-// don't need all of the neighbor points attached to this vertex.
-//--------------------------------------------------------------------------------------
-float3 ComputeEdgeVertex( in uint index /* 0-7 */,
- const in InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip,
- const in uint Val[4],
- const in uint Pref[4] )
-{
- float val1 = 2 * Val[0] + 10;
- float val2 = 2 * Val[1] + 10;
- float val13 = 2 * Val[3] + 10;
- float val14 = 2 * Val[2] + 10;
- float val4 = val1;
- float val8 = val13;
- float val7 = val2;
- float val11 = val14;
-
- float3 vRetVal = float3(0,0,0);
- switch( index )
- {
- // Horizontal
- case 0:
- vRetVal = (Val[0]*2*ip[0].vPosition + 4*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*2 +
- 2*ip[Pref[0]-1].vPosition + ip[Pref[0]].vPosition) / val1;
- break;
- case 1:
- vRetVal = (4*ip[0].vPosition + Val[1]*2*ip[1].vPosition + ip[2].vPosition*2 + ip[3].vPosition +
- ip[Pref[0]-1].vPosition + 2*ip[Pref[0]].vPosition) / val2;
- break;
- case 2:
- vRetVal = (2*ip[0].vPosition + ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition*2*Val[3] +
- 2*ip[Pref[2]].vPosition + ip[Pref[2]-1].vPosition) / val13;
- break;
- case 3:
- vRetVal = (ip[0].vPosition + 2*ip[1].vPosition + Val[2]*2*ip[2].vPosition + ip[3].vPosition*4 +
- ip[Pref[2]].vPosition + 2*ip[Pref[2]-1].vPosition) / val14;
- break;
- // Vertical
- case 4:
- vRetVal = (Val[0]*2*ip[0].vPosition + 2*ip[1].vPosition + ip[2].vPosition + ip[3].vPosition*4 +
- 2*ip[4].vPosition + ip[Pref[3]-1].vPosition) / val4;
- break;
- case 5:
- vRetVal = (4*ip[0].vPosition + ip[1].vPosition + 2*ip[2].vPosition + ip[3].vPosition*2*Val[3] +
- ip[4].vPosition + 2*ip[Pref[3]-1].vPosition) / val8;
- break;
- case 6:
- vRetVal = (2*ip[0].vPosition + Val[1]*2*ip[1].vPosition + 4*ip[2].vPosition + ip[3].vPosition +
- 2*ip[Pref[1]-1].vPosition + ip[Pref[1]].vPosition) / val7;
- break;
- case 7:
- vRetVal = (ip[0].vPosition + 4*ip[1].vPosition + Val[2]*2*ip[2].vPosition + 2*ip[3].vPosition +
- ip[Pref[1]-1].vPosition + 2*ip[Pref[1]].vPosition) / val11;
- break;
- }
-
- return vRetVal;
-}
-
-//--------------------------------------------------------------------------------------
-// Helper function
-//--------------------------------------------------------------------------------------
-void BezierRaise(inout float3 pQ[3], out float3 pC[4])
-{
- pC[0] = pQ[0];
- pC[3] = pQ[2];
-
- for( int i=1; i<3; i++ )
- {
- pC[i] = ( 1.0f / 3.0f ) * ( pQ[i - 1] * i + ( 3.0f - i ) * pQ[i] );
- }
-}
-
-//--------------------------------------------------------------------------------------
-// Computes the tangent patch from the input bezier patch
-//--------------------------------------------------------------------------------------
-void ComputeTanPatch( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch,
- inout float3 vOut[16],
- in float fCWts[4],
- in float3 vCorner[4],
- in float3 vCornerLocal[4],
- in const uint cX,
- in const uint cY)
-{
- float3 vQuad[3];
- float3 vQuadB[3];
- float3 vCubic[4];
-
- // boundary edges are really simple...
- vQuad[0] = vCornerLocal[0];
- vQuad[2] = vCornerLocal[1];
- vQuad[1] = 3.0f*(bezpatch[2*cX+0*cY].vPosition-bezpatch[1*cX+0*cY].vPosition);
-
- BezierRaise(vQuad,vCubic);
- vOut[1*cX + 0*cY] = vCubic[1];
- vOut[2*cX + 0*cY] = vCubic[2];
-
- vQuad[0] = vCornerLocal[2];
- vQuad[2] = vCornerLocal[3];
- vQuad[1] = 3.0f*(bezpatch[2*cX+3*cY].vPosition-bezpatch[1*cX+3*cY].vPosition);
-
- BezierRaise(vQuad,vCubic);
- vOut[1*cX + 3*cY] = vCubic[1];
- vOut[2*cX + 3*cY] = vCubic[2];
-
- // two internal edges - this is where work happens...
- float3 vA,vB,vC,vD,vE;
- float fC0,fC1;
- vQuad[1] = 3.0f*(bezpatch[2*cX+2*cY].vPosition-bezpatch[1*cX+2*cY].vPosition);
- // also do "second" scan line
- vQuadB[1] = 3.0f*(bezpatch[2*cX+1*cY].vPosition-bezpatch[1*cX+1*cY].vPosition);
-
- vD = 3.0f*(bezpatch[1*cX + 2*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition);
- vE = 3.0f*(bezpatch[1*cX + 1*cY].vPosition - bezpatch[0*cX + 1*cY].vPosition); // used later...
-
- fC0 = fCWts[3];
- fC1 = fCWts[0];
-
- // sign flip
- vA = -vCorner[3];
- vB = 3.0f*(bezpatch[0*cX + 1*cY].vPosition - bezpatch[0*cX + 2*cY].vPosition);
- vC = -vCorner[0];
-
- vQuad[0] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD;
- vQuadB[0] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE;
-
- // do end of strip - same as before, but stuff is switched around...
- vC = vCorner[2];
- vB = 3.0f*(bezpatch[3*cX + 2*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition);
- vA = vCorner[1];
-
- vD = 3.0f*(bezpatch[2*cX + 1*cY].vPosition - bezpatch[3*cX + 1*cY].vPosition);
- vE = 3.0f*(bezpatch[2*cX + 2*cY].vPosition - bezpatch[3*cX + 2*cY].vPosition);
-
- fC0 = fCWts[1];
- fC1 = fCWts[2];
-
- vQuadB[2] = 1.0f/3.0f*(2.0f*fC0*vB - fC1*vA) + vD;
- vQuad[2] = 1.0f/3.0f*(fC0*vC - 2.0f*fC1*vB) + vE;
-
- vQuadB[2] *= -1.0f;
- vQuad[2] *= -1.0f;
-
- BezierRaise(vQuad,vCubic);
-
- vOut[0*cX + 2*cY] = vCubic[0];
- vOut[1*cX + 2*cY] = vCubic[1];
- vOut[2*cX + 2*cY] = vCubic[2];
- vOut[3*cX + 2*cY] = vCubic[3];
-
- BezierRaise(vQuadB,vCubic);
-
- vOut[0*cX + 1*cY] = vCubic[0];
- vOut[1*cX + 1*cY] = vCubic[1];
- vOut[2*cX + 1*cY] = vCubic[2];
- vOut[3*cX + 1*cY] = vCubic[3];
-}
-
-//--------------------------------------------------------------------------------------
-// Skinning vertex shader Section
-//--------------------------------------------------------------------------------------
-struct VS_CONTROL_POINT_INPUT
-{
- float3 vPosition : POSITION;
- float2 vUV : TEXCOORD0;
- float3 vTangent : TANGENT;
- uint4 vBones : BONES;
- float4 vWeights : WEIGHTS;
-};
-
-VS_CONTROL_POINT_OUTPUT PatchSkinningVS( VS_CONTROL_POINT_INPUT Input )
-{
- VS_CONTROL_POINT_OUTPUT Output;
-
- float4 vInputPos = float4( Input.vPosition, 1 );
- float4 vWorldPos = float4( 0, 0, 0, 0 );
-
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-
- float3 vWorldTan = float3( 0, 0, 0 );
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-
- Output.vPosition = vWorldPos;
- Output.vUV = Input.vUV;
- Output.vTangent = vWorldTan;
-
- return Output;
-}
-
-struct VS_MESH_POINT_INPUT
-{
- float3 vPosition : POSITION;
- float2 vUV : TEXCOORD0;
- float3 vNormal : NORMAL;
- float3 vTangent : TANGENT;
- uint4 vBones : BONES;
- float4 vWeights : WEIGHTS;
-};
-
-struct VS_MESH_POINT_OUTPUT
-{
- float3 vWorldPos : POSITION;
- float3 vNormal : NORMAL;
- float2 vUV : TEXCOORD;
- float3 vTangent : TANGENT;
- float3 vBiTangent : BITANGENT;
-
- float4 vPosition : SV_POSITION;
-};
-
-VS_MESH_POINT_OUTPUT MeshSkinningVS( VS_MESH_POINT_INPUT Input )
-{
- VS_MESH_POINT_OUTPUT Output;
-
- float4 vInputPos = float4( Input.vPosition, 1 );
- float4 vWorldPos = float4( 0, 0, 0, 0 );
-
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
- vWorldPos += mul( vInputPos, g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-
- float3 vWorldTan = float3( 0, 0, 0 );
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
- vWorldTan += mul( Input.vTangent, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-
- float3 vWorldNormal = float3( 0, 0, 0 );
- vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.x ] ) * Input.vWeights.x;
- vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.y ] ) * Input.vWeights.y;
- vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.z ] ) * Input.vWeights.z;
- vWorldNormal += mul( Input.vNormal, (float3x3)g_mConstBoneWorld[ Input.vBones.w ] ) * Input.vWeights.w;
-
- Output.vWorldPos = vWorldPos.xyz;
- Output.vPosition = mul( float4( vWorldPos.xyz, 1 ), g_mViewProjection );
- Output.vUV = Input.vUV;
- Output.vTangent = vWorldTan;
- Output.vNormal = vWorldNormal;
- Output.vBiTangent = cross( vWorldNormal, vWorldTan );
-
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// SubD to Bezier hull shader Section
-//--------------------------------------------------------------------------------------
-struct HS_CONSTANT_DATA_OUTPUT
-{
- float Edges[4] : SV_TessFactor;
- float Inside[2] : SV_InsideTessFactor;
-
- float3 vTangent[4] : TANGENT;
- float2 vUV[4] : TEXCOORD;
- float3 vTanUCorner[4] : TANUCORNER;
- float3 vTanVCorner[4] : TANVCORNER;
- float4 vCWts : TANWEIGHTS;
-};
-
-//--------------------------------------------------------------------------------------
-// Load per-patch valence and prefix data
-//--------------------------------------------------------------------------------------
-void LoadValenceAndPrefixData( in uint PatchID, out uint Val[4], out uint Prefixes[4] )
-{
- PatchID += g_iPatchStartIndex;
- uint4 ValPack = g_ValencePrefixBuffer.Load( PatchID * 2 );
- uint4 PrefPack = g_ValencePrefixBuffer.Load( PatchID * 2 + 1 );
-
- Val[0] = ValPack.x;
- Val[1] = ValPack.y;
- Val[2] = ValPack.z;
- Val[3] = ValPack.w;
-
- Prefixes[0] = PrefPack.x;
- Prefixes[1] = PrefPack.y;
- Prefixes[2] = PrefPack.z;
- Prefixes[3] = PrefPack.w;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Constant data function for the SubDToBezierHS. This is executed once per patch.
-//--------------------------------------------------------------------------------------
-HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip,
- uint PatchID : SV_PrimitiveID )
-{
- HS_CONSTANT_DATA_OUTPUT Output;
-
- float TessAmount = g_fTessellationFactor;
-
- Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount;
- Output.Inside[0] = Output.Inside[1] = TessAmount;
-
- Output.vTangent[0] = ip[0].vTangent;
- Output.vTangent[1] = ip[1].vTangent;
- Output.vTangent[2] = ip[2].vTangent;
- Output.vTangent[3] = ip[3].vTangent;
-
- Output.vUV[0] = ip[0].vUV;
- Output.vUV[1] = ip[1].vUV;
- Output.vUV[2] = ip[2].vUV;
- Output.vUV[3] = ip[3].vUV;
-
- // Compute part of our tangent patch here
- uint Val[4];
- uint Prefixes[4];
- LoadValenceAndPrefixData( PatchID, Val, Prefixes );
-
- [unroll]
- for( int i=0; i<4; i++ )
- {
- float3 CornerB, CornerU, CornerV;
- ComputeCornerVertex( i, CornerB, CornerU, CornerV, ip, Val, Prefixes );
- Output.vTanUCorner[i] = CornerU;
- Output.vTanVCorner[i] = CornerV;
- }
-
- float fCWts[4];
- Output.vCWts.x = g_fCi[ Val[0]-3 ];
- Output.vCWts.y = g_fCi[ Val[1]-3 ];
- Output.vCWts.z = g_fCi[ Val[2]-3 ];
- Output.vCWts.w = g_fCi[ Val[3]-3 ];
-
- return Output;
-}
-
-HS_CONSTANT_DATA_OUTPUT SubDToBezierConstantsHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> ip,
- uint PatchID : SV_PrimitiveID )
-{
- HS_CONSTANT_DATA_OUTPUT Output;
-
- float TessAmount = g_fTessellationFactor;
-
- Output.Edges[0] = Output.Edges[1] = Output.Edges[2] = Output.Edges[3] = TessAmount;
- Output.Inside[0] = Output.Inside[1] = TessAmount;
-
- Output.vTangent[0] = ip[0].vTangent;
- Output.vTangent[1] = ip[1].vTangent;
- Output.vTangent[2] = ip[2].vTangent;
- Output.vTangent[3] = ip[3].vTangent;
-
- Output.vUV[0] = ip[0].vUV;
- Output.vUV[1] = ip[1].vUV;
- Output.vUV[2] = ip[2].vUV;
- Output.vUV[3] = ip[3].vUV;
-
- // Compute part of our tangent patch here
- static const uint Val[4] = (uint[4])uint4(4,4,4,4);
- static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16);
-
- [unroll]
- for( int i=0; i<4; i++ )
- {
- float3 CornerB, CornerU, CornerV;
- ComputeCornerVertex4444( i, CornerB, CornerU, CornerV, ip, Val, Prefixes );
- Output.vTanUCorner[i] = CornerU;
- Output.vTanVCorner[i] = CornerV;
- }
-
- float fCWts[4];
- Output.vCWts.x = g_fCi[ Val[0]-3 ];
- Output.vCWts.y = g_fCi[ Val[1]-3 ];
- Output.vCWts.z = g_fCi[ Val[2]-3 ];
- Output.vCWts.w = g_fCi[ Val[3]-3 ];
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// HS for SubDToBezier. This outputcontrolpoints(16) specifies that we will produce
-// 16 control points. Therefore this function will be invoked 16x, one for each output
-// control point.
-//
-// !! PERFORMANCE NOTE: This hull shader is written for maximum readability, and its
-// performance is not expected to be optimal on D3D11 hardware. The switch statement
-// below that determines the codepath for each patch control point generates sub-optimal
-// code for parallel execution on the GPU. A future implementation of this hull shader
-// will combine the 16 codepaths and 3 variants (corner, edge, interior) into one shared
-// codepath; this change is expected to increase performance at the expense of readability.
-//--------------------------------------------------------------------------------------
-[domain("quad")]
-[partitioning("integer")]
-[outputtopology("triangle_cw")]
-[outputcontrolpoints(16)]
-[patchconstantfunc("SubDToBezierConstantsHS")]
-BEZIER_CONTROL_POINT SubDToBezierHS( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p,
- uint i : SV_OutputControlPointID,
- uint PatchID : SV_PrimitiveID )
-{
- // Valences and prefixes are loaded from a buffer
- uint Val[4];
- uint Prefixes[4];
- LoadValenceAndPrefixData( PatchID, Val, Prefixes );
-
- float3 CornerB = float3(0,0,0);
- float3 CornerU = float3(0,0,0);
- float3 CornerV = float3(0,0,0);
-
- BEZIER_CONTROL_POINT Output;
- Output.vPosition = float3(0,0,0);
-
- // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates
- // inefficient code for the sake of readability.
- switch( i )
- {
- // Interior vertices
- case 5:
- Output.vPosition = ComputeInteriorVertex( 0, Val, p );
- break;
- case 6:
- Output.vPosition = ComputeInteriorVertex( 1, Val, p );
- break;
- case 10:
- Output.vPosition = ComputeInteriorVertex( 2, Val, p );
- break;
- case 9:
- Output.vPosition = ComputeInteriorVertex( 3, Val, p );
- break;
-
- // Corner vertices
- case 0:
- ComputeCornerVertex( 0, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
- case 3:
- ComputeCornerVertex( 1, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
- case 15:
- ComputeCornerVertex( 2, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
- case 12:
- ComputeCornerVertex( 3, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
-
- // Edge vertices
- case 1:
- Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes );
- break;
- case 2:
- Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes );
- break;
- case 13:
- Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes );
- break;
- case 14:
- Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes );
- break;
- case 4:
- Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes );
- break;
- case 8:
- Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes );
- break;
- case 7:
- Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes );
- break;
- case 11:
- Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes );
- break;
- }
-
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Specialised version for Regular (4,4,4,4) patches, this is much simpler and has less
-// branching compared to the general one above
-//--------------------------------------------------------------------------------------
-[domain("quad")]
-[partitioning("integer")]
-[outputtopology("triangle_cw")]
-[outputcontrolpoints(16)]
-[patchconstantfunc("SubDToBezierConstantsHS4444")]
-BEZIER_CONTROL_POINT SubDToBezierHS4444( InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POINTS> p,
- uint i : SV_OutputControlPointID,
- uint PatchID : SV_PrimitiveID )
-{
- // Valences and prefixes are Constant for this case (4,4,4,4)
- static const uint Val[4] = (uint[4])uint4(4,4,4,4);
- static const uint Prefixes[4] = (uint[4])uint4(7,10,13,16);
-
- float3 CornerB = float3(0,0,0);
- float3 CornerU = float3(0,0,0);
- float3 CornerV = float3(0,0,0);
-
- BEZIER_CONTROL_POINT Output;
- Output.vPosition = float3(0,0,0);
-
- // !! PERFORMANCE NOTE: As mentioned above, this switch statement generates
- // inefficient code for the sake of readability.
- switch( i )
- {
- // Interior vertices
- case 5:
- Output.vPosition = ComputeInteriorVertex( 0, Val, p );
- break;
- case 6:
- Output.vPosition = ComputeInteriorVertex( 1, Val, p );
- break;
- case 10:
- Output.vPosition = ComputeInteriorVertex( 2, Val, p );
- break;
- case 9:
- Output.vPosition = ComputeInteriorVertex( 3, Val, p );
- break;
-
- // Corner vertices
- case 0:
- ComputeCornerVertex4444( 0, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
- case 3:
- ComputeCornerVertex4444( 1, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
- case 15:
- ComputeCornerVertex4444( 2, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
- case 12:
- ComputeCornerVertex4444( 3, CornerB, CornerU, CornerV, p, Val, Prefixes );
- Output.vPosition = CornerB;
- break;
-
- // Edge vertices
- case 1:
- Output.vPosition = ComputeEdgeVertex( 0, p, Val, Prefixes );
- break;
- case 2:
- Output.vPosition = ComputeEdgeVertex( 1, p, Val, Prefixes );
- break;
- case 13:
- Output.vPosition = ComputeEdgeVertex( 2, p, Val, Prefixes );
- break;
- case 14:
- Output.vPosition = ComputeEdgeVertex( 3, p, Val, Prefixes );
- break;
- case 4:
- Output.vPosition = ComputeEdgeVertex( 4, p, Val, Prefixes );
- break;
- case 8:
- Output.vPosition = ComputeEdgeVertex( 5, p, Val, Prefixes );
- break;
- case 7:
- Output.vPosition = ComputeEdgeVertex( 6, p, Val, Prefixes );
- break;
- case 11:
- Output.vPosition = ComputeEdgeVertex( 7, p, Val, Prefixes );
- break;
- }
-
- return Output;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Bezier evaluation domain shader section
-//--------------------------------------------------------------------------------------
-struct DS_OUTPUT
-{
- float3 vWorldPos : POSITION;
- float3 vNormal : NORMAL;
- float2 vUV : TEXCOORD;
- float3 vTangent : TANGENT;
- float3 vBiTangent : BITANGENT;
-
- float4 vPosition : SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-float4 BernsteinBasis(float t)
-{
- float invT = 1.0f - t;
-
- return float4( invT * invT * invT,
- 3.0f * t * invT * invT,
- 3.0f * t * t * invT,
- t * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float4 dBernsteinBasis(float t)
-{
- float invT = 1.0f - t;
-
- return float4( -3 * invT * invT,
- 3 * invT * invT - 6 * t * invT,
- 6 * t * invT - 3 * t * t,
- 3 * t * t );
-}
-
-//--------------------------------------------------------------------------------------
-float3 EvaluateBezier( const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch,
- float4 BasisU,
- float4 BasisV )
-{
- float3 Value = float3(0,0,0);
- Value = BasisV.x * ( bezpatch[0].vPosition * BasisU.x + bezpatch[1].vPosition * BasisU.y + bezpatch[2].vPosition * BasisU.z + bezpatch[3].vPosition * BasisU.w );
- Value += BasisV.y * ( bezpatch[4].vPosition * BasisU.x + bezpatch[5].vPosition * BasisU.y + bezpatch[6].vPosition * BasisU.z + bezpatch[7].vPosition * BasisU.w );
- Value += BasisV.z * ( bezpatch[8].vPosition * BasisU.x + bezpatch[9].vPosition * BasisU.y + bezpatch[10].vPosition * BasisU.z + bezpatch[11].vPosition * BasisU.w );
- Value += BasisV.w * ( bezpatch[12].vPosition * BasisU.x + bezpatch[13].vPosition * BasisU.y + bezpatch[14].vPosition * BasisU.z + bezpatch[15].vPosition * BasisU.w );
-
- return Value;
-}
-
-//--------------------------------------------------------------------------------------
-float3 EvaluateBezierTan( const float3 bezpatch[16],
- float4 BasisU,
- float4 BasisV )
-{
- float3 Value = float3(0,0,0);
- Value = BasisV.x * ( bezpatch[0] * BasisU.x + bezpatch[1] * BasisU.y + bezpatch[2] * BasisU.z + bezpatch[3] * BasisU.w );
- Value += BasisV.y * ( bezpatch[4] * BasisU.x + bezpatch[5] * BasisU.y + bezpatch[6] * BasisU.z + bezpatch[7] * BasisU.w );
- Value += BasisV.z * ( bezpatch[8] * BasisU.x + bezpatch[9] * BasisU.y + bezpatch[10] * BasisU.z + bezpatch[11] * BasisU.w );
- Value += BasisV.w * ( bezpatch[12] * BasisU.x + bezpatch[13] * BasisU.y + bezpatch[14] * BasisU.z + bezpatch[15] * BasisU.w );
-
- return Value;
-}
-
-//--------------------------------------------------------------------------------------
-// Compute a two full tangent patches from the Tangent corner data created in the
-// HS constant data function.
-//--------------------------------------------------------------------------------------
-void CreatTangentPatches( in HS_CONSTANT_DATA_OUTPUT input,
- const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch,
- out float3 TanU[16],
- out float3 TanV[16] )
-{
- TanV[0] = input.vTanVCorner[0];
- TanV[3] = input.vTanVCorner[1];
- TanV[15] = input.vTanVCorner[2];
- TanV[12] = input.vTanVCorner[3];
-
- TanU[0] = input.vTanUCorner[0];
- TanU[3] = input.vTanUCorner[1];
- TanU[15] = input.vTanUCorner[2];
- TanU[12] = input.vTanUCorner[3];
-
- float fCWts[4];
- fCWts[0] = input.vCWts.x;
- fCWts[1] = input.vCWts.y;
- fCWts[2] = input.vCWts.z;
- fCWts[3] = input.vCWts.w;
-
- float3 vCorner[4];
- float3 vCornerLocal[4];
-
- vCorner[0] = TanV[0];
- vCorner[1] = TanV[3];
- vCorner[2] = TanV[15];
- vCorner[3] = TanV[12];
- vCornerLocal[0] = TanU[0];
- vCornerLocal[1] = TanU[3];
- vCornerLocal[2] = TanU[12];
- vCornerLocal[3] = TanU[15];
-
- ComputeTanPatch( bezpatch, TanU, fCWts, vCorner, vCornerLocal, 1, 4 );
-
- fCWts[3] = input.vCWts.y;
- fCWts[1] = input.vCWts.w;
-
- vCorner[0] = TanU[0];
- vCorner[3] = TanU[3];
- vCorner[2] = TanU[15];
- vCorner[1] = TanU[12];
- vCornerLocal[0] = TanV[0];
- vCornerLocal[1] = TanV[12];
- vCornerLocal[2] = TanV[3];
- vCornerLocal[3] = TanV[15];
-
- ComputeTanPatch( bezpatch, TanV, fCWts, vCorner, vCornerLocal, 4, 1 );
-}
-
-//--------------------------------------------------------------------------------------
-// For each input UV (from the Tessellator), evaluate the Bezier patch at this position.
-//--------------------------------------------------------------------------------------
-[domain("quad")]
-DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
- float2 UV : SV_DomainLocation,
- const OutputPatch<BEZIER_CONTROL_POINT, 16> bezpatch )
-{
- float4 BasisU = BernsteinBasis( UV.x );
- float4 BasisV = BernsteinBasis( UV.y );
-
- float3 WorldPos = EvaluateBezier( bezpatch, BasisU, BasisV );
-
- float3 TanU[16];
- float3 TanV[16];
- CreatTangentPatches( input, bezpatch, TanU, TanV );
- float3 Tangent = EvaluateBezierTan( TanU, BasisU, BasisV );
- float3 BiTangent = EvaluateBezierTan( TanV, BasisU, BasisV );
-
- // To see what the patch looks like without using the tangent patches to fix the normals, uncomment this section
- /*
- float4 dBasisU = dBernsteinBasis( UV.x );
- float4 dBasisV = dBernsteinBasis( UV.y );
- Tangent = EvaluateBezier( bezpatch, dBasisU, BasisV );
- BiTangent = EvaluateBezier( bezpatch, BasisU, dBasisV );
- */
-
- float3 Norm = normalize( cross( Tangent, BiTangent ) );
-
- DS_OUTPUT Output;
- Output.vNormal = Norm;
-
- // Evalulate the tangent vectors through bilinear interpolation.
- // These tangents are the texture-space tangents. They should not be confused with the parametric
- // tangents that we use to get the normals for the bicubic patch.
- float3 TextureTanU0 = input.vTangent[0];
- float3 TextureTanU1 = input.vTangent[1];
- float3 TextureTanU2 = input.vTangent[2];
- float3 TextureTanU3 = input.vTangent[3];
-
- float3 UVbottom = lerp( TextureTanU0, TextureTanU1, UV.x );
- float3 UVtop = lerp( TextureTanU3, TextureTanU2, UV.x );
- float3 Tan = lerp( UVbottom, UVtop, UV.y );
-
- Output.vTangent = Tan;
-
- // This is an optimization. We assume that the UV mapping of the mesh will result in a "relatively" orthogonal
- // tangent basis. If we assume this, then we can avoid fetching and bilerping the BiTangent along with the tangent.
- Output.vBiTangent = cross( Norm, Tan );
-
- // bilerp the texture coordinates
- float2 tex0 = input.vUV[0];
- float2 tex1 = input.vUV[1];
- float2 tex2 = input.vUV[2];
- float2 tex3 = input.vUV[3];
-
- float2 bottom = lerp( tex0, tex1, UV.x );
- float2 top = lerp( tex3, tex2, UV.x );
- float2 TexUV = lerp( bottom, top, UV.y );
- Output.vUV = TexUV;
-
- if( g_fDisplacementHeight > 0 )
- {
- // On this sample displacement can go into or out of the mesh. This is why we bias the heigh amount.
- float height = g_fDisplacementHeight * ( g_txHeight.SampleLevel( g_samPoint, TexUV, 0 ).a * 2 - 1 );
- float3 WorldPosMiddle = Norm * height;
- WorldPos += WorldPosMiddle;
- }
-
- Output.vPosition = mul( float4(WorldPos,1), g_mViewProjection );
- Output.vWorldPos = WorldPos;
-
- return Output;
-}
-
-//--------------------------------------------------------------------------------------
-// Smooth shading pixel shader section
-//--------------------------------------------------------------------------------------
-
-float3 safe_normalize( float3 vInput )
-{
- float len2 = dot( vInput, vInput );
- if( len2 > 0 )
- {
- return vInput * rsqrt( len2 );
- }
- return vInput;
-}
-
-static const float g_fSpecularExponent = 32.0f;
-static const float g_fSpecularIntensity = 0.6f;
-static const float g_fNormalMapIntensity = 1.5f;
-
-float2 ComputeDirectionalLight( float3 vWorldPos, float3 vWorldNormal, float3 vDirLightDir )
-{
- // Result.x is diffuse illumination, Result.y is specular illumination
- float2 Result = float2( 0, 0 );
- Result.x = pow( saturate( dot( vWorldNormal, -vDirLightDir ) ), 2 );
-
- float3 vPointToCamera = normalize( g_vCameraPosWorld - vWorldPos );
- float3 vHalfAngle = normalize( vPointToCamera - vDirLightDir );
- Result.y = pow( saturate( dot( vHalfAngle, vWorldNormal ) ), g_fSpecularExponent );
-
- return Result;
-}
-
-float3 ColorGamma( float3 Input )
-{
- return pow( Input, 2.2f );
-}
-
-float4 SmoothPS( PS_INPUT Input ) : SV_TARGET
-{
- float4 vNormalMapSampleRaw = g_txHeight.Sample( g_samLinear, Input.vUV );
- float3 vNormalMapSampleBiased = ( vNormalMapSampleRaw.xyz * 2 ) - 1;
- vNormalMapSampleBiased.xy *= g_fNormalMapIntensity;
- float3 vNormalMapSample = normalize( vNormalMapSampleBiased );
-
- float3 vNormal = safe_normalize( Input.vNormal ) * vNormalMapSample.z;
- vNormal += safe_normalize( Input.vTangent ) * vNormalMapSample.x;
- vNormal += safe_normalize( Input.vBiTangent ) * vNormalMapSample.y;
-
- //float3 vColor = float3( 1, 1, 1 );
- float3 vColor = g_txDiffuse.Sample( g_samLinear, Input.vUV ).rgb;
- float vSpecular = g_txSpecular.Sample( g_samLinear, Input.vUV ).r * g_fSpecularIntensity;
-
- const float3 DirLightDirections[4] =
- {
- // key light
- normalize( float3( -63.345150, -58.043934, 27.785097 ) ),
- // fill light
- normalize( float3( 23.652107, -17.391443, 54.972504 ) ),
- // back light 1
- normalize( float3( 20.470509, -22.939510, -33.929531 ) ),
- // back light 2
- normalize( float3( -31.003685, 24.242104, -41.352859 ) ),
- };
-
- const float3 DirLightColors[4] =
- {
- // key light
- ColorGamma( float3( 1.0f, 0.964f, 0.706f ) * 1.0f ),
- // fill light
- ColorGamma( float3( 0.446f, 0.641f, 1.0f ) * 1.0f ),
- // back light 1
- ColorGamma( float3( 1.0f, 0.862f, 0.419f ) * 1.0f ),
- // back light 2
- ColorGamma( float3( 0.405f, 0.630f, 1.0f ) * 1.0f ),
- };
-
- float3 fLightColor = 0;
- for( int i = 0; i < 4; ++i )
- {
- float2 LightDiffuseSpecular = ComputeDirectionalLight( Input.vWorldPos, vNormal, DirLightDirections[i] );
- fLightColor += DirLightColors[i] * vColor * LightDiffuseSpecular.x;
- fLightColor += DirLightColors[i] * LightDiffuseSpecular.y * vSpecular;
- }
-
- return float4( fLightColor, 1 );
-}
-
-//--------------------------------------------------------------------------------------
-// Solid color shading pixel shader (used for wireframe overlay)
-//--------------------------------------------------------------------------------------
-float4 SolidColorPS( PS_INPUT Input ) : SV_TARGET
-{
- return float4( g_vSolidColor, 1 );
-}
diff --git a/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl
deleted file mode 100644
index 7d9763a79..000000000
--- a/tests/hlsl/dxsdk/VarianceShadows11/2DQuadShaders.hlsl
+++ /dev/null
@@ -1,216 +0,0 @@
-//TEST_DISABLED:COMPARE_HLSL:-no-mangle -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY
-
-//SLANG: This test has been disabled because its semantic correctness
-//around use of compile-time-constant expressions relies on processing
-//the `[unroll]` attribute, and we don't yet support that.
-
-//--------------------------------------------------------------------------------------
-// File: Skinning10.fx
-//
-// The effect file for the Skinning10 sample.
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-#ifndef SEPERABLE_BLUR_KERNEL_SIZE
-#define SEPERABLE_BLUR_KERNEL_SIZE 3
-#endif
-
-static const int BLUR_KERNEL_BEGIN = SEPERABLE_BLUR_KERNEL_SIZE / -2;
-static const int BLUR_KERNEL_END = SEPERABLE_BLUR_KERNEL_SIZE / 2 + 1;
-static const float FLOAT_BLUR_KERNEL_SIZE = (float)SEPERABLE_BLUR_KERNEL_SIZE;
-
-cbuffer cbblurVS : register( b2)
-{
- int2 g_iWidthHeight : packoffset( c0 );
- int g_iKernelStart : packoffset( c0.z );
- int g_iKernelEnd : packoffset( c0.w );
-};
-
-//--------------------------------------------------------------------------------------
-// defines
-//--------------------------------------------------------------------------------------
-
-Texture2DArray g_txShadow : register( t5 );
-SamplerState g_samShadow : register( s5 );
-
-//--------------------------------------------------------------------------------------
-// Input/Output structures
-//--------------------------------------------------------------------------------------
-
-struct PSIn
-{
- float4 Pos : SV_Position; //Position
- float2 Tex : TEXCOORD; //Texture coordinate
- float2 ITex : TEXCOORD2;
-};
-
-struct VSIn
-{
- uint Pos : SV_VertexID ;
-};
-
-
-PSIn VSMain(VSIn inn)
-{
- PSIn output;
-
- output.Pos.y = -1.0f + (inn.Pos%2) * 2.0f ;
- output.Pos.x = -1.0f + (inn.Pos/2) * 2.0f;
- output.Pos.z = .5;
- output.Pos.w = 1;
- output.Tex.x = inn.Pos/2;
- output.Tex.y = 1.0f - inn.Pos%2;
- output.ITex.x = (float)(g_iWidthHeight.x * output.Tex.x);
- output.ITex.y = (float)(g_iWidthHeight.y * output.Tex.y);
- return output;
-}
-
-//float PSDepth
-
-//------------------------------------------------------------------------------
-// Logarithmic filtering
-//------------------------------------------------------------------------------
-
-float log_conv ( float x0, float X, float y0, float Y )
-{
- return (X + log(x0 + (y0 * exp(Y - X))));
-}
-
-
-//--------------------------------------------------------------------------------------
-// Pixel shader that performs bump mapping on the final vertex
-//--------------------------------------------------------------------------------------
-float2 PSBlurX(PSIn input) : SV_Target
-{
-/*
- float2 centerDistance;
- if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x);
- else centerDistance.x = input.Tex.x;
- if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y);
- else centerDistance.y = input.Tex.y;
- if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
- centerDistance.x -= .2;
- centerDistance.x *= (1.0f / .8);
-
- float store_samples[8];
- int ind = 0;
- for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) {
- store_samples[ind] = g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).r;
- ind++;
- }
- const float c = (1.f/5.f);
-
- float accum;
- accum = log_conv( c, store_samples[0], c, store_samples[1] );
-
- ind = 0;
- for (x = g_iKernelStart - 2; x < g_iKernelEnd; ++x) {
- ind++;
- accum += log_conv( 1.0f, accum, c, store_samples[ind] );
- }
- float2 rt;
- rt.x = accum;
- return rt;
- */
- /*
- float2 dep = 0;
- float2 centerDistance;
- if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x);
- else centerDistance.x = input.Tex.x;
- if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y);
- else centerDistance.y = input.Tex.y;
- if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
- centerDistance.x -= .2;
- centerDistance.x *= ( 1.0f / 0.8f );
-
- for (int x = g_iKernelStart; x < g_iKernelEnd; ++x) {
- dep += g_txShadow.Load( int3(input.ITex.x+(float)x * centerDistance.x , input.ITex.y, 0) ).rg;
- }
- dep /= (g_iKernelEnd - g_iKernelStart);
- return dep;
- */
-
- float2 dep=0;
- [unroll]for ( int x = BLUR_KERNEL_BEGIN; x < BLUR_KERNEL_END; ++x ) {
- dep += g_txShadow.Sample( g_samShadow, float3( input.Tex.x, input.Tex.y, 0 ), int2( x,0 ) ).rg;
- }
- dep /= FLOAT_BLUR_KERNEL_SIZE;
- return dep;
-
-// return g_txShadow.Sample(g_samShadow, float3(input.Tex.x, input.Tex.y, 0) ).rg;
-
-}
-
-//--------------------------------------------------------------------------------------
-// Pixel shader that performs bump mapping on the final vertex
-//--------------------------------------------------------------------------------------
-float2 PSBlurY(PSIn input) : SV_Target
-{
-/*
- float2 centerDistance;
- if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x);
- else centerDistance.x = input.Tex.x;
- if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y);
- else centerDistance.y = input.Tex.y;
- if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
- centerDistance.x -= .2;
- centerDistance.x *= (1.0f / .8);
-
- float store_samples[8];
- int ind = 0;
- for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) {
- store_samples[ind] = g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).r;
- }
- const float c = (1.f/5.f);
-
- float accum;
- accum = log_conv( c, store_samples[0], c, store_samples[1] );
-
- ind = 0;
- for (y = g_iKernelStart; y < g_iKernelEnd; ++y) {
- ind++;
- accum += log_conv( 1.0f, accum, c, store_samples[ind] );
- }
- float2 rt;
- rt.x = accum;
- return rt;
- */
-
-
- /*
- float2 dep = 0;
-
- float2 centerDistance;
- if ( input.Tex.x < .5 ) centerDistance.x = (1.0 - input.Tex.x);
- else centerDistance.x = input.Tex.x;
- if ( input.Tex.y < .5 ) centerDistance.y = (1.0 - input.Tex.y);
- else centerDistance.y = input.Tex.y;
- if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
- centerDistance.x -= 0;
- centerDistance.x *= (1.0f / 1.0f);
-
- if (centerDistance.x < centerDistance.y) centerDistance.x = centerDistance.y;
- for (int y = g_iKernelStart; y < g_iKernelEnd; ++y) {
- dep += g_txShadow.Load( int3(input.ITex.x, input.ITex.y+(float)y * centerDistance.x, 0) ).rg;
- }
-
-
- dep /= (g_iKernelEnd - g_iKernelStart);
- return dep;
-
- */
-
-
- float2 dep=0;
- [unroll]for ( int y = BLUR_KERNEL_BEGIN; y < BLUR_KERNEL_END; ++y ) {
- dep += g_txShadow.Sample( g_samShadow, float3( input.Tex.x, input.Tex.y, 0 ), int2( 0,y ) ).rg;
- }
- dep /= FLOAT_BLUR_KERNEL_SIZE;
- return dep;
-
- //return g_txShadow.Sample(g_samShadow, float3(input.Tex.x, input.Tex.y, 0) ).rg;
-}
-
-
-
diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl
deleted file mode 100644
index 29c9851d8..000000000
--- a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceScene.hlsl
+++ /dev/null
@@ -1,412 +0,0 @@
-//TEST_IGNORE_FILE: Currently failing due to Slang compiler issues.
-//TEST:COMPARE_HLSL: -profile vs_4_0 -entry VSMain -profile ps_4_0 -entry PSBlurX -entry PSBlurY
-//--------------------------------------------------------------------------------------
-// File: RenderCascadeScene.hlsl
-//
-// This is the main shader file. This shader is compiled with several different flags
-// to provide different customizations based on user controls.
-//
-//
-// Copyright (c) Microsoft Corporation. All rights reserved.
-//--------------------------------------------------------------------------------------
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-
-// This flag enables the shadow to blend between cascades. This is most useful when the
-// the shadow maps are small and artifact can be seen between the various cascade layers.
-#ifndef BLEND_BETWEEN_CASCADE_LAYERS_FLAG
-#define BLEND_BETWEEN_CASCADE_LAYERS_FLAG 0
-#endif
-
-// There are two methods for selecting the proper cascade a fragment lies in. Interval selection
-// compares the depth of the fragment against the frustum's depth partition.
-// Map based selection compares the texture coordinates against the acutal cascade maps.
-// Map based selection gives better coverage.
-// Interval based selection is easier to extend and understand.
-#ifndef SELECT_CASCADE_BY_INTERVAL_FLAG
-#define SELECT_CASCADE_BY_INTERVAL_FLAG 0
-#endif
-
-// The number of cascades
-#ifndef CASCADE_COUNT_FLAG
-#define CASCADE_COUNT_FLAG 3
-#endif
-
-
-// Most titles will find that 3-4 cascades with
-// BLEND_BETWEEN_CASCADE_LAYERS_FLAG, is good for lower end PCs.
-
-cbuffer cbAllShadowData : register( b0 )
-{
- matrix m_mWorldViewProjection;
- matrix m_mWorld;
- matrix m_mWorldView;
- matrix m_mShadow;
- float4 m_vCascadeOffset[8];
- float4 m_vCascadeScale[8];
- int m_nCascadeLevels; // Number of Cascades
- int m_iVisualizeCascades; // 1 is to visualize the cascades in different colors. 0 is to just draw the scene
-
- // For Map based selection scheme, this keeps the pixels inside of the the valid range.
- // When there is no boarder, these values are 0 and 1 respectivley.
- float m_fMinBorderPadding;
- float m_fMaxBorderPadding;
-
- float m_fCascadeBlendArea; // Amount to overlap when blending between cascades.
- float m_fTexelSize; // Padding variables exist because CBs must be a multiple of 16 bytes.
- float m_fNativeTexelSizeInX;
- float4 m_fCascadeFrustumsEyeSpaceDepthsData[2]; // The values along Z that seperate the cascades.
- // This code creates an array based pointer that points towards the vectorized input data.
- // This is the only way to index arbitrary arrays of data.
- // If the array is used at run time, the compiler will generate code that uses logic to index the correct component.
-
- static float m_fCascadeFrustumsEyeSpaceDepths[8] = (float[8])m_fCascadeFrustumsEyeSpaceDepthsData;
-
- float3 m_vLightDir;
- float m_fPaddingCB4;
-
-};
-
-
-
-//--------------------------------------------------------------------------------------
-// Textures and Samplers
-//--------------------------------------------------------------------------------------
-Texture2D g_txDiffuse : register( t0 );
-Texture2DArray g_txShadow : register( t5 );
-
-SamplerState g_samLinear : register( s0 );
-SamplerState g_samShadow : register( s5 );
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
- float3 vNormal : NORMAL;
- float2 vTexcoord : TEXCOORD0;
-};
-
-struct VS_OUTPUT
-{
- float3 vNormal : NORMAL;
- float2 vTexcoord : COLOR0;
- float4 vTexShadow : TEXCOORD1;
- float4 vPosition : SV_POSITION;
- float4 vInterpPos : TEXCOORD2;
- float vDepth : TEXCOORD3;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
- VS_OUTPUT Output;
-
- Output.vPosition = mul( Input.vPosition, m_mWorldViewProjection );
- Output.vNormal = mul( Input.vNormal, (float3x3)m_mWorld );
- Output.vTexcoord = Input.vTexcoord;
- Output.vInterpPos = Input.vPosition;
- Output.vDepth = mul( Input.vPosition, m_mWorldView ).z ;
-
- // Transform the shadow texture coordinates for all the cascades.
- Output.vTexShadow = mul( Input.vPosition, m_mShadow );
-
- return Output;
-}
-
-
-
-static const float4 vCascadeColorsMultiplier[8] =
-{
- float4 ( 1.5f, 0.0f, 0.0f, 1.0f ),
- float4 ( 0.0f, 1.5f, 0.0f, 1.0f ),
- float4 ( 0.0f, 0.0f, 5.5f, 1.0f ),
- float4 ( 1.5f, 0.0f, 5.5f, 1.0f ),
- float4 ( 1.5f, 1.5f, 0.0f, 1.0f ),
- float4 ( 1.0f, 1.0f, 1.0f, 1.0f ),
- float4 ( 0.0f, 1.0f, 5.5f, 1.0f ),
- float4 ( 0.5f, 3.5f, 0.75f, 1.0f )
-};
-
-
-void ComputeCoordinatesTransform( in int iCascadeIndex,
- in float4 InterpolatedPosition,
- in out float4 vShadowTexCoord,
- in out float4 vShadowTexCoordViewSpace )
-{
- // Now that we know the correct map, we can transform the world space position of the current fragment
- if( SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- vShadowTexCoord = vShadowTexCoordViewSpace * m_vCascadeScale[iCascadeIndex];
- vShadowTexCoord += m_vCascadeOffset[iCascadeIndex];
- }
- vShadowTexCoord.w = vShadowTexCoord.z; // We put the z value in w so that we can index the texture array with Z.
- vShadowTexCoord.z = iCascadeIndex;
-
-}
-
-//--------------------------------------------------------------------------------------
-// Use PCF to sample the depth map and return a percent lit value.
-//--------------------------------------------------------------------------------------
-void CalculateVarianceShadow ( in float4 vShadowTexCoord, in float4 vShadowMapTextureCoordViewSpace, int iCascade, out float fPercentLit )
-{
- fPercentLit = 0.0f;
- // This loop could be unrolled, and texture immediate offsets could be used if the kernel size were fixed.
- // This would be a performance improvment.
-
- float2 mapDepth = 0;
-
-
- // In orderto pull the derivative out of divergent flow control we calculate the
- // derivative off of the view space coordinates an then scale the deriviative.
-
- float3 vShadowTexCoordDDX =
- ddx(vShadowMapTextureCoordViewSpace );
- vShadowTexCoordDDX *= m_vCascadeScale[iCascade].xyz;
- float3 vShadowTexCoordDDY =
- ddy(vShadowMapTextureCoordViewSpace );
- vShadowTexCoordDDY *= m_vCascadeScale[iCascade].xyz;
-
- mapDepth += g_txShadow.SampleGrad( g_samShadow, vShadowTexCoord.xyz,
- vShadowTexCoordDDX,
- vShadowTexCoordDDY);
- // The sample instruction uses gradients for some filters.
-
- float fAvgZ = mapDepth.x; // Filtered z
- float fAvgZ2 = mapDepth.y; // Filtered z-squared
-
- if ( vShadowTexCoord.w <= fAvgZ ) // We put the z value in w so that we can index the texture array with Z.
- {
- fPercentLit = 1;
- }
- else
- {
- float variance = ( fAvgZ2 ) - ( fAvgZ * fAvgZ );
- variance = min( 1.0f, max( 0.0f, variance + 0.00001f ) );
-
- float mean = fAvgZ;
- float d = vShadowTexCoord.w - mean; // We put the z value in w so that we can index the texture array with Z.
- float p_max = variance / ( variance + d*d );
-
- // To combat light-bleeding, experiment with raising p_max to some power
- // (Try values from 0.1 to 100.0, if you like.)
- fPercentLit = pow( p_max, 4 );
-
- }
-
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForInterval ( in int iNextCascadeIndex,
- in out float fPixelDepth,
- in out float fCurrentPixelsBlendBandLocation,
- out float fBlendBetweenCascadesAmount
- )
-{
-
- // We need to calculate the band of the current shadow map where it will fade into the next cascade.
- // We can then early out of the expensive PCF for loop.
- //
- float fBlendInterval = m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex - 1 ];
- if( iNextCascadeIndex > 1 )
- {
- fPixelDepth -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ];
- fBlendInterval -= m_fCascadeFrustumsEyeSpaceDepths[ iNextCascadeIndex-2 ];
- }
- // The current pixel's blend band location will be used to determine when we need to blend and by how much.
- fCurrentPixelsBlendBandLocation = fPixelDepth / fBlendInterval;
- fCurrentPixelsBlendBandLocation = 1.0f - fCurrentPixelsBlendBandLocation;
- // The fBlendBetweenCascadesAmount is our location in the blend band.
- fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-
-//--------------------------------------------------------------------------------------
-// Calculate amount to blend between two cascades and the band where blending will occure.
-//--------------------------------------------------------------------------------------
-void CalculateBlendAmountForMap ( in float4 vShadowMapTextureCoord,
- in out float fCurrentPixelsBlendBandLocation,
- out float fBlendBetweenCascadesAmount )
-{
- // Calcaulte the blend band for the map based selection.
- float2 distanceToOne = float2 ( 1.0f - vShadowMapTextureCoord.x, 1.0f - vShadowMapTextureCoord.y );
- fCurrentPixelsBlendBandLocation = min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y );
- float fCurrentPixelsBlendBandLocation2 = min( distanceToOne.x, distanceToOne.y );
- fCurrentPixelsBlendBandLocation =
- min( fCurrentPixelsBlendBandLocation, fCurrentPixelsBlendBandLocation2 );
- fBlendBetweenCascadesAmount = fCurrentPixelsBlendBandLocation / m_fCascadeBlendArea;
-}
-
-//--------------------------------------------------------------------------------------
-// Calculate the shadow based on several options and rende the scene.
-//--------------------------------------------------------------------------------------
-
-float4 PSMain( VS_OUTPUT Input ) : SV_TARGET
-{
- float4 vDiffuse = g_txDiffuse.Sample( g_samLinear, Input.vTexcoord );
-
-
- float4 vShadowMapTextureCoordViewSpace = 0.0f;
- float4 vShadowMapTextureCoord = 0.0f;
- float4 vShadowMapTextureCoord_blend = 0.0f;
-
- float4 vVisualizeCascadeColor = float4(0.0f,0.0f,0.0f,1.0f);
-
- float fPercentLit = 0.0f;
- float fPercentLit_blend = 0.0f;
-
- int iCascadeFound = 0;
- int iCurrentCascadeIndex=1;
- int iNextCascadeIndex = 0;
-
- float fCurrentPixelDepth;
-
- // The interval based selection technique compares the pixel's depth against the frustum's cascade divisions.
- fCurrentPixelDepth = Input.vDepth;
-
- // This for loop is not necessary when the frustum is uniformaly divided and interval based selection is used.
- // In this case fCurrentPixelDepth could be used as an array lookup into the correct frustum.
- vShadowMapTextureCoordViewSpace = Input.vTexShadow;
-
-
- if( SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- iCurrentCascadeIndex = 0;
- if (CASCADE_COUNT_FLAG > 1 )
- {
- float4 vCurrentPixelDepth = Input.vDepth;
- float4 fComparison = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[0]);
- float4 fComparison2 = ( vCurrentPixelDepth > m_fCascadeFrustumsEyeSpaceDepthsData[1]);
- float fIndex = dot(
- float4( CASCADE_COUNT_FLAG > 0,
- CASCADE_COUNT_FLAG > 1,
- CASCADE_COUNT_FLAG > 2,
- CASCADE_COUNT_FLAG > 3)
- , fComparison )
- + dot(
- float4(
- CASCADE_COUNT_FLAG > 4,
- CASCADE_COUNT_FLAG > 5,
- CASCADE_COUNT_FLAG > 6,
- CASCADE_COUNT_FLAG > 7)
- , fComparison2 ) ;
-
- fIndex = min( fIndex, CASCADE_COUNT_FLAG - 1 );
- iCurrentCascadeIndex = (int)fIndex;
- }
- }
-
- if ( !SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- iCurrentCascadeIndex = 0;
- if ( CASCADE_COUNT_FLAG == 1 )
- {
- vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[0];
- vShadowMapTextureCoord += m_vCascadeOffset[0];
- }
- if ( CASCADE_COUNT_FLAG > 1 ) {
- for( int iCascadeIndex = 0; iCascadeIndex < CASCADE_COUNT_FLAG && iCascadeFound == 0; ++iCascadeIndex )
- {
- vShadowMapTextureCoord = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iCascadeIndex];
- vShadowMapTextureCoord += m_vCascadeOffset[iCascadeIndex];
-
- if ( min( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) > m_fMinBorderPadding
- && max( vShadowMapTextureCoord.x, vShadowMapTextureCoord.y ) < m_fMaxBorderPadding )
- {
- iCurrentCascadeIndex = iCascadeIndex;
- iCascadeFound = 1;
- }
- }
- }
- }
- // Found the correct map.
- vVisualizeCascadeColor = vCascadeColorsMultiplier[iCurrentCascadeIndex];
-
- ComputeCoordinatesTransform( iCurrentCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace );
-
- if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 )
- {
- // Repeat text coord calculations for the next cascade.
- // The next cascade index is used for blurring between maps.
- iNextCascadeIndex = min ( CASCADE_COUNT_FLAG - 1, iCurrentCascadeIndex + 1 );
- if( !SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- vShadowMapTextureCoord_blend = vShadowMapTextureCoordViewSpace * m_vCascadeScale[iNextCascadeIndex];
- vShadowMapTextureCoord_blend += m_vCascadeOffset[iNextCascadeIndex];
- }
- ComputeCoordinatesTransform( iNextCascadeIndex, Input.vInterpPos, vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace );
- }
- float fBlendBetweenCascadesAmount = 1.0f;
- float fCurrentPixelsBlendBandLocation = 1.0f;
-
- if( SELECT_CASCADE_BY_INTERVAL_FLAG )
- {
- if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG )
- {
- CalculateBlendAmountForInterval ( iNextCascadeIndex, fCurrentPixelDepth,
- fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
-
- }
- }
- else
- {
- if( CASCADE_COUNT_FLAG > 1 && BLEND_BETWEEN_CASCADE_LAYERS_FLAG )
- {
- CalculateBlendAmountForMap ( vShadowMapTextureCoord,
- fCurrentPixelsBlendBandLocation, fBlendBetweenCascadesAmount );
- }
- }
-
- // Because the Z coordinate specifies the texture array,
- // the derivative will be 0 when there is no divergence
- //float fDivergence = abs( ddy( vShadowMapTextureCoord.z ) ) + abs( ddx( vShadowMapTextureCoord.z ) );
- CalculateVarianceShadow ( vShadowMapTextureCoord, vShadowMapTextureCoordViewSpace,
- iCurrentCascadeIndex, fPercentLit);
-
- // We repeat the calcuation for the next cascade layer, when blending between maps.
- if( BLEND_BETWEEN_CASCADE_LAYERS_FLAG && CASCADE_COUNT_FLAG > 1 )
- {
- if( fCurrentPixelsBlendBandLocation < m_fCascadeBlendArea )
- { // the current pixel is within the blend band.
-
- // Because the Z coordinate species the texture array,
- // the derivative will be 0 when there is no divergence
- float fDivergence = abs( ddy( vShadowMapTextureCoord_blend.z ) ) +
- abs( ddx( vShadowMapTextureCoord_blend.z) );
- CalculateVarianceShadow ( vShadowMapTextureCoord_blend, vShadowMapTextureCoordViewSpace,
- iNextCascadeIndex, fPercentLit_blend );
-
- // Blend the two calculated shadows by the blend amount.
- fPercentLit = lerp( fPercentLit_blend, fPercentLit, fBlendBetweenCascadesAmount );
-
- }
- }
-
- if( !m_iVisualizeCascades ) vVisualizeCascadeColor = float4( 1.0f, 1.0f, 1.0f, 1.0f );
-
- float3 vLightDir1 = float3( -1.0f, 1.0f, -1.0f );
- float3 vLightDir2 = float3( 1.0f, 1.0f, -1.0f );
- float3 vLightDir3 = float3( 0.0f, -1.0f, 0.0f );
- float3 vLightDir4 = float3( 1.0f, 1.0f, 1.0f );
- // Some ambient-like lighting.
- float fLighting =
- saturate( dot( vLightDir1 , Input.vNormal ) )*0.05f +
- saturate( dot( vLightDir2 , Input.vNormal ) )*0.05f +
- saturate( dot( vLightDir3 , Input.vNormal ) )*0.05f +
- saturate( dot( vLightDir4 , Input.vNormal ) )*0.05f ;
-
- float4 vShadowLighting = fLighting * 0.5f;
- fLighting += saturate( dot( m_vLightDir , Input.vNormal ) );
- fLighting = lerp( vShadowLighting, fLighting, fPercentLit );
-
- return fLighting * vVisualizeCascadeColor * vDiffuse;
-
-}
-
diff --git a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl b/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl
deleted file mode 100644
index 9aec9a55d..000000000
--- a/tests/hlsl/dxsdk/VarianceShadows11/RenderVarianceShadow.hlsl
+++ /dev/null
@@ -1,49 +0,0 @@
-//TEST:COMPARE_HLSL: -profile sm_4_0 -entry VSMain -stage vertex -entry PSMain -stage pixel
-
-#ifndef __SLANG__
-#define cbPerObject cbPerObject_0
-#define g_mWorldViewProjection g_mWorldViewProjection_0
-#endif
-
-//--------------------------------------------------------------------------------------
-// Globals
-//--------------------------------------------------------------------------------------
-cbuffer cbPerObject : register( b0 )
-{
- matrix g_mWorldViewProjection ;//SLANG: : packoffset( c0 );
-};
-
-//--------------------------------------------------------------------------------------
-// Input / Output structures
-//--------------------------------------------------------------------------------------
-struct VS_INPUT
-{
- float4 vPosition : POSITION;
-};
-
-struct VS_OUTPUT
-{
- float4 vPosition : SV_POSITION;
-};
-
-//--------------------------------------------------------------------------------------
-// Vertex Shader
-//--------------------------------------------------------------------------------------
-VS_OUTPUT VSMain( VS_INPUT Input )
-{
- VS_OUTPUT Output;
-
-
- Output.vPosition = mul( Input.vPosition, g_mWorldViewProjection );
-
- return Output;
-}
-
-
-float2 PSMain (VS_OUTPUT Input) : SV_TARGET
-{
- float2 rt;
- rt.x = Input.vPosition.z;
- rt.y = rt.x * rt.x;
- return rt;
-} \ No newline at end of file
diff --git a/tests/hlsl/simple/rw-texture.hlsl b/tests/hlsl/simple/rw-texture.hlsl
index 26916b474..de8e82777 100644
--- a/tests/hlsl/simple/rw-texture.hlsl
+++ b/tests/hlsl/simple/rw-texture.hlsl
@@ -5,7 +5,16 @@
// Ensure that we implement the `Load` operations on
// `RWTexture*` types with the correct signature.
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define R(X) : register(X)
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : REG { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
#define C C_0
#define SV_Target SV_TARGET
#define u2 u2_0
@@ -16,19 +25,20 @@
#endif
-cbuffer C : register(b0)
+BEGIN_CBUFFER(C)
{
uint2 u2;
uint3 u3;
-};
+}
+END_CBUFFER(C, register(b0))
-RWTexture2D<float4> t2 : register(u1);
-RWTexture2DArray<float4> t2a : register(u2);
-RWTexture3D<float4> t3 : register(u3);
+RWTexture2D<float4> t2 R(u1);
+RWTexture2DArray<float4> t2a R(u2);
+RWTexture3D<float4> t3 R(u3);
float4 main() : SV_Target
{
- return t2.Load(u2)
- + t2a.Load(u3)
- + t3.Load(u3);
+ return t2.Load(CBUFFER_REF(C,u2))
+ + t2a.Load(CBUFFER_REF(C,u3))
+ + t3.Load(CBUFFER_REF(C,u3));
}
diff --git a/tests/parser/cast-precedence.hlsl b/tests/parser/cast-precedence.hlsl
index 3383d9912..29793e4a2 100644
--- a/tests/parser/cast-precedence.hlsl
+++ b/tests/parser/cast-precedence.hlsl
@@ -3,20 +3,32 @@
// Confirm that type-cast expressions parse with
// the appropriate precedence.
-#ifndef __SLANG__
+#ifdef __SLANG__
+#define R(X) /**/
+#define BEGIN_CBUFFER(NAME) cbuffer NAME
+#define END_CBUFFER(NAME, REG) /**/
+#define CBUFFER_REF(NAME, FIELD) FIELD
+#else
+#define R(X) X
+#define BEGIN_CBUFFER(NAME) struct SLANG_ParameterGroup_##NAME
+#define END_CBUFFER(NAME, REG) ; cbuffer NAME : register(REG) { SLANG_ParameterGroup_##NAME NAME; }
+#define CBUFFER_REF(NAME, FIELD) NAME.FIELD
+
#define C C_0
#define a a_0
#define b b_0
#define SV_Position SV_POSITION
#endif
-cbuffer C : register(b0)
+BEGIN_CBUFFER(C)
{
float a;
float b;
-};
+}
+END_CBUFFER(C,b0)
+
float4 main() : SV_Position
{
- return (uint) a / b;
+ return (uint) CBUFFER_REF(C,a) / CBUFFER_REF(C,b);
}
diff --git a/tests/reflection/parameter-block-explicit-space.slang b/tests/reflection/parameter-block-explicit-space.slang
index 5679a1c35..b4d3eff9c 100644
--- a/tests/reflection/parameter-block-explicit-space.slang
+++ b/tests/reflection/parameter-block-explicit-space.slang
@@ -71,7 +71,7 @@ struct A
{
float4 au;
};
-cbuffer _S1 : register(b0, space2)
+cbuffer a : register(b0, space2)
{ A a; }
Texture2D at1 : register(t0, space2);
Texture2D at2 : register(t1, space2);
@@ -81,7 +81,7 @@ struct B
{
float4 bu;
};
-cbuffer _S3 : register(b0, space3)
+cbuffer b : register(b0, space3)
{ B b; }
Texture2D bt : register(t0, space3);
SamplerState bs : register(s0, space3);
diff --git a/tests/rewriter/type-splitting.hlsl b/tests/rewriter/type-splitting.hlsl
index 0826cbf21..850e1b5ad 100644
--- a/tests/rewriter/type-splitting.hlsl
+++ b/tests/rewriter/type-splitting.hlsl
@@ -42,9 +42,14 @@ struct Foo_0
float2 u_0;
};
+struct SLANG_ParameterGroup_C_0
+{
+ Foo_0 foo_0;
+};
+
cbuffer C_0
{
- Foo_0 foo_0;
+ SLANG_ParameterGroup_C_0 C_0;
}
Texture2D C_foo_t_0;
@@ -52,7 +57,7 @@ SamplerState C_foo_s_0;
float4 main() : SV_TARGET
{
- return C_foo_t_0.Sample(C_foo_s_0, foo_0.u_0);
+ return C_foo_t_0.Sample(C_foo_s_0, C_0.foo_0.u_0);
}
#endif
diff --git a/tests/vkray/anyhit.slang.glsl b/tests/vkray/anyhit.slang.glsl
index 43fd29e01..07789cdbd 100644
--- a/tests/vkray/anyhit.slang.glsl
+++ b/tests/vkray/anyhit.slang.glsl
@@ -10,8 +10,8 @@ struct Params_0
layout(binding = 0)
layout(std140) uniform _S1
{
- Params_0 gParams_0;
-};
+ Params_0 _data;
+} gParams_0;
layout(binding = 1)
uniform texture2D gParams_alphaMap_0;
@@ -35,7 +35,7 @@ void main()
{
SphereHitAttributes_0 _S4 = _S2;
- if(bool(gParams_0.mode_0))
+ if(bool(gParams_0._data.mode_0))
{
float val_0 = textureLod(
sampler2D(gParams_alphaMap_0, gParams_sampler_0),
diff --git a/tests/vkray/callable-caller.slang.glsl b/tests/vkray/callable-caller.slang.glsl
index 2704e6720..b0d174381 100644
--- a/tests/vkray/callable-caller.slang.glsl
+++ b/tests/vkray/callable-caller.slang.glsl
@@ -4,16 +4,26 @@ layout(row_major) uniform;
layout(row_major) buffer;
#extension GL_NV_ray_tracing : require
+#define tmp_ubo _S1
+#define tmp_launchid _S2
+#define tmp_luanchidf _S3
+#define tmp_launchsize _S4
+#define tmp_launchpos _S5
+#define tmp_shaderidx _S6
+#define tmp_payload _S7
+#define tmp_launchid2 _S8
+
struct SLANG_ParameterGroup_C_0
{
uint shaderIndex_0;
};
layout(binding = 0)
-layout(std140) uniform C_0
+layout(std140)
+uniform tmp_ubo
{
- uint shaderIndex_0;
-};
+ SLANG_ParameterGroup_C_0 _data;
+} C_0;
struct MaterialPayload_0
{
@@ -43,25 +53,25 @@ void main()
MaterialPayload_0 payload_1;
payload_1.albedo_0 = vec4(0);
- uvec3 _S1 = gl_LaunchIDNV;
- vec2 _S2 = vec2(_S1.xy);
+ uvec3 tmp_launchid = gl_LaunchIDNV;
+ vec2 tmp_luanchidf = vec2(tmp_launchid.xy);
- uvec3 _S3 = gl_LaunchSizeNV;
- vec2 _S4 = _S2 / vec2(_S3.xy);
+ uvec3 tmp_launchsize = gl_LaunchSizeNV;
+ vec2 tmp_launchpos = tmp_luanchidf / vec2(tmp_launchsize.xy);
- payload_1.uv_0 = _S4;
+ payload_1.uv_0 = tmp_launchpos;
- uint _S5 = shaderIndex_0;
+ uint tmp_shaderidx = C_0._data.shaderIndex_0;
- MaterialPayload_0 _S6;
- _S6 = payload_1;
- CallShader_0(_S5, _S6);
- payload_1 = _S6;
+ MaterialPayload_0 tmp_payload;
+ tmp_payload = payload_1;
+ CallShader_0(tmp_shaderidx, tmp_payload);
+ payload_1 = tmp_payload;
- uvec3 _S7 = gl_LaunchIDNV;
+ uvec3 tmp_launchid2 = gl_LaunchIDNV;
imageStore(
gImage_0,
- ivec2(_S7.xy),
+ ivec2(tmp_launchid2.xy),
payload_1.albedo_0);
return;
}
diff --git a/tests/vkray/closesthit.slang.glsl b/tests/vkray/closesthit.slang.glsl
index a056b7809..79fd3afbe 100644
--- a/tests/vkray/closesthit.slang.glsl
+++ b/tests/vkray/closesthit.slang.glsl
@@ -2,50 +2,70 @@
#version 460
#extension GL_NV_ray_tracing : require
-layout(shaderRecordNV)
-buffer ShaderRecord_0
+#define tmp_shaderrecord _S1
+#define tmp_colors _S2
+#define tmp_hitattrs _S3
+#define tmp_payload _S4
+#define tmp_localattrs _S5
+#define tmp_customidx _S6
+#define tmp_instanceid _S7
+#define tmp_add_0 _S8
+#define tmp_primid _S9
+#define tmp_add_1 _S10
+#define tmp_hitkind _S11
+#define tmp_hitt _S12
+#define tmp_tmin _S13
+
+struct SLANG_ParameterGroup_ShaderRecord_0
{
- uint shaderRecordID_0;
+ uint shaderRecordID_0;
};
-layout(std430, binding = 0) buffer _S1
+layout(shaderRecordNV)
+buffer tmp_shaderrecord
{
- vec4 colors_0[];
-};
+ SLANG_ParameterGroup_ShaderRecord_0 _data;
+} ShaderRecord_0;
+
+layout(std430, binding = 0) buffer tmp_colors
+{
+ vec4 _data[];
+} colors_0;
struct BuiltInTriangleIntersectionAttributes_0
{
vec2 barycentrics_0;
};
-hitAttributeNV BuiltInTriangleIntersectionAttributes_0 _S2;
+
+hitAttributeNV BuiltInTriangleIntersectionAttributes_0 tmp_hitattrs;
struct ReflectionRay_0
{
vec4 color_0;
};
-rayPayloadInNV ReflectionRay_0 _S3;
+rayPayloadInNV ReflectionRay_0 tmp_payload;
void main()
{
- BuiltInTriangleIntersectionAttributes_0 _S4 = _S2;
+ BuiltInTriangleIntersectionAttributes_0 tmp_localattrs = tmp_hitattrs;
- uint _S5 = gl_InstanceCustomIndexNV;
- uint _S6 = gl_InstanceID;
+ uint tmp_customidx = gl_InstanceCustomIndexNV;
+ uint tmp_instanceid = gl_InstanceID;
- uint _S7 = _S5 + _S6;
- uint _S8 = gl_PrimitiveID;
+ uint tmp_add_0 = tmp_customidx + tmp_instanceid;
+ uint tmp_primid = gl_PrimitiveID;
- uint _S9 = _S7 + _S8;
- uint _S10 = gl_HitKindNV;
+ uint tmp_add_1 = tmp_add_0 + tmp_primid;
+ uint tmp_hitkind = gl_HitKindNV;
- vec4 color_1 = colors_0[_S9 + _S10 + shaderRecordID_0];
+ vec4 color_1 = colors_0._data[tmp_add_1 + tmp_hitkind + ShaderRecord_0._data.shaderRecordID_0];
- float _S11 = gl_HitTNV;
- float _S12 = gl_RayTminNV;
+ float tmp_hitt = gl_HitTNV;
+ float tmp_tmin = gl_RayTminNV;
- _S3.color_0 = color_1 * (_S11 - _S12);
+ tmp_payload.color_0 = color_1 * (tmp_hitt - tmp_tmin);
return;
}
diff --git a/tests/vkray/intersection.slang.glsl b/tests/vkray/intersection.slang.glsl
index cfa53efa7..09d7e63a5 100644
--- a/tests/vkray/intersection.slang.glsl
+++ b/tests/vkray/intersection.slang.glsl
@@ -3,19 +3,37 @@
#extension GL_NV_ray_tracing : require
+#define tmp_ubo _S1
+#define tmp_reportHit _S2
+#define tmp_origin _S3
+#define tmp_direction _S4
+#define tmp_tmin _S5
+#define tmp_tmax _S6
+#define tmp_ray _S7
+#define tmp_sphere _S8
+#define tmp_thit _S9
+#define tmp_hitattrs _S10
+#define tmp_dithit _S11
+#define tmp_reportresult _S12
+
struct Sphere_0
{
vec3 position_0;
float radius_0;
};
-layout(binding = 0)
-layout(std140)
-uniform U_0
+struct SLANG_ParameterGroup_U_0
{
Sphere_0 gSphere_0;
};
+layout(binding = 0)
+layout(std140)
+uniform tmp_ubo
+{
+ SLANG_ParameterGroup_U_0 _data;
+} U_0;
+
struct RayDesc_0
{
vec3 Origin_0;
@@ -45,40 +63,40 @@ hitAttributeNV SphereHitAttributes_0 a_0;
bool ReportHit_0(float tHit_1, uint hitKind_0, SphereHitAttributes_0 attributes_0)
{
a_0 = attributes_0;
- bool _S1 = reportIntersectionNV(tHit_1, hitKind_0);
- return _S1;
+ bool tmp_reportHit = reportIntersectionNV(tHit_1, hitKind_0);
+ return tmp_reportHit;
}
void main()
{
RayDesc_0 ray_1;
- vec3 _S2 = gl_ObjectRayOriginNV;
- ray_1.Origin_0 = _S2;
- vec3 _S3 = gl_ObjectRayDirectionNV;
+ vec3 tmp_origin = gl_ObjectRayOriginNV;
+ ray_1.Origin_0 = tmp_origin;
- ray_1.Direction_0 = _S3;
- float _S4 = gl_RayTminNV;
+ vec3 tmp_direction = gl_ObjectRayDirectionNV;
+ ray_1.Direction_0 = tmp_direction;
- ray_1.TMin_0 = _S4;
- float _S5 = gl_RayTmaxNV;
+ float tmp_tmin = gl_RayTminNV;
+ ray_1.TMin_0 = tmp_tmin;
- ray_1.TMax_0 = _S5;
+ float tmp_tmax = gl_RayTmaxNV;
+ ray_1.TMax_0 = tmp_tmax;
- RayDesc_0 _S6 = ray_1;
+ RayDesc_0 tmp_ray = ray_1;
- Sphere_0 _S7 = gSphere_0;
+ Sphere_0 tmp_sphere = U_0._data.gSphere_0;
- float _S8;
- SphereHitAttributes_0 _S9;
- bool _S10 = rayIntersectsSphere_0(_S6, _S7, _S8, _S9);
+ float tmp_thit;
+ SphereHitAttributes_0 tmp_hitattrs;
+ bool tmp_dithit = rayIntersectsSphere_0(tmp_ray, tmp_sphere, tmp_thit, tmp_hitattrs);
- float tHit_2 = _S8;
- SphereHitAttributes_0 attrs_1 = _S9;
+ float tHit_2 = tmp_thit;
+ SphereHitAttributes_0 attrs_1 = tmp_hitattrs;
- if(_S10)
+ if(tmp_dithit)
{
- bool _S11 = ReportHit_0(tHit_2, (uint((0))), attrs_1);
+ bool tmp_reportresult = ReportHit_0(tHit_2, (uint((0))), attrs_1);
}
return;
diff --git a/tests/vkray/raygen.slang.glsl b/tests/vkray/raygen.slang.glsl
index 512215a73..f65053ecf 100644
--- a/tests/vkray/raygen.slang.glsl
+++ b/tests/vkray/raygen.slang.glsl
@@ -1,10 +1,46 @@
//TEST_IGNORE_FILE:
#version 460
+layout(row_major) uniform;
+
#extension GL_NV_ray_tracing : require
#define TRACING_EPSILON 1e-6
+#define tmp_ubo _S1
+#define tmp_saturate _S2
+#define tmp_launchID_x _S3
+#define tmp_add_x _S4
+#define tmp_launchSize_x _S5
+#define tmp_div_x _S6
+#define tmp_launchID_y _S7
+#define tmp_add_y _S8
+#define tmp_launchSize_y _S9
+#define tmp_div_y _S10
+#define tmp_tex_pos _S11
+#define tmp_tex_nrm _S12
+#define tmp_light_invDist _S13
+#define tmp_trace_A _S14
+#define tmp_trace_B _S15
+#define tmp_trace_C _S16
+#define tmp_trace_D _S17
+#define tmp_trace_E _S18
+#define tmp_trace_ray _S19
+#define tmp_trace_payload _S20
+#define tmp_cmp _S21
+#define tmp_color _S22
+#define tmp_dot _S23
+#define tmp_sat _S24
+#define tmp_trace2_A _S25
+#define tmp_trace2_B _S26
+#define tmp_trace2_C _S27
+#define tmp_trace2_D _S28
+#define tmp_trace2_E _S29
+#define tmp_trace2_ray _S30
+#define tmp_trace2_payload _S31
+#define tmp_storeIdx _S32
+
+
layout(binding = 0) uniform texture2D samplerPosition_0;
layout(binding = 2) uniform sampler sampler_0;
layout(binding = 1) uniform texture2D samplerNormal_0;
@@ -17,15 +53,20 @@ struct Light_0
#define NUM_LIGHTS 17
-layout(binding = 3)
-layout(std140) uniform ubo_0
+struct Uniforms_0
{
Light_0 light_0;
vec4 viewPos_0;
- layout(row_major) mat4x4 view_0;
- layout(row_major) mat4x4 model_0;
+ mat4x4 view_0;
+ mat4x4 model_0;
};
+layout(binding = 3)
+layout(std140) uniform tmp_ubo
+{
+ Uniforms_0 _data;
+} ubo_0;
+
layout(binding = 5) uniform accelerationStructureNV as_0;
struct ShadowRay_0
@@ -79,8 +120,8 @@ void TraceRay_0(
float saturate_0(float x_0)
{
- float _S1 = clamp(x_0, float(0), float(1));
- return _S1;
+ float tmp_saturate = clamp(x_0, float(0), float(1));
+ return tmp_saturate;
}
void TraceRay_1(
@@ -114,27 +155,28 @@ void main()
{
float atten_0;
- uvec3 _S2 = gl_LaunchIDNV;
- float _S3 = float(_S2.x) + 0.5;
- uvec3 _S4 = gl_LaunchSizeNV;
- float _S5 = _S3 / float(_S4.x);
- uvec3 _S6 = gl_LaunchIDNV;
- float _S7 = float(_S6.y) + 0.5;
- uvec3 _S8 = gl_LaunchSizeNV;
- float _S9 = _S7 / float(_S8.y);
- vec2 inUV_0 = vec2(_S5, _S9);
+ uvec3 tmp_launchID_x = gl_LaunchIDNV;
+ float tmp_add_x = float(tmp_launchID_x.x) + 0.5;
+ uvec3 tmp_launchSize_x = gl_LaunchSizeNV;
+ float tmp_div_x = tmp_add_x / float(tmp_launchSize_x.x);
+
+ uvec3 tmp_launchID_y = gl_LaunchIDNV;
+ float tmp_add_y = float(tmp_launchID_y.y) + 0.5;
+ uvec3 tmp_launchSize_y = gl_LaunchSizeNV;
+ float tmp_div_y = tmp_add_y / float(tmp_launchSize_y.y);
+ vec2 inUV_0 = vec2(tmp_div_x, tmp_div_y);
- vec4 _S10 = texture(sampler2D(samplerPosition_0, sampler_0), inUV_0);
- vec3 P_0 = _S10.xyz;
+ vec4 tmp_tex_pos = texture(sampler2D(samplerPosition_0, sampler_0), inUV_0);
+ vec3 P_0 = tmp_tex_pos.xyz;
- vec4 _S11 = texture(sampler2D(samplerNormal_0, sampler_0), inUV_0);
- vec3 N_0 = _S11.xyz * 2.0 - 1.0;
+ vec4 tmp_tex_nrm = texture(sampler2D(samplerNormal_0, sampler_0), inUV_0);
+ vec3 N_0 = tmp_tex_nrm.xyz * 2.0 - 1.0;
- vec3 lightDelta_0 = light_0.position_0.xyz - P_0;
+ vec3 lightDelta_0 = ubo_0._data.light_0.position_0.xyz - P_0;
float lightDist_0 = length(lightDelta_0);
vec3 L_0 = normalize(lightDelta_0);
- float _S12 = 1.0 / (lightDist_0 * lightDist_0);
+ float tmp_light_invDist = 1.0 / (lightDist_0 * lightDist_0);
RayDesc_0 ray_0;
ray_0.Origin_0 = P_0;
@@ -144,47 +186,47 @@ void main()
ShadowRay_0 shadowRay_0;
shadowRay_0.hitDistance_0 = float(0);
- const uint _S13 = uint(1);
- const uint _S14 = uint(0xFF);
- const uint _S15 = uint(0);
- const uint _S16 = uint(0);
- const uint _S17 = uint(2);
-
- RayDesc_0 _S18 = ray_0;
- ShadowRay_0 _S19;
- _S19 = shadowRay_0;
- TraceRay_0(as_0, _S13, _S14, _S15, _S16, _S17, _S18, _S19);
- shadowRay_0 = _S19;
-
- bool _S20 = shadowRay_0.hitDistance_0 < lightDist_0;
+ const uint tmp_trace_A = uint(1);
+ const uint tmp_trace_B = uint(0xFF);
+ const uint tmp_trace_C = uint(0);
+ const uint tmp_trace_D = uint(0);
+ const uint tmp_trace_E = uint(2);
+
+ RayDesc_0 tmp_trace_ray = ray_0;
+ ShadowRay_0 tmp_trace_payload;
+ tmp_trace_payload = shadowRay_0;
+ TraceRay_0(as_0, tmp_trace_A, tmp_trace_B, tmp_trace_C, tmp_trace_D, tmp_trace_E, tmp_trace_ray, tmp_trace_payload);
+ shadowRay_0 = tmp_trace_payload;
+
+ bool tmp_cmp = shadowRay_0.hitDistance_0 < lightDist_0;
ReflectionRay_0 reflectionRay_0;
- if(_S20)
+ if(tmp_cmp)
{
atten_0 = (0.00000000000000000000);
}
else
{
- atten_0 = _S12;
+ atten_0 = tmp_light_invDist;
}
- vec3 _S21 = light_0.color_0.xyz;
- float _S22 = dot(N_0, L_0);
- float _S23 = saturate_0(_S22);
- vec3 color_2 = (_S21 * _S23) * atten_0;
-
- const uint _S24 = uint(1);
- const uint _S25 = uint(255);
- const uint _S26 = uint(0);
- const uint _S27 = uint(0);
- const uint _S28 = uint(2);
- RayDesc_0 _S29 = ray_0;
- ReflectionRay_0 _S30;
- _S30 = reflectionRay_0;
- TraceRay_1(as_0, _S24, _S25, _S26, _S27, _S28, _S29, _S30);
-
- vec3 color_3 = color_2 + _S30.color_1;
-
- uvec3 _S31 = gl_LaunchIDNV;
- imageStore(outputImage_0, ivec2(uvec2(ivec2(_S31.xy))), vec4(color_3, 1.0));
+ vec3 tmp_color = ubo_0._data.light_0.color_0.xyz;
+ float tmp_dot = dot(N_0, L_0);
+ float tmp_sat = saturate_0(tmp_dot);
+ vec3 color_2 = (tmp_color * tmp_sat) * atten_0;
+
+ const uint tmp_trace2_A = uint(1);
+ const uint tmp_trace2_B = uint(255);
+ const uint tmp_trace2_C = uint(0);
+ const uint tmp_trace2_D = uint(0);
+ const uint tmp_trace2_E = uint(2);
+ RayDesc_0 tmp_trace2_ray = ray_0;
+ ReflectionRay_0 tmp_trace2_payload;
+ tmp_trace2_payload = reflectionRay_0;
+ TraceRay_1(as_0, tmp_trace2_A, tmp_trace2_B, tmp_trace2_C, tmp_trace2_D, tmp_trace2_E, tmp_trace2_ray, tmp_trace2_payload);
+
+ vec3 color_3 = color_2 + tmp_trace2_payload.color_1;
+
+ uvec3 tmp_storeIdx = gl_LaunchIDNV;
+ imageStore(outputImage_0, ivec2(uvec2(ivec2(tmp_storeIdx.xy))), vec4(color_3, 1.0));
return;
}