diff options
| author | Tim Foley <tfoley@nvidia.com> | 2017-06-09 11:34:21 -0700 |
|---|---|---|
| committer | Tim Foley <tfoley@nvidia.com> | 2017-06-09 13:44:59 -0700 |
| commit | fcf83dbf9effab3bd98bad2b83b2468b7eb05cfd (patch) | |
| tree | 41047c94883b86ec085a81597391ce3ef557cd43 /tests/hlsl/dxsdk/ComputeShaderSort11 | |
| parent | 52e8d4b9a27ab0060f874c3a63ab531847be35c0 (diff) | |
Initial import of code.
Diffstat (limited to 'tests/hlsl/dxsdk/ComputeShaderSort11')
| -rw-r--r-- | tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl b/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl new file mode 100644 index 000000000..db7bd5136 --- /dev/null +++ b/tests/hlsl/dxsdk/ComputeShaderSort11/ComputeShaderSort11.hlsl @@ -0,0 +1,75 @@ +//TEST_IGNORE_FILE: Currently failing due to Spire compiler issues. +//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry BitonicSort -entry MatrixTranspose +//-------------------------------------------------------------------------------------- +// File: ComputeShaderSort11.hlsl +// +// This file contains the compute shaders to perform GPU sorting using DirectX 11. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#define BITONIC_BLOCK_SIZE 512 + +#define TRANSPOSE_BLOCK_SIZE 16 + +//-------------------------------------------------------------------------------------- +// Constant Buffers +//-------------------------------------------------------------------------------------- +cbuffer CB : register( b0 ) +{ + unsigned int g_iLevel; + unsigned int g_iLevelMask; + unsigned int g_iWidth; + unsigned int g_iHeight; +}; + +//-------------------------------------------------------------------------------------- +// Structured Buffers +//-------------------------------------------------------------------------------------- +StructuredBuffer<unsigned int> Input : register( t0 ); +RWStructuredBuffer<unsigned int> Data : register( u0 ); + +//-------------------------------------------------------------------------------------- +// Bitonic Sort Compute Shader +//-------------------------------------------------------------------------------------- +groupshared unsigned int shared_data[BITONIC_BLOCK_SIZE]; + +[numthreads(BITONIC_BLOCK_SIZE, 1, 1)] +void BitonicSort( uint3 Gid : SV_GroupID, + uint3 DTid : SV_DispatchThreadID, + uint3 GTid : SV_GroupThreadID, + uint GI : SV_GroupIndex ) +{ + // Load shared data + shared_data[GI] = Data[DTid.x]; + GroupMemoryBarrierWithGroupSync(); + + // Sort the shared data + for (unsigned int j = g_iLevel >> 1 ; j > 0 ; j >>= 1) + { + unsigned int result = ((shared_data[GI & ~j] <= shared_data[GI | j]) == (bool)(g_iLevelMask & DTid.x))? shared_data[GI ^ j] : shared_data[GI]; + GroupMemoryBarrierWithGroupSync(); + shared_data[GI] = result; + GroupMemoryBarrierWithGroupSync(); + } + + // Store shared data + Data[DTid.x] = shared_data[GI]; +} + +//-------------------------------------------------------------------------------------- +// Matrix Transpose Compute Shader +//-------------------------------------------------------------------------------------- +groupshared unsigned int transpose_shared_data[TRANSPOSE_BLOCK_SIZE * TRANSPOSE_BLOCK_SIZE]; + +[numthreads(TRANSPOSE_BLOCK_SIZE, TRANSPOSE_BLOCK_SIZE, 1)] +void MatrixTranspose( uint3 Gid : SV_GroupID, + uint3 DTid : SV_DispatchThreadID, + uint3 GTid : SV_GroupThreadID, + uint GI : SV_GroupIndex ) +{ + transpose_shared_data[GI] = Input[DTid.y * g_iWidth + DTid.x]; + GroupMemoryBarrierWithGroupSync(); + uint2 XY = DTid.yx - GTid.yx + GTid.xy; + Data[XY.y * g_iHeight + XY.x] = transpose_shared_data[GTid.x * TRANSPOSE_BLOCK_SIZE + GTid.y]; +} |
