summaryrefslogtreecommitdiff
path: root/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
diff options
context:
space:
mode:
authorTim Foley <tfoley@nvidia.com>2017-06-09 11:34:21 -0700
committerTim Foley <tfoley@nvidia.com>2017-06-09 13:44:59 -0700
commitfcf83dbf9effab3bd98bad2b83b2468b7eb05cfd (patch)
tree41047c94883b86ec085a81597391ce3ef557cd43 /tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
parent52e8d4b9a27ab0060f874c3a63ab531847be35c0 (diff)
Initial import of code.
Diffstat (limited to 'tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl')
-rw-r--r--tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl63
1 files changed, 63 insertions, 0 deletions
diff --git a/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
new file mode 100644
index 000000000..cf506283e
--- /dev/null
+++ b/tests/hlsl/dxsdk/HDRToneMappingCS11/ReduceToSingleCS.hlsl
@@ -0,0 +1,63 @@
+//TEST:COMPARE_HLSL: -target dxbc-assembly -profile cs_4_0 -entry CSMain
+//-----------------------------------------------------------------------------
+// File: ReduceToSingleCS.hlsl
+//
+// Desc: Reduce an input buffer by a factor of groupthreads
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-----------------------------------------------------------------------------
+
+StructuredBuffer<float> Input : register( t0 );
+RWStructuredBuffer<float> Result : register( u0 );
+
+cbuffer cbCS : register( b0 )
+{
+ uint4 g_param; // g_param.x is the actual elements contained in Input
+ // g_param.y is the x dimension of the Dispatch call
+};
+
+#define groupthreads 128
+groupshared float accum[groupthreads];
+
+[numthreads(groupthreads,1,1)]
+void CSMain( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+ if ( DTid.x < g_param.x )
+ accum[GI] = Input[DTid.x];
+ else
+ accum[GI] = 0;
+
+ // Parallel reduction algorithm follows
+ GroupMemoryBarrierWithGroupSync();
+ if ( GI < 64 )
+ accum[GI] += accum[64+GI];
+
+ GroupMemoryBarrierWithGroupSync();
+ if ( GI < 32 )
+ accum[GI] += accum[32+GI];
+
+ GroupMemoryBarrierWithGroupSync();
+ if ( GI < 16 )
+ accum[GI] += accum[16+GI];
+
+ GroupMemoryBarrierWithGroupSync();
+ if ( GI < 8 )
+ accum[GI] += accum[8+GI];
+
+ GroupMemoryBarrierWithGroupSync();
+ if ( GI < 4 )
+ accum[GI] += accum[4+GI];
+
+ GroupMemoryBarrierWithGroupSync();
+ if ( GI < 2 )
+ accum[GI] += accum[2+GI];
+
+ GroupMemoryBarrierWithGroupSync();
+ if ( GI < 1 )
+ accum[GI] += accum[1+GI];
+
+ if ( GI == 0 )
+ {
+ Result[Gid.x] = accum[0];
+ }
+}