blob: 32eb41f1b172cbf207071aa7c3bd6512f4c359fc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
//TEST_DISABLED:SIMPLE:-dump-ir -profile cs_5_0 -entry main
// Note: disabling this test for now because
// the actual IR that gets dumped is not very
// stable with code generation changes going on,
// and we already have more significant tests
// that stress the IR functionality.
//
// We should consider removing this test, or
// else work to ensure that "canonical" IR
// output is more consistent.
#define GROUP_THREAD_COUNT 64
StructuredBuffer<float4> input;
RWStructuredBuffer<float4> output;
groupshared float4 s[GROUP_THREAD_COUNT];
[numthreads(GROUP_THREAD_COUNT, 1, 1)]
void main(
uint dispatchThreadID : SV_DispatchThreadID,
uint groupThreadID : SV_GroupThreadID,
uint groupID : SV_GroupIndex )
{
// the actual algorithm being done here is bogus
// load shared memory
s[groupThreadID] = input[dispatchThreadID];
// do some sum passes
for(uint stride = 1; stride < GROUP_THREAD_COUNT; stride <<= 1)
{
GroupMemoryBarrierWithGroupSync();
s[groupThreadID] += s[groupThreadID - stride];
}
GroupMemoryBarrierWithGroupSync();
output[dispatchThreadID] = s[0];
}
|