summaryrefslogtreecommitdiff
path: root/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-bitwise.slang
blob: a56a8abeb425cc4422517f50094d641056452c0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
//TEST_CATEGORY(wave, compute)
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-slang -compute -dx12 -profile sm_6_5 -shaderobj
//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute  -shaderobj -xslang -DCUDA

//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl

//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<uint> outputBuffer;

#if defined(USE_GLSL_SYNTAX)
#define __partitionedInclusiveAnd subgroupPartitionedInclusiveAndNV
#define __partitionedInclusiveOr subgroupPartitionedInclusiveOrNV
#define __partitionedInclusiveXor subgroupPartitionedInclusiveXorNV
#define __partitionedExclusiveAnd subgroupPartitionedExclusiveAndNV
#define __partitionedExclusiveOr subgroupPartitionedExclusiveOrNV
#define __partitionedExclusiveXor subgroupPartitionedExclusiveXorNV
#else
#define __partitionedInclusiveAnd WaveMultiPrefixInclusiveBitAnd
#define __partitionedInclusiveOr WaveMultiPrefixInclusiveBitOr
#define __partitionedInclusiveXor WaveMultiPrefixInclusiveBitXor
#define __partitionedExclusiveAnd WaveMultiPrefixExclusiveBitAnd
#define __partitionedExclusiveOr WaveMultiPrefixExclusiveBitOr
#define __partitionedExclusiveXor WaveMultiPrefixExclusiveBitXor
#endif


static uint gAndValue = 0;
static uint gAndResultExclusive = 0;
static uint gOrValue = 0;
static uint gOrResult = 0;
static uint gXorValue = 0;
static uint gXorResultInclusive = 0;
static uint gXorResultExclusive = 0;

__generic<T : __BuiltinLogicalType>
bool test1Bitwise(uint4 mask)
{
    let andValue = T(gAndValue);
    let orValue = T(gOrValue);
    let xorValue = T(gXorValue);

    return true
        & (__partitionedInclusiveAnd(andValue, mask) == andValue)
        & (__partitionedExclusiveAnd(andValue, mask) == T(gAndResultExclusive))
        & (__partitionedInclusiveOr(orValue, mask) == orValue)
        & (__partitionedExclusiveOr(orValue, mask) == T(0))
        & (__partitionedInclusiveXor(xorValue, mask) == T(gXorResultInclusive))
        & (__partitionedExclusiveXor(xorValue, mask) == T(gXorResultExclusive))
        ;
}

__generic<T : __BuiltinLogicalType, let N : int>
bool testVBitwise(uint4 mask) {
    typealias GVec = vector<T, N>;

    let andValue = GVec(T(gAndValue));
    let orValue = GVec(T(gOrValue));
    let xorValue = GVec(T(gXorValue));

    return true
        & all(__partitionedInclusiveAnd(andValue, mask) == andValue)
        & all(__partitionedExclusiveAnd(andValue, mask) == GVec(T(gAndResultExclusive)))
        & all(__partitionedInclusiveOr(orValue, mask) == orValue)
        & all(__partitionedExclusiveOr(orValue, mask) == GVec(T(0)))
        & all(__partitionedInclusiveXor(xorValue, mask) == GVec(T(gXorResultInclusive)))
        & all(__partitionedExclusiveXor(xorValue, mask) == GVec(T(gXorResultExclusive)))
        ;
}

bool testBitwise(uint4 mask)
{
    return true
        & test1Bitwise<int>(mask)
        & testVBitwise<int, 2>(mask)
        & testVBitwise<int, 3>(mask)
        & testVBitwise<int, 4>(mask)
        & test1Bitwise<uint>(mask)
        & testVBitwise<uint, 2>(mask)
        & testVBitwise<uint, 3>(mask)
        & testVBitwise<uint, 4>(mask)

        // TODO: these are failing SPIRV validation and should be fixed.
        // SPIRV's ops do not directly accept/return bool.
        // & test1Bitwise<bool>(mask)
        // & testVBitwise<bool, 2>(mask)
        // & testVBitwise<bool, 3>(mask)
        // & testVBitwise<bool, 4>(mask)

#if defined(VK)
        & test1Bitwise<int8_t>(mask)
        & testVBitwise<int8_t, 2>(mask)
        & testVBitwise<int8_t, 3>(mask)
        & testVBitwise<int8_t, 4>(mask)
        & test1Bitwise<uint8_t>(mask)
        & testVBitwise<uint8_t, 2>(mask)
        & testVBitwise<uint8_t, 3>(mask)
        & testVBitwise<uint8_t, 4>(mask)
#endif

#if !defined(CUDA)
        & test1Bitwise<int16_t>(mask)
        & testVBitwise<int16_t, 2>(mask)
        & testVBitwise<int16_t, 3>(mask)
        & testVBitwise<int16_t, 4>(mask)
        & test1Bitwise<int64_t>(mask)
        & testVBitwise<int64_t, 2>(mask)
        & testVBitwise<int64_t, 3>(mask)
        & testVBitwise<int64_t, 4>(mask)
        & test1Bitwise<uint16_t>(mask)
        & testVBitwise<uint16_t, 2>(mask)
        & testVBitwise<uint16_t, 3>(mask)
        & testVBitwise<uint16_t, 4>(mask)
        & test1Bitwise<uint64_t>(mask)
        & testVBitwise<uint64_t, 2>(mask)
        & testVBitwise<uint64_t, 3>(mask)
        & testVBitwise<uint64_t, 4>(mask)
#endif
        ;
}

[numthreads(32, 1, 1)]
[shader("compute")]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
    let index = dispatchThreadID.x;

    let isSecondGroup = index >= 15;
    let mask = isSecondGroup ? uint4(0xFFFF8000, 0, 0, 0) : uint4(0x0007FFF, 0, 0, 0);
    let isLastInvocation = (index == 31);
    let isLastInPartition = (index == 14) || (index == 31);
    let isFirstInPartition = (index == 0) || (index == 15);

    //
    // Prefix and.
    // - Both groups use 1 except for the last invocation in each partition where input is 0.
    // - For inclusive ops, result is 1 except for last invocation in each partition.
    // - For exclusive ops, first in partition is always results to ~0(identity). Otherwise exclusive ops result to 1.
    gAndValue = isLastInPartition ? uint(0) : uint(1);
    gAndResultExclusive = isFirstInPartition ? uint(~0) : uint(1);

    //
    // Prefix or.
    // - Both groups use 0 except for the last invocation in each partition where input is 1.
    // - For inclusive ops, result is 0 except for last invocation in each partition.
    // - For exclusive ops, result is always 0.
    gOrValue = isLastInPartition ? uint(1) : uint(0);

    // Prefix xor.
    // - First group input is always 1. Inclusive results alternate between 1 and 0, starting at 1. Exclusive result is also alternates but starts at 0 (opposite of inclusive result).
    // - Second group is always 0. Results are all 0.
    gXorValue = isSecondGroup ? uint(0) : uint(1);
    gXorResultInclusive = (isSecondGroup || (index % 2 != 0)) ? uint(0) : uint(1);
    gXorResultExclusive = isSecondGroup ? uint(0) : (uint(1) - gXorResultInclusive);

    bool result = true
            & testBitwise(mask)
            ;

    // CHECK-COUNT-32: 1
    outputBuffer[index] = uint(result);
}