blob: 18e3938865f6a4fbe873665039dd4c49fa368a63 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
// ggml_compute_forward_diag_mask_inf_f32
RWBuffer<float> result: register( u0 );
cbuffer Constants: register( b0 )
{
uint4 elements: packoffset( c0 );
uint4 strides: packoffset( c1 );
uint n_past : packoffset( c2.x );
}
static const float negativeInfinity = asfloat( 0xff800000 );
[numthreads( 32, 1, 1 )]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
const uint k = group.y;
const uint j = group.x;
// Start of the row
uint rdi = k * strides[ 2 ] + j * strides[ 1 ];
// End of the row
const uint rdiEnd = rdi + elements[ 0 ] * strides[ 0 ];
// First index to write in this thread
rdi += ( n_past + j + thread + 1 ) * strides[ 0 ];
// Index increment
const uint rdiInc = 32 * strides[ 0 ];
for( ; rdi < rdiEnd; rdi += rdiInc )
result[ rdi ] = negativeInfinity;
}
|