blob: 376ef1f80ddc8481a876144120b8247bbc1fb377 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
//TEST:SIMPLE(filecheck=CUDA): -stage compute -entry computeMain -target cuda -line-directive-mode none
//TEST:SIMPLE(filecheck=PTX): -stage compute -entry computeMain -target cuda
struct Data {
StructuredBuffer<float> input[2];
RWStructuredBuffer<float> output;
uint input_tensor_count;
StructuredBuffer<uint> index_buffer;
uint index_count;
// CUDA: __device__ float Data_fetch{{.*}}(int {{.*}}, int {{.*}})
// CUDA-NEXT: {
// CUDA-NEXT: globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}]
float fetch(int buffer, int index)
{
return input[buffer][index];
}
};
ParameterBlock<Data> data;
// PTX: computeMain
[shader("compute")]
[numthreads(8, 8, 1)]
void computeMain(uint3 tid: SV_DispatchThreadID)
{
float result = 0.0;
for (int i = 0; i < data.index_count; ++i) {
uint buffer = data.index_buffer[i];
result += data.fetch(buffer, tid.x * 1024 + tid.y);
}
data.output[tid.x * 1024 + tid.y] = result;
}
|