1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
result code = 0
standard error = {
ir_global_var @_SV01s : Ptr<@ThreadGroup vector<float,4>[64]>;
ir_global_var @_SV05input : Ptr<StructuredBuffer<vector<float,4>>>;
ir_func @_S031GroupMemoryBarrierWithGroupSyncp0pV : () -> void;
ir_global_var @_SV06output : Ptr<RWStructuredBuffer<vector<float,4>>>;
ir_func @_S04mainp3puuuV : (uint, uint, uint) -> void
{
block %1(
param %2 : uint,
param %3 : uint,
param %4 : uint):
let %5 : Ptr<uint> = var()
store(%5, %2)
let %6 : Ptr<uint> = var()
store(%6, %3)
let %7 : Ptr<uint> = var()
store(%7, %4)
let %8 : uint = load(%6)
let %9 : Ptr<vector<float,4>> = getElementPtr(@_SV01s, %8)
let %10 : StructuredBuffer<vector<float,4>> = load(@_SV05input)
let %11 : uint = load(%5)
let %12 : vector<float,4> = bufferLoad(%10, %11)
store(%9, %12)
let %13 : Ptr<uint> = var()
let %14 : uint = construct(1)
store(%13, %14)
loop(%15, %16, %17)
block %15:
let %18 : uint = load(%13)
let %19 : uint = construct(64)
let %20 : bool = cmpLT(%18, %19)
loopTest(%20, %21, %16)
block %21:
call(@_S031GroupMemoryBarrierWithGroupSyncp0pV)
let %22 : uint = load(%6)
let %23 : Ptr<vector<float,4>> = getElementPtr(@_SV01s, %22)
let %24 : Ptr<vector<float,4>> = var()
let %25 : vector<float,4> = load(%23)
store(%24, %25)
let %26 : uint = load(%6)
let %27 : uint = load(%13)
let %28 : uint = sub(%26, %27)
let %29 : Ptr<vector<float,4>> = getElementPtr(@_SV01s, %28)
let %30 : vector<float,4> = load(%29)
let %31 : vector<float,4> = load(%24)
let %32 : vector<float,4> = add(%31, %30)
store(%24, %32)
let %33 : vector<float,4> = load(%24)
store(%23, %33)
unconditionalBranch(%17)
block %17:
let %34 : Ptr<uint> = var()
let %35 : uint = load(%13)
store(%34, %35)
let %36 : uint = construct(1)
let %37 : uint = load(%34)
let %38 : uint = shl(%37, %36)
store(%34, %38)
let %39 : uint = load(%34)
store(%13, %39)
unconditionalBranch(%15)
block %16:
call(@_S031GroupMemoryBarrierWithGroupSyncp0pV)
let %40 : RWStructuredBuffer<vector<float,4>> = load(@_SV06output)
let %41 : uint = load(%5)
let %42 : Ptr<vector<float,4>> = getElementPtr(@_SV01s, 0)
let %43 : vector<float,4> = load(%42)
bufferStore(%40, %41, %43)
return_void()
}
}
standard output = {
}
|