summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorTheresa Foley <10618364+tangent-vector@users.noreply.github.com>2022-05-10 07:18:03 -0700
committerGitHub <noreply@github.com>2022-05-10 10:18:03 -0400
commit8c540f216f9fe9366bbe57732063607b41344b9f (patch)
treece5ea4ee23ef5b2c12e133b04c79d5efcdf70dbc /tests
parent7a9bc08f3548fefeb54b907a5de301b90435f04a (diff)
Use IR pass to eliminate phi nodes (#2226)
* Use IR pass to eliminate phi nodes "Phi nodes" are one of the key contrivances that makes SSA (Static Single Assignment) form work. Because SSA is so great for compiler IRs, we kind of need to deal with phi nodes, but they also get in the way because they don't have a direct analog in most lower-level machine ISAs or execution models, nor in most of the high-level languages a transpiler wants to emit. As a result a compiler like ours needs to be able to eliminate the phi nodes from a program as part of generating output code. (For any clever people noting that SPIR-V supports phi nodes directly: yes, it does. It doesn't need to and it probably *shouldn't*. Anybody involved in the decision-making knows my reasoning, and anybody else should feel free to ask me if they want the lecture. Anyway...) The basic idea of elimiating phi nodes is simple enough. We replace each phi node with a temporary variable. Uses of the phi use values loaded from the temporary. The operation of the phi itself (assigning a value based on the branch taken) amounts to an assignment into the temporary. Previously, the Slang compiler dealt with phi nodes very late in the process of generating code: in the middle of emitting strings of source code in a high-level language like HLSL or GLSL. Doing the work that late in compilation has two big drawbacks: 1. Our ability to emit clean and/or optimal code is limited because we may not be able to make certain changes to the IR, or because we cannot make use of additional information like a dominator tree that might be available at other points in compilation. 2. Any other IR passes that relate to temporary variables won't be able to see the variables that we generate for phi nodes. This could raise issues with correctness (e.g., if we want to compute live-range information for *all* temporary variables), or performance (we have no way to run additional IR optimization passes after phis are eliminated). This change addresses these problems by making the elimination of phi nodes an explicit IR pass. Additional optimizations can easily be run after this pass (although we'd need to be careful not to run passes that could end up introducing new phis). The pass makes use of the information available to it to try to produce code that will emit to "clean" HLSL/GLSL. The core of the pass is in `slang-ir-eliminate-phis.cpp`, and is heavily commented, so I won't describe the approach in detail here. There are two related issues that came up, though: First, it turned out that our emit logic for local variables (`IRVar` instructions) wasn't using the function we'd defined named `emitVar()`. One worrying consequence of that oversight was that the `precise` modifier would impact generated HLSL/GLSL for variables that turned into SSA values (including phi nodes), but *not* for local variables that had not been SSA'd (or that had been SSA'd and then de-SSA'd). This change also fixes that bug; it is unclear how widespread the impact of the original issue might be. Second, generating explicit IR temporaries for phi nodes exposed a pre-existing bug in the `slang-ir-restructure-scoping` pass. That pass basically detects cases where we have an instruction `I` with a use `U` such that the use follows the rules of SSA form ("def dominates use," meaning `I` dominations `U`), but does not follow the more restrictive scoping rules of high-level-language output (where a value computed "inside" a loop is not automatically visible to code outside the loop just because it dominates that code). That pass did not correctly account for the case where `I` was a temporary variable. It seems that case could not arise before now because we didn't have any passes that would move `var`, `load`, or `store` operations out of the basic block they started in. The fix for that pass was relatively simple, and will make the whole thing more robust in case we add more aggressive optimizations later. * fixup: expected test output
Diffstat (limited to 'tests')
-rw-r--r--tests/bugs/gh-841.slang.glsl11
-rw-r--r--tests/compute/unbounded-array-of-array-syntax.slang.glsl17
-rw-r--r--tests/cross-compile/geometry-shader.slang.glsl6
-rw-r--r--tests/cross-compile/half-conversion.slang.glsl11
-rw-r--r--tests/cross-compile/precise-keyword.slang.glsl8
-rw-r--r--tests/experimental/liveness/liveness.slang.expected66
6 files changed, 68 insertions, 51 deletions
diff --git a/tests/bugs/gh-841.slang.glsl b/tests/bugs/gh-841.slang.glsl
index 8dd37274a..da23c33f1 100644
--- a/tests/bugs/gh-841.slang.glsl
+++ b/tests/bugs/gh-841.slang.glsl
@@ -18,19 +18,18 @@ struct RasterVertex_0
void main()
{
- vec4 result_0;
RasterVertex_0 _S4 = RasterVertex_0(_S2, _S3);
- vec4 result_1 = _S4.c_0;
+ vec4 result_0 = _S4.c_0;
+ vec4 result_1;
if(bool(_S4.u_0 & uint(1)))
{
- vec4 result_2 = result_1 + 1.0;
- result_0 = result_2;
+ result_1 = result_0 + 1.0;
}
else
{
- result_0 = result_1;
+ result_1 = result_0;
}
- _S1 = result_0;
+ _S1 = result_1;
return;
}
diff --git a/tests/compute/unbounded-array-of-array-syntax.slang.glsl b/tests/compute/unbounded-array-of-array-syntax.slang.glsl
index a275b4599..6ee5d1c6b 100644
--- a/tests/compute/unbounded-array-of-array-syntax.slang.glsl
+++ b/tests/compute/unbounded-array-of-array-syntax.slang.glsl
@@ -15,27 +15,24 @@ layout(std430, binding = 0) buffer _S2 {
layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;
void main()
{
- int innerIndex_0;
-
int index_0 = int(gl_GlobalInvocationID.x);
-
- int innerIndex_1 = index_0 & 3;
+ int innerIndex_0 = index_0 & 3;
uint bufferCount_0;
uint bufferStride_0;
(bufferCount_0) = (g_aoa_0[nonuniformEXT(index_0 >> 2)])._data.length();
(bufferStride_0) = 0;
- if(uint(innerIndex_1) >= bufferCount_0)
+ int innerIndex_1;
+ if(uint(innerIndex_0) >= bufferCount_0)
{
- int _S3 = int(bufferCount_0 - uint(1));
- innerIndex_0 = _S3;
+ innerIndex_1 = int(bufferCount_0 - uint(1));
}
else
{
- innerIndex_0 = innerIndex_1;
+ innerIndex_1 = innerIndex_0;
}
- uint _S4 = uint(innerIndex_0);
- ((outputBuffer_0)._data[(uint(index_0))]) = ((g_aoa_0[nonuniformEXT(index_0 >> 2)])._data[(_S4)]);
+ uint _S3 = uint(innerIndex_1);
+ ((outputBuffer_0)._data[(uint(index_0))]) = ((g_aoa_0[nonuniformEXT(index_0 >> 2)])._data[(_S3)]);
return;
}
diff --git a/tests/cross-compile/geometry-shader.slang.glsl b/tests/cross-compile/geometry-shader.slang.glsl
index 55e1691a9..5a8db5b09 100644
--- a/tests/cross-compile/geometry-shader.slang.glsl
+++ b/tests/cross-compile/geometry-shader.slang.glsl
@@ -50,8 +50,6 @@ layout(triangle_strip) out;
void main()
{
- int ii_0;
-
uint _S6 = uint(gl_PrimitiveIDIn);
// TODO: Having to make this copy to transpose things is unfortunate.
@@ -66,6 +64,7 @@ void main()
CoarseVertex_0(input_position[2], input_color[2], input_id[2])
};
+ int ii_0;
ii_0 = 0;
for(;;)
{
@@ -91,8 +90,7 @@ void main()
EmitVertex();
- int ii_1 = ii_0 + 1;
- ii_0 = ii_1;
+ ii_0 = ii_0 + 1;
}
return;
diff --git a/tests/cross-compile/half-conversion.slang.glsl b/tests/cross-compile/half-conversion.slang.glsl
index 3b7b740e4..58d20b4fc 100644
--- a/tests/cross-compile/half-conversion.slang.glsl
+++ b/tests/cross-compile/half-conversion.slang.glsl
@@ -16,8 +16,8 @@ layout(std140) uniform _S1
vec4 f16tof32_0(uvec4 value_0)
{
- int i_0;
vec4 result_0;
+ int i_0;
i_0 = 0;
for(;;)
{
@@ -25,18 +25,17 @@ vec4 f16tof32_0(uvec4 value_0)
float _S2 = (unpackHalf2x16((value_0[i_0])).x);
result_0[i_0] = _S2;
- int _S3 = i_0 + int(1);
- i_0 = _S3;
+ i_0 = i_0 + int(1);
}
return result_0;
}
layout(location = 0)
-out vec4 _S4;
+out vec4 _S3;
void main()
{
- vec4 _S5 = f16tof32_0(C_0._data.u_0);
- _S4 = _S5;
+ vec4 _S4 = f16tof32_0(C_0._data.u_0);
+ _S3 = _S4;
return;
}
diff --git a/tests/cross-compile/precise-keyword.slang.glsl b/tests/cross-compile/precise-keyword.slang.glsl
index 1aabaa1b3..17fed739e 100644
--- a/tests/cross-compile/precise-keyword.slang.glsl
+++ b/tests/cross-compile/precise-keyword.slang.glsl
@@ -15,14 +15,12 @@ void main()
if(_S2.x > float(0))
{
- float _S3 = _S2.x * _S2.y + _S2.x;
- z_0 = _S3;
+ z_0 = _S2.x * _S2.y + _S2.x;
}
else
{
- float _S4 = _S2.y * _S2.x + _S2.y;
- z_0 = _S4;
+ z_0 = _S2.y * _S2.x + _S2.y;
}
_S1 = vec4(z_0);
return;
-} \ No newline at end of file
+}
diff --git a/tests/experimental/liveness/liveness.slang.expected b/tests/experimental/liveness/liveness.slang.expected
index 7cb39861c..ce0298799 100644
--- a/tests/experimental/liveness/liveness.slang.expected
+++ b/tests/experimental/liveness/liveness.slang.expected
@@ -10,13 +10,21 @@ layout(row_major) buffer;
#line 24 0
int someSlowFunc_0(int a_0)
{
+ uint _S1 = uint(a_0);
+
+#line 26
int i_0;
+
+#line 26
uint v_0;
#line 26
- uint _S1 = uint(a_0);
i_0 = 0;
+
+#line 26
v_0 = _S1;
+
+#line 26
for(;;)
{
@@ -33,13 +41,16 @@ int someSlowFunc_0(int a_0)
uint _S2 = uint(int(bool(v_0 >> 1) || bool(v_0 << 31)) * i_0);
#line 27
- int i_1 = i_0 + 1;
+ i_0 = i_0 + 1;
#line 27
- i_0 = i_1;
v_0 = _S2;
+
+#line 27
}
+
+
return int(v_0);
}
@@ -103,22 +114,28 @@ void livenessEnd_0(spirv_by_reference SomeStruct_0 _0, int _1);
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
void main()
{
- int i_2;
- int res_0;
- SomeStruct_0 u_0;
#line 48
int index_0 = int(gl_GlobalInvocationID.x);
+#line 48
+ int i_1;
+#line 48
+ int res_0;
- i_2 = 0;
+#line 48
+ i_1 = 0;
+
+#line 48
res_0 = index_0;
+
+#line 48
for(;;)
{
-#line 52
- if(i_2 < index_0)
+
+ if(i_1 < index_0)
{
}
else
@@ -146,41 +163,49 @@ void main()
const int _S7[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
#line 58
- SomeStruct_0 u_1 = { 0, 0, _S7 };
+ SomeStruct_0 u_0 = { 0, 0, _S7 };
+
+#line 58
+ SomeStruct_0 u_1;
if(bool(v_1 & 256))
{
s_3.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]);
t_0.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]);
-#line 60
- u_0 = u_1;
+#line 63
+ u_1 = u_0;
+
+#line 63
}
else
{
-#line 68
+
SomeStruct_0 x_1;
#line 68
livenessStart_0(x_1, 0);
#line 68
- x_1 = u_1;
+ x_1 = u_0;
x_1.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]) + 1;
SomeStruct_0 _S8 = x_1;
#line 70
livenessEnd_0(x_1, 0);
-#line 60
- u_0 = _S8;
+#line 70
+ u_1 = _S8;
+
+#line 70
}
-#line 74
+
+
s_3.c_0[index_0 & 7] = s_3.c_0[index_0 & 7] + 1;
- int _S9 = s_3.x_0 + t_0.x_0 + u_0.x_0;
+ int _S9 = s_3.x_0 + t_0.x_0 + u_1.x_0;
#line 76
int _S10 = doThing_0(t_0);
@@ -207,11 +232,12 @@ void main()
int res_1 = res_0 + (_S13 + _S14);
#line 52
- int i_3 = i_2 + 1;
+ i_1 = i_1 + 1;
#line 52
- i_2 = i_3;
res_0 = res_1;
+
+#line 52
}
#line 79