summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2018-05-04 12:01:30 -0700
committerGitHub <noreply@github.com>2018-05-04 12:01:30 -0700
commit07a59b623e44348caf06d65b2578b51d86ba0af5 (patch)
treedcb07f728cc1050bd36f7c323c3a1f2cc9ba6681
parentee47232fc17f31ef2bd95ca480372216a79def56 (diff)
Allow more complex compound expressions when emitting from IR (#552)
The emit logic already had an idea of when an instruction should be "folded" it its use site(s), and this change just expands on that logic to try to be more aggressive. The basic idea is that instead of outputting this: ```hlsl float4 _S3 = a_0 + b_0; float4 _S4 = c_0 * _S3; d_0 = _S4; ``` we can hopefully output something like this: ```hlsl d_0 = c_0 * (a_0 + b_0); ``` The way this works is that after dealing with the various special cases that decide an instruction `I` must/cannot be folded in, we look and see if it has the following properites: * `I` has no side effects * `I` has a single user, `U` * `I` and `U` are in the same block (and `I` comes before `U` in that block) * for every instruction `X` between `I` and `U` (exclusive), `X` has no side effects If all of these conditions are true, then `I` can be folded in as a sub-expression when we emit `U`. This change doesn't affect most of our test output, but there is still a single test with SPIR-V output that we compare against a GLSL baseline, and so that baseline had to be modified to match the GLSL we now generate. Similar to #547, this change is not meant to provide a complete solution, but rather to take a concrete but low-risk step toward improving our output. Opportunities to improve the results further include: * We can/should ensure that when outputting sub-expressions we keep extra parentheses to a minimum. The old logic for emitting from an AST had support for "unparsing" expressions with minimal parentheses, and we should try to do the same. This can be error-prone, because omitting parentheses can lead to silent failures, so it must be done carefully. * We could try to be more aggressive about detecting what operations might have side effects. The most interesting case is function calls, where we should try to check if the callee is a function known to be side-effect-free. We could start by annotating most builtin functions with an attribute/decoration that indicates freedom from side effects. Deriving this attribute for user functions could be interesting, but we'd have to be careful since "nontermination" is technically a side effect. * We could try to be more aggressive about determining what side effects in instructions `X` are "safe" for the instruction `I` to move across. For example, if `I` is a load from variable `a` and `X` is a store to variable `b`, then that would seem to be safe. This starts to get into issues of instruction scheduling, though, and that is probably beyond what we want Slang to be doing.
-rw-r--r--source/slang/emit.cpp99
-rw-r--r--source/slang/ir.cpp72
-rw-r--r--source/slang/ir.h11
-rw-r--r--tests/bindings/glsl-parameter-blocks.slang.glsl14
4 files changed, 181 insertions, 15 deletions
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index 4ed7dd5a7..c86634c02 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -2213,21 +2213,43 @@ struct EmitVisitor
IRInst* inst,
IREmitMode mode)
{
- // Certain opcodes should always be folded in
+ // Certain opcodes should never/always be folded in
switch( inst->op )
{
default:
break;
+ // Never fold these in, because they represent declarations
+ //
case kIROp_Var:
case kIROp_GlobalVar:
case kIROp_GlobalConstant:
case kIROp_Param:
return false;
+ // HACK: don't fold these in because we currently lower
+ // them to initializer lists, which aren't allowed in
+ // general expression contexts.
+ //
+ case kIROp_makeStruct:
+ case kIROp_makeArray:
+ return false;
+
+ // Always fold these in, because they are trivial
+ //
case kIROp_IntLit:
case kIROp_FloatLit:
case kIROp_boolConst:
+ return true;
+
+ // Always fold these in, because their results
+ // cannot be represented in the type system of
+ // our current targets.
+ //
+ // TODO: when we add C/C++ as an optional target,
+ // we could consider lowering insts that result
+ // in pointers directly.
+ //
case kIROp_FieldAddress:
case kIROp_getElementPtr:
case kIROp_Specialize:
@@ -2239,11 +2261,13 @@ struct EmitVisitor
if (mode == IREmitMode::GlobalConstant)
return true;
- // Certain *types* will usually want to be folded in,
- // because they aren't allowed as types for temporary
- // variables.
+ // Instructions with specific result *types* will usually
+ // want to be folded in, because they aren't allowed as types
+ // for temporary variables.
auto type = inst->getDataType();
+ // First we unwrap any layers of pointer-ness and array-ness
+ // from the types to get at the underlying data type.
while (auto ptrType = as<IRPtrTypeBase>(type))
{
type = ptrType->getValueType();
@@ -2253,6 +2277,15 @@ struct EmitVisitor
type = ptrType->getElementType();
}
+ // First we check for uniform parameter groups,
+ // because a `cbuffer` or GLSL `uniform` block
+ // does not have a first-class type that we can
+ // pass around.
+ //
+ // TODO: We need to ensure that type legalization
+ // cleans up cases where we use a parameter group
+ // or parameter block type as a function parameter...
+ //
if(as<IRUniformParameterGroupType>(type))
{
// TODO: we need to be careful here, because
@@ -2260,6 +2293,12 @@ struct EmitVisitor
// types.
return true;
}
+ //
+ // The stream-output and patch types need to be handled
+ // too, because they are not really first class (especially
+ // not in GLSL, but they also seem to confuse the HLSL
+ // compiler when they get used as temporaries).
+ //
else if (as<IRHLSLStreamOutputType>(type))
{
return true;
@@ -2289,8 +2328,56 @@ struct EmitVisitor
}
}
- // By default we will *not* fold things into their use sites.
- return false;
+ // Having dealt with all of the cases where we *must* fold things
+ // above, we can now deal with the more general cases where we
+ // *should not* fold things.
+
+ // Don't fold somethin with no users:
+ if(!inst->hasUses())
+ return false;
+
+ // Don't fold something that has multiple users:
+ if(inst->hasMoreThanOneUse())
+ return false;
+
+ // Don't fold something that might have side effects:
+ if(inst->mightHaveSideEffects())
+ return false;
+
+ // Okay, at this point we know our instruction must have a single use.
+ auto use = inst->firstUse;
+ SLANG_ASSERT(use);
+ SLANG_ASSERT(!use->nextUse);
+
+ auto user = use->getUser();
+
+ // We'd like to figure out if it is safe to fold our instruction into `user`
+
+ // First, let's make sure they are in the same block/parent:
+ if(inst->getParent() != user->getParent())
+ return false;
+
+ // Now let's look at all the instructions between this instruction
+ // and the user. If any of them might have side effects, then lets
+ // bail out now.
+ for(auto ii = inst->getNextInst(); ii != user; ii = ii->getNextInst())
+ {
+ if(!ii)
+ {
+ // We somehow reached the end of the block without finding
+ // the user, which doesn't make sense if uses dominate
+ // defs. Let's just play it safe and bail out.
+ return false;
+ }
+
+ if(ii->mightHaveSideEffects())
+ return false;
+ }
+
+ // Okay, if we reach this point then the user comes later in
+ // the same block, and there are no instructions with side
+ // effects in between, so it seems safe to fold things in.
+ return true;
}
bool isDerefBaseImplicit(
diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp
index f8ca54639..a7cc342ae 100644
--- a/source/slang/ir.cpp
+++ b/source/slang/ir.cpp
@@ -3071,6 +3071,78 @@ namespace Slang
deallocate();
}
+ bool IRInst::mightHaveSideEffects()
+ {
+ // TODO: We should drive this based on flags specified
+ // in `ir-inst-defs.h` isntead of hard-coding things here,
+ // but this is good enough for now if we are conservative:
+
+ if(as<IRType>(this))
+ return false;
+
+ if(as<IRGlobalValue>(this))
+ return false;
+
+ if(as<IRConstant>(this))
+ return false;
+
+ switch(op)
+ {
+ // By default, assume that we might have side effects,
+ // to safely cover all the instructions we haven't had time to think about.
+ default:
+ return true;
+
+ case kIROp_Call:
+ // This is the most interesting.
+ return true;
+
+ case kIROp_Nop:
+ case kIROp_Specialize:
+ case kIROp_lookup_interface_method:
+ case kIROp_Construct:
+ case kIROp_makeVector:
+ case kIROp_makeMatrix:
+ case kIROp_makeArray:
+ case kIROp_makeStruct:
+ case kIROp_Load: // We are ignoring the possibility of loads from bad addresses, or `volatile` loads
+ case kIROp_BufferLoad:
+ case kIROp_FieldExtract:
+ case kIROp_FieldAddress:
+ case kIROp_getElement:
+ case kIROp_getElementPtr:
+ case kIROp_constructVectorFromScalar:
+ case kIROp_swizzle:
+ case kIROp_swizzleSet: // Doesn't actually "set" anything - just returns the resulting vector
+ case kIROp_Add:
+ case kIROp_Sub:
+ case kIROp_Mul:
+ //case kIROp_Div: // TODO: We could split out integer vs. floating-point div/mod and assume the floating-point cases have no side effects
+ //case kIROp_Mod:
+ case kIROp_Lsh:
+ case kIROp_Rsh:
+ case kIROp_Eql:
+ case kIROp_Neq:
+ case kIROp_Greater:
+ case kIROp_Less:
+ case kIROp_Geq:
+ case kIROp_Leq:
+ case kIROp_BitAnd:
+ case kIROp_BitXor:
+ case kIROp_BitOr:
+ case kIROp_And:
+ case kIROp_Or:
+ case kIROp_Neg:
+ case kIROp_Not:
+ case kIROp_BitNot:
+ case kIROp_Select:
+ case kIROp_Dot:
+ case kIROp_Mul_Vector_Matrix:
+ case kIROp_Mul_Matrix_Vector:
+ case kIROp_Mul_Matrix_Matrix:
+ return false;
+ }
+ }
//
// Legalization of entry points for GLSL:
diff --git a/source/slang/ir.h b/source/slang/ir.h
index 5d9948b70..af08b9491 100644
--- a/source/slang/ir.h
+++ b/source/slang/ir.h
@@ -284,6 +284,17 @@ struct IRInst : public IRObject
// for those values.
void removeArguments();
+ /// Does this instruction have any uses?
+ bool hasUses() const { return firstUse != nullptr; }
+
+ /// Does this instructiomn have more than one use?
+ bool hasMoreThanOneUse() const { return firstUse != nullptr && firstUse->nextUse != nullptr; }
+
+ /// It is possible that this instruction has side effects?
+ ///
+ /// This is a conservative test, and will return `true` if an exact answer can't be determined.
+ bool mightHaveSideEffects();
+
// RTTI support
static bool isaImpl(IROp) { return true; }
};
diff --git a/tests/bindings/glsl-parameter-blocks.slang.glsl b/tests/bindings/glsl-parameter-blocks.slang.glsl
index 67aca71dc..01f49279a 100644
--- a/tests/bindings/glsl-parameter-blocks.slang.glsl
+++ b/tests/bindings/glsl-parameter-blocks.slang.glsl
@@ -13,10 +13,9 @@
#define main_result _S2
#define uv _S3
-#define temp_uv _S4
-#define temp_a _S5
-#define temp_sample _S6
-#define temp_add _S7
+#define temp_a _S4
+#define temp_sample _S5
+#define temp_add _S2
struct Test
{
@@ -43,14 +42,11 @@ in vec2 uv;
void main()
{
- vec2 temp_uv = uv;
-
vec4 temp_a = gTest.a;
- vec4 temp_sample = texture(sampler2D(gTest_t, gTest_s), temp_uv);
+ vec4 temp_sample = texture(sampler2D(gTest_t, gTest_s), uv);
- vec4 temp_add = temp_a + temp_sample;
- main_result = temp_add;
+ main_result = temp_a + temp_sample;
return;
}