summaryrefslogtreecommitdiff
path: root/source/slang/slang-emit.cpp
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2022-02-25 20:49:31 -0800
committerGitHub <noreply@github.com>2022-02-25 20:49:31 -0800
commitc31577953d5041c82375c22d847c2eba06106c58 (patch)
treebc685a8b63fc13cb85d160ae13df950056ca6e91 /source/slang/slang-emit.cpp
parent8990d270e3a0c01b1f7abbf4f79556c5ef82a096 (diff)
Improved SCCP, inlining and resource specialization passes, legalize `ImageSubscript` for GLSL (#2146)
Diffstat (limited to 'source/slang/slang-emit.cpp')
-rw-r--r--source/slang/slang-emit.cpp74
1 files changed, 34 insertions, 40 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 378732fb3..a0ac30857 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -26,11 +26,13 @@
#include "slang-ir-optix-entry-point-uniforms.h"
#include "slang-ir-restructure.h"
#include "slang-ir-restructure-scoping.h"
+#include "slang-ir-sccp.h"
#include "slang-ir-specialize.h"
#include "slang-ir-specialize-arrays.h"
#include "slang-ir-specialize-buffer-load-arg.h"
#include "slang-ir-specialize-resources.h"
#include "slang-ir-ssa.h"
+#include "slang-ir-ssa-simplification.h"
#include "slang-ir-strip-witness-tables.h"
#include "slang-ir-synthesize-active-mask.h"
#include "slang-ir-union.h"
@@ -324,10 +326,13 @@ Result linkAndOptimizeIR(
specializeModule(irModule);
dumpIRIfEnabled(compileRequest, irModule, "AFTER-SPECIALIZE");
+ applySparseConditionalConstantPropagation(irModule);
eliminateDeadCode(irModule);
lowerReinterpret(targetRequest, irModule, sink);
+ validateIRModuleIfEnabled(compileRequest, irModule);
+
// For targets that supports dynamic dispatch, we need to lower the
// generics / interface types to ordinary functions and types using
// function pointers.
@@ -359,10 +364,7 @@ Result linkAndOptimizeIR(
// up downstream passes like type legalization, so we
// will run a DCE pass to clean up after the specialization.
//
- // TODO: Are there other cleanup optimizations we should
- // apply at this point?
- //
- eliminateDeadCode(irModule);
+ simplifyIR(irModule);
#if 0
dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE");
#endif
@@ -435,7 +437,7 @@ Result linkAndOptimizeIR(
// to see if we can clean up any temporaries created by legalization.
// (e.g., things that used to be aggregated might now be split up,
// so that we can work with the individual fields).
- constructSSA(irModule);
+ simplifyIR(irModule);
#if 0
dumpIRIfEnabled(compileRequest, irModule, "AFTER SSA");
@@ -450,36 +452,12 @@ Result linkAndOptimizeIR(
// Many of our targets place restrictions on how certain
// resource types can be used, so that having them as
// function parameters, reults, etc. is invalid.
- // To clean this up, we apply two kinds of specialization:
- //
- // * Specalize call sites based on the actual resources
- // that a called function will return/output.
- //
- // * Specialize called functions based on teh actual resources
- // passed ass input at specific call sites.
- //
- // Because the legalization may depend on what target
- // we are compiling for (certain things might be okay
- // for D3D targets that are not okay for Vulkan), we
- // pass down the target request along with the IR.
- //
- specializeResourceOutputs(compileRequest, targetRequest, irModule);
- //
- // After specialization of function outputs, we may find that there
- // are cases where opaque-typed local variables can now be eliminated
- // and turned into SSA temporaries. Such optimization may enable
- // the following passes to "see" and specialize more cases.
- //
- // TODO: We should consider whether there are cases that will require
- // iterating the passes as given here in order to achieve a fully
- // specialized result. If that is the case, we might consider implementing
- // a single combined pass that makes all of the relevant changes and
- // iterates to convergence.
- //
- constructSSA(irModule);
- //
+ // We clean up the usages of resource values here.
+ specializeResourceUsage(compileRequest, targetRequest, irModule);
specializeFuncsForBufferLoadArgs(compileRequest, targetRequest, irModule);
- specializeResourceParameters(compileRequest, targetRequest, irModule);
+
+ //
+ simplifyIR(irModule);
// For GLSL targets, we also want to specialize calls to functions that
// takes array parameters if possible, to avoid performance issues on
@@ -487,6 +465,7 @@ Result linkAndOptimizeIR(
if (isKhronosTarget(targetRequest))
{
specializeArrayParameters(compileRequest, targetRequest, irModule);
+ simplifyIR(irModule);
}
#if 0
@@ -675,6 +654,17 @@ Result linkAndOptimizeIR(
break;
}
+ // Legalize `ImageSubscript` for GLSL.
+ switch (target)
+ {
+ case CodeGenTarget::GLSL:
+ {
+ legalizeImageSubscriptForGLSL(irModule);
+ }
+ break;
+ default:
+ break;
+ }
switch( target )
{
@@ -712,11 +702,16 @@ Result linkAndOptimizeIR(
// functions, so there might still be invalid code in
// our IR module.
//
- // To clean up the code, we will apply a fairly general
- // dead-code-elimination (DCE) pass that only retains
- // whatever code is "live."
+ // We run IR simplification passes again to clean things up.
//
- eliminateDeadCode(irModule);
+ simplifyIR(irModule);
+
+ if (isKhronosTarget(targetRequest))
+ {
+ // As a fallback, if the above specialization steps failed to remove resource type parameters, we will
+ // inline the functions in question to make sure we can produce valid GLSL.
+ performGLSLResourceReturnFunctionInlining(irModule);
+ }
#if 0
dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE");
#endif
@@ -725,8 +720,7 @@ Result linkAndOptimizeIR(
// Lower all bit_cast operations on complex types into leaf-level
// bit_cast on basic types.
lowerBitCast(targetRequest, irModule);
- eliminateDeadCode(irModule);
-
+ simplifyIR(irModule);
// We include one final step to (optionally) dump the IR and validate
// it after all of the optimization passes are complete. This should