diff options
| author | Yong He <yonghe@outlook.com> | 2022-02-25 20:49:31 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-02-25 20:49:31 -0800 |
| commit | c31577953d5041c82375c22d847c2eba06106c58 (patch) | |
| tree | bc685a8b63fc13cb85d160ae13df950056ca6e91 /source/slang/slang-emit.cpp | |
| parent | 8990d270e3a0c01b1f7abbf4f79556c5ef82a096 (diff) | |
Improved SCCP, inlining and resource specialization passes, legalize `ImageSubscript` for GLSL (#2146)
Diffstat (limited to 'source/slang/slang-emit.cpp')
| -rw-r--r-- | source/slang/slang-emit.cpp | 74 |
1 files changed, 34 insertions, 40 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 378732fb3..a0ac30857 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -26,11 +26,13 @@ #include "slang-ir-optix-entry-point-uniforms.h" #include "slang-ir-restructure.h" #include "slang-ir-restructure-scoping.h" +#include "slang-ir-sccp.h" #include "slang-ir-specialize.h" #include "slang-ir-specialize-arrays.h" #include "slang-ir-specialize-buffer-load-arg.h" #include "slang-ir-specialize-resources.h" #include "slang-ir-ssa.h" +#include "slang-ir-ssa-simplification.h" #include "slang-ir-strip-witness-tables.h" #include "slang-ir-synthesize-active-mask.h" #include "slang-ir-union.h" @@ -324,10 +326,13 @@ Result linkAndOptimizeIR( specializeModule(irModule); dumpIRIfEnabled(compileRequest, irModule, "AFTER-SPECIALIZE"); + applySparseConditionalConstantPropagation(irModule); eliminateDeadCode(irModule); lowerReinterpret(targetRequest, irModule, sink); + validateIRModuleIfEnabled(compileRequest, irModule); + // For targets that supports dynamic dispatch, we need to lower the // generics / interface types to ordinary functions and types using // function pointers. @@ -359,10 +364,7 @@ Result linkAndOptimizeIR( // up downstream passes like type legalization, so we // will run a DCE pass to clean up after the specialization. // - // TODO: Are there other cleanup optimizations we should - // apply at this point? - // - eliminateDeadCode(irModule); + simplifyIR(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE"); #endif @@ -435,7 +437,7 @@ Result linkAndOptimizeIR( // to see if we can clean up any temporaries created by legalization. // (e.g., things that used to be aggregated might now be split up, // so that we can work with the individual fields). - constructSSA(irModule); + simplifyIR(irModule); #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER SSA"); @@ -450,36 +452,12 @@ Result linkAndOptimizeIR( // Many of our targets place restrictions on how certain // resource types can be used, so that having them as // function parameters, reults, etc. is invalid. - // To clean this up, we apply two kinds of specialization: - // - // * Specalize call sites based on the actual resources - // that a called function will return/output. - // - // * Specialize called functions based on teh actual resources - // passed ass input at specific call sites. - // - // Because the legalization may depend on what target - // we are compiling for (certain things might be okay - // for D3D targets that are not okay for Vulkan), we - // pass down the target request along with the IR. - // - specializeResourceOutputs(compileRequest, targetRequest, irModule); - // - // After specialization of function outputs, we may find that there - // are cases where opaque-typed local variables can now be eliminated - // and turned into SSA temporaries. Such optimization may enable - // the following passes to "see" and specialize more cases. - // - // TODO: We should consider whether there are cases that will require - // iterating the passes as given here in order to achieve a fully - // specialized result. If that is the case, we might consider implementing - // a single combined pass that makes all of the relevant changes and - // iterates to convergence. - // - constructSSA(irModule); - // + // We clean up the usages of resource values here. + specializeResourceUsage(compileRequest, targetRequest, irModule); specializeFuncsForBufferLoadArgs(compileRequest, targetRequest, irModule); - specializeResourceParameters(compileRequest, targetRequest, irModule); + + // + simplifyIR(irModule); // For GLSL targets, we also want to specialize calls to functions that // takes array parameters if possible, to avoid performance issues on @@ -487,6 +465,7 @@ Result linkAndOptimizeIR( if (isKhronosTarget(targetRequest)) { specializeArrayParameters(compileRequest, targetRequest, irModule); + simplifyIR(irModule); } #if 0 @@ -675,6 +654,17 @@ Result linkAndOptimizeIR( break; } + // Legalize `ImageSubscript` for GLSL. + switch (target) + { + case CodeGenTarget::GLSL: + { + legalizeImageSubscriptForGLSL(irModule); + } + break; + default: + break; + } switch( target ) { @@ -712,11 +702,16 @@ Result linkAndOptimizeIR( // functions, so there might still be invalid code in // our IR module. // - // To clean up the code, we will apply a fairly general - // dead-code-elimination (DCE) pass that only retains - // whatever code is "live." + // We run IR simplification passes again to clean things up. // - eliminateDeadCode(irModule); + simplifyIR(irModule); + + if (isKhronosTarget(targetRequest)) + { + // As a fallback, if the above specialization steps failed to remove resource type parameters, we will + // inline the functions in question to make sure we can produce valid GLSL. + performGLSLResourceReturnFunctionInlining(irModule); + } #if 0 dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE"); #endif @@ -725,8 +720,7 @@ Result linkAndOptimizeIR( // Lower all bit_cast operations on complex types into leaf-level // bit_cast on basic types. lowerBitCast(targetRequest, irModule); - eliminateDeadCode(irModule); - + simplifyIR(irModule); // We include one final step to (optionally) dump the IR and validate // it after all of the optimization passes are complete. This should |
