summaryrefslogtreecommitdiff
path: root/source/slang/slang-emit.cpp
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2023-10-04 11:20:35 -0700
committerGitHub <noreply@github.com>2023-10-04 11:20:35 -0700
commitac886fd3e329a9599ed1ac7a6d8b26ca5821046c (patch)
tree87bcafb3985775f9d90303d6a4239eb743164407 /source/slang/slang-emit.cpp
parentd87493a46c00be37b820a473c0827bbb865eb222 (diff)
SPIRV compiler performance fixes. (#3258)
* SPIRV compiler performance fixes. * Cleanup. * update project files * Cleanup debug code. * Make redundancy removal non-recursive. --------- Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source/slang/slang-emit.cpp')
-rw-r--r--source/slang/slang-emit.cpp39
1 files changed, 28 insertions, 11 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 6a49e9842..bbf6885a8 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -10,6 +10,7 @@
#include "slang-ir-byte-address-legalize.h"
#include "slang-ir-collect-global-uniforms.h"
#include "slang-ir-cleanup-void.h"
+#include "slang-ir-composite-reg-to-mem.h"
#include "slang-ir-dce.h"
#include "slang-ir-diff-call.h"
#include "slang-ir-autodiff.h"
@@ -360,7 +361,7 @@ Result linkAndOptimizeIR(
// Lower all the LValue implict casts (used for out/inout/ref scenarios)
lowerLValueCast(targetRequest, irModule);
- simplifyIR(irModule, sink);
+ simplifyIR(irModule, IRSimplificationOptions::getDefault(), sink);
// Fill in default matrix layout into matrix types that left layout unspecified.
specializeMatrixLayout(codeGenContext->getTargetReq(), irModule);
@@ -472,7 +473,7 @@ Result linkAndOptimizeIR(
validateIRModuleIfEnabled(codeGenContext, irModule);
- simplifyIR(irModule, sink);
+ simplifyIR(irModule, IRSimplificationOptions::getFast(), sink);
if (!ArtifactDescUtil::isCpuLikeTarget(artifactDesc))
{
@@ -501,7 +502,7 @@ Result linkAndOptimizeIR(
// up downstream passes like type legalization, so we
// will run a DCE pass to clean up after the specialization.
//
- simplifyIR(irModule, sink);
+ simplifyIR(irModule, IRSimplificationOptions::getDefault(), sink);
validateIRModuleIfEnabled(codeGenContext, irModule);
@@ -591,7 +592,7 @@ Result linkAndOptimizeIR(
// to see if we can clean up any temporaries created by legalization.
// (e.g., things that used to be aggregated might now be split up,
// so that we can work with the individual fields).
- simplifyIR(irModule, sink);
+ simplifyIR(irModule, IRSimplificationOptions::getFast(), sink);
#if 0
dumpIRIfEnabled(codeGenContext, irModule, "AFTER SSA");
@@ -924,12 +925,20 @@ Result linkAndOptimizeIR(
// bit_cast on basic types.
lowerBitCast(targetRequest, irModule);
- eliminateMultiLevelBreak(irModule);
if (isKhronosTarget(targetRequest) && targetRequest->shouldEmitSPIRVDirectly())
- performIntrinsicFunctionFunctionInlining(irModule);
+ {
+ //performIntrinsicFunctionFunctionInlining(irModule);
+ performSpirvInlining(irModule);
+ eliminateDeadCode(irModule);
+ }
+ eliminateMultiLevelBreak(irModule);
- simplifyIR(irModule, sink);
+ {
+ IRSimplificationOptions simplificationOptions = IRSimplificationOptions::getFast();
+ simplificationOptions.cfgOptions.removeTrivialSingleIterationLoops = true;
+ simplifyIR(irModule, IRSimplificationOptions::getFast(), sink);
+ }
// As a late step, we need to take the SSA-form IR and move things *out*
// of SSA form, by eliminating all "phi nodes" (block parameters) and
@@ -956,7 +965,13 @@ Result linkAndOptimizeIR(
}
// We only want to accumulate locations if liveness tracking is enabled.
- eliminatePhis(livenessMode, irModule);
+ PhiEliminationOptions phiEliminationOptions;
+ if (isKhronosTarget(targetRequest) && targetRequest->shouldEmitSPIRVDirectly())
+ {
+ phiEliminationOptions.eliminateCompositeTypedPhiOnly = false;
+ phiEliminationOptions.useRegisterAllocation = true;
+ }
+ eliminatePhis(livenessMode, irModule, phiEliminationOptions);
#if 0
dumpIRIfEnabled(codeGenContext, irModule, "PHIS ELIMINATED");
#endif
@@ -1000,7 +1015,7 @@ Result linkAndOptimizeIR(
}
// Run a final round of simplifications to clean up unused things after phi-elimination.
- simplifyNonSSAIR(irModule);
+ simplifyNonSSAIR(irModule, IRSimplificationOptions::getFast());
// We include one final step to (optionally) dump the IR and validate
// it after all of the optimization passes are complete. This should
@@ -1263,15 +1278,17 @@ SlangResult emitSPIRVForEntryPointsDirectly(
List<uint8_t> spirv, outSpirv;
emitSPIRVFromIR(codeGenContext, irModule, irEntryPoints, spirv);
+#if 0
String optErr;
if (SLANG_FAILED(optimizeSPIRV(spirv, optErr, outSpirv)))
{
codeGenContext->getSink()->diagnose(SourceLoc(), Diagnostics::spirvOptFailed, optErr);
- outSpirv = _Move(spirv);
+ spirv = _Move(outSpirv);
}
+#endif
auto artifact = ArtifactUtil::createArtifactForCompileTarget(asExternal(codeGenContext->getTargetFormat()));
- artifact->addRepresentationUnknown(ListBlob::moveCreate(outSpirv));
+ artifact->addRepresentationUnknown(ListBlob::moveCreate(spirv));
ArtifactUtil::addAssociated(artifact, linkedIR.metadata);