summaryrefslogtreecommitdiffstats
path: root/source/slang/slang-ir-inline.cpp
diff options
context:
space:
mode:
authorEllie Hermaszewska <ellieh@nvidia.com>2024-10-29 14:49:26 +0800
committerGitHub <noreply@github.com>2024-10-29 14:49:26 +0800
commitf65d756bff8d4c5cbc15bd0322a2ae8e6b896a21 (patch)
treeea1d61342cd29368e19135000ec2948813096205 /source/slang/slang-ir-inline.cpp
parenta729c15e9dce9f5116a38afc66329ab2ca4cea54 (diff)
format
* format * Minor test fixes * enable checking cpp format in ci
Diffstat (limited to 'source/slang/slang-ir-inline.cpp')
-rw-r--r--source/slang/slang-ir-inline.cpp276
1 files changed, 137 insertions, 139 deletions
diff --git a/source/slang/slang-ir-inline.cpp b/source/slang/slang-ir-inline.cpp
index d4906759e..1490446d5 100644
--- a/source/slang/slang-ir-inline.cpp
+++ b/source/slang/slang-ir-inline.cpp
@@ -1,9 +1,8 @@
// slang-ir-inline.cpp
#include "slang-ir-inline.h"
-#include "slang-ir-ssa-simplification.h"
-
#include "../core/slang-performance-profiler.h"
+#include "slang-ir-ssa-simplification.h"
// This file provides general facilities for inlining function calls.
//
@@ -17,31 +16,28 @@
// on user-supplied hints, or on optimization criteria like performance and
// code size.
-#include "slang-ir.h"
#include "slang-ir-clone.h"
#include "slang-ir-insts.h"
+#include "slang-ir.h"
namespace Slang
{
- /// Base type for inlining passes, providing shared/common functionality
+/// Base type for inlining passes, providing shared/common functionality
struct InliningPassBase
{
- /// The module that we are optimizing/transforming
+ /// The module that we are optimizing/transforming
IRModule* m_module = nullptr;
HashSet<IRInst*>* m_modifiedFuncs = nullptr;
- /// Initialize an inlining pass to operate on the given `module`
+ /// Initialize an inlining pass to operate on the given `module`
InliningPassBase(IRModule* module)
: m_module(module)
{
}
- /// Consider all the call sites in the module for inlining
- bool considerAllCallSites()
- {
- return considerAllCallSitesRec(m_module->getModuleInst());
- }
+ /// Consider all the call sites in the module for inlining
+ bool considerAllCallSites() { return considerAllCallSitesRec(m_module->getModuleInst()); }
bool considerCallSiteInFunc(IRFunc* func)
{
@@ -77,12 +73,12 @@ struct InliningPassBase
return result;
}
- /// Consider all call sites at or under `inst` for inlining
+ /// Consider all call sites at or under `inst` for inlining
bool considerAllCallSitesRec(IRInst* inst)
{
bool changed = false;
- if( auto func = as<IRFunc>(inst) )
+ if (auto func = as<IRFunc>(inst))
{
changed = considerCallSiteInFunc(func);
}
@@ -108,32 +104,34 @@ struct InliningPassBase
// to package it all up in a `struct` that can be re-used when
// we actually get around to inlining a call site.
- /// Information about a call site to be inlined
+ /// Information about a call site to be inlined
struct CallSiteInfo
{
- /// The call instruction.
+ /// The call instruction.
IRCall* call = nullptr;
- /// The function being called.
- ///
- /// For an inlinable call, this must be non-null and a valid function *definition* (with a body) for inlining to proceed.
+ /// The function being called.
+ ///
+ /// For an inlinable call, this must be non-null and a valid function *definition* (with a
+ /// body) for inlining to proceed.
IRFunc* callee = nullptr;
- /// The specialization of the function, if any.
- ///
- /// For an inlineable call, this must be non-null if the function is generic, but may be null otherwise.
+ /// The specialization of the function, if any.
+ ///
+ /// For an inlineable call, this must be non-null if the function is generic, but may be
+ /// null otherwise.
IRSpecialize* specialize = nullptr;
- /// The generic being specialized.
- ///
- /// For an inlineable call, this must be be non-null if `specialize` is non-null.
+ /// The generic being specialized.
+ ///
+ /// For an inlineable call, this must be be non-null if `specialize` is non-null.
IRGeneric* generic = nullptr;
};
// With `CallSiteInfo` defined, we can now understand the
// basic proces of considering a call site for inlining.
- /// Consider the given `call` site, and possibly inline it.
+ /// Consider the given `call` site, and possibly inline it.
bool considerCallSite(IRCall* call)
{
// We start by checking if inlining would even be possible,
@@ -144,7 +142,7 @@ struct InliningPassBase
// to consider and we bail out.
//
CallSiteInfo callSite;
- if(!canInline(call, callSite))
+ if (!canInline(call, callSite))
return false;
// If we've decided that we *can* inline the given call
@@ -152,7 +150,7 @@ struct InliningPassBase
// for when we should inline may vary by subclass,
// so `shouldInline` is a virtual method.
//
- if(!shouldInline(callSite))
+ if (!shouldInline(callSite))
return false;
// Finally, if we both *can* and *should* inline the
@@ -173,7 +171,7 @@ struct InliningPassBase
// here for the benefit of passes that might implement their
// own logic for deciding what to inline, bypassing `considerCallSite`.
- /// Determine whether `callSite` should be inlined.
+ /// Determine whether `callSite` should be inlined.
virtual bool shouldInline(CallSiteInfo const& callSite)
{
SLANG_UNUSED(callSite);
@@ -193,7 +191,8 @@ struct InliningPassBase
return false;
}
- /// Determine whether `call` can be inlined, and if so write information about it to `outCallSite`
+ /// Determine whether `call` can be inlined, and if so write information about it to
+ /// `outCallSite`
bool canInline(IRCall* call, CallSiteInfo& outCallSite)
{
// We can start by writing the `call` instruction into our `CallSiteInfo`.
@@ -207,7 +206,7 @@ struct InliningPassBase
// If the callee is a `specialize` instruction, then we
// want to look at what is being specialized instead.
//
- if( auto specialize = as<IRSpecialize>(callee) )
+ if (auto specialize = as<IRSpecialize>(callee))
{
// If the `specialize` is applied to something other
// than a `generic` instruction, then we can't
@@ -215,7 +214,7 @@ struct InliningPassBase
// call to a generic method in an interface.
//
IRGeneric* generic = findSpecializedGeneric(specialize);
- if(!generic)
+ if (!generic)
return false;
// If we have a `generic` instruction, then we
@@ -228,7 +227,7 @@ struct InliningPassBase
// yields, then inlining isn't possible.
//
callee = findGenericReturnVal(generic);
- if(!callee)
+ if (!callee)
return false;
// If we decide to inline this call, then the information
@@ -245,7 +244,7 @@ struct InliningPassBase
// If it is not, then inlining isn't possible.
//
auto calleeFunc = as<IRFunc>(callee);
- if(!calleeFunc)
+ if (!calleeFunc)
return false;
//
// If the callee *is* a function, then we can update
@@ -257,8 +256,7 @@ struct InliningPassBase
{
switch (decor->getOp())
{
- case kIROp_IntrinsicOpDecoration:
- return true;
+ case kIROp_IntrinsicOpDecoration: return true;
}
}
@@ -274,7 +272,7 @@ struct InliningPassBase
// a call site if the callee function is a full definition
// in the IR (not just a declaration).
//
- if(!isDefinition(calleeFunc))
+ if (!isDefinition(calleeFunc))
return false;
// We cannot inline a call inside an `IRExpand`.
@@ -291,7 +289,7 @@ struct InliningPassBase
return true;
}
- /// Inline the given `callSite`, which is assumed to have been validated
+ /// Inline the given `callSite`, which is assumed to have been validated
void inlineCallSite(CallSiteInfo const& callSite)
{
// Information about the call site, including
@@ -331,7 +329,11 @@ struct InliningPassBase
}
else
{
- auto newCall = builder.emitIntrinsicInst(call->getFullType(), op, args.getCount(), args.getBuffer());
+ auto newCall = builder.emitIntrinsicInst(
+ call->getFullType(),
+ op,
+ args.getCount(),
+ args.getBuffer());
call->replaceUsesWith(newCall);
}
call->removeAndDeallocate();
@@ -342,7 +344,7 @@ struct InliningPassBase
// need to include the substitution of generic parameters
// with their argument values in our cloning.
//
- if( auto specialize = callSite.specialize )
+ if (auto specialize = callSite.specialize)
{
auto generic = callSite.generic;
@@ -350,7 +352,7 @@ struct InliningPassBase
// generic parameters to the matching arguments.
//
Int argCounter = 0;
- for( auto param : generic->getParams() )
+ for (auto param : generic->getParams())
{
SLANG_ASSERT(argCounter < (Int)specialize->getArgCount());
auto arg = specialize->getArg(argCounter++);
@@ -367,16 +369,16 @@ struct InliningPassBase
auto body = generic->getFirstBlock();
SLANG_ASSERT(!body->getNextBlock()); // All IR generics should have a single block.
- for( auto inst : body->getChildren() )
+ for (auto inst : body->getChildren())
{
- if( inst == callee )
+ if (inst == callee)
{
// We don't want to create a clone of the callee
// function at the call site, since it would
// immediately become dead code when we inline
// its body.
}
- else if(as<IRReturn>(inst))
+ else if (as<IRReturn>(inst))
{
// We also don't want to clone any `return`
// instruction in the generic, since that is
@@ -407,7 +409,7 @@ struct InliningPassBase
// matching argument at the call site.
//
Int argCounter = 0;
- for(auto param : callee->getParams())
+ for (auto param : callee->getParams())
{
SLANG_ASSERT(argCounter < (Int)call->getArgCount());
auto arg = call->getArg(argCounter++);
@@ -416,31 +418,34 @@ struct InliningPassBase
SLANG_ASSERT(argCounter == (Int)call->getArgCount());
}
- inlineFuncBody(callSite, &env, &builder);
+ inlineFuncBody(callSite, &env, &builder);
}
- // When instructions are cloned, with cloneInst no sourceLoc information is copied over by default.
- // Here we attempt some policy about copying sourceLocs when inlining.
- //
- // An assumption here is that [__unsafeForceInlineEarly] will not be in user code (when we have more
- // general inlining this will not follow).
- //
- // Therefore we probably *don't* want to copy sourceLoc from the original definition in the core module because
- //
- // * That won't be much use to the user (they can't easily see the core module code currently for example)
- // * That the definitions in the core module are currently 'mundane' and largely exist to flesh out language features - such that
- // their being in the core module would likely be surprising to users
- //
- // That being the case, we actually copy the call sites sourceLoc if it's defined, and only fall back
- // onto the originating loc, if that's not defined.
- //
- // We *could* vary behavior if we knew if the function was defined in the core module. There doesn't appear
- // to be a decoration for this.
- // We could find out by looking at the source loc and checking if it's in the range of the core module - this would actually be
- // a fast and easy but to do properly this way you'd want a way to mark that source range that would also work across
- // serialization.
- //
- // For now this punts on this, and just assumes [__unsafeForceInlineEarly] is not in user code.
+ // When instructions are cloned, with cloneInst no sourceLoc information is copied over by
+ // default. Here we attempt some policy about copying sourceLocs when inlining.
+ //
+ // An assumption here is that [__unsafeForceInlineEarly] will not be in user code (when we have
+ // more general inlining this will not follow).
+ //
+ // Therefore we probably *don't* want to copy sourceLoc from the original definition in the core
+ // module because
+ //
+ // * That won't be much use to the user (they can't easily see the core module code currently
+ // for example)
+ // * That the definitions in the core module are currently 'mundane' and largely exist to flesh
+ // out language features - such that
+ // their being in the core module would likely be surprising to users
+ //
+ // That being the case, we actually copy the call sites sourceLoc if it's defined, and only fall
+ // back onto the originating loc, if that's not defined.
+ //
+ // We *could* vary behavior if we knew if the function was defined in the core module. There
+ // doesn't appear to be a decoration for this. We could find out by looking at the source loc
+ // and checking if it's in the range of the core module - this would actually be a fast and easy
+ // but to do properly this way you'd want a way to mark that source range that would also work
+ // across serialization.
+ //
+ // For now this punts on this, and just assumes [__unsafeForceInlineEarly] is not in user code.
static void _setSourceLoc(IRInst* clonedInst, IRInst* srcInst, CallSiteInfo const& callSite)
{
SourceLoc sourceLoc;
@@ -458,21 +463,24 @@ struct InliningPassBase
clonedInst->sourceLoc = sourceLoc;
}
- static IRInst* _cloneInstWithSourceLoc(CallSiteInfo const& callSite,
- IRCloneEnv* env,
- IRBuilder* builder,
- IRInst* inst)
+ static IRInst* _cloneInstWithSourceLoc(
+ CallSiteInfo const& callSite,
+ IRCloneEnv* env,
+ IRBuilder* builder,
+ IRInst* inst)
{
IRInst* clonedInst = cloneInst(env, builder, inst);
_setSourceLoc(clonedInst, inst, callSite);
return clonedInst;
}
- /// Inline the body of the callee for `callSite`, for a callee that has only
- /// a single basic block.
- ///
+ /// Inline the body of the callee for `callSite`, for a callee that has only
+ /// a single basic block.
+ ///
void inlineSingleBlockFuncBody(
- CallSiteInfo const& callSite, IRCloneEnv* env, IRBuilder* builder)
+ CallSiteInfo const& callSite,
+ IRCloneEnv* env,
+ IRBuilder* builder)
{
auto call = callSite.call;
auto callee = callSite.callee;
@@ -537,9 +545,8 @@ struct InliningPassBase
call->removeAndDeallocate();
}
- /// Inline the body of the callee for `callSite`.
- void inlineFuncBody(
- CallSiteInfo const& callSite, IRCloneEnv* env, IRBuilder* builder)
+ /// Inline the body of the callee for `callSite`.
+ void inlineFuncBody(CallSiteInfo const& callSite, IRCloneEnv* env, IRBuilder* builder)
{
auto call = callSite.call;
auto callee = callSite.callee;
@@ -551,7 +558,7 @@ struct InliningPassBase
//
auto firstBlock = callee->getFirstBlock();
SLANG_ASSERT(firstBlock);
- if(!firstBlock->getNextBlock() && as<IRReturn>(firstBlock->getTerminator()))
+ if (!firstBlock->getNextBlock() && as<IRReturn>(firstBlock->getTerminator()))
{
inlineSingleBlockFuncBody(callSite, env, builder);
return;
@@ -663,8 +670,8 @@ struct InliningPassBase
//
{
auto returnedValue = findCloneForOperand(env, inst->getOperand(0));
- auto returnBranch = builder->emitBranch(
- afterBlock, returnValParam ? 1 : 0, &returnedValue);
+ auto returnBranch =
+ builder->emitBranch(afterBlock, returnValParam ? 1 : 0, &returnedValue);
_setSourceLoc(returnBranch, inst, callSite);
}
break;
@@ -692,24 +699,24 @@ struct InliningPassBase
//
call->removeAndDeallocate();
}
-
};
- /// An inlining pass that inlines calls to `[unsafeForceInlineEarly]` functions
+/// An inlining pass that inlines calls to `[unsafeForceInlineEarly]` functions
struct MandatoryEarlyInliningPass : InliningPassBase
{
typedef InliningPassBase Super;
MandatoryEarlyInliningPass(IRModule* module)
: Super(module)
- {}
+ {
+ }
bool shouldInline(CallSiteInfo const& info)
{
if (info.callee->findDecoration<IRIntrinsicOpDecoration>())
return true;
- if(info.callee->findDecoration<IRUnsafeForceInlineEarlyDecoration>())
+ if (info.callee->findDecoration<IRUnsafeForceInlineEarlyDecoration>())
return true;
return false;
}
@@ -725,7 +732,8 @@ bool performMandatoryEarlyInlining(IRModule* module, HashSet<IRInst*>* modifiedF
return pass.considerAllCallSites();
}
-namespace { // anonymous
+namespace
+{ // anonymous
// Inlines calls that involve String types
struct TypeInliningPass : InliningPassBase
@@ -734,30 +742,31 @@ struct TypeInliningPass : InliningPassBase
TypeInliningPass(IRModule* module)
: Super(module)
- {}
+ {
+ }
bool doesTypeRequireInline(IRType* type, IRFunc* callee)
{
// TODO(JS):
// I guess there is a question here about what type around string requires
- // inlining.
+ // inlining.
// For example if we had an array of strings etc.
// For now we just consider just basic string types.
const auto op = type->getOp();
switch (op)
{
- case kIROp_RefType:
+ case kIROp_RefType:
{
- if(callee->findDecoration<IRNoRefInlineDecoration>())
+ if (callee->findDecoration<IRNoRefInlineDecoration>())
return false;
return true;
}
- case kIROp_StringType:
- case kIROp_NativeStringType:
+ case kIROp_StringType:
+ case kIROp_NativeStringType:
{
return true;
}
- default: break;
+ default: break;
}
return false;
@@ -785,25 +794,25 @@ struct TypeInliningPass : InliningPassBase
}
};
-} // anonymous
+} // namespace
Result performTypeInlining(IRModule* module, DiagnosticSink* sink)
{
SLANG_UNUSED(sink);
- // TODO(JS):
- // This is perhaps not as efficient as might be desirable.
+ // TODO(JS):
+ // This is perhaps not as efficient as might be desirable.
// A more optimized version might not need to pass over all of the module
- // to find new call sites.
+ // to find new call sites.
//
// Another problem here is recursion. Right now Slang compiler doesn't accept recursive input,
- // but the Slang language is supposed to support recursion on targets that support it.
+ // but the Slang language is supposed to support recursion on targets that support it.
// There are GPU targets that allow recursion such as CUDA.
//
- // Another approach would be (when enabled) when inlining occurs, would be instead of continuing
- // *after*, to start the checks/inlining from where the inline took place.
- //
- while(true)
+ // Another approach would be (when enabled) when inlining occurs, would be instead of continuing
+ // *after*, to start the checks/inlining from where the inline took place.
+ //
+ while (true)
{
TypeInliningPass pass(module);
if (pass.considerAllCallSites())
@@ -811,12 +820,11 @@ Result performTypeInlining(IRModule* module, DiagnosticSink* sink)
// If there was a change try inlining again
continue;
}
-
+
// Done.
break;
}
-
return SLANG_OK;
}
@@ -827,12 +835,13 @@ struct ForceInliningPass : InliningPassBase
ForceInliningPass(IRModule* module)
: Super(module)
- {}
+ {
+ }
bool shouldInline(CallSiteInfo const& info)
{
if (info.callee->findDecoration<IRForceInlineDecoration>() ||
- info.callee->findDecoration<IRUnsafeForceInlineEarlyDecoration>()||
+ info.callee->findDecoration<IRUnsafeForceInlineEarlyDecoration>() ||
info.callee->findDecoration<IRIntrinsicOpDecoration>())
return true;
return false;
@@ -859,7 +868,8 @@ struct PreAutoDiffForceInliningPass : InliningPassBase
PreAutoDiffForceInliningPass(IRModule* module)
: Super(module)
- {}
+ {
+ }
Dictionary<IRInst*, bool> m_funcCanInline;
@@ -875,15 +885,10 @@ struct PreAutoDiffForceInliningPass : InliningPassBase
switch (decor->getOp())
{
case kIROp_UnsafeForceInlineEarlyDecoration:
- case kIROp_IntrinsicOpDecoration:
- return true;
- case kIROp_ForceInlineDecoration:
- hasForceInline = true;
- break;
+ case kIROp_IntrinsicOpDecoration: return true;
+ case kIROp_ForceInlineDecoration: hasForceInline = true; break;
case kIROp_UserDefinedBackwardDerivativeDecoration:
- case kIROp_ForwardDerivativeDecoration:
- hasUserDefinedDerivative = true;
- break;
+ case kIROp_ForwardDerivativeDecoration: hasUserDefinedDerivative = true; break;
}
}
if (!hasForceInline || hasUserDefinedDerivative)
@@ -903,14 +908,10 @@ struct PreAutoDiffForceInliningPass : InliningPassBase
case kIROp_ForwardDifferentiate:
case kIROp_BackwardDifferentiate:
case kIROp_BackwardDifferentiatePrimal:
- case kIROp_BackwardDifferentiatePropagate:
- canInline = false;
- goto end;
-
+ case kIROp_BackwardDifferentiatePropagate: canInline = false; goto end;
+
// Also avoid inlining functions with inline-asm instructions.
- case kIROp_SPIRVAsm:
- canInline = false;
- goto end;
+ case kIROp_SPIRVAsm: canInline = false; goto end;
}
}
}
@@ -932,19 +933,20 @@ bool performPreAutoDiffForceInlining(IRModule* module)
return pass.considerAllCallSitesRec(module->getModuleInst());
}
- // Defined in slang-ir-specialize-resource.cpp
+// Defined in slang-ir-specialize-resource.cpp
bool isResourceType(IRType* type);
bool isIllegalGLSLParameterType(IRType* type);
- /// An inlining pass that inlines calls functions that returns resources.
- /// This is needed for glsl targets.
+/// An inlining pass that inlines calls functions that returns resources.
+/// This is needed for glsl targets.
struct GLSLResourceReturnFunctionInliningPass : InliningPassBase
{
typedef InliningPassBase Super;
GLSLResourceReturnFunctionInliningPass(IRModule* module)
: Super(module)
- {}
+ {
+ }
bool shouldInline(CallSiteInfo const& info)
{
@@ -985,7 +987,8 @@ struct IntrinsicFunctionInliningPass : InliningPassBase
IntrinsicFunctionInliningPass(IRModule* module)
: Super(module)
- {}
+ {
+ }
bool shouldInline(CallSiteInfo const& info)
{
@@ -999,20 +1002,17 @@ struct IntrinsicFunctionInliningPass : InliningPassBase
// If a function body has only asm blocks + trivial insts (load/store),
// this is considered as a pure asm function, and we can inline it.
bool hasSpvAsm = false;
- for (auto inst = func->getFirstBlock()->getFirstOrdinaryInst(); inst != returnInst; inst = inst->getNextInst())
+ for (auto inst = func->getFirstBlock()->getFirstOrdinaryInst(); inst != returnInst;
+ inst = inst->getNextInst())
{
switch (inst->getOp())
{
case kIROp_SPIRVAsmOperandInst:
- case kIROp_SPIRVAsm:
- hasSpvAsm = true;
- continue;
+ case kIROp_SPIRVAsm: hasSpvAsm = true; continue;
case kIROp_Load:
case kIROp_swizzle:
- case kIROp_Store:
- continue;
- default:
- return false;
+ case kIROp_Store: continue;
+ default: return false;
}
}
return hasSpvAsm;
@@ -1036,12 +1036,10 @@ struct CustomInliningPass : InliningPassBase
CustomInliningPass(IRModule* module)
: Super(module)
- {}
-
- bool shouldInline(CallSiteInfo const&)
{
- return true;
}
+
+ bool shouldInline(CallSiteInfo const&) { return true; }
};
bool inlineCall(IRCall* call)