From 4edc72e4dea47cf549b4e28940e3509a5ab61439 Mon Sep 17 00:00:00 2001 From: Yong He Date: Tue, 14 May 2024 18:01:31 -0700 Subject: Remove use of `G0` and `__target_intrinsic` in stdlib. (#4170) * Remove use of `G0` and `__target_intrinsic` in stdlib. * Fix. * Fix calling intrinsic in global scope. --- source/slang/core.meta.slang | 19 +- source/slang/diff.meta.slang | 246 ++++++++++++++++----- source/slang/hlsl.meta.slang | 31 ++- source/slang/slang-ast-stmt.h | 2 + source/slang/slang-check-impl.h | 2 +- source/slang/slang-check-stmt.cpp | 7 + source/slang/slang-emit-c-like.cpp | 25 ++- source/slang/slang-emit-c-like.h | 7 +- source/slang/slang-emit-cpp.cpp | 3 +- source/slang/slang-emit-cpp.h | 2 +- source/slang/slang-emit-cuda.cpp | 4 +- source/slang/slang-emit-cuda.h | 2 +- source/slang/slang-emit-glsl.cpp | 3 +- source/slang/slang-intrinsic-expand.cpp | 72 ++++-- source/slang/slang-intrinsic-expand.h | 3 +- source/slang/slang-ir-insts.h | 2 +- source/slang/slang-ir-lower-generic-call.cpp | 3 +- source/slang/slang-ir-lower-generic-function.cpp | 3 +- source/slang/slang-ir-specialize-function-call.cpp | 3 +- source/slang/slang-ir-specialize-resources.cpp | 3 +- source/slang/slang-ir-spirv-legalize.cpp | 1 + source/slang/slang-ir.cpp | 6 +- source/slang/slang-lower-to-ir.cpp | 18 +- source/slang/slang-parser.cpp | 6 + 24 files changed, 363 insertions(+), 110 deletions(-) (limited to 'source/slang') diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 63bc2571b..bf69eb9ad 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -2003,11 +2003,12 @@ int __SyntaxError() /// For downstream compilers that allow sizeof/alignof/offsetof /// Can't be called in the C/C++ style. Need to use __size_of() as opposed to sizeof(some_type). __generic -__target_intrinsic(cpp, "sizeof($G0)") -__target_intrinsic(cuda, "sizeof($G0)") [__readNone] [require(cpp_cuda)] -int __sizeOf(); +int __sizeOf() +{ + __intrinsic_asm "sizeof($[0])", T; +} __generic [__readNone] @@ -2022,11 +2023,17 @@ int __sizeOf(T v) } __generic -__target_intrinsic(cuda, "SLANG_ALIGN_OF($G0)") -__target_intrinsic(cpp, "SLANG_ALIGN_OF($G0)") [__readNone] [require(cpp_cuda)] -int __alignOf(); +int __alignOf() +{ + __target_switch + { + case cuda : + case cpp : + __intrinsic_asm "SLANG_ALIGN_OF($[0])", T; + } +} __generic [__readNone] diff --git a/source/slang/diff.meta.slang b/source/slang/diff.meta.slang index 769630d50..c912e026c 100644 --- a/source/slang/diff.meta.slang +++ b/source/slang/diff.meta.slang @@ -55,81 +55,171 @@ __magic_type(TensorViewType) __intrinsic_type($(kIROp_TensorViewType)) struct TensorView { - __target_intrinsic(cuda, "$0.data_ptr<$G0>()") [__NoSideEffect] [require(cuda)] - Ptr data_ptr(); + Ptr data_ptr() + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.data_ptr<$[0]>()", T; + } + } - __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)") [__NoSideEffect] [require(cuda)] - Ptr data_ptr_at(uint index); + Ptr data_ptr_at(uint index) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.data_ptr_at<$[0]>($1)", T; + } + } - __generic - __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)") + __generic [__NoSideEffect] [require(cuda)] - Ptr data_ptr_at(vector index); + Ptr data_ptr_at(vector index) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.data_ptr_at<$[0]>($1)", T; + } + } __implicit_conversion($(kConversionCost_ImplicitDereference)) __intrinsic_op($(kIROp_TorchTensorGetView)) __init(TorchTensor t); - __target_intrinsic(cuda, "$0.load<$G0>($1)") [__NoSideEffect] [require(cuda)] - T load(uint x); + T load(uint x) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2)") [__NoSideEffect] [require(cuda)] - T load(uint x, uint y); + T load(uint x, uint y) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)") [__NoSideEffect] [require(cuda)] - T load(uint x, uint y, uint z); + T load(uint x, uint y, uint z) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)") [__NoSideEffect] [require(cuda)] - T load(uint x, uint y, uint z, uint w); + T load(uint x, uint y, uint z, uint w) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)") [__NoSideEffect] [require(cuda)] - T load(uint i0, uint i1, uint i2, uint i3, uint i4); + T load(uint i0, uint i1, uint i2, uint i3, uint i4) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4, $5)", T; + } + } __generic - __target_intrinsic(cuda, "$0.load<$TR>($1)") [__NoSideEffect] [require(cuda)] - T load(vector index); + T load(vector index) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$TR>($1)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2)") [require(cuda)] - void store(uint x, T val); + void store(uint x, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T2>($1, $2)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3)") [require(cuda)] - void store(uint x, uint y, T val); + void store(uint x, uint y, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T3>($1, $2, $3)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4)") [require(cuda)] - void store(uint x, uint y, uint z, T val); + void store(uint x, uint y, uint z, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T4>($1, $2, $3, $4)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5)") [require(cuda)] - void store(uint x, uint y, uint z, uint w, T val); + void store(uint x, uint y, uint z, uint w, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T5>($1, $2, $3, $4, $5)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5, $6)") [require(cuda)] - void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val); + void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T6>($1, $2, $3, $4, $5, $6)"; + } + } __generic - __target_intrinsic(cuda, "$0.store<$T2>($1, $2)") [require(cuda)] - void store(vector index, T val); + void store(vector index, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T2>($1, $2)"; + } + } [require(cuda)] void InterlockedAdd(uint index, T val, out T oldVal) @@ -184,66 +274,114 @@ struct TensorView { [ForceInline] [__NoSideEffect] get { return load(index); } [ForceInline] set { store(index, newValue); } - - __target_intrinsic(cuda, "$0.load<$G0>($1)") + [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1)", T; + } + } } __subscript(uint i1, uint i2) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2); } [ForceInline] set { store(i1, i2, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2)", T; + } + } } __subscript(uint2 i) -> T { [ForceInline] [__NoSideEffect] get { return load(i.x, i.y); } [ForceInline] set { store(i.x, i.y, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1.x, $1.y)", T; + } + } } __subscript(uint i1, uint i2, uint i3) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3); } [ForceInline] set { store(i1, i2, i3, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3)", T; + } + } } __subscript(uint3 i) -> T { [ForceInline] [__NoSideEffect] get { return load(i.x, i.y, i.z); } [ForceInline] set { store(i.x, i.y, i.z, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1.x, $1.y, $1.z)", T; + } + } } __subscript(uint i1, uint i2, uint i3, uint i4) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4); } [ForceInline] set { store(i1, i2, i3, i4, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4)", T; + } + } } __subscript(uint4 i) -> T { [__NoSideEffect][ForceInline] get { return load(i.x, i.y, i.z, i.w); } [ForceInline] set { store(i.x, i.y, i.z, i.w, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z, $1.w)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1.x, $1.y, $1.z, $1.w)", T; + } + } } __subscript(uint i1, uint i2, uint i3, uint i4, uint i5) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4, i5); } [ForceInline] set { store(i1, i2, i3, i4, i5, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4, $5)", T; + } + } } } @@ -882,12 +1020,18 @@ struct TorchTensor } } - __target_intrinsic(cpp, "$0.data_ptr<$G0>()") - __target_intrinsic(cuda, "$0.data_ptr<$G0>()") [__readNone] [CudaHost] [require(cpp_cuda)] - Ptr data_ptr(); + Ptr data_ptr() + { + __target_switch + { + case cpp: + case cuda: + __intrinsic_asm "$0.data_ptr<$[0]>()", T; + } + } __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 92b68c3e6..f318eb79e 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -9894,15 +9894,32 @@ vector fmax3(vector x, vector y, vector z) // minimum __generic -__target_intrinsic(hlsl) -__target_intrinsic(glsl) -__target_intrinsic(metal) -__target_intrinsic(cuda, "$P_min($0, $1)") -__target_intrinsic(cpp, "$P_min($0, $1)") -__target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0 _1") [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] -T min(T x, T y); +T min(T x, T y) +{ + __target_switch + { + case hlsl: + case glsl: + case metal: + __intrinsic_asm "min"; + case cuda: + case cpp: + __intrinsic_asm "$P_min($0, $1)"; + case spirv: + { + if (__isSignedInt()) + return spirv_asm { + result:$$T = OpExtInst glsl450 SMin $x $y + }; + else + return spirv_asm { + result:$$T = OpExtInst glsl450 UMin $x $y + }; + } + } +} __generic [__readNone] diff --git a/source/slang/slang-ast-stmt.h b/source/slang/slang-ast-stmt.h index afa606456..0342cdc50 100644 --- a/source/slang/slang-ast-stmt.h +++ b/source/slang/slang-ast-stmt.h @@ -114,6 +114,8 @@ class IntrinsicAsmStmt : public Stmt SLANG_AST_CLASS(IntrinsicAsmStmt) String asmText; + + List args; }; // A statement that is expected to appear lexically nested inside diff --git a/source/slang/slang-check-impl.h b/source/slang/slang-check-impl.h index fc87c680b..20139b4e4 100644 --- a/source/slang/slang-check-impl.h +++ b/source/slang/slang-check-impl.h @@ -2734,7 +2734,7 @@ namespace Slang void visitTargetCaseStmt(TargetCaseStmt* stmt); - void visitIntrinsicAsmStmt(IntrinsicAsmStmt*) {} + void visitIntrinsicAsmStmt(IntrinsicAsmStmt*); void visitDefaultStmt(DefaultStmt* stmt); diff --git a/source/slang/slang-check-stmt.cpp b/source/slang/slang-check-stmt.cpp index 2af8f7d08..89ec82e48 100644 --- a/source/slang/slang-check-stmt.cpp +++ b/source/slang/slang-check-stmt.cpp @@ -355,6 +355,13 @@ namespace Slang subContext.checkStmt(stmt->body); } + void SemanticsStmtVisitor::visitIntrinsicAsmStmt(IntrinsicAsmStmt* stmt) + { + WithOuterStmt subContext(this, stmt); + for (auto& arg : stmt->args) + arg = subContext.CheckExpr(arg); + } + void SemanticsStmtVisitor::visitDefaultStmt(DefaultStmt* stmt) { auto switchStmt = FindOuterStmt(); diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 19a7930f6..7551f4da9 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -1064,7 +1064,8 @@ String CLikeSourceEmitter::generateName(IRInst* inst) // that should be emitted as a target intrinsic, // then use that name instead. UnownedStringSlice intrinsicDef; - if(findTargetIntrinsicDefinition(inst, intrinsicDef)) + IRInst* intrinsicInst = nullptr; + if(findTargetIntrinsicDefinition(inst, intrinsicDef, intrinsicInst)) { return String(intrinsicDef); } @@ -1536,7 +1537,8 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst) // parameter. This is not indicated into the call, and can lead to output code computes something multiple // times as it is folding into the expression of the the target intrinsic, which we don't want. UnownedStringSlice intrinsicDef; - if (findTargetIntrinsicDefinition(funcValue, intrinsicDef)) + IRInst* intrinsicInst; + if (findTargetIntrinsicDefinition(funcValue, intrinsicDef, intrinsicInst)) { // Find the index of the original instruction, to see if it's multiply used. IRUse* args = callInst->getArgs(); @@ -1854,14 +1856,15 @@ IRTargetIntrinsicDecoration* CLikeSourceEmitter::_findBestTargetIntrinsicDecorat } -void CLikeSourceEmitter::emitIntrinsicCallExpr(IRCall* inst, UnownedStringSlice intrinsicDefinition, EmitOpInfo const& inOuterPrec) +void CLikeSourceEmitter::emitIntrinsicCallExpr(IRCall* inst, UnownedStringSlice intrinsicDefinition, IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec) { - emitIntrinsicCallExprImpl(inst, intrinsicDefinition, inOuterPrec); + emitIntrinsicCallExprImpl(inst, intrinsicDefinition, intrinsicInst, inOuterPrec); } void CLikeSourceEmitter::emitIntrinsicCallExprImpl( IRCall* inst, UnownedStringSlice intrinsicDefinition, + IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec) { auto outerPrec = inOuterPrec; @@ -1939,7 +1942,7 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl( else { IntrinsicExpandContext context(this); - context.emit(inst, args, argCount, name); + context.emit(inst, args, argCount, name, intrinsicInst); } } @@ -1985,9 +1988,9 @@ void CLikeSourceEmitter::emitComInterfaceCallExpr(IRCall* inst, EmitOpInfo const maybeCloseParens(needClose); } -bool CLikeSourceEmitter::findTargetIntrinsicDefinition(IRInst* callee, UnownedStringSlice& outDefinition) +bool CLikeSourceEmitter::findTargetIntrinsicDefinition(IRInst* callee, UnownedStringSlice& outDefinition, IRInst*& outInst) { - return Slang::findTargetIntrinsicDefinition(callee, getTargetCaps(), outDefinition); + return Slang::findTargetIntrinsicDefinition(callee, getTargetCaps(), outDefinition, outInst); } void CLikeSourceEmitter::emitCallExpr(IRCall* inst, EmitOpInfo outerPrec) @@ -2024,8 +2027,9 @@ void CLikeSourceEmitter::emitCallExpr(IRCall* inst, EmitOpInfo outerPrec) // We want to detect any call to an intrinsic operation, // that we can emit it directly without mangling, etc. UnownedStringSlice intrinsicDefinition; + IRInst* intrinsicInst; auto resolvedFunc = getResolvedInstForDecorations(funcValue); - if (findTargetIntrinsicDefinition(resolvedFunc, intrinsicDefinition)) + if (findTargetIntrinsicDefinition(resolvedFunc, intrinsicDefinition, intrinsicInst)) { // Make sure we register all required preludes for emit. if (auto func = as(resolvedFunc)) @@ -2043,7 +2047,7 @@ void CLikeSourceEmitter::emitCallExpr(IRCall* inst, EmitOpInfo outerPrec) } } } - emitIntrinsicCallExpr(inst, intrinsicDefinition, outerPrec); + emitIntrinsicCallExpr(inst, intrinsicDefinition, intrinsicInst, outerPrec); } else { @@ -3529,7 +3533,8 @@ bool CLikeSourceEmitter::isTargetIntrinsic(IRInst* inst) // target intrinsic for the current compilation target. // UnownedStringSlice intrinsicDef; - return findTargetIntrinsicDefinition(inst, intrinsicDef); + IRInst* intrinsicInst; + return findTargetIntrinsicDefinition(inst, intrinsicDef, intrinsicInst); } bool shouldWrapInExternCBlock(IRFunc* func) diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 450770238..ab0b2c3e4 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -316,8 +316,8 @@ public: IRTargetIntrinsicDecoration* _findBestTargetIntrinsicDecoration(IRInst* inst); // Find the definition of a target intrinsic either from __target_intrinsic decoration, or from - // a genericAsm inst in the function body. - bool findTargetIntrinsicDefinition(IRInst* callee, UnownedStringSlice& outDefinition); + // a genericAsm inst in the function body. `outInst` is the decoration or the genericAsm inst. + bool findTargetIntrinsicDefinition(IRInst* callee, UnownedStringSlice& outDefinition, IRInst*& outInst); // Check if the string being used to define a target intrinsic // is an "ordinary" name, such that we can simply emit a call @@ -329,6 +329,7 @@ public: void emitIntrinsicCallExpr( IRCall* inst, UnownedStringSlice intrinsicDefinition, + IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec); void emitCallExpr(IRCall* inst, EmitOpInfo outerPrec); @@ -495,7 +496,7 @@ public: virtual void emitVarExpr(IRInst* inst, EmitOpInfo const& outerPrec); virtual void emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec); virtual void emitParamTypeImpl(IRType* type, String const& name); - virtual void emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, EmitOpInfo const& inOuterPrec); + virtual void emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec); virtual void emitFunctionPreambleImpl(IRInst* inst) { SLANG_UNUSED(inst); } virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) { SLANG_UNUSED(decl); } virtual void emitIfDecorationsImpl(IRIfElse* ifInst) { SLANG_UNUSED(ifInst); } diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 7e327cab4..bcb9ed9da 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -1112,6 +1112,7 @@ void CPPSourceEmitter::_emitType(IRType* type, DeclaratorInfo* declarator) void CPPSourceEmitter::emitIntrinsicCallExprImpl( IRCall* inst, UnownedStringSlice intrinsicDefinition, + IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec) { // TODO: Much of this logic duplicates code that is already @@ -1176,7 +1177,7 @@ void CPPSourceEmitter::emitIntrinsicCallExprImpl( } // Use default impl (which will do intrinsic special macro expansion as necessary) - return Super::emitIntrinsicCallExprImpl(inst, intrinsicDefinition, inOuterPrec); + return Super::emitIntrinsicCallExprImpl(inst, intrinsicDefinition, intrinsicInst, inOuterPrec); } void CPPSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h index cfd3d278d..90db780bd 100644 --- a/source/slang/slang-emit-cpp.h +++ b/source/slang/slang-emit-cpp.h @@ -68,7 +68,7 @@ protected: void emitComInterface(IRInterfaceType* interfaceType); virtual void emitRTTIObject(IRRTTIObject* rttiObject) SLANG_OVERRIDE; virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE; - virtual void emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE; + virtual void emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE; virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE; virtual void emitFuncDecorationsImpl(IRFunc* func) SLANG_OVERRIDE; virtual void emitVarDecorationsImpl(IRInst* var) SLANG_OVERRIDE; diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 633b065c7..2417a64ec 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -432,7 +432,7 @@ void CUDASourceEmitter::_emitInitializerList(IRType* elementType, IRUse* operand m_writer->emit("\n}"); } -void CUDASourceEmitter::emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, EmitOpInfo const& inOuterPrec) +void CUDASourceEmitter::emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec) { // This works around the problem, where some intrinsics that require the "half" type enabled don't use the half/float16_t type. // For example `f16tof32` can operate on float16_t *and* uint. If the input is uint, although we are @@ -442,7 +442,7 @@ void CUDASourceEmitter::emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSli m_extensionTracker->requireBaseType(BaseType::Half); } - Super::emitIntrinsicCallExprImpl(inst, intrinsicDefinition, inOuterPrec); + Super::emitIntrinsicCallExprImpl(inst, intrinsicDefinition, intrinsicInst, inOuterPrec); } bool CUDASourceEmitter::tryEmitInstStmtImpl(IRInst* inst) diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h index 097d7b741..13a497343 100644 --- a/source/slang/slang-emit-cuda.h +++ b/source/slang/slang-emit-cuda.h @@ -91,7 +91,7 @@ protected: virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE; virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; virtual bool tryEmitInstStmtImpl(IRInst* inst) SLANG_OVERRIDE; - virtual void emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE; + virtual void emitIntrinsicCallExprImpl(IRCall* inst, UnownedStringSlice intrinsicDefinition, IRInst* intrinsicInst, EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE; virtual void emitModuleImpl(IRModule* module, DiagnosticSink* sink) SLANG_OVERRIDE; diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index f9fa90d2f..5cf508876 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -2592,7 +2592,8 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) auto decorated = getResolvedInstForDecorations(type); UnownedStringSlice intrinsicDef; - if (findTargetIntrinsicDefinition(decorated, intrinsicDef)) + IRInst* intrinsicInst; + if (findTargetIntrinsicDefinition(decorated, intrinsicDef, intrinsicInst)) { m_writer->emit(intrinsicDef); return; diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp index b96c2657d..08f21ff1d 100644 --- a/source/slang/slang-intrinsic-expand.cpp +++ b/source/slang/slang-intrinsic-expand.cpp @@ -3,15 +3,22 @@ #include "slang-emit-cuda.h" #include "slang-ir-util.h" +#include "../core/slang-char-util.h" namespace Slang { -void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const UnownedStringSlice& intrinsicText) +void IntrinsicExpandContext::emit( + IRCall* inst, + IRUse* args, + Int argCount, + const UnownedStringSlice& intrinsicText, + IRInst* intrinsicInst) { m_args = args; m_argCount = argCount; m_text = intrinsicText; m_callInst = inst; + m_intrinsicInst = intrinsicInst; const auto returnType = inst->getDataType(); @@ -213,6 +220,22 @@ static bool _isResourceWrite(IRCall* call) return returnType && (as(returnType) != nullptr); } +static Index parseNumber(const char*& cursor, const char* end) +{ + char d = *cursor; + SLANG_RELEASE_ASSERT(CharUtil::isDigit(d)); + Index n = 0; + while (CharUtil::isDigit(d)) + { + n = n * 10 + (d - '0'); + cursor++; + if (cursor == end) + break; + d = *cursor; + } + return n; +} + const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) { const char*const end = m_text.end(); @@ -224,24 +247,13 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) SLANG_RELEASE_ASSERT(cursor < end); char d = *cursor++; + auto parseNat = [&]() -> Index + { + return parseNumber(cursor, end); + }; // Takes the first character of the number, parses the rest and returns the // total value. - auto isDigit = [](char c){ return c >= '0' && c <= '9'; }; - auto parseNat = [&](){ - char d = *cursor; - SLANG_RELEASE_ASSERT(isDigit(d)); - Index n = 0; - while(isDigit(d)) - { - n = n * 10 + (d - '0'); - cursor++; - if(cursor == end) - break; - d = *cursor; - } - return n; - }; switch (d) { @@ -840,7 +852,33 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) } break; } - + case '[': + { + Index argIndex = parseNat(); + auto arg = m_intrinsicInst->getOperand((UInt)(1 + argIndex)); + if (!arg->getDataType()) + { + m_emitter->emitSimpleType((IRType*)arg); + } + else + { + switch (arg->getDataType()->getOp()) + { + case kIROp_TypeKind: + case kIROp_TypeType: + m_emitter->emitType((IRType*)arg); + break; + default: + m_emitter->emitOperand( + m_intrinsicInst->getOperand((UInt)(1 + argIndex)), + getInfo(EmitOp::General)); + break; + } + } + SLANG_ASSERT(*cursor == ']'); + cursor++; + break; + } default: SLANG_UNEXPECTED("bad format in intrinsic definition"); break; diff --git a/source/slang/slang-intrinsic-expand.h b/source/slang/slang-intrinsic-expand.h index cf072e3f4..3fb51b0f6 100644 --- a/source/slang/slang-intrinsic-expand.h +++ b/source/slang/slang-intrinsic-expand.h @@ -17,7 +17,7 @@ struct IntrinsicExpandContext { } - void emit(IRCall* inst, IRUse* args, Int argCount, const UnownedStringSlice& intrinsicText); + void emit(IRCall* inst, IRUse* args, Int argCount, const UnownedStringSlice& intrinsicText, IRInst* intirnsicInst); protected: const char* _emitSpecial(const char* cursor); @@ -25,6 +25,7 @@ protected: SourceWriter* m_writer; UnownedStringSlice m_text; IRCall* m_callInst; + IRInst* m_intrinsicInst = nullptr; IRUse* m_args = nullptr; Int m_argCount = 0; Index m_openParenCount = 0; diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index f0613dfa5..5a9fa9a32 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -4949,7 +4949,7 @@ IRTargetSpecificDecoration* findBestTargetDecoration( IRInst* val, CapabilityName targetCapabilityAtom); -bool findTargetIntrinsicDefinition(IRInst* callee, CapabilitySet const& targetCaps, UnownedStringSlice& outDefinition); +bool findTargetIntrinsicDefinition(IRInst* callee, CapabilitySet const& targetCaps, UnownedStringSlice& outDefinition, IRInst*& outInst); inline IRTargetIntrinsicDecoration* findBestTargetIntrinsicDecoration( IRInst* inInst, diff --git a/source/slang/slang-ir-lower-generic-call.cpp b/source/slang/slang-ir-lower-generic-call.cpp index e67d97a48..c34f243b5 100644 --- a/source/slang/slang-ir-lower-generic-call.cpp +++ b/source/slang/slang-ir-lower-generic-call.cpp @@ -242,8 +242,9 @@ namespace Slang // Don't process intrinsic functions. UnownedStringSlice intrinsicDef; + IRInst* intrinsicInst; if (findTargetIntrinsicDefinition(getResolvedInstForDecorations(loweredFunc), - sharedContext->targetProgram->getTargetReq()->getTargetCaps(), intrinsicDef)) + sharedContext->targetProgram->getTargetReq()->getTargetCaps(), intrinsicDef, intrinsicInst)) return; // All callees should have already been lowered in lower-generic-functions pass. diff --git a/source/slang/slang-ir-lower-generic-function.cpp b/source/slang/slang-ir-lower-generic-function.cpp index 47dc6cc79..cb6555a7e 100644 --- a/source/slang/slang-ir-lower-generic-function.cpp +++ b/source/slang/slang-ir-lower-generic-function.cpp @@ -45,7 +45,8 @@ namespace Slang SLANG_ASSERT(func); // Do not lower intrinsic functions. UnownedStringSlice intrinsicDef; - if (!func->isDefinition() || findTargetIntrinsicDefinition(func, sharedContext->targetProgram->getTargetReq()->getTargetCaps(), intrinsicDef)) + IRInst* intrinsicInst; + if (!func->isDefinition() || findTargetIntrinsicDefinition(func, sharedContext->targetProgram->getTargetReq()->getTargetCaps(), intrinsicDef, intrinsicInst)) { sharedContext->loweredGenericFunctions[genericValue] = genericValue; return genericValue; diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp index 7e5d9b59c..7cf1516b8 100644 --- a/source/slang/slang-ir-specialize-function-call.cpp +++ b/source/slang/slang-ir-specialize-function-call.cpp @@ -172,7 +172,8 @@ struct FunctionParameterSpecializationContext if(!func->isDefinition()) return false; UnownedStringSlice def; - if (findTargetIntrinsicDefinition(func, codeGenContext->getTargetReq()->getTargetCaps(), def)) + IRInst* intrinsicInst; + if (findTargetIntrinsicDefinition(func, codeGenContext->getTargetReq()->getTargetCaps(), def, intrinsicInst)) return false; // With the basic checks out of the way, there are // two conditions we care about: diff --git a/source/slang/slang-ir-specialize-resources.cpp b/source/slang/slang-ir-specialize-resources.cpp index 93a964d95..e1c1788cb 100644 --- a/source/slang/slang-ir-specialize-resources.cpp +++ b/source/slang/slang-ir-specialize-resources.cpp @@ -291,7 +291,8 @@ struct ResourceOutputSpecializationPass if(!func->isDefinition()) return false; UnownedStringSlice def; - if (findTargetIntrinsicDefinition(func, targetRequest->getTargetCaps(), def)) + IRInst* intrinsicInst; + if (findTargetIntrinsicDefinition(func, targetRequest->getTargetCaps(), def, intrinsicInst)) return false; // If any of the parameters of the function are `out` diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index e863f279e..d2a6bd557 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -1745,6 +1745,7 @@ struct SPIRVLegalizationContext : public SourceEmitterBase case kIROp_Neq: case kIROp_Eql: case kIROp_Call: + case kIROp_SPIRVAsm: return true; default: return false; diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index 11f8cdb87..b53bfc9d1 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -7387,6 +7387,8 @@ namespace Slang case BaseType::UInt16: case BaseType::UInt: case BaseType::UInt64: + case BaseType::IntPtr: + case BaseType::UIntPtr: return true; default: return false; @@ -8305,11 +8307,12 @@ namespace Slang IRInst* val, CapabilityName targetCapabilityAtom); - bool findTargetIntrinsicDefinition(IRInst* callee, CapabilitySet const& targetCaps, UnownedStringSlice& outDefinition) + bool findTargetIntrinsicDefinition(IRInst* callee, CapabilitySet const& targetCaps, UnownedStringSlice& outDefinition, IRInst*& outInst) { if (auto decor = findBestTargetIntrinsicDecoration(callee, targetCaps)) { outDefinition = decor->getDefinition(); + outInst = decor; return true; } auto func = as(callee); @@ -8320,6 +8323,7 @@ namespace Slang if (auto genAsm = as(block->getTerminator())) { outDefinition = genAsm->getAsm(); + outInst = genAsm; return true; } } diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 9de1833a0..debe5078d 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -6422,8 +6422,22 @@ struct StmtLoweringVisitor : StmtVisitor void visitIntrinsicAsmStmt(IntrinsicAsmStmt* stmt) { auto builder = getBuilder(); - IRInst* arg = builder->getStringValue(stmt->asmText.getUnownedSlice()); - builder->emitIntrinsicInst(nullptr, kIROp_GenericAsm, 1, &arg); + ShortList args; + args.add(builder->getStringValue(stmt->asmText.getUnownedSlice())); + for (auto argExpr : stmt->args) + { + if (auto typetype = as(argExpr->type)) + { + auto type = lowerType(context, typetype->getType()); + args.add(type); + } + else + { + auto argVal = lowerRValueExpr(context, argExpr); + args.add(argVal.val); + } + } + builder->emitIntrinsicInst(nullptr, kIROp_GenericAsm, args.getCount(), args.getArrayView().getBuffer()); } void visitSwitchStmt(SwitchStmt* stmt) diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp index d90e541f8..6f7b0b991 100644 --- a/source/slang/slang-parser.cpp +++ b/source/slang/slang-parser.cpp @@ -5075,6 +5075,12 @@ namespace Slang parser->ReadToken(); stmt->asmText = getStringLiteralTokenValue(parser->ReadToken(TokenType::StringLiteral)); + + while (AdvanceIf(parser, TokenType::Comma)) + { + stmt->args.add(parser->ParseArgExpr()); + } + parser->ReadToken(TokenType::Semicolon); return stmt; } -- cgit v1.2.3