diff options
| author | Yong He <yonghe@outlook.com> | 2023-08-30 11:45:47 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-08-30 11:45:47 -0700 |
| commit | bb15f5b494b20e957127f0ffa6040c94349da0d0 (patch) | |
| tree | b060f2b5751cc56c5f1fd8f7b37434e56b6b4e5b /source | |
| parent | 019f702e24d2d1d6ecf53d71f87776a83db96608 (diff) | |
Fix memory barrier intrinsics. (#3166)
* Fix memory barrier intrinsics.
Makes them produce the same spirv code as dxc.
* Fix.
* filecheck barrier test for spirv backend.
* Fix glsl intrinsic definition.
* Fix intrinsics.
* Fix intrinsics.
* Fix.
* Fix.
---------
Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 141 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-inline.cpp | 44 | ||||
| -rw-r--r-- | source/slang/slang-ir-inline.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-lower-to-ir.cpp | 4 |
5 files changed, 143 insertions, 52 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index fd668f73a..e230ee6b0 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1076,14 +1076,36 @@ bool all(matrix<T,N,M> x) } // Barrier for writes to all memory spaces (HLSL SM 5.0) -__target_intrinsic(glsl, "memoryBarrier(), groupMemoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer()") -__target_intrinsic(cuda, "__threadfence()") -void AllMemoryBarrier(); +__glsl_extension(GL_KHR_memory_scope_semantics) +void AllMemoryBarrier() +{ + __target_switch + { + case hlsl: __intrinsic_asm "AllMemoryBarrier"; + case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeDevice, (gl_StorageSemanticsShared|gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; + case cuda: __intrinsic_asm "__threadfence()"; + case spirv: spirv_asm + { + OpMemoryBarrier Device AcquireRelease|UniformMemory|WorkgroupMemory|ImageMemory; + }; + } +} // Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0) -__target_intrinsic(glsl, "memoryBarrier(), groupMemoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer(), barrier()") -__target_intrinsic(cuda, "__syncthreads()") -void AllMemoryBarrierWithGroupSync(); +__glsl_extension(GL_KHR_memory_scope_semantics) +void AllMemoryBarrierWithGroupSync() +{ + __target_switch + { + case hlsl: __intrinsic_asm "AllMemoryBarrierWithGroupSync"; + case glsl: __intrinsic_asm "controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, (gl_StorageSemanticsShared|gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; + case cuda: __intrinsic_asm "__syncthreads()"; + case spirv: spirv_asm + { + OpControlBarrier Workgroup Device AcquireRelease|UniformMemory|WorkgroupMemory|ImageMemory; + }; + } +} // Test if any components is non-zero (HLSL SM 1.0) @@ -1993,13 +2015,35 @@ __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Determinant _0" T determinant(matrix<T,N,N> m); // Barrier for device memory -__target_intrinsic(glsl, "memoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer()") -__target_intrinsic(cuda, "__threadfence()") -void DeviceMemoryBarrier(); +__glsl_extension(GL_KHR_memory_scope_semantics) +void DeviceMemoryBarrier() +{ + __target_switch + { + case hlsl: __intrinsic_asm "DeviceMemoryBarrier"; + case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeDevice, (gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; + case cuda: __intrinsic_asm "__threadfence()"; + case spirv: spirv_asm + { + OpMemoryBarrier Device AcquireRelease|UniformMemory|ImageMemory; + }; + } +} -__target_intrinsic(glsl, "memoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer(), barrier()") -__target_intrinsic(glsl, "__syncthreads()") -void DeviceMemoryBarrierWithGroupSync(); +__glsl_extension(GL_KHR_memory_scope_semantics) +void DeviceMemoryBarrierWithGroupSync() +{ + __target_switch + { + case hlsl: __intrinsic_asm "DeviceMemoryBarrierWithGroupSync"; + case glsl: __intrinsic_asm "controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, (gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; + case cuda: __intrinsic_asm "__syncthreads()"; + case spirv: spirv_asm + { + OpControlBarrier Workgroup Device AcquireRelease|UniformMemory|ImageMemory; + }; + } +} // Vector distance @@ -2604,34 +2648,51 @@ uint GetRenderTargetSampleCount(); float2 GetRenderTargetSamplePosition(int Index); // Group memory barrier -__target_intrinsic(glsl, "groupMemoryBarrier") -__target_intrinsic(cuda, "__threadfence_block") -__target_intrinsic(spirv, "OpMemoryBarrier const(int,ScopeWorkgroup)" - "const(int, MemorySemanticsAcquireReleaseMask" - "| MemorySemanticsUniformMemoryMask" - "| MemorySemanticsImageMemoryMask" - "| MemorySemanticsAtomicCounterMemoryMask" - "| MemorySemanticsWorkgroupMemoryMask)") -void GroupMemoryBarrier(); - -__target_intrinsic(glsl, "subgroupBarrier") -__target_intrinsic(spirv, "OpControlBarrier const(int,ScopeSubgroup) const(int,ScopeSubgroup)" - "const(int, MemorySemanticsAcquireReleaseMask" - "| MemorySemanticsUniformMemoryMask" - "| MemorySemanticsImageMemoryMask" - "| MemorySemanticsAtomicCounterMemoryMask" - "| MemorySemanticsWorkgroupMemoryMask)") -void __subgroupBarrier(); - -__target_intrinsic(glsl, "groupMemoryBarrier(), barrier()") -__target_intrinsic(cuda, "__syncthreads()") -__target_intrinsic(spirv, "OpControlBarrier const(int,ScopeWorkgroup) const(int, ScopeWorkgroup)" - "const(int, MemorySemanticsAcquireReleaseMask" - "| MemorySemanticsUniformMemoryMask" - "| MemorySemanticsImageMemoryMask" - "| MemorySemanticsAtomicCounterMemoryMask" - "| MemorySemanticsWorkgroupMemoryMask)") -void GroupMemoryBarrierWithGroupSync(); +__glsl_extension(GL_KHR_memory_scope_semantics) +void GroupMemoryBarrier() +{ + __target_switch + { + case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeWorkgroup, gl_StorageSemanticsShared, gl_SemanticsAcquireRelease)"; + case hlsl: __intrinsic_asm "GroupMemoryBarrier"; + case cuda: __intrinsic_asm "__threadfence_block"; + case spirv: + spirv_asm + { + OpMemoryBarrier Workgroup AcquireRelease|WorkgroupMemory + }; + } +} + +void __subgroupBarrier() +{ + __target_switch + { + case glsl: __intrinsic_asm "subgroupBarrier"; + case hlsl: __intrinsic_asm "GroupMemoryBarrierWithGroupSync"; + case cuda: __intrinsic_asm "__syncthreads()"; + case spirv: + spirv_asm + { + OpControlBarrier Subgroup Subgroup AcquireRelease|WorkgroupMemory|ImageMemory|UniformMemory + }; + } +} + +void GroupMemoryBarrierWithGroupSync() +{ + __target_switch + { + case glsl: __intrinsic_asm "barrier"; + case hlsl: __intrinsic_asm "GroupMemoryBarrierWithGroupSync"; + case cuda: __intrinsic_asm "__syncthreads()"; + case spirv: + spirv_asm + { + OpControlBarrier Workgroup Workgroup AcquireRelease|WorkgroupMemory + }; + } +} // Atomics diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 8ee641acc..8dc0d2983 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -899,6 +899,9 @@ Result linkAndOptimizeIR( eliminateMultiLevelBreak(irModule); + if (isKhronosTarget(targetRequest) && targetRequest->shouldEmitSPIRVDirectly()) + performIntrinsicFunctionFunctionInlining(irModule); + simplifyIR(irModule, sink); // As a late step, we need to take the SSA-form IR and move things *out* diff --git a/source/slang/slang-ir-inline.cpp b/source/slang/slang-ir-inline.cpp index e171e2dd3..4308d16f0 100644 --- a/source/slang/slang-ir-inline.cpp +++ b/source/slang/slang-ir-inline.cpp @@ -246,13 +246,6 @@ struct InliningPassBase // outCallSite.callee = calleeFunc; - if (callee->findDecoration<IRIntrinsicOpDecoration>()) - return true; - - // We cannot inline a function that is defined by a generic asm inst. - if (hasGenericAsmInst(callee)) - return false; - for (auto decor : callee->getDecorations()) { switch (decor->getOp()) @@ -260,13 +253,15 @@ struct InliningPassBase case kIROp_IntrinsicOpDecoration: return true; case kIROp_RequireSPIRVCapabilityDecoration: - case kIROp_RequireSPIRVVersionDecoration: - case kIROp_RequireGLSLExtensionDecoration: - case kIROp_RequireGLSLVersionDecoration: + // Don't inline a function with spirv capability decoration to avoid losing it. return false; } } + // We cannot inline a function that is defined by a generic asm inst. + if (hasGenericAsmInst(callee)) + return false; + // At this point the `CallSiteInfo` is complete and // could be used for inlining, but we have additional // checks to make. @@ -883,6 +878,35 @@ void performGLSLResourceReturnFunctionInlining(IRModule* module) } } +struct IntrinsicFunctionInliningPass : InliningPassBase +{ + typedef InliningPassBase Super; + + IntrinsicFunctionInliningPass(IRModule* module) + : Super(module) + {} + + bool shouldInline(CallSiteInfo const& info) + { + auto func = as<IRFunc>(getResolvedInstForDecorations(info.callee)); + if (!func) + return false; + if (func->findDecorationImpl(kIROp_RequireSPIRVCapabilityDecoration)) + return false; + auto returnInst = as<IRReturn>(func->getFirstBlock()->getTerminator()); + if (!returnInst) + return false; + auto firstInst = as<IRSPIRVAsm>(func->getFirstBlock()->getFirstOrdinaryInst()); + return returnInst->getVal() == firstInst; + } +}; + +void performIntrinsicFunctionFunctionInlining(IRModule* module) +{ + IntrinsicFunctionInliningPass pass(module); + pass.considerAllCallSites(); +} + struct CustomInliningPass : InliningPassBase { typedef InliningPassBase Super; diff --git a/source/slang/slang-ir-inline.h b/source/slang/slang-ir-inline.h index 61a411d32..6eb3a1bb1 100644 --- a/source/slang/slang-ir-inline.h +++ b/source/slang/slang-ir-inline.h @@ -25,6 +25,9 @@ namespace Slang /// Inline calls to functions that returns a resource/sampler via either return value or output parameter. void performGLSLResourceReturnFunctionInlining(IRModule* module); + /// Inline simple intrinsic functions whose definition is a single asm block. + void performIntrinsicFunctionFunctionInlining(IRModule* module); + /// Inline a specific call. bool inlineCall(IRCall* call); } diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 596d09184..bbd494ee9 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -3289,9 +3289,9 @@ struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo> case SPIRVAsmOperand::NamedValue: { const auto v = operand.knownValue; - const auto i = builder->getIntValue(builder->getIntType(), v); + const auto i = builder->getIntValue(builder->getUIntType(), v); if(operand.wrapInId) - return builder->emitSPIRVAsmOperandEnum(i, builder->getIntType()); + return builder->emitSPIRVAsmOperandEnum(i, builder->getUIntType()); else return builder->emitSPIRVAsmOperandEnum(i); } |
