summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2023-08-30 11:45:47 -0700
committerGitHub <noreply@github.com>2023-08-30 11:45:47 -0700
commitbb15f5b494b20e957127f0ffa6040c94349da0d0 (patch)
treeb060f2b5751cc56c5f1fd8f7b37434e56b6b4e5b /source/slang
parent019f702e24d2d1d6ecf53d71f87776a83db96608 (diff)
Fix memory barrier intrinsics. (#3166)
* Fix memory barrier intrinsics. Makes them produce the same spirv code as dxc. * Fix. * filecheck barrier test for spirv backend. * Fix glsl intrinsic definition. * Fix intrinsics. * Fix intrinsics. * Fix. * Fix. --------- Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/hlsl.meta.slang141
-rw-r--r--source/slang/slang-emit.cpp3
-rw-r--r--source/slang/slang-ir-inline.cpp44
-rw-r--r--source/slang/slang-ir-inline.h3
-rw-r--r--source/slang/slang-lower-to-ir.cpp4
5 files changed, 143 insertions, 52 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index fd668f73a..e230ee6b0 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1076,14 +1076,36 @@ bool all(matrix<T,N,M> x)
}
// Barrier for writes to all memory spaces (HLSL SM 5.0)
-__target_intrinsic(glsl, "memoryBarrier(), groupMemoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer()")
-__target_intrinsic(cuda, "__threadfence()")
-void AllMemoryBarrier();
+__glsl_extension(GL_KHR_memory_scope_semantics)
+void AllMemoryBarrier()
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "AllMemoryBarrier";
+ case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeDevice, (gl_StorageSemanticsShared|gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)";
+ case cuda: __intrinsic_asm "__threadfence()";
+ case spirv: spirv_asm
+ {
+ OpMemoryBarrier Device AcquireRelease|UniformMemory|WorkgroupMemory|ImageMemory;
+ };
+ }
+}
// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)
-__target_intrinsic(glsl, "memoryBarrier(), groupMemoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer(), barrier()")
-__target_intrinsic(cuda, "__syncthreads()")
-void AllMemoryBarrierWithGroupSync();
+__glsl_extension(GL_KHR_memory_scope_semantics)
+void AllMemoryBarrierWithGroupSync()
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "AllMemoryBarrierWithGroupSync";
+ case glsl: __intrinsic_asm "controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, (gl_StorageSemanticsShared|gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)";
+ case cuda: __intrinsic_asm "__syncthreads()";
+ case spirv: spirv_asm
+ {
+ OpControlBarrier Workgroup Device AcquireRelease|UniformMemory|WorkgroupMemory|ImageMemory;
+ };
+ }
+}
// Test if any components is non-zero (HLSL SM 1.0)
@@ -1993,13 +2015,35 @@ __target_intrinsic(spirv, "OpExtInst resultType resultId glsl450 Determinant _0"
T determinant(matrix<T,N,N> m);
// Barrier for device memory
-__target_intrinsic(glsl, "memoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer()")
-__target_intrinsic(cuda, "__threadfence()")
-void DeviceMemoryBarrier();
+__glsl_extension(GL_KHR_memory_scope_semantics)
+void DeviceMemoryBarrier()
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "DeviceMemoryBarrier";
+ case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeDevice, (gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)";
+ case cuda: __intrinsic_asm "__threadfence()";
+ case spirv: spirv_asm
+ {
+ OpMemoryBarrier Device AcquireRelease|UniformMemory|ImageMemory;
+ };
+ }
+}
-__target_intrinsic(glsl, "memoryBarrier(), memoryBarrierImage(), memoryBarrierBuffer(), barrier()")
-__target_intrinsic(glsl, "__syncthreads()")
-void DeviceMemoryBarrierWithGroupSync();
+__glsl_extension(GL_KHR_memory_scope_semantics)
+void DeviceMemoryBarrierWithGroupSync()
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "DeviceMemoryBarrierWithGroupSync";
+ case glsl: __intrinsic_asm "controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, (gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)";
+ case cuda: __intrinsic_asm "__syncthreads()";
+ case spirv: spirv_asm
+ {
+ OpControlBarrier Workgroup Device AcquireRelease|UniformMemory|ImageMemory;
+ };
+ }
+}
// Vector distance
@@ -2604,34 +2648,51 @@ uint GetRenderTargetSampleCount();
float2 GetRenderTargetSamplePosition(int Index);
// Group memory barrier
-__target_intrinsic(glsl, "groupMemoryBarrier")
-__target_intrinsic(cuda, "__threadfence_block")
-__target_intrinsic(spirv, "OpMemoryBarrier const(int,ScopeWorkgroup)"
- "const(int, MemorySemanticsAcquireReleaseMask"
- "| MemorySemanticsUniformMemoryMask"
- "| MemorySemanticsImageMemoryMask"
- "| MemorySemanticsAtomicCounterMemoryMask"
- "| MemorySemanticsWorkgroupMemoryMask)")
-void GroupMemoryBarrier();
-
-__target_intrinsic(glsl, "subgroupBarrier")
-__target_intrinsic(spirv, "OpControlBarrier const(int,ScopeSubgroup) const(int,ScopeSubgroup)"
- "const(int, MemorySemanticsAcquireReleaseMask"
- "| MemorySemanticsUniformMemoryMask"
- "| MemorySemanticsImageMemoryMask"
- "| MemorySemanticsAtomicCounterMemoryMask"
- "| MemorySemanticsWorkgroupMemoryMask)")
-void __subgroupBarrier();
-
-__target_intrinsic(glsl, "groupMemoryBarrier(), barrier()")
-__target_intrinsic(cuda, "__syncthreads()")
-__target_intrinsic(spirv, "OpControlBarrier const(int,ScopeWorkgroup) const(int, ScopeWorkgroup)"
- "const(int, MemorySemanticsAcquireReleaseMask"
- "| MemorySemanticsUniformMemoryMask"
- "| MemorySemanticsImageMemoryMask"
- "| MemorySemanticsAtomicCounterMemoryMask"
- "| MemorySemanticsWorkgroupMemoryMask)")
-void GroupMemoryBarrierWithGroupSync();
+__glsl_extension(GL_KHR_memory_scope_semantics)
+void GroupMemoryBarrier()
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeWorkgroup, gl_StorageSemanticsShared, gl_SemanticsAcquireRelease)";
+ case hlsl: __intrinsic_asm "GroupMemoryBarrier";
+ case cuda: __intrinsic_asm "__threadfence_block";
+ case spirv:
+ spirv_asm
+ {
+ OpMemoryBarrier Workgroup AcquireRelease|WorkgroupMemory
+ };
+ }
+}
+
+void __subgroupBarrier()
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "subgroupBarrier";
+ case hlsl: __intrinsic_asm "GroupMemoryBarrierWithGroupSync";
+ case cuda: __intrinsic_asm "__syncthreads()";
+ case spirv:
+ spirv_asm
+ {
+ OpControlBarrier Subgroup Subgroup AcquireRelease|WorkgroupMemory|ImageMemory|UniformMemory
+ };
+ }
+}
+
+void GroupMemoryBarrierWithGroupSync()
+{
+ __target_switch
+ {
+ case glsl: __intrinsic_asm "barrier";
+ case hlsl: __intrinsic_asm "GroupMemoryBarrierWithGroupSync";
+ case cuda: __intrinsic_asm "__syncthreads()";
+ case spirv:
+ spirv_asm
+ {
+ OpControlBarrier Workgroup Workgroup AcquireRelease|WorkgroupMemory
+ };
+ }
+}
// Atomics
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 8ee641acc..8dc0d2983 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -899,6 +899,9 @@ Result linkAndOptimizeIR(
eliminateMultiLevelBreak(irModule);
+ if (isKhronosTarget(targetRequest) && targetRequest->shouldEmitSPIRVDirectly())
+ performIntrinsicFunctionFunctionInlining(irModule);
+
simplifyIR(irModule, sink);
// As a late step, we need to take the SSA-form IR and move things *out*
diff --git a/source/slang/slang-ir-inline.cpp b/source/slang/slang-ir-inline.cpp
index e171e2dd3..4308d16f0 100644
--- a/source/slang/slang-ir-inline.cpp
+++ b/source/slang/slang-ir-inline.cpp
@@ -246,13 +246,6 @@ struct InliningPassBase
//
outCallSite.callee = calleeFunc;
- if (callee->findDecoration<IRIntrinsicOpDecoration>())
- return true;
-
- // We cannot inline a function that is defined by a generic asm inst.
- if (hasGenericAsmInst(callee))
- return false;
-
for (auto decor : callee->getDecorations())
{
switch (decor->getOp())
@@ -260,13 +253,15 @@ struct InliningPassBase
case kIROp_IntrinsicOpDecoration:
return true;
case kIROp_RequireSPIRVCapabilityDecoration:
- case kIROp_RequireSPIRVVersionDecoration:
- case kIROp_RequireGLSLExtensionDecoration:
- case kIROp_RequireGLSLVersionDecoration:
+ // Don't inline a function with spirv capability decoration to avoid losing it.
return false;
}
}
+ // We cannot inline a function that is defined by a generic asm inst.
+ if (hasGenericAsmInst(callee))
+ return false;
+
// At this point the `CallSiteInfo` is complete and
// could be used for inlining, but we have additional
// checks to make.
@@ -883,6 +878,35 @@ void performGLSLResourceReturnFunctionInlining(IRModule* module)
}
}
+struct IntrinsicFunctionInliningPass : InliningPassBase
+{
+ typedef InliningPassBase Super;
+
+ IntrinsicFunctionInliningPass(IRModule* module)
+ : Super(module)
+ {}
+
+ bool shouldInline(CallSiteInfo const& info)
+ {
+ auto func = as<IRFunc>(getResolvedInstForDecorations(info.callee));
+ if (!func)
+ return false;
+ if (func->findDecorationImpl(kIROp_RequireSPIRVCapabilityDecoration))
+ return false;
+ auto returnInst = as<IRReturn>(func->getFirstBlock()->getTerminator());
+ if (!returnInst)
+ return false;
+ auto firstInst = as<IRSPIRVAsm>(func->getFirstBlock()->getFirstOrdinaryInst());
+ return returnInst->getVal() == firstInst;
+ }
+};
+
+void performIntrinsicFunctionFunctionInlining(IRModule* module)
+{
+ IntrinsicFunctionInliningPass pass(module);
+ pass.considerAllCallSites();
+}
+
struct CustomInliningPass : InliningPassBase
{
typedef InliningPassBase Super;
diff --git a/source/slang/slang-ir-inline.h b/source/slang/slang-ir-inline.h
index 61a411d32..6eb3a1bb1 100644
--- a/source/slang/slang-ir-inline.h
+++ b/source/slang/slang-ir-inline.h
@@ -25,6 +25,9 @@ namespace Slang
/// Inline calls to functions that returns a resource/sampler via either return value or output parameter.
void performGLSLResourceReturnFunctionInlining(IRModule* module);
+ /// Inline simple intrinsic functions whose definition is a single asm block.
+ void performIntrinsicFunctionFunctionInlining(IRModule* module);
+
/// Inline a specific call.
bool inlineCall(IRCall* call);
}
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 596d09184..bbd494ee9 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -3289,9 +3289,9 @@ struct ExprLoweringVisitorBase : ExprVisitor<Derived, LoweredValInfo>
case SPIRVAsmOperand::NamedValue:
{
const auto v = operand.knownValue;
- const auto i = builder->getIntValue(builder->getIntType(), v);
+ const auto i = builder->getIntValue(builder->getUIntType(), v);
if(operand.wrapInId)
- return builder->emitSPIRVAsmOperandEnum(i, builder->getIntType());
+ return builder->emitSPIRVAsmOperandEnum(i, builder->getUIntType());
else
return builder->emitSPIRVAsmOperandEnum(i);
}