summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2024-03-07 13:19:44 -0800
committerGitHub <noreply@github.com>2024-03-07 13:19:44 -0800
commit240727db40552180446c1f14acc371f690db10e4 (patch)
tree45853db6e7405d37ab0ceafb8c1d5afb029de511 /source
parentaf108c72894fdfb18438bd1c0cfb452b625cb6a6 (diff)
Uniformity analysis. (#3704)
* Uniformity analysis. * Add [NonUniformReturn] decorations to some hlsl intrinsic functions.
Diffstat (limited to 'source')
-rw-r--r--source/core/slang-uint-set.cpp2
-rw-r--r--source/core/slang-uint-set.h2
-rw-r--r--source/slang/core.meta.slang6
-rw-r--r--source/slang/hlsl.meta.slang46
-rw-r--r--source/slang/slang-ast-modifier.h11
-rw-r--r--source/slang/slang-check-modifier.cpp1
-rw-r--r--source/slang/slang-diagnostic-defs.h4
-rw-r--r--source/slang/slang-emit.cpp8
-rw-r--r--source/slang/slang-ir-entry-point-uniforms.cpp2
-rw-r--r--source/slang/slang-ir-inst-defs.h7
-rw-r--r--source/slang/slang-ir-insts.h7
-rw-r--r--source/slang/slang-ir-uniformity.cpp474
-rw-r--r--source/slang/slang-ir-uniformity.h10
-rw-r--r--source/slang/slang-lower-to-ir.cpp17
-rw-r--r--source/slang/slang-options.cpp2
-rw-r--r--source/slang/slang-parser.cpp1
16 files changed, 591 insertions, 9 deletions
diff --git a/source/core/slang-uint-set.cpp b/source/core/slang-uint-set.cpp
index 02e142706..e973cbc3a 100644
--- a/source/core/slang-uint-set.cpp
+++ b/source/core/slang-uint-set.cpp
@@ -27,7 +27,7 @@ UIntSet& UIntSet::operator=(const UIntSet& other)
return *this;
}
-HashCode UIntSet::getHashCode()
+HashCode UIntSet::getHashCode() const
{
int rs = 0;
for (auto val : m_buffer)
diff --git a/source/core/slang-uint-set.h b/source/core/slang-uint-set.h
index 4912ae504..0f2165bab 100644
--- a/source/core/slang-uint-set.h
+++ b/source/core/slang-uint-set.h
@@ -26,7 +26,7 @@ public:
UIntSet& operator=(UIntSet&& other);
UIntSet& operator=(const UIntSet& other);
- HashCode getHashCode();
+ HashCode getHashCode() const;
/// Return the count of all bits directly represented
Int getCount() const { return Int(m_buffer.getCount()) * kElementSize; }
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 3131224b1..54a82c4f0 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -2308,7 +2308,8 @@ int __naturalStrideOf()
return __naturalStrideOf_impl(__declVal<T>());
}
-
+__intrinsic_op($(kIROp_TreatAsDynamicUniform))
+T asDynamicUniform<T>(T v);
// Binding Attributes
@@ -2555,3 +2556,6 @@ attribute_syntax [PreferCheckpoint] : PreferCheckpointAttribute;
__attributeTarget(DeclBase)
attribute_syntax [KnownBuiltin(name : String)] : KnownBuiltinAttribute;
+
+__attributeTarget(FunctionDeclBase)
+attribute_syntax [NonUniformReturn] : NonDynamicUniformAttribute;
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 5c97de525..f220a68a5 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -6423,6 +6423,7 @@ uint NonUniformResourceIndex(uint index)
__glsl_extension(GL_EXT_nonuniform_qualifier)
[__readNone]
[ForceInline]
+[NonUniformReturn]
int NonUniformResourceIndex(int index)
{
__target_switch
@@ -8730,6 +8731,7 @@ bool WaveActiveAnyTrue(bool condition)
__glsl_extension(GL_KHR_shader_subgroup_ballot)
__spirv_version(1.3)
+[NonUniformReturn]
uint4 WaveActiveBallot(bool condition)
{
__target_switch
@@ -8757,6 +8759,7 @@ uint WaveActiveCountBits(bool value)
__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
+[NonUniformReturn]
uint WaveGetLaneCount()
{
__target_switch
@@ -8775,6 +8778,7 @@ uint WaveGetLaneCount()
__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
+[NonUniformReturn]
uint WaveGetLaneIndex()
{
__target_switch
@@ -8793,6 +8797,7 @@ uint WaveGetLaneIndex()
__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
+[NonUniformReturn]
bool WaveIsFirstLane()
{
__target_switch
@@ -9802,6 +9807,7 @@ void AcceptHitAndEndSearch()
// 10.4.1 - Ray Dispatch System Values
+[NonUniformReturn]
uint3 DispatchRaysIndex()
{
__target_switch
@@ -9832,6 +9838,7 @@ uint3 DispatchRaysDimensions()
// 10.4.2 - Ray System Values
+[NonUniformReturn]
float3 WorldRayOrigin()
{
__target_switch
@@ -9846,6 +9853,7 @@ float3 WorldRayOrigin()
}
}
+[NonUniformReturn]
float3 WorldRayDirection()
{
__target_switch
@@ -9860,6 +9868,7 @@ float3 WorldRayDirection()
}
}
+[NonUniformReturn]
float RayTMin()
{
__target_switch
@@ -9884,6 +9893,7 @@ float RayTMin()
// we should simply provide two overloads here, specialized
// to the appropriate Vulkan stages.
//
+[NonUniformReturn]
float RayTCurrent()
{
__target_switch
@@ -9914,6 +9924,7 @@ uint RayFlags()
// 10.4.3 - Primitive/Object Space System Values
+[NonUniformReturn]
uint InstanceIndex()
{
__target_switch
@@ -9928,6 +9939,7 @@ uint InstanceIndex()
}
}
+[NonUniformReturn]
uint InstanceID()
{
__target_switch
@@ -9942,6 +9954,7 @@ uint InstanceID()
}
}
+[NonUniformReturn]
uint PrimitiveIndex()
{
__target_switch
@@ -9956,6 +9969,7 @@ uint PrimitiveIndex()
}
}
+[NonUniformReturn]
float3 ObjectRayOrigin()
{
__target_switch
@@ -9970,6 +9984,7 @@ float3 ObjectRayOrigin()
}
}
+[NonUniformReturn]
float3 ObjectRayDirection()
{
__target_switch
@@ -9986,6 +10001,7 @@ float3 ObjectRayDirection()
// TODO: optix has an optixGetObjectToWorldTransformMatrix function that returns 12
// floats by reference.
+[NonUniformReturn]
float3x4 ObjectToWorld3x4()
{
__target_switch
@@ -10000,6 +10016,7 @@ float3x4 ObjectToWorld3x4()
}
}
+[NonUniformReturn]
float3x4 WorldToObject3x4()
{
__target_switch
@@ -10014,6 +10031,7 @@ float3x4 WorldToObject3x4()
}
}
+[NonUniformReturn]
float4x3 ObjectToWorld4x3()
{
__target_switch
@@ -10027,6 +10045,7 @@ float4x3 ObjectToWorld4x3()
}
}
+[NonUniformReturn]
float4x3 WorldToObject4x3()
{
__target_switch
@@ -10047,6 +10066,7 @@ float4x3 WorldToObject4x3()
__glsl_version(460)
__glsl_extension(GL_NV_ray_tracing_motion_blur)
__glsl_extension(GL_EXT_ray_tracing)
+[NonUniformReturn]
float RayCurrentTime()
{
__target_switch
@@ -10069,10 +10089,11 @@ float RayCurrentTime()
// declarations, so that users can know they aren't coding
// against the final spec?
//
-float3x4 ObjectToWorld() { return ObjectToWorld3x4(); }
-float3x4 WorldToObject() { return WorldToObject3x4(); }
+[NonUniformReturn] float3x4 ObjectToWorld() { return ObjectToWorld3x4(); }
+[NonUniformReturn] float3x4 WorldToObject() { return WorldToObject3x4(); }
// 10.4.4 - Hit Specific System values
+[NonUniformReturn]
uint HitKind()
{
__target_switch
@@ -10270,6 +10291,7 @@ extension __TextureImpl<T,__Shape2D, 1, 0, 0, $(kStdlibResourceAccessFeedback),
// Get the index of the geometry that was hit in an intersection, any-hit, or closest-hit shader
__glsl_extension(GL_EXT_ray_tracing)
+[NonUniformReturn]
uint GeometryIndex()
{
__target_switch
@@ -10532,6 +10554,7 @@ struct RayQuery <let rayFlagsGeneric : RAY_FLAG = RAY_FLAG_NONE>
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
CANDIDATE_TYPE CandidateType()
{
__target_switch
@@ -10550,6 +10573,7 @@ struct RayQuery <let rayFlagsGeneric : RAY_FLAG = RAY_FLAG_NONE>
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
COMMITTED_STATUS CommittedStatus()
{
__target_switch
@@ -10568,6 +10592,7 @@ struct RayQuery <let rayFlagsGeneric : RAY_FLAG = RAY_FLAG_NONE>
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
bool CandidateProceduralPrimitiveNonOpaque()
{
__target_switch
@@ -10587,6 +10612,7 @@ struct RayQuery <let rayFlagsGeneric : RAY_FLAG = RAY_FLAG_NONE>
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
float CandidateTriangleRayT()
{
__target_switch
@@ -10604,6 +10630,7 @@ struct RayQuery <let rayFlagsGeneric : RAY_FLAG = RAY_FLAG_NONE>
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
float CommittedRayT()
{
__target_switch
@@ -10675,6 +10702,7 @@ ${{{{
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
float3x4 $(ccName)$(matName)3x4()
{
__target_switch
@@ -10693,6 +10721,7 @@ ${{{{
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__readNone]
+ [NonUniformReturn]
float4x3 $(ccName)$(matName)4x3()
{
__target_switch
@@ -10734,6 +10763,7 @@ ${{{{
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
$(method.type) $(ccName)$(method.hlslName)()
{
__target_switch
@@ -10757,6 +10787,7 @@ ${{{{
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
uint RayFlags()
{
__target_switch
@@ -10773,6 +10804,7 @@ ${{{{
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
float3 WorldRayOrigin()
{
__target_switch
@@ -10789,6 +10821,7 @@ ${{{{
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
float3 WorldRayDirection()
{
__target_switch
@@ -10805,6 +10838,7 @@ ${{{{
__glsl_extension(GL_EXT_ray_query)
__glsl_version(460)
[__NoSideEffect]
+ [NonUniformReturn]
float RayTMin()
{
__target_switch
@@ -12082,6 +12116,7 @@ void debugBreak();
[__requiresNVAPI]
__glsl_extension(GL_EXT_shader_realtime_clock)
[require(shaderclock)]
+[NonUniformReturn]
uint getRealtimeClockLow()
{
__target_switch
@@ -12100,12 +12135,14 @@ uint getRealtimeClockLow()
}
__target_intrinsic(cpp, "std::chrono::high_resolution_clock::now().time_since_epoch().count()")
-__target_intrinsic(cuda, "clock64")
+ __target_intrinsic(cuda, "clock64")
+[NonUniformReturn]
int64_t __cudaCppGetRealtimeClock();
[__requiresNVAPI]
__glsl_extension(GL_EXT_shader_realtime_clock)
[require(shaderclock)]
+[NonUniformReturn]
uint2 getRealtimeClock()
{
__target_switch
@@ -12134,14 +12171,17 @@ uint2 getRealtimeClock()
__target_intrinsic(cuda, "(threadIdx)")
[__readNone]
+[NonUniformReturn]
uint3 cudaThreadIdx();
__target_intrinsic(cuda, "(blockIdx)")
[__readNone]
+[NonUniformReturn]
uint3 cudaBlockIdx();
__target_intrinsic(cuda, "(blockDim)")
[__readNone]
+[NonUniformReturn]
uint3 cudaBlockDim();
//
diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h
index c5d18f4f8..8e5120fad 100644
--- a/source/slang/slang-ast-modifier.h
+++ b/source/slang/slang-ast-modifier.h
@@ -634,6 +634,10 @@ class AttributeUsageAttribute : public Attribute
SyntaxClass<NodeBase> targetSyntaxClass;
};
+class NonDynamicUniformAttribute : public Attribute
+{
+ SLANG_AST_CLASS(NonDynamicUniformAttribute)
+};
class RequireCapabilityAttribute : public Attribute
{
@@ -1482,7 +1486,7 @@ class GLSLPatchModifier : public SimpleModifier
//
class BitFieldModifier : public Modifier
{
- SLANG_ABSTRACT_AST_CLASS(BitFieldModifier)
+ SLANG_AST_CLASS(BitFieldModifier)
IntegerLiteralValue width;
@@ -1491,4 +1495,9 @@ class BitFieldModifier : public Modifier
DeclRef<VarDecl> backingDeclRef;
};
+class DynamicUniformModifier : public Modifier
+{
+ SLANG_AST_CLASS(DynamicUniformModifier)
+};
+
} // namespace Slang
diff --git a/source/slang/slang-check-modifier.cpp b/source/slang/slang-check-modifier.cpp
index 2d9107431..0f49891d0 100644
--- a/source/slang/slang-check-modifier.cpp
+++ b/source/slang/slang-check-modifier.cpp
@@ -1083,6 +1083,7 @@ namespace Slang
case ASTNodeType::HLSLCentroidModifier:
case ASTNodeType::PerVertexModifier:
case ASTNodeType::HLSLUniformModifier:
+ case ASTNodeType::DynamicUniformModifier:
return (as<VarDeclBase>(decl) && (isGlobalDecl(decl) || as<StructDecl>(getParentDecl(decl)))) || as<ParamDecl>(decl);
case ASTNodeType::HLSLSemantic:
diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h
index 2150be5ad..8264d256b 100644
--- a/source/slang/slang-diagnostic-defs.h
+++ b/source/slang/slang-diagnostic-defs.h
@@ -716,6 +716,10 @@ DIAGNOSTIC(41904, Error, unableToAlignOf, "alignof could not be performed for ty
DIAGNOSTIC(42001, Error, invalidUseOfTorchTensorTypeInDeviceFunc, "invalid use of TorchTensor type in device/kernel functions. use `TensorView` instead.")
DIAGNOSTIC(45001, Error, unresolvedSymbol, "unresolved external symbol '$0'.")
+
+DIAGNOSTIC(41201, Warning, expectDynamicUniformArgument, "argument for '$0' is not a dynamic uniform.")
+DIAGNOSTIC(41201, Warning, expectDynamicUniformValue, "value stored at this location must be dynamic uniform.")
+
//
// 5xxxx - Target code generation.
//
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 2217bc143..1d72fd233 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -71,6 +71,7 @@
#include "slang-ir-string-hash.h"
#include "slang-ir-simplify-for-emit.h"
#include "slang-ir-pytorch-cpp-binding.h"
+#include "slang-ir-uniformity.h"
#include "slang-ir-vk-invert-y.h"
#include "slang-legalize-types.h"
#include "slang-lower-to-ir.h"
@@ -371,6 +372,13 @@ Result linkAndOptimizeIR(
simplifyIR(targetProgram, irModule, IRSimplificationOptions::getDefault(), sink);
+ if (targetProgram->getOptionSet().getBoolOption(CompilerOptionName::ValidateUniformity))
+ {
+ validateUniformity(irModule, sink);
+ if (sink->getErrorCount() != 0)
+ return SLANG_FAIL;
+ }
+
// Fill in default matrix layout into matrix types that left layout unspecified.
specializeMatrixLayout(codeGenContext->getTargetProgram(), irModule);
diff --git a/source/slang/slang-ir-entry-point-uniforms.cpp b/source/slang/slang-ir-entry-point-uniforms.cpp
index 518f6ae2c..a269ec171 100644
--- a/source/slang/slang-ir-entry-point-uniforms.cpp
+++ b/source/slang/slang-ir-entry-point-uniforms.cpp
@@ -161,6 +161,8 @@ bool isVaryingParameter(IRTypeLayout* typeLayout)
bool isVaryingParameter(IRVarLayout* varLayout)
{
+ if (!varLayout)
+ return false;
return isVaryingParameter(varLayout->getTypeLayout());
}
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 656b3d320..4865aa0b5 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -838,6 +838,9 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0)
// Marks a type to be non copyable, causing SSA pass to skip turning variables of the the type into SSA values.
INST(NonCopyableTypeDecoration, nonCopyable, 0, 0)
+ // Marks a value to be dynamically uniform.
+ INST(DynamicUniformDecoration, DynamicUniform, 0, 0)
+
/// A call to the decorated function should always be folded into its use site.
INST(AlwaysFoldIntoUseSiteDecoration, alwaysFold, 0, 0)
@@ -951,6 +954,9 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0)
INST_RANGE(CheckpointHintDecoration, PreferCheckpointDecoration, PreferRecomputeDecoration)
+ /// Marks a function whose return value is never dynamic uniform.
+ INST(NonDynamicUniformReturnDecoration, NonDynamicUniformReturnDecoration, 0, 0)
+
/// Marks a class type as a COM interface implementation, which enables
/// the witness table to be easily picked up by emit.
INST(COMWitnessDecoration, COMWitnessDecoration, 1, 0)
@@ -1023,6 +1029,7 @@ INST(CastPtrToInt, CastPtrToInt, 1, 0)
INST(CastIntToPtr, CastIntToPtr, 1, 0)
INST(CastToVoid, castToVoid, 1, 0)
INST(PtrCast, PtrCast, 1, 0)
+INST(TreatAsDynamicUniform, TreatAsDynamicUniform, 1, 0)
INST(SizeOf, sizeOf, 1, 0)
INST(AlignOf, alignOf, 1, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index a80f7fb29..92a9d473f 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -685,6 +685,8 @@ struct IROffsetDecoration : IRDecoration
IRIntegerValue getOffset() { return getOffsetOperand()->getValue(); }
};
+IR_SIMPLE_DECORATION(DynamicUniformDecoration)
+
struct IRBuiltinDecoration : IRDecoration
{
enum
@@ -4619,6 +4621,11 @@ public:
addDecoration(value, kIROp_NonCopyableTypeDecoration);
}
+ void addDynamicUniformDecoration(IRInst* value)
+ {
+ addDecoration(value, kIROp_DynamicUniformDecoration);
+ }
+
/// Add a decoration that indicates that the given `inst` depends on the given `dependency`.
///
/// This decoration can be used to ensure that a value that an instruction
diff --git a/source/slang/slang-ir-uniformity.cpp b/source/slang/slang-ir-uniformity.cpp
new file mode 100644
index 000000000..9c463f530
--- /dev/null
+++ b/source/slang/slang-ir-uniformity.cpp
@@ -0,0 +1,474 @@
+#include "slang-ir-uniformity.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-util.h"
+#include "slang-ir-dominators.h"
+
+namespace Slang
+{
+ struct ValidateUniformityContext
+ {
+ IRModule* module;
+ DiagnosticSink* sink;
+
+ HashSet<IRInst*> nonUniformInsts;
+ ValidateUniformityContext* parentContext = nullptr;
+ IRCall* call = nullptr;
+ IRFunc* currentCallee = nullptr;
+
+ bool isInstNonUniform(IRInst* inst)
+ {
+ auto context = this;
+ while (context)
+ {
+ if (context->nonUniformInsts.contains(inst))
+ return true;
+ context = context->parentContext;
+ }
+ return false;
+ }
+
+ struct FunctionNonUniformInfoKey
+ {
+ IRFunc* func;
+ UIntSet nonUniformParams;
+
+ bool operator==(const FunctionNonUniformInfoKey& other) const
+ {
+ return func == other.func && nonUniformParams == other.nonUniformParams;
+ }
+ HashCode getHashCode() const
+ {
+ return combineHash(Slang::getHashCode(func), nonUniformParams.getHashCode());
+ }
+ };
+
+ struct FunctionNonUniformInfo
+ {
+ UIntSet nonUniformParams;
+ bool isResultNonUniform = false;
+ };
+
+ Dictionary<FunctionNonUniformInfoKey, FunctionNonUniformInfo> functionNonUniformInfos;
+
+ template<typename F>
+ void traverseControlDependentBlocks(IRDominatorTree* dom, IRInst* inst, const F& f)
+ {
+ auto block = as<IRBlock>(inst->getParent());
+ if (!block)
+ return;
+ for (auto idom = dom->getImmediateDominator(block); idom; idom = dom->getImmediateDominator(idom))
+ {
+ if (as<IRUnconditionalBranch>(idom->getTerminator()))
+ continue;
+ if (auto ifelse = as<IRIfElse>(idom->getTerminator()))
+ {
+ if (dom->dominates(ifelse->getAfterBlock(), block))
+ continue;
+ }
+ else if (auto switchInst = as<IRSwitch>(idom->getTerminator()))
+ {
+ if (dom->dominates(switchInst->getBreakLabel(), block))
+ continue;
+ }
+ else if (auto loopInst = as<IRLoop>(idom->getTerminator()))
+ {
+ if (dom->dominates(loopInst->getBreakBlock(), block))
+ continue;
+ }
+ f(idom);
+ }
+ }
+
+ FunctionNonUniformInfo* getFunctionNonUniformInfo(IRCall* callInst, const FunctionNonUniformInfoKey& key)
+ {
+ if (auto rs = functionNonUniformInfos.tryGetValue(key))
+ return rs;
+
+ // Is the function already being analyzed? If so exit early to avoid infinite recursion.
+ for (auto context = this; context; context = context->parentContext)
+ {
+ if (context->currentCallee == key.func)
+ return nullptr;
+ }
+
+ // If the function body has target intrinsic, we can't analyze it, and we
+ // will use the fallback behavior (result is non-uniform if any of its arguments are non-uniform).
+ for (auto block : key.func->getBlocks())
+ {
+ if (auto genAsm = as<IRGenericAsm>(block->getTerminator()))
+ {
+ return nullptr;
+ }
+ }
+
+ ValidateUniformityContext subContext;
+ subContext.module = module;
+ subContext.sink = sink;
+ subContext.parentContext = this;
+
+ List<IRInst*> workList;
+ Index paramIndex = 0;
+ for (auto param : key.func->getParams())
+ {
+ if (key.nonUniformParams.contains(UInt(paramIndex)))
+ {
+ subContext.nonUniformInsts.add(param);
+ workList.add(param);
+ }
+ paramIndex++;
+ }
+ subContext.call = callInst;
+ subContext.currentCallee = key.func;
+ subContext.propagateNonUniform(key.func, workList);
+
+ FunctionNonUniformInfo info;
+ info.nonUniformParams = key.nonUniformParams;
+ paramIndex = 0;
+ for (auto param : key.func->getParams())
+ {
+ if (subContext.nonUniformInsts.contains(param))
+ {
+ info.nonUniformParams.add(paramIndex);
+ }
+ paramIndex++;
+ }
+
+ // If the function has [NonUniformReturn] attribute,
+ // treat its return value as non uniform.
+ if (key.func->findDecorationImpl(kIROp_NonDynamicUniformReturnDecoration))
+ {
+ info.isResultNonUniform = true;
+ }
+ else
+ {
+ // The return value is non-uniform if the any values used in IRReturn is
+ // non-uniform, or if the return insts are control-dependent on non-uniform
+ // values.
+ for (auto bb : key.func->getBlocks())
+ {
+ if (auto ret = as<IRReturn>(bb->getTerminator()))
+ {
+ if (subContext.isInstNonUniform(ret->getVal()) || subContext.isInstNonUniform(ret))
+ {
+ info.isResultNonUniform = true;
+ break;
+ }
+ }
+ }
+ }
+ functionNonUniformInfos[key] = info;
+ return functionNonUniformInfos.tryGetValue(key);
+ }
+
+ bool isDynamicUniformLocation(IRInst* addr)
+ {
+ while (addr)
+ {
+ switch (addr->getOp())
+ {
+ case kIROp_FieldAddress:
+ if (as<IRFieldAddress>(addr)->getField()->findDecoration<IRDynamicUniformDecoration>())
+ return true;
+ addr = as<IRFieldAddress>(addr)->getBase();
+ break;
+ case kIROp_GetElementPtr:
+ addr = as<IRGetElementPtr>(addr)->getBase();
+ break;
+ case kIROp_GetOffsetPtr:
+ addr = addr->getOperand(0);
+ break;
+ case kIROp_Param:
+ case kIROp_Var:
+ return addr->findDecoration<IRDynamicUniformDecoration>() != nullptr;
+ default:
+ addr = nullptr;
+ }
+ }
+ return false;
+ }
+
+ void propagateNonUniform(IRFunc* root, List<IRInst*>& workList)
+ {
+ List<IRInst*>& nextWorkList = *module->getContainerPool().getList<IRInst>();
+ HashSet<IRInst*>& workListSet = *module->getContainerPool().getHashSet<IRInst>();
+ auto addToWorkList = [&](IRInst* inst)
+ {
+ if (workListSet.add(inst))
+ {
+ nonUniformInsts.add(inst);
+ nextWorkList.add(inst);
+ }
+ };
+
+ // Go through the children first to identify initial non-uniform insts.
+ for (auto block : root->getBlocks())
+ {
+ for (auto inst = block->getFirstInst(); inst; inst = inst->getNextInst())
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_Call:
+ {
+ auto callInst = as<IRCall>(inst);
+ auto callee = getResolvedInstForDecorations(callInst->getCallee());
+ if (callee->findDecorationImpl(kIROp_NonDynamicUniformReturnDecoration))
+ {
+ addToWorkList(inst);
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ auto dom = module->findOrCreateDominatorTree(root);
+
+ auto visitControlDependentBlock = [&](IRBlock* dependentBlock)
+ {
+ if (!dependentBlock)
+ return;
+ for (auto block : dom->getProperlyDominatedBlocks(dependentBlock))
+ {
+ for (auto inst = block->getFirstInst(); inst; inst = inst->getNextInst())
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_Store:
+ case kIROp_SwizzledStore:
+ addToWorkList(inst->getOperand(0));
+ break;
+ case kIROp_Return:
+ addToWorkList(inst);
+ break;
+ case kIROp_Call:
+ {
+ auto call = as<IRCall>(inst);
+ for (UInt i = 0; i < call->getArgCount(); i++)
+ {
+ if (as<IRPtrTypeBase>(call->getArg(i)))
+ addToWorkList(call->getArg(i));
+ }
+ }
+ break;
+ }
+ }
+ }
+ };
+
+ while (workList.getCount())
+ {
+ for (Index i = 0; i < workList.getCount(); i++)
+ {
+ auto inst = workList[i];
+ for (auto use = inst->firstUse; use; use = use->nextUse)
+ {
+ auto user = use->getUser();
+ if (as<IRAttr>(user))
+ continue;
+ if (as<IRDecoration>(user))
+ continue;
+ switch (user->getOp())
+ {
+ case kIROp_TreatAsDynamicUniform:
+ continue;
+ case kIROp_FieldAddress:
+ {
+ if (isDynamicUniformLocation(user))
+ continue;
+ break;
+ }
+ case kIROp_FieldExtract:
+ {
+ if (as<IRFieldExtract>(user)->findDecoration<IRDynamicUniformDecoration>())
+ continue;
+ break;
+ }
+ case kIROp_SwizzledStore:
+ case kIROp_Store:
+ {
+ if (use == user->getOperands() + 1)
+ {
+ auto ptr = user->getOperand(0);
+ addToWorkList(ptr);
+ if (isDynamicUniformLocation(ptr))
+ {
+ sink->diagnose(user->sourceLoc, Diagnostics::expectDynamicUniformValue, ptr);
+ }
+ else
+ {
+ // Conservatively treat the entire composite at root addr as non-uniform.
+ auto addrRoot = getRootAddr(ptr);
+ addToWorkList(addrRoot);
+ }
+ }
+ break;
+ }
+ case kIROp_ifElse:
+ {
+ auto ifElse = as<IRIfElse>(user);
+ visitControlDependentBlock(ifElse->getTrueBlock());
+ visitControlDependentBlock(ifElse->getFalseBlock());
+ break;
+ }
+ case kIROp_Switch:
+ {
+ auto switchInst = as<IRSwitch>(user);
+ for (UInt c = 0; c < switchInst->getCaseCount(); c++)
+ visitControlDependentBlock(switchInst->getCaseLabel(c));
+ visitControlDependentBlock(switchInst->getDefaultLabel());
+ break;
+ }
+ case kIROp_Call:
+ {
+ auto callInst = as<IRCall>(user);
+ auto callee = getResolvedInstForDecorations(callInst->getCallee());
+ if (auto func = as<IRFunc>(callee))
+ {
+ if (func->getFirstBlock())
+ {
+ FunctionNonUniformInfoKey key;
+ key.func = func;
+ for (UInt argi = 0; argi < callInst->getArgCount(); argi++)
+ {
+ if (nonUniformInsts.contains(callInst->getArg(argi)))
+ {
+ auto param = getParamAt(func->getFirstBlock(), argi);
+ if (param->findDecoration<IRDynamicUniformDecoration>())
+ {
+ sink->diagnose(callInst->sourceLoc, Diagnostics::expectDynamicUniformArgument, param);
+ }
+ else
+ {
+ key.nonUniformParams.add(i);
+ }
+ }
+ }
+ if (auto funcInfo = getFunctionNonUniformInfo(callInst, key))
+ {
+ for (UInt argi = 0; argi < callInst->getArgCount(); argi++)
+ {
+ if (funcInfo->nonUniformParams.contains(argi))
+ {
+ addToWorkList(callInst->getArg(argi));
+ }
+ if (funcInfo->isResultNonUniform)
+ {
+ addToWorkList(callInst);
+ }
+ }
+ break;
+ }
+ }
+ }
+ // The default behavior for calls is that the result is non-uniform if
+ // any of its arguments are non-uniform.
+ bool isNonUniformCall = callee->findDecorationImpl(kIROp_NonDynamicUniformReturnDecoration) != nullptr;
+ if (!isNonUniformCall)
+ {
+ for (UInt argi = 0; argi < callInst->getArgCount(); argi++)
+ {
+ if (nonUniformInsts.contains(callInst->getArg(argi)))
+ {
+ isNonUniformCall = true;
+ break;
+ }
+ }
+ }
+ if (isNonUniformCall)
+ {
+ addToWorkList(callInst);
+ for (UInt argi = 0; argi < callInst->getArgCount(); argi++)
+ {
+ if (auto ptrType = as<IRPtrTypeBase>(callInst->getArg(argi)->getDataType()))
+ {
+ addToWorkList(callInst->getArg(argi));
+ // Conservatively treat the entire composite at root addr as non-uniform.
+ auto addrRoot = getRootAddr(callInst->getArg(argi));
+ addToWorkList(addrRoot);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ addToWorkList(user);
+ }
+ }
+ workList.swapWith(nextWorkList);
+ nextWorkList.clear();
+ }
+ }
+
+ void analyzeModule()
+ {
+ List<IRInst*>& workList = *module->getContainerPool().getList<IRInst>();
+ for (auto globalInst : module->getGlobalInsts())
+ {
+ if (auto code = as<IRGlobalValueWithCode>(globalInst))
+ {
+ auto func = getResolvedInstForDecorations(code);
+ if (func->findDecorationImpl(kIROp_NonDynamicUniformReturnDecoration))
+ {
+ nonUniformInsts.add(code);
+ }
+ }
+ if (auto entryPointDecor = globalInst->findDecoration<IREntryPointDecoration>())
+ {
+ auto func = as<IRFunc>(globalInst);
+ if (!func)
+ continue;
+ for (auto param : func->getParams())
+ {
+ auto varLayout = findVarLayout(param);
+ if (isVaryingParameter(varLayout) || varLayout->findAttr<IRSystemValueSemanticAttr>())
+ {
+ nonUniformInsts.add(param);
+ workList.add(param);
+ }
+ }
+ currentCallee = func;
+ call = nullptr;
+ propagateNonUniform(func, workList);
+ }
+ }
+ workList.clear();
+
+ eliminateAsDynamicUniformInst();
+ }
+
+ void eliminateAsDynamicUniformInst()
+ {
+ List<IRInst*>& workList = *module->getContainerPool().getList<IRInst>();
+ workList.add(module->getModuleInst());
+ for (Index i = 0; i < workList.getCount(); i++)
+ {
+ auto inst = workList[i];
+ if (inst->getOp() == kIROp_TreatAsDynamicUniform)
+ {
+ auto val = inst->getOperand(0);
+ inst->replaceUsesWith(val);
+ inst->removeAndDeallocate();
+ }
+ else
+ {
+ for (auto child = inst->getFirstChild(); child; child = child->getNextInst())
+ {
+ workList.add(child);
+ }
+ }
+ }
+ }
+ };
+
+ void validateUniformity(IRModule* module, DiagnosticSink* sink)
+ {
+ ValidateUniformityContext context;
+ context.module = module;
+ context.sink = sink;
+ context.analyzeModule();
+ }
+}
diff --git a/source/slang/slang-ir-uniformity.h b/source/slang/slang-ir-uniformity.h
new file mode 100644
index 000000000..db3c89bd6
--- /dev/null
+++ b/source/slang/slang-ir-uniformity.h
@@ -0,0 +1,10 @@
+// slang-ir-uniformity.h
+#pragma once
+
+namespace Slang
+{
+ struct IRModule;
+ class DiagnosticSink;
+
+ void validateUniformity(IRModule* module, DiagnosticSink* sink);
+}
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 753b21589..146af452f 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -2187,6 +2187,10 @@ void addVarDecorations(
{
builder->addSemanticDecoration(inst, hlslSemantic->name.getContent());
}
+ else if (auto dynamicUniform = as<DynamicUniformModifier>(mod))
+ {
+ builder->addDynamicUniformDecoration(inst);
+ }
// TODO: what are other modifiers we need to propagate through?
}
if(auto t = composeGetters<IRMeshOutputType>(inst->getFullType(), &IROutTypeBase::getValueType))
@@ -8376,9 +8380,16 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo>
fieldKey,
fieldType);
- if (auto packOffsetModifier = fieldDecl->findModifier<HLSLPackOffsetSemantic>())
+ for (auto mod : fieldDecl->modifiers)
{
- lowerPackOffsetModifier(fieldKey, packOffsetModifier);
+ if (auto packOffsetModifier = as<HLSLPackOffsetSemantic>(mod))
+ {
+ lowerPackOffsetModifier(fieldKey, packOffsetModifier);
+ }
+ else if (auto dynamicUniformModifer = as<DynamicUniformModifier>(mod))
+ {
+ subBuilder->addDynamicUniformDecoration(fieldKey);
+ }
}
}
@@ -9669,6 +9680,8 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo>
getBuilder()->addRequireSPIRVVersionDecoration(irFunc, spvVersion->version);
else if (auto cudasmVersion = as<RequiredCUDASMVersionModifier>(modifier))
getBuilder()->addRequireCUDASMVersionDecoration(irFunc, cudasmVersion->version);
+ else if (auto nonUniform= as<NonDynamicUniformAttribute>(modifier))
+ getBuilder()->addDecoration(irFunc, kIROp_NonDynamicUniformReturnDecoration);
}
if (!isInline)
diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp
index 6b56d5600..13984e530 100644
--- a/source/slang/slang-options.cpp
+++ b/source/slang/slang-options.cpp
@@ -516,6 +516,7 @@ void initCommandOptions(CommandOptions& options)
"Set the filesystem hook to use for a compile request."},
{ OptionKind::Heterogeneous, "-heterogeneous", nullptr, "Output heterogeneity-related code." },
{ OptionKind::NoMangle, "-no-mangle", nullptr, "Do as little mangling of names as possible." },
+ { OptionKind::ValidateUniformity, "-validate-uniformity", nullptr, "Perform uniformity validation analysis." },
{ OptionKind::AllowGLSL, "-allow-glsl", nullptr, "Enable GLSL as an input language." },
};
_addOptions(makeConstArrayView(experimentalOpts), options);
@@ -1664,6 +1665,7 @@ SlangResult OptionsParser::_parse(
switch (optionKind)
{
case OptionKind::NoMangle:
+ case OptionKind::ValidateUniformity:
case OptionKind::AllowGLSL:
case OptionKind::EmitIr:
case OptionKind::DumpIntermediates:
diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp
index 3f1ed3f76..ae40234bc 100644
--- a/source/slang/slang-parser.cpp
+++ b/source/slang/slang-parser.cpp
@@ -8025,6 +8025,7 @@ namespace Slang
_makeParseModifier("uniform", HLSLUniformModifier::kReflectClassInfo),
_makeParseModifier("volatile", HLSLVolatileModifier::kReflectClassInfo),
_makeParseModifier("export", HLSLExportModifier::kReflectClassInfo),
+ _makeParseModifier("dynamic_uniform", DynamicUniformModifier::kReflectClassInfo),
// Modifiers for geometry shader input
_makeParseModifier("point", HLSLPointModifier::kReflectClassInfo),