summaryrefslogtreecommitdiff
path: root/source/slang
diff options
context:
space:
mode:
authorDarren Wihandi <65404740+fairywreath@users.noreply.github.com>2025-02-28 13:11:26 -0500
committerGitHub <noreply@github.com>2025-02-28 10:11:26 -0800
commit66984eb856454d0a372e3b30643823af18612067 (patch)
treecf9283fc3cb15bd83ae476f00e7785658adaa77e /source/slang
parente4b960024972420dfc96a758bfc35e8fcbf28273 (diff)
Add WaveGetLane* support for Metal and WGSL (#6371)
* support WaveGetLane* for WGSL and Metal * update test and glsl support * address review comments and fix metal test * add missing pragma guard * update test * Revert "update test" This reverts commit f2b97e91c29de154190710580c343bd0764aedbb. * update failing glsl metal test and added new test * make hlsl and glsl outputs similar * update test * disable tests for Metal and cleanup * comment fix * add expected failures * correct expected failures list * remove expected failure * add tests to expected failure --------- Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/core.meta.slang4
-rw-r--r--source/slang/glsl.meta.slang56
-rw-r--r--source/slang/hlsl.meta.slang88
-rw-r--r--source/slang/slang-core-module-textures.cpp2
-rw-r--r--source/slang/slang-emit-c-like.cpp9
-rw-r--r--source/slang/slang-emit-c-like.h2
-rw-r--r--source/slang/slang-emit-glsl.cpp2
-rw-r--r--source/slang/slang-emit-wgsl.cpp5
-rw-r--r--source/slang/slang-emit-wgsl.h2
-rw-r--r--source/slang/slang-emit.cpp10
-rw-r--r--source/slang/slang-ir-call-graph.h3
-rw-r--r--source/slang/slang-ir-inst-defs.h4
-rw-r--r--source/slang/slang-ir-insts.h4
-rw-r--r--source/slang/slang-ir-legalize-varying-params.cpp29
-rw-r--r--source/slang/slang-ir-legalize-varying-params.h2
-rw-r--r--source/slang/slang-ir-translate-global-varying-var.cpp (renamed from source/slang/slang-ir-translate-glsl-global-var.cpp)8
-rw-r--r--source/slang/slang-ir-translate-global-varying-var.h14
-rw-r--r--source/slang/slang-ir-translate-glsl-global-var.h17
18 files changed, 160 insertions, 101 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index da1b47e13..e2fb8bbf2 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -440,8 +440,8 @@ attribute_syntax [Differentiable(order:int = 0)] : BackwardDifferentiableAttribu
__intrinsic_op($(kIROp_RequirePrelude))
void __requirePrelude(constexpr String preludeText);
-__intrinsic_op($(kIROp_RequireGLSLExtension))
-void __requireGLSLExtension(constexpr String preludeText);
+__intrinsic_op($(kIROp_RequireTargetExtension))
+void __requireTargetExtension(constexpr String preludeText);
/// @experimetal
/// Perform a compile-time condition check and emit a compile-time error if the condition is false.
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index eed6cc690..2a89f2b66 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -4296,7 +4296,7 @@ __generic<T : __BuiltinType>
case glsl:
{
if (__type_equals<T, float>())
- __requireGLSLExtension("GL_EXT_shader_atomic_float");
+ __requireTargetExtension("GL_EXT_shader_atomic_float");
}
case spirv:
if (__type_equals<T, float>())
@@ -4318,7 +4318,7 @@ __generic<T : __BuiltinType>
case glsl:
{
if (__type_equals<T, float>())
- __requireGLSLExtension("GL_EXT_shader_atomic_float2");
+ __requireTargetExtension("GL_EXT_shader_atomic_float2");
}
case spirv:
if (__type_equals<T, float>())
@@ -4758,7 +4758,7 @@ void requireGLSLExtForRayTracingBuiltin()
__target_switch
{
case glsl:
- __requireGLSLExtension("GL_EXT_ray_tracing");
+ __requireTargetExtension("GL_EXT_ray_tracing");
__intrinsic_asm "";
default:
return;
@@ -6304,22 +6304,22 @@ public void traceRayMotionNV(
__generic<T : __BuiltinType>
[ForceInline]
void typeRequireChecks_shader_subgroup_GLSL() {
- // the following is a seperate function call, since else the `__requireGLSLExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm
+ // the following is a seperate function call, since else the `__requireTargetExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm
__target_switch
{
case glsl:
if (__type_equals<T, half>()
|| __type_equals<T, float16_t>()
- ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
else if (__type_equals<T, uint8_t>()
|| __type_equals<T, int8_t>()
- ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_int8");
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int8");
else if (__type_equals<T, uint16_t>()
|| __type_equals<T, int16_t>()
- ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_int16");
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int16");
else if (__type_equals<T, uint64_t>()
|| __type_equals<T, int64_t>()
- ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_int64");
+ ) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_int64");
__intrinsic_asm "";
}
@@ -6327,7 +6327,7 @@ void typeRequireChecks_shader_subgroup_GLSL() {
__generic<T : __BuiltinType>
void shader_subgroup_preamble() {
- // checks needed for shader_subgroup functions; __requireGLSLExtension does not work
+ // checks needed for shader_subgroup functions; __requireTargetExtension does not work
// (does not add the ext specified correctly to the compile output; using extended type
// will result in error for using the type)
__target_switch
@@ -6347,14 +6347,14 @@ void requireGLSLExtForSubgroupBasicBuiltin() {
__target_switch
{
case glsl:
- __requireGLSLExtension("GL_KHR_shader_subgroup_basic");
+ __requireTargetExtension("GL_KHR_shader_subgroup_basic");
__intrinsic_asm "";
default:
return;
}
}
-[require(cpp_cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_basic)]
void setupExtForSubgroupBasicBuiltIn() {
__target_switch
{
@@ -6371,7 +6371,7 @@ void requireGLSLExtForSubgroupBallotBuiltin() {
__target_switch
{
case glsl:
- __requireGLSLExtension("GL_KHR_shader_subgroup_ballot");
+ __requireTargetExtension("GL_KHR_shader_subgroup_ballot");
__intrinsic_asm "";
default:
return;
@@ -6429,7 +6429,8 @@ public property uint gl_SubgroupID
public property uint gl_SubgroupSize
{
- [require(cpp_cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)]
+ [ForceInline]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_basic)]
get {
setupExtForSubgroupBasicBuiltIn();
return WaveGetLaneCount();
@@ -6438,7 +6439,8 @@ public property uint gl_SubgroupSize
public property uint gl_SubgroupInvocationID
{
- [require(cpp_cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)]
+ [ForceInline]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_basic)]
get {
setupExtForSubgroupBasicBuiltIn();
return WaveGetLaneIndex();
@@ -8388,8 +8390,8 @@ void typeRequireChecks_atomic_using_float0_tier()
{
case glsl:
{
- if (__type_equals<T, uint64_t>() || __type_equals<T, int64_t>())
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
+ if (__type_equals<T, uint64_t>() || __type_equals<T, int64_t>())
+ __requireTargetExtension("GL_EXT_shader_atomic_int64");
}
case spirv:
return;
@@ -8405,16 +8407,16 @@ void typeRequireChecks_atomic_using_float1_tier()
case glsl:
{
if (__type_equals<T, float>())
- __requireGLSLExtension("GL_EXT_shader_atomic_float");
+ __requireTargetExtension("GL_EXT_shader_atomic_float");
else if (__type_equals<T, half>() || __type_equals<T, float16_t>())
{
- __requireGLSLExtension("GL_EXT_shader_atomic_float2");
- __requireGLSLExtension("GL_EXT_shader_explicit_arithmetic_types");
+ __requireTargetExtension("GL_EXT_shader_atomic_float2");
+ __requireTargetExtension("GL_EXT_shader_explicit_arithmetic_types");
}
else if (__type_equals<T, double>())
- __requireGLSLExtension("GL_EXT_shader_atomic_float");
+ __requireTargetExtension("GL_EXT_shader_atomic_float");
else if (__type_equals<T, uint64_t>() || __type_equals<T, int64_t>())
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
+ __requireTargetExtension("GL_EXT_shader_atomic_int64");
}
case spirv:
return;
@@ -8430,16 +8432,16 @@ void typeRequireChecks_atomic_using_float2_tier()
case glsl:
{
if (__type_equals<T, float>())
- __requireGLSLExtension("GL_EXT_shader_atomic_float2");
+ __requireTargetExtension("GL_EXT_shader_atomic_float2");
else if (__type_equals<T, half>() || __type_equals<T, float16_t>())
{
- __requireGLSLExtension("GL_EXT_shader_atomic_float2");
- __requireGLSLExtension("GL_EXT_shader_explicit_arithmetic_types");
+ __requireTargetExtension("GL_EXT_shader_atomic_float2");
+ __requireTargetExtension("GL_EXT_shader_explicit_arithmetic_types");
}
else if (__type_equals<T, double>())
- __requireGLSLExtension("GL_EXT_shader_atomic_float2");
- else if (__type_equals<T, uint64_t>() || __type_equals<T, int64_t>())
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
+ __requireTargetExtension("GL_EXT_shader_atomic_float2");
+ else if (__type_equals<T, uint64_t>() || __type_equals<T, int64_t>())
+ __requireTargetExtension("GL_EXT_shader_atomic_int64");
}
case spirv:
return;
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index a2b685b69..c9f3fb533 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -3,8 +3,14 @@
typedef uint UINT;
-__intrinsic_op($(kIROp_RequireGLSLExtension))
-void __requireGLSLExtension(String extensionName);
+__intrinsic_op($(kIROp_RequireTargetExtension))
+void __requireTargetExtension(constexpr String extensionName);
+
+/// Built-in values or system value semantics represented as in/out global variables.
+/// This allows the built-ins to be arbitrarily used from a global scope without being
+/// explicitly passed as entry point parameters.
+in uint __builtinWaveLaneIndex : SV_WaveLaneIndex;
+in uint __builtinWaveLaneCount : SV_WaveLaneCount;
//@public:
/// Represents an interface for buffer data layout.
@@ -3505,7 +3511,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format>
__intrinsic_asm "<invalid intrinsics>";
case glsl:
if (isCombined == 0)
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
__intrinsic_asm "$ctexelFetch($0, ($1).$w1b, ($1).$w1e)$z";
case spirv:
const int lodLoc = Shape.dimensions+isArray;
@@ -3569,7 +3575,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format>
__intrinsic_asm ".Load";
case glsl:
if (isCombined == 0)
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
__intrinsic_asm "$ctexelFetchOffset($0, ($1).$w1b, ($1).$w1e, ($2))$z";
case spirv:
const int lodLoc = Shape.dimensions+isArray;
@@ -3625,7 +3631,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format>
return Load(__makeVector(location, 0));
case glsl:
if (isCombined == 0)
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
return Load(__makeVector(location, 0));
case spirv:
@@ -3702,7 +3708,7 @@ extension _Texture<T,Shape,isArray,1,sampleCount,0,isShadow,isCombined,format>
__intrinsic_asm "<Not supported>";
case glsl:
if (isCombined == 0)
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
__intrinsic_asm "$ctexelFetch($0, $1, ($2))$z";
case spirv:
if (isCombined != 0)
@@ -3752,7 +3758,7 @@ extension _Texture<T,Shape,isArray,1,sampleCount,0,isShadow,isCombined,format>
__intrinsic_asm ".Load";
case glsl:
if (isCombined == 0)
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
__intrinsic_asm "$ctexelFetchOffset($0, $1, ($2), ($3))$z";
case spirv:
if (isCombined != 0)
@@ -3807,7 +3813,7 @@ extension _Texture<T,Shape,isArray,1,sampleCount,0,isShadow,isCombined,format>
return Load(location, 0);
case glsl:
if (isCombined == 0)
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
return Load(location, 0);
}
}
@@ -3830,7 +3836,7 @@ extension _Texture<T,Shape,isArray,1,sampleCount,0,isShadow,isCombined,format>
return Load(location, sampleIndex);
case glsl:
if (isCombined == 0)
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
return Load(location, sampleIndex);
}
}
@@ -13913,7 +13919,7 @@ T WaveMaskSum(WaveMask mask, T expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupAdd($1)";
case cuda: __intrinsic_asm "_waveSum($0, $1)";
case hlsl: __intrinsic_asm "WaveActiveSum($1)";
@@ -13940,7 +13946,7 @@ vector<T,N> WaveMaskSum(WaveMask mask, vector<T,N> expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupAdd($1)";
case cuda: __intrinsic_asm "_waveSumMultiple($0, $1)";
case hlsl: __intrinsic_asm "WaveActiveSum($1)";
@@ -13979,7 +13985,7 @@ bool WaveMaskAllEqual(WaveMask mask, T value)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupAllEqual($1)";
case hlsl:
__intrinsic_asm "WaveActiveAllEqual($1)";
@@ -14003,7 +14009,7 @@ bool WaveMaskAllEqual(WaveMask mask, vector<T,N> value)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupAllEqual($1)";
case hlsl:
__intrinsic_asm "WaveActiveAllEqual($1)";
@@ -14040,7 +14046,7 @@ T WaveMaskPrefixProduct(WaveMask mask, T expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveMul($1)";
case cuda: __intrinsic_asm "_wavePrefixProduct($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixProduct($1)";
@@ -14067,7 +14073,7 @@ vector<T,N> WaveMaskPrefixProduct(WaveMask mask, vector<T,N> expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveMul($1)";
case cuda: __intrinsic_asm "_wavePrefixProductMultiple($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixProduct($1)";
@@ -14105,7 +14111,7 @@ T WaveMaskPrefixSum(WaveMask mask, T expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveAdd($1)";
case cuda: __intrinsic_asm "_wavePrefixSum($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixSum($1)";
@@ -14133,7 +14139,7 @@ vector<T,N> WaveMaskPrefixSum(WaveMask mask, vector<T,N> expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveAdd($1)";
case cuda: __intrinsic_asm "_wavePrefixSumMultiple($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixSum($1)";
@@ -14761,7 +14767,7 @@ T WaveActive$(opName.hlslName)(T expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroup$(opName.glslName)($0)";
case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
case metal: __intrinsic_asm "simd_$(opName.metalName)";
@@ -14796,7 +14802,7 @@ vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroup$(opName.glslName)($0)";
case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
case metal: __intrinsic_asm "simd_$(opName.metalName)";
@@ -15018,7 +15024,8 @@ uint WaveActiveCountBits(bool value)
__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
[NonUniformReturn]
-[require(cuda_glsl_hlsl_spirv, subgroup_basic)]
+[ForceInline]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_basic)]
uint WaveGetLaneCount()
{
__target_switch
@@ -15032,6 +15039,11 @@ uint WaveGetLaneCount()
OpCapability GroupNonUniform;
result:$$uint = OpLoad builtin(SubgroupSize:uint)
};
+ case metal:
+ return __builtinWaveLaneCount;
+ case wgsl:
+ __requireTargetExtension("subgroups");
+ return __builtinWaveLaneCount;
}
}
@@ -15039,7 +15051,8 @@ uint WaveGetLaneCount()
__glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
[NonUniformReturn]
-[require(cuda_glsl_hlsl_spirv, subgroup_basic)]
+[ForceInline]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_basic)]
uint WaveGetLaneIndex()
{
__target_switch
@@ -15053,6 +15066,11 @@ uint WaveGetLaneIndex()
OpCapability GroupNonUniform;
result:$$uint = OpLoad builtin(SubgroupLocalInvocationId:uint)
};
+ case metal:
+ return __builtinWaveLaneIndex;
+ case wgsl:
+ __requireTargetExtension("subgroups");
+ return __builtinWaveLaneIndex;
}
}
@@ -15122,7 +15140,7 @@ T WavePrefixProduct(T expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveMul($0)";
case hlsl: __intrinsic_asm "WavePrefixProduct";
case metal: __intrinsic_asm "simd_prefix_exclusive_product";
@@ -15158,7 +15176,7 @@ vector<T,N> WavePrefixProduct(vector<T,N> expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveMul($0)";
case hlsl: __intrinsic_asm "WavePrefixProduct";
case metal: __intrinsic_asm "simd_prefix_exclusive_product";
@@ -15209,7 +15227,7 @@ T WavePrefixSum(T expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveAdd($0)";
case hlsl: __intrinsic_asm "WavePrefixSum";
case metal: __intrinsic_asm "simd_prefix_exclusive_sum";
@@ -15241,7 +15259,7 @@ vector<T,N> WavePrefixSum(vector<T,N> expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveAdd($0)";
case hlsl: __intrinsic_asm "WavePrefixSum";
case metal: __intrinsic_asm "simd_prefix_exclusive_sum";
@@ -15292,7 +15310,7 @@ T WaveReadLaneFirst(T expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcastFirst($0)";
case hlsl: __intrinsic_asm "WaveReadLaneFirst";
case metal: __intrinsic_asm "simd_broadcast_first";
@@ -15314,7 +15332,7 @@ vector<T,N> WaveReadLaneFirst(vector<T,N> expr)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcastFirst($0)";
case hlsl: __intrinsic_asm "WaveReadLaneFirst";
case metal: __intrinsic_asm "simd_broadcast_first";
@@ -15360,7 +15378,7 @@ T WaveBroadcastLaneAt(T value, constexpr int lane)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcast($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case metal: __intrinsic_asm "simd_broadcast($0, ushort($1))";
@@ -15384,7 +15402,7 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcast($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case metal: __intrinsic_asm "simd_broadcast($0, ushort($1))";
@@ -15426,7 +15444,7 @@ T WaveReadLaneAt(T value, int lane)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
@@ -15449,7 +15467,7 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
@@ -15492,7 +15510,7 @@ T WaveShuffle(T value, int lane)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
@@ -15516,7 +15534,7 @@ vector<T,N> WaveShuffle(vector<T,N> value, int lane)
__target_switch
{
case glsl:
- if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ if (__isHalf<T>()) __requireTargetExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
@@ -16158,7 +16176,7 @@ extension _Texture<T, __ShapeBuffer, 0, 0, 0, $(aa), 0, 0, format>
{
case hlsl: __intrinsic_asm ".GetDimensions";
case glsl:
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
__intrinsic_asm "($1 = $(glslTextureSizeFunc)($0))";
case metal: __intrinsic_asm "(*($1) = $0.get_width())";
case spirv:
@@ -16178,7 +16196,7 @@ extension _Texture<T, __ShapeBuffer, 0, 0, 0, $(aa), 0, 0, format>
case hlsl: __intrinsic_asm ".Load";
case metal: __intrinsic_asm "$c$0.read(uint($1))$z";
case glsl:
- __requireGLSLExtension("GL_EXT_samplerless_texture_functions");
+ __requireTargetExtension("GL_EXT_samplerless_texture_functions");
__intrinsic_asm "$(glslLoadFuncName)($0, $1)$z";
case spirv: return spirv_asm {
%sampled:__sampledType(T) = $(spvLoadInstName) $this $location;
diff --git a/source/slang/slang-core-module-textures.cpp b/source/slang/slang-core-module-textures.cpp
index 22c1fc63f..f703a8a3b 100644
--- a/source/slang/slang-core-module-textures.cpp
+++ b/source/slang/slang-core-module-textures.cpp
@@ -439,7 +439,7 @@ void TextureTypeInfo::writeGetDimensionFunctions()
}
};
glsl << "if (isCombined == 0) { "
- "__requireGLSLExtension(\"GL_EXT_samplerless_texture_functions\"); }\n";
+ "__requireTargetExtension(\"GL_EXT_samplerless_texture_functions\"); }\n";
glsl << "if (access == " << kCoreModule_ResourceAccessReadOnly
<< ") __intrinsic_asm \"";
emitIntrinsic(toSlice("textureSize"), !isMultisample);
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 946e9c429..1c48d98ef 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -3061,10 +3061,6 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
m_requiredPreludes.add(preludeTextInst);
break;
}
- case kIROp_RequireGLSLExtension:
- {
- break; // should already have set requirement; case covered for empty intrinsic block
- }
case kIROp_RequireComputeDerivative:
{
break; // should already have been parsed and used.
@@ -3074,6 +3070,11 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
emitOperand(as<IRGlobalValueRef>(inst)->getOperand(0), getInfo(EmitOp::General));
break;
}
+ case kIROp_RequireTargetExtension:
+ {
+ emitRequireExtension(as<IRRequireTargetExtension>(inst));
+ break;
+ }
default:
diagnoseUnhandledInst(inst);
break;
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 6fe7f5d34..ca915ab2d 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -678,6 +678,8 @@ protected:
void _emitCallArgList(IRCall* call, int startingOperandIndex = 1);
virtual void emitCallArg(IRInst* arg);
+ virtual void emitRequireExtension(IRRequireTargetExtension* inst) { SLANG_UNUSED(inst); }
+
String _generateUniqueName(const UnownedStringSlice& slice);
// Sort witnessTable entries according to the order defined in the witnessed interface type.
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index 776c539b4..696830bf2 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -30,7 +30,7 @@ void GLSLSourceEmitter::_beforeComputeEmitProcessInstruction(
IRInst* inst,
IRBuilder& builder)
{
- if (auto requireGLSLExt = as<IRRequireGLSLExtension>(inst))
+ if (auto requireGLSLExt = as<IRRequireTargetExtension>(inst))
{
_requireGLSLExtension(requireGLSLExt->getExtensionName());
return;
diff --git a/source/slang/slang-emit-wgsl.cpp b/source/slang/slang-emit-wgsl.cpp
index 13c79e9ac..7c83b194d 100644
--- a/source/slang/slang-emit-wgsl.cpp
+++ b/source/slang/slang-emit-wgsl.cpp
@@ -1696,4 +1696,9 @@ void WGSLSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst)
}
}
+void WGSLSourceEmitter::emitRequireExtension(IRRequireTargetExtension* inst)
+{
+ _requireExtension(inst->getExtensionName());
+}
+
} // namespace Slang
diff --git a/source/slang/slang-emit-wgsl.h b/source/slang/slang-emit-wgsl.h
index 441933b57..a29f39a1d 100644
--- a/source/slang/slang-emit-wgsl.h
+++ b/source/slang/slang-emit-wgsl.h
@@ -57,6 +57,8 @@ public:
EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE;
virtual void emitGlobalParamDefaultVal(IRGlobalParam* varDecl) SLANG_OVERRIDE;
+ virtual void emitRequireExtension(IRRequireTargetExtension* inst) SLANG_OVERRIDE;
+
virtual void handleRequiredCapabilitiesImpl(IRInst* inst) SLANG_OVERRIDE;
void emit(const AddressSpace addressSpace);
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 847c5b55c..ddb4ea67a 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -100,7 +100,7 @@
#include "slang-ir-strip-default-construct.h"
#include "slang-ir-strip-legalization-insts.h"
#include "slang-ir-synthesize-active-mask.h"
-#include "slang-ir-translate-glsl-global-var.h"
+#include "slang-ir-translate-global-varying-var.h"
#include "slang-ir-uniformity.h"
#include "slang-ir-user-type-hint.h"
#include "slang-ir-validate.h"
@@ -318,7 +318,7 @@ struct RequiredLoweringPassSet
bool bindingQuery;
bool meshOutput;
bool higherOrderFunc;
- bool glslGlobalVar;
+ bool globalVaryingVar;
bool glslSSBO;
bool byteAddressBuffer;
bool dynamicResource;
@@ -422,7 +422,7 @@ void calcRequiredLoweringPassSet(
case kIROp_GlobalInputDecoration:
case kIROp_GlobalOutputDecoration:
case kIROp_GetWorkGroupSize:
- result.glslGlobalVar = true;
+ result.globalVaryingVar = true;
break;
case kIROp_BindExistentialSlotsDecoration:
result.bindExistential = true;
@@ -667,8 +667,8 @@ Result linkAndOptimizeIR(
if (!isKhronosTarget(targetRequest) && requiredLoweringPassSet.glslSSBO)
lowerGLSLShaderStorageBufferObjectsToStructuredBuffers(irModule, sink);
- if (requiredLoweringPassSet.glslGlobalVar)
- translateGLSLGlobalVar(codeGenContext, irModule);
+ if (requiredLoweringPassSet.globalVaryingVar)
+ translateGlobalVaryingVar(codeGenContext, irModule);
if (requiredLoweringPassSet.resolveVaryingInputRef)
resolveVaryingInputRef(irModule);
diff --git a/source/slang/slang-ir-call-graph.h b/source/slang/slang-ir-call-graph.h
index 4ee642356..b7290ef79 100644
--- a/source/slang/slang-ir-call-graph.h
+++ b/source/slang/slang-ir-call-graph.h
@@ -1,3 +1,6 @@
+// slang-ir-call-graph.h
+#pragma once
+
#include "slang-ir-clone.h"
#include "slang-ir-insts.h"
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 3e2872cb7..714ba146d 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -407,7 +407,7 @@ INST(WitnessTableEntry, witness_table_entry, 2, 0)
INST(InterfaceRequirementEntry, interface_req_entry, 2, GLOBAL)
// An inst to represent the workgroup size of the calling entry point.
-// We will materialize this inst during `translateGLSLGlobalVar`.
+// We will materialize this inst during `translateGlobalVaryingVar`.
INST(GetWorkGroupSize, GetWorkGroupSize, 0, HOISTABLE)
// An inst that returns the current stage of the calling entry point.
@@ -666,7 +666,7 @@ INST_RANGE(TerminatorInst, Return, Unreachable)
INST(discard, discard, 0, 0)
INST(RequirePrelude, RequirePrelude, 1, 0)
-INST(RequireGLSLExtension, RequireGLSLExtension, 1, 0)
+INST(RequireTargetExtension, RequireTargetExtension, 1, 0)
INST(RequireComputeDerivative, RequireComputeDerivative, 0, 0)
INST(StaticAssert, StaticAssert, 2, 0)
INST(Printf, Printf, 1, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 4efb7d671..5231592ca 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -3506,9 +3506,9 @@ struct IRRequirePrelude : IRInst
UnownedStringSlice getPrelude() { return as<IRStringLit>(getOperand(0))->getStringSlice(); }
};
-struct IRRequireGLSLExtension : IRInst
+struct IRRequireTargetExtension : IRInst
{
- IR_LEAF_ISA(RequireGLSLExtension)
+ IR_LEAF_ISA(RequireTargetExtension)
UnownedStringSlice getExtensionName()
{
return as<IRStringLit>(getOperand(0))->getStringSlice();
diff --git a/source/slang/slang-ir-legalize-varying-params.cpp b/source/slang/slang-ir-legalize-varying-params.cpp
index 3b65ee59a..e744969db 100644
--- a/source/slang/slang-ir-legalize-varying-params.cpp
+++ b/source/slang/slang-ir-legalize-varying-params.cpp
@@ -3228,6 +3228,20 @@ protected:
result.permittedTypes.add(builder.getBasicType(BaseType::UInt));
break;
}
+ case SystemValueSemanticName::WaveLaneCount:
+ {
+ result.systemValueName = toSlice("threads_per_simdgroup");
+ result.permittedTypes.add(builder.getUIntType());
+ result.permittedTypes.add(builder.getUInt16Type());
+ break;
+ }
+ case SystemValueSemanticName::WaveLaneIndex:
+ {
+ result.systemValueName = toSlice("thread_index_in_simdgroup");
+ result.permittedTypes.add(builder.getUIntType());
+ result.permittedTypes.add(builder.getUInt16Type());
+ break;
+ }
default:
m_sink->diagnose(
parentVar,
@@ -3845,6 +3859,20 @@ protected:
break;
}
+ case SystemValueSemanticName::WaveLaneCount:
+ {
+ result.systemValueName = toSlice("subgroup_size");
+ result.permittedTypes.add(builder.getUIntType());
+ break;
+ }
+
+ case SystemValueSemanticName::WaveLaneIndex:
+ {
+ result.systemValueName = toSlice("subgroup_invocation_id");
+ result.permittedTypes.add(builder.getUIntType());
+ break;
+ }
+
case SystemValueSemanticName::ViewID:
case SystemValueSemanticName::ViewportArrayIndex:
case SystemValueSemanticName::StartVertexLocation:
@@ -3853,7 +3881,6 @@ protected:
result.isUnsupported = true;
break;
}
-
default:
{
m_sink->diagnose(
diff --git a/source/slang/slang-ir-legalize-varying-params.h b/source/slang/slang-ir-legalize-varying-params.h
index e742f3093..0a7c3be8e 100644
--- a/source/slang/slang-ir-legalize-varying-params.h
+++ b/source/slang/slang-ir-legalize-varying-params.h
@@ -68,6 +68,8 @@ void depointerizeInputParams(IRFunc* entryPoint);
M(Target, SV_Target) \
M(StartVertexLocation, SV_StartVertexLocation) \
M(StartInstanceLocation, SV_StartInstanceLocation) \
+ M(WaveLaneCount, SV_WaveLaneCount) \
+ M(WaveLaneIndex, SV_WaveLaneIndex) \
/* end */
/// A known system-value semantic name that can be applied to a parameter
diff --git a/source/slang/slang-ir-translate-glsl-global-var.cpp b/source/slang/slang-ir-translate-global-varying-var.cpp
index 80ed3c3e4..80f5c42c3 100644
--- a/source/slang/slang-ir-translate-glsl-global-var.cpp
+++ b/source/slang/slang-ir-translate-global-varying-var.cpp
@@ -1,4 +1,4 @@
-#include "slang-ir-translate-glsl-global-var.h"
+#include "slang-ir-translate-global-varying-var.h"
#include "slang-ir-call-graph.h"
#include "slang-ir-insts.h"
@@ -152,8 +152,8 @@ struct GlobalVarTranslationContext
builder.getPtrType(kIROp_ConstRefType, inputStructType, AddressSpace::Input));
builder.addLayoutDecoration(inputParam, paramLayout);
- // Initialize all global variables.
- for (Index i = 0; i < inputVars.getCount(); i++)
+ // Initialize all global variables in the order of struct member declaration.
+ for (Index i = inputVars.getCount() - 1; i >= 0; i--)
{
auto input = inputVars[i];
setInsertBeforeOrdinaryInst(&builder, firstBlock->getFirstOrdinaryInst());
@@ -373,7 +373,7 @@ struct GlobalVarTranslationContext
}
};
-void translateGLSLGlobalVar(CodeGenContext* context, IRModule* module)
+void translateGlobalVaryingVar(CodeGenContext* context, IRModule* module)
{
GlobalVarTranslationContext ctx;
ctx.context = context;
diff --git a/source/slang/slang-ir-translate-global-varying-var.h b/source/slang/slang-ir-translate-global-varying-var.h
new file mode 100644
index 000000000..f97683700
--- /dev/null
+++ b/source/slang/slang-ir-translate-global-varying-var.h
@@ -0,0 +1,14 @@
+// slang-ir-translate-global-varying-var.h
+#pragma once
+
+namespace Slang
+{
+
+struct IRModule;
+struct CodeGenContext;
+
+/// Translate GLSL-flavored global in/out variables into
+/// entry point parameters with system value semantics.
+void translateGlobalVaryingVar(CodeGenContext* context, IRModule* module);
+
+} // namespace Slang
diff --git a/source/slang/slang-ir-translate-glsl-global-var.h b/source/slang/slang-ir-translate-glsl-global-var.h
deleted file mode 100644
index 5821ba5c5..000000000
--- a/source/slang/slang-ir-translate-glsl-global-var.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// slang-ir-translate-glsl-global-var.h
-#ifndef SLANG_IR_TRANSLATE_GLSL_GLOBAL_VAR_H
-#define SLANG_IR_TRANSLATE_GLSL_GLOBAL_VAR_H
-
-namespace Slang
-{
-
-struct IRModule;
-struct CodeGenContext;
-
-/// Translate global in/out variables defined in GLSL-flavored code
-/// into entry point parameters with system value semantics.
-void translateGLSLGlobalVar(CodeGenContext* context, IRModule* module);
-
-} // namespace Slang
-
-#endif // SLANG_IR_TRANSLATE_GLSL_GLOBAL_VAR_H