summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarren Wihandi <65404740+fairywreath@users.noreply.github.com>2025-02-10 19:40:39 -0500
committerGitHub <noreply@github.com>2025-02-10 16:40:39 -0800
commit133bd259c00984c6a01869f71951a7feb919463a (patch)
treea69f1a6b3caff0ac4d958453fde6176ab3c66c91
parentf761ab0586353da67bf7b3ae395ad7b090cd904f (diff)
Add support for Metal subgroup/simd operations (#6247)
* initial work for metal subgroups * add glsl intrinsics * enable wave tests * enable glsl subgroup tests, glsl barrier fixes * minor fixes * fix incorrect test target * disable some glsl functional tests * disable failing glsl test --------- Co-authored-by: Yong He <yonghe@outlook.com>
-rw-r--r--source/slang/glsl.meta.slang137
-rw-r--r--source/slang/hlsl.meta.slang249
-rw-r--r--source/slang/slang-capabilities.capdef10
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang39
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang40
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang50
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang29
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang1
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang25
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang38
-rw-r--r--tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang5
-rw-r--r--tests/hlsl-intrinsic/wave-active-product.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-diverge.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-is-first-lane.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-prefix-product.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang4
-rw-r--r--tests/hlsl-intrinsic/wave-prefix-sum.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-read-lane-at-vk.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-shuffle-vk.slang3
-rw-r--r--tests/hlsl-intrinsic/wave-vector.slang3
-rw-r--r--tests/hlsl-intrinsic/wave.slang1
22 files changed, 399 insertions, 256 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
index 0bad3c681..6f0ca1bf3 100644
--- a/source/slang/glsl.meta.slang
+++ b/source/slang/glsl.meta.slang
@@ -6525,7 +6525,7 @@ public property uvec4 gl_SubgroupLtMask
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_basic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv, subgroup_basic)]
+[require(cuda_glsl_hlsl_metal_spirv, subgroup_basic)]
public void subgroupBarrier()
{
__target_switch
@@ -6536,6 +6536,8 @@ public void subgroupBarrier()
__intrinsic_asm "AllMemoryBarrierWithGroupSync()";
case glsl:
__intrinsic_asm "subgroupBarrier()";
+ case metal:
+ __intrinsic_asm "simdgroup_barrier(mem_flags::mem_none)";
case spirv:
spirv_asm {
OpCapability Shader;
@@ -6548,7 +6550,7 @@ public void subgroupBarrier()
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_basic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv, subgroup_basic)]
+[require(cuda_glsl_hlsl_metal_spirv, subgroup_basic)]
public void subgroupMemoryBarrier()
{
__target_switch
@@ -6559,6 +6561,8 @@ public void subgroupMemoryBarrier()
__intrinsic_asm "AllMemoryBarrier()";
case glsl:
__intrinsic_asm "subgroupMemoryBarrier()";
+ case metal:
+ __intrinsic_asm "simdgroup_barrier(mem_flags::mem_device)";
case spirv:
spirv_asm {
OpCapability Shader;
@@ -6571,7 +6575,7 @@ public void subgroupMemoryBarrier()
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_basic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv, subgroup_basic)]
+[require(cuda_glsl_hlsl_metal_spirv, subgroup_basic)]
public void subgroupMemoryBarrierBuffer()
{
// the following implementation is NOT the same as DeviceMemoryBarrier
@@ -6584,6 +6588,8 @@ public void subgroupMemoryBarrierBuffer()
__intrinsic_asm "DeviceMemoryBarrier()";
case glsl:
__intrinsic_asm "subgroupMemoryBarrierBuffer()";
+ case metal:
+ __intrinsic_asm "simdgroup_barrier(mem_flags::mem_device)";
case spirv:
spirv_asm {
OpCapability Shader;
@@ -6596,7 +6602,7 @@ public void subgroupMemoryBarrierBuffer()
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_basic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv, subgroup_basic)]
+[require(cuda_glsl_hlsl_metal_spirv, subgroup_basic)]
public void subgroupMemoryBarrierImage()
{
__target_switch
@@ -6607,6 +6613,8 @@ public void subgroupMemoryBarrierImage()
__intrinsic_asm "DeviceMemoryBarrier()";
case glsl:
__intrinsic_asm "subgroupMemoryBarrierImage()";
+ case metal:
+ __intrinsic_asm "simdgroup_barrier(mem_flags::mem_texture)";
case spirv:
spirv_asm {
OpMemoryBarrier Subgroup AcquireRelease|ImageMemory
@@ -6618,7 +6626,7 @@ public void subgroupMemoryBarrierImage()
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_basic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv, subgroup_basic)]
+[require(cuda_glsl_hlsl_metal_spirv, subgroup_basic)]
public void subgroupMemoryBarrierShared()
{
__target_switch
@@ -6629,6 +6637,8 @@ public void subgroupMemoryBarrierShared()
__intrinsic_asm "GroupMemoryBarrier()";
case glsl:
__intrinsic_asm "subgroupMemoryBarrierShared()";
+ case metal:
+ __intrinsic_asm "simdgroup_barrier(mem_flags::mem_threadgroup)";
case spirv:
spirv_asm {
// SubgroupMemory triggers vulkan validation layer error;
@@ -6642,17 +6652,14 @@ public void subgroupMemoryBarrierShared()
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_basic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_basic)]
public bool subgroupElect()
{
__target_switch
{
case cuda:
__intrinsic_asm "( (__activemask() & (__activemask()*-1)) == _getLaneId())";
- case glsl:
- case spirv:
- case hlsl:
- case wgsl:
+ default:
return WaveIsFirstLane();
}
@@ -6663,7 +6670,7 @@ public bool subgroupElect()
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_vote)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_vote)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_vote)]
public bool subgroupAll(bool value)
{
return WaveActiveAllTrue(value);
@@ -6672,7 +6679,7 @@ public bool subgroupAll(bool value)
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_vote)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_vote)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_vote)]
public bool subgroupAny(bool value)
{
return WaveActiveAnyTrue(value);
@@ -6706,7 +6713,7 @@ __generic<T : __BuiltinArithmeticType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupAdd(T value)
{
shader_subgroup_preamble<T>();
@@ -6717,7 +6724,7 @@ __generic<T : __BuiltinArithmeticType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupMul(T value)
{
shader_subgroup_preamble<T>();
@@ -6728,7 +6735,7 @@ __generic<T : __BuiltinArithmeticType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupMin(T value)
{
shader_subgroup_preamble<T>();
@@ -6739,7 +6746,7 @@ __generic<T : __BuiltinArithmeticType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupMax(T value)
{
shader_subgroup_preamble<T>();
@@ -6751,7 +6758,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupAnd(T value)
{
shader_subgroup_preamble<T>();
@@ -6760,6 +6767,8 @@ public T subgroupAnd(T value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupAnd($0)";
+ case metal:
+ __intrinsic_asm "simd_and";
case spirv:
if (__isBool<T>()) {
return spirv_asm {
@@ -6781,15 +6790,17 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupOr(T value)
{
shader_subgroup_preamble<T>();
__target_switch
{
- case glsl:
+ case glsl:
case wgsl:
__intrinsic_asm "subgroupOr($0)";
+ case metal:
+ __intrinsic_asm "simd_or";
case spirv:
if (__isBool<T>()) {
return spirv_asm {
@@ -6811,7 +6822,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupXor(T value)
{
shader_subgroup_preamble<T>();
@@ -6820,6 +6831,8 @@ public T subgroupXor(T value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupXor($0)";
+ case metal:
+ __intrinsic_asm "simd_xor";
case spirv:
if (__isBool<T>()) {
return spirv_asm {
@@ -6841,7 +6854,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupInclusiveAdd(T value)
{
shader_subgroup_preamble<T>();
@@ -6850,6 +6863,8 @@ public T subgroupInclusiveAdd(T value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupInclusiveAdd($0)";
+ case metal:
+ __intrinsic_asm "simd_prefix_inclusive_sum";
case spirv:
if (__isFloat<T>())
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup InclusiveScan $value};
@@ -6864,7 +6879,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupInclusiveMul(T value)
{
shader_subgroup_preamble<T>();
@@ -6873,6 +6888,8 @@ public T subgroupInclusiveMul(T value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupInclusiveMul($0)";
+ case metal:
+ __intrinsic_asm "simd_prefix_inclusive_product";
case spirv:
if (__isFloat<T>())
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup InclusiveScan $value};
@@ -7005,7 +7022,7 @@ __generic<T : __BuiltinArithmeticType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupExclusiveAdd(T value)
{
shader_subgroup_preamble<T>();
@@ -7017,7 +7034,7 @@ __generic<T : __BuiltinArithmeticType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public T subgroupExclusiveMul(T value)
{
shader_subgroup_preamble<T>();
@@ -7128,7 +7145,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupAdd(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7139,7 +7156,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupMul(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7150,7 +7167,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupMin(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7161,7 +7178,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupMax(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7173,7 +7190,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupAnd(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7181,8 +7198,10 @@ public vector<T,N> subgroupAnd(vector<T,N> value)
{
case glsl:
case wgsl:
- // TODO: Bool inputs are invalid for WGSL, cast them to int or don't allow them to compile.
+ // TODO: Bool inputs are invalid for Metal and WGSL, cast them to int or don't allow them to compile.
__intrinsic_asm "subgroupAnd($0)";
+ case metal:
+ __intrinsic_asm "simd_and";
case spirv:
if (__isBool<T>()) {
return spirv_asm {
@@ -7205,7 +7224,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupOr(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7214,6 +7233,8 @@ public vector<T,N> subgroupOr(vector<T,N> value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupOr($0)";
+ case metal:
+ __intrinsic_asm "simd_or";
case spirv:
if (__isBool<T>()) {
return spirv_asm {
@@ -7236,7 +7257,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupXor(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7245,6 +7266,8 @@ public vector<T,N> subgroupXor(vector<T,N> value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupXor($0)";
+ case metal:
+ __intrinsic_asm "simd_xor";
case spirv:
if (__isBool<T>()) {
return spirv_asm {
@@ -7266,7 +7289,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupInclusiveAdd(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7275,6 +7298,8 @@ public vector<T,N> subgroupInclusiveAdd(vector<T,N> value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupInclusiveAdd($0)";
+ case metal:
+ __intrinsic_asm "simd_prefix_inclusive_sum";
case spirv:
if (__isFloat<T>())
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector<T,N> result Subgroup InclusiveScan $value};
@@ -7289,7 +7314,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_arithmetic)]
+[require(glsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupInclusiveMul(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7298,6 +7323,8 @@ public vector<T,N> subgroupInclusiveMul(vector<T,N> value)
case glsl:
case wgsl:
__intrinsic_asm "subgroupInclusiveMul($0)";
+ case metal:
+ __intrinsic_asm "simd_prefix_inclusive_product";
case spirv:
if (__isFloat<T>())
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector<T,N> result Subgroup InclusiveScan $value};
@@ -7411,7 +7438,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupExclusiveAdd(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7423,7 +7450,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
public vector<T,N> subgroupExclusiveMul(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7533,7 +7560,7 @@ __generic<T : __BuiltinType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_ballot)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
public T subgroupBroadcast(T value, uint id)
{
shader_subgroup_preamble<T>();
@@ -7551,7 +7578,7 @@ __generic<T : __BuiltinType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_ballot)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
public vector<T,N> subgroupBroadcast(vector<T,N> value, uint id)
{
shader_subgroup_preamble<T>();
@@ -7569,7 +7596,7 @@ __generic<T : __BuiltinType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_ballot)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
public T subgroupBroadcastFirst(T value)
{
shader_subgroup_preamble<T>();
@@ -7580,7 +7607,7 @@ __generic<T : __BuiltinType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_ballot)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
public vector<T,N> subgroupBroadcastFirst(vector<T,N> value)
{
shader_subgroup_preamble<T>();
@@ -7591,7 +7618,7 @@ public vector<T,N> subgroupBroadcastFirst(vector<T,N> value)
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_ballot)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
public uvec4 subgroupBallot(bool value)
{
return WaveActiveBallot(value);
@@ -7772,7 +7799,7 @@ __generic<T : __BuiltinType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_shuffle)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_shuffle)]
public T subgroupShuffle(T value, uint index)
{
shader_subgroup_preamble<T>();
@@ -7783,7 +7810,7 @@ __generic<T : __BuiltinType>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
__wgsl_extension(subgroups)
-[require(glsl_spirv_wgsl, subgroup_shuffle)]
+[require(glsl_metal_spirv_wgsl, subgroup_shuffle)]
[ForceInline] public T subgroupShuffleXor(T value, uint mask)
{
shader_subgroup_preamble<T>();
@@ -7792,6 +7819,8 @@ __wgsl_extension(subgroups)
case glsl:
case wgsl:
__intrinsic_asm "subgroupShuffleXor($0,$1)";
+ case metal:
+ __intrinsic_asm "simd_shuffle_xor($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformBallot;
@@ -7804,7 +7833,7 @@ __generic<T : __BuiltinType, let N : int>
__spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
[ForceInline]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_shuffle)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_shuffle)]
public vector<T,N> subgroupShuffle(vector<T,N> value, uint index)
{
shader_subgroup_preamble<T>();
@@ -7816,7 +7845,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_shuffle)]
+[require(glsl_metal_spirv_wgsl, subgroup_shuffle)]
public vector<T,N> subgroupShuffleXor(vector<T,N> value, uint mask)
{
shader_subgroup_preamble<T>();
@@ -7825,6 +7854,8 @@ public vector<T,N> subgroupShuffleXor(vector<T,N> value, uint mask)
case glsl:
case wgsl:
__intrinsic_asm "subgroupShuffleXor($0,$1)";
+ case metal:
+ __intrinsic_asm "simd_shuffle_xor($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformBallot;
@@ -7841,7 +7872,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_shufflerelative)]
+[require(glsl_metal_spirv_wgsl, subgroup_shufflerelative)]
public T subgroupShuffleUp(T value, uint delta)
{
shader_subgroup_preamble<T>();
@@ -7850,6 +7881,8 @@ public T subgroupShuffleUp(T value, uint delta)
case glsl:
case wgsl:
__intrinsic_asm "subgroupShuffleUp($0, $1)";
+ case metal:
+ __intrinsic_asm "simd_shuffle_up($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformShuffleRelative;
@@ -7863,7 +7896,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_shufflerelative)]
+[require(glsl_metal_spirv_wgsl, subgroup_shufflerelative)]
public T subgroupShuffleDown(T value, uint delta)
{
shader_subgroup_preamble<T>();
@@ -7872,6 +7905,8 @@ public T subgroupShuffleDown(T value, uint delta)
case glsl:
case wgsl:
__intrinsic_asm "subgroupShuffleDown($0, $1)";
+ case metal:
+ __intrinsic_asm "simd_shuffle_down($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformShuffleRelative;
@@ -7886,7 +7921,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_shufflerelative)]
+[require(glsl_metal_spirv_wgsl, subgroup_shufflerelative)]
public vector<T,N> subgroupShuffleUp(vector<T,N> value, uint delta)
{
shader_subgroup_preamble<T>();
@@ -7895,6 +7930,8 @@ public vector<T,N> subgroupShuffleUp(vector<T,N> value, uint delta)
case glsl:
case wgsl:
__intrinsic_asm "subgroupShuffleUp($0, $1)";
+ case metal:
+ __intrinsic_asm "simd_shuffle_up($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformShuffleRelative;
@@ -7908,7 +7945,7 @@ __spirv_version(1.3)
__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative)
__wgsl_extension(subgroups)
[ForceInline]
-[require(glsl_spirv_wgsl, subgroup_shufflerelative)]
+[require(glsl_metal_spirv_wgsl, subgroup_shufflerelative)]
public vector<T,N> subgroupShuffleDown(vector<T,N> value, uint delta)
{
shader_subgroup_preamble<T>();
@@ -7917,6 +7954,8 @@ public vector<T,N> subgroupShuffleDown(vector<T,N> value, uint delta)
case glsl:
case wgsl:
__intrinsic_asm "subgroupShuffleDown($0, $1)";
+ case metal:
+ __intrinsic_asm "simd_shuffle_down($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformShuffleRelative;
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 491f0ef4d..884621960 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -9782,7 +9782,7 @@ void __subgroupBarrier()
case glsl: __intrinsic_asm "subgroupBarrier";
case hlsl: __intrinsic_asm "GroupMemoryBarrierWithGroupSync";
case cuda: __intrinsic_asm "__syncthreads()";
- case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_threadgroup)";
+ case metal: __intrinsic_asm "simdgroup_barrier(mem_flags::none)";
case spirv:
spirv_asm
{
@@ -14423,7 +14423,7 @@ matrix<T,N,M> WaveMaskPrefixBitXor(WaveMask mask, matrix<T,N,M> expr)
__generic<T : __BuiltinType>
__glsl_extension(GL_KHR_shader_subgroup_quad)
__spirv_version(1.3)
-[require(glsl_hlsl_spirv, subgroup_quad)]
+[require(glsl_hlsl_metal_spirv, subgroup_quad)]
T QuadReadLaneAt(T sourceValue, uint quadLaneID)
{
__target_switch
@@ -14432,6 +14432,9 @@ T QuadReadLaneAt(T sourceValue, uint quadLaneID)
__intrinsic_asm "QuadReadLaneAt";
case glsl:
__intrinsic_asm "subgroupQuadBroadcast";
+ case metal:
+ // TODO: Need to add intrinsics to access Metal and WGSL's broadcast variant where lane is const for all threads.
+ __intrinsic_asm "quad_shuffle($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformQuad;
@@ -14442,7 +14445,7 @@ T QuadReadLaneAt(T sourceValue, uint quadLaneID)
__generic<T : __BuiltinType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_quad)
__spirv_version(1.3)
-[require(glsl_hlsl_spirv, subgroup_quad)]
+[require(glsl_hlsl_metal_spirv, subgroup_quad)]
vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, uint quadLaneID)
{
__target_switch
@@ -14451,6 +14454,8 @@ vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, uint quadLaneID)
__intrinsic_asm "QuadReadLaneAt";
case glsl:
__intrinsic_asm "subgroupQuadBroadcast";
+ case metal:
+ __intrinsic_asm "quad_shuffle($0, ushort($1))";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformQuad;
@@ -14598,8 +14603,8 @@ __generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadReadAcr
// WaveActiveBitAnd, WaveActiveBitOr, WaveActiveBitXor
${{{{
-struct WaveActiveBitOpEntry { const char* hlslName; const char* glslName; const char* spirvName; };
-const WaveActiveBitOpEntry kWaveActiveBitOpEntries[] = {{"BitAnd", "And", "BitwiseAnd"}, {"BitOr", "Or", "BitwiseOr"}, {"BitXor", "Xor", "BitwiseXor"}};
+struct WaveActiveBitOpEntry { const char* hlslName; const char* glslName; const char* spirvName; const char* metalName; };
+const WaveActiveBitOpEntry kWaveActiveBitOpEntries[] = {{"BitAnd", "And", "BitwiseAnd", "and"}, {"BitOr", "Or", "BitwiseOr", "or"}, {"BitXor", "Xor", "BitwiseXor", "xor"}};
for (auto opName : kWaveActiveBitOpEntries) {
}}}}
/// @category wave Wave and quad functions
@@ -14607,7 +14612,7 @@ __generic<T : __BuiltinIntegerType>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
T WaveActive$(opName.hlslName)(T expr)
{
__target_switch
@@ -14615,7 +14620,10 @@ T WaveActive$(opName.hlslName)(T expr)
case glsl:
case wgsl:
__intrinsic_asm "subgroup$(opName.glslName)";
- case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
+ case hlsl:
+ __intrinsic_asm "WaveActive$(opName.hlslName)";
+ case metal:
+ __intrinsic_asm "simd_$(opName.metalName)";
case spirv:
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniform$(opName.spirvName) $$T result Subgroup Reduce $expr};
case cuda:
@@ -14627,7 +14635,7 @@ __generic<T : __BuiltinIntegerType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
vector<T, N> WaveActive$(opName.hlslName)(vector<T, N> expr)
{
__target_switch
@@ -14635,7 +14643,10 @@ vector<T, N> WaveActive$(opName.hlslName)(vector<T, N> expr)
case glsl:
case wgsl:
__intrinsic_asm "subgroup$(opName.glslName)";
- case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
+ case hlsl:
+ __intrinsic_asm "WaveActive$(opName.hlslName)";
+ case metal:
+ __intrinsic_asm "simd_$(opName.metalName)";
case spirv:
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniform$(opName.spirvName) $$vector<T, N> result Subgroup Reduce $expr};
case cuda:
@@ -14644,22 +14655,21 @@ vector<T, N> WaveActive$(opName.hlslName)(vector<T, N> expr)
}
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
matrix<T, N, M> WaveActive$(opName.hlslName)(matrix<T, N, M> expr)
{
__target_switch
{
- case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
- case glsl:
- case spirv:
- case wgsl:
+ case cuda:
+ return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr);
+ case hlsl:
+ __intrinsic_asm "WaveActive$(opName.hlslName)";
+ default:
matrix<T,N,M> result;
[ForceUnroll]
for (int i = 0; i < N; ++i)
result[i] = WaveActive$(opName.hlslName)(expr[i]);
return result;
- case cuda:
- return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr);
}
}
${{{{
@@ -14668,32 +14678,36 @@ ${{{{
// WaveActiveMin/Max
${{{{
-const char* kWaveActiveMinMaxNames[] = {"Min", "Max"};
-for (const char* opName : kWaveActiveMinMaxNames) {
+struct WaveActiveMinMaxEntry { const char* name; const char* metalName; };
+const WaveActiveMinMaxEntry kWaveActiveMinMaxNames[] = {{"Min", "min"}, {"Max", "max"}};
+for (const auto opName : kWaveActiveMinMaxNames) {
}}}}
/// @category wave
__generic<T : __BuiltinArithmeticType>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
-T WaveActive$(opName)(T expr)
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
+T WaveActive$(opName.name)(T expr)
{
__target_switch
{
- case glsl:
+ case glsl:
case wgsl:
- __intrinsic_asm "subgroup$(opName)";
- case hlsl: __intrinsic_asm "WaveActive$(opName)";
+ __intrinsic_asm "subgroup$(opName.name)";
+ case hlsl:
+ __intrinsic_asm "WaveActive$(opName.name)";
+ case metal:
+ __intrinsic_asm "simd_$(opName.metalName)";
case spirv:
if (__isFloat<T>())
- return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName) $$T result Subgroup Reduce $expr};
+ return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName.name) $$T result Subgroup Reduce $expr};
else if (__isUnsignedInt<T>())
- return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformU$(opName) $$T result Subgroup Reduce $expr};
+ return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformU$(opName.name) $$T result Subgroup Reduce $expr};
else
- return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformS$(opName) $$T result Subgroup Reduce $expr};
+ return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformS$(opName.name) $$T result Subgroup Reduce $expr};
case cuda:
- return WaveMask$(opName)(WaveGetActiveMask(), expr);
+ return WaveMask$(opName.name)(WaveGetActiveMask(), expr);
}
}
@@ -14701,44 +14715,46 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
-vector<T, N> WaveActive$(opName)(vector<T, N> expr)
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
+vector<T, N> WaveActive$(opName.name)(vector<T, N> expr)
{
__target_switch
{
- case glsl:
+ case glsl:
case wgsl:
- __intrinsic_asm "subgroup$(opName)";
- case hlsl: __intrinsic_asm "WaveActive$(opName)";
+ __intrinsic_asm "subgroup$(opName.name)";
+ case hlsl:
+ __intrinsic_asm "WaveActive$(opName.name)";
+ case metal:
+ __intrinsic_asm "simd_$(opName.metalName)";
case spirv:
if (__isFloat<T>())
- return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName) $$vector<T, N> result Subgroup Reduce $expr};
+ return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName.name) $$vector<T, N> result Subgroup Reduce $expr};
else if (__isUnsignedInt<T>())
- return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformU$(opName) $$vector<T, N> result Subgroup Reduce $expr};
+ return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformU$(opName.name) $$vector<T, N> result Subgroup Reduce $expr};
else
- return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformS$(opName) $$vector<T, N> result Subgroup Reduce $expr};
+ return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformS$(opName.name) $$vector<T, N> result Subgroup Reduce $expr};
case cuda:
- return WaveMask$(opName)(WaveGetActiveMask(), expr);
+ return WaveMask$(opName.name)(WaveGetActiveMask(), expr);
}
}
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
-matrix<T, N, M> WaveActive$(opName)(matrix<T, N, M> expr)
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
+matrix<T, N, M> WaveActive$(opName.name)(matrix<T, N, M> expr)
{
__target_switch
{
- case hlsl: __intrinsic_asm "WaveActive$(opName)";
- case glsl:
- case spirv:
- case wgsl:
+ case cuda:
+ return WaveMask$(opName.name)(WaveGetActiveMask(), expr);
+ case hlsl:
+ __intrinsic_asm "WaveActive$(opName.name)";
+ default:
matrix<T, N, M> result;
[ForceUnroll]
for (int i = 0; i < N; ++i)
- result[i] = WaveActive$(opName)(expr[i]);
+ result[i] = WaveActive$(opName.name)(expr[i]);
return result;
- case cuda:
- return WaveMask$(opName)(WaveGetActiveMask(), expr);
}
}
@@ -14748,8 +14764,8 @@ ${{{{
// WaveActiveProduct/Sum
${{{{
-struct WaveActiveProductSumEntry { const char* hlslName; const char* glslName; };
-const WaveActiveProductSumEntry kWaveActivProductSumNames[] = {{"Product", "Mul"}, {"Sum", "Add"}};
+struct WaveActiveProductSumEntry { const char* hlslName; const char* glslName; const char* metalName; };
+const WaveActiveProductSumEntry kWaveActivProductSumNames[] = {{"Product", "Mul", "product"}, {"Sum", "Add", "sum"}};
for (auto opName : kWaveActivProductSumNames) {
}}}}
/// @category wave
@@ -14757,7 +14773,7 @@ __generic<T : __BuiltinArithmeticType>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
T WaveActive$(opName.hlslName)(T expr)
{
__target_switch
@@ -14766,6 +14782,7 @@ T WaveActive$(opName.hlslName)(T expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroup$(opName.glslName)($0)";
case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
+ case metal: __intrinsic_asm "simd_$(opName.metalName)";
case spirv:
if (__isFloat<T>())
return spirv_asm {
@@ -14791,7 +14808,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr)
{
__target_switch
@@ -14800,6 +14817,7 @@ vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroup$(opName.glslName)($0)";
case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
+ case metal: __intrinsic_asm "simd_$(opName.metalName)";
case spirv:
if (__isFloat<T>())
return spirv_asm {
@@ -14822,27 +14840,27 @@ vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr)
}
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
matrix<T, N, M> WaveActive$(opName.hlslName)(matrix<T, N, M> expr)
{
__target_switch
{
- case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
- case glsl:
- case spirv:
- case wgsl:
+ case cuda:
+ return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr);
+ case hlsl:
+ __intrinsic_asm "WaveActive$(opName.hlslName)";
+ default:
matrix<T, N, M> result;
[ForceUnroll]
for (int i = 0; i < N; ++i)
result[i] = WaveActive$(opName.hlslName)(expr[i]);
return result;
- case cuda:
- return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr);
}
}
${{{{
} // WaveActiveProduct/WaveActiveProductSum.
}}}}
+
/// @category wave
__generic<T : __BuiltinType>
__glsl_extension(GL_KHR_shader_subgroup_vote)
@@ -14906,7 +14924,7 @@ bool WaveActiveAllEqual(matrix<T, N, M> value)
__glsl_extension(GL_KHR_shader_subgroup_vote)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_vote)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_vote)]
bool WaveActiveAllTrue(bool condition)
{
__target_switch
@@ -14914,7 +14932,10 @@ bool WaveActiveAllTrue(bool condition)
case glsl:
case wgsl:
__intrinsic_asm "subgroupAll";
- case hlsl: __intrinsic_asm "WaveActiveAllTrue($0)";
+ case hlsl:
+ __intrinsic_asm "WaveActiveAllTrue($0)";
+ case metal:
+ __intrinsic_asm "simd_all";
case spirv:
return spirv_asm
{
@@ -14930,7 +14951,7 @@ bool WaveActiveAllTrue(bool condition)
__glsl_extension(GL_KHR_shader_subgroup_vote)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_vote)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_vote)]
bool WaveActiveAnyTrue(bool condition)
{
__target_switch
@@ -14940,6 +14961,8 @@ bool WaveActiveAnyTrue(bool condition)
__intrinsic_asm "subgroupAny";
case hlsl:
__intrinsic_asm "WaveActiveAnyTrue($0)";
+ case metal:
+ __intrinsic_asm "simd_any";
case spirv:
return spirv_asm
{
@@ -14951,12 +14974,28 @@ bool WaveActiveAnyTrue(bool condition)
}
}
+
+//@hidden:
+[ForceInline]
+uint64_t __metal_simd_ballot(bool expr)
+{
+ __intrinsic_asm "uint64_t(simd_ballot($0))";
+}
+
+[ForceInline]
+uint4 __metal_simd_vote_mask_to_uint4(uint64_t mask)
+{
+ return uint4(uint(mask & 0xFFFFFFFF), uint(mask >> 32), 0, 0);
+}
+
+//@public:
+
/// @category wave
__glsl_extension(GL_KHR_shader_subgroup_ballot)
__spirv_version(1.3)
__wgsl_extension(subgroups)
[NonUniformReturn]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
uint4 WaveActiveBallot(bool condition)
{
__target_switch
@@ -14966,6 +15005,7 @@ uint4 WaveActiveBallot(bool condition)
__intrinsic_asm "subgroupBallot";
case hlsl:
__intrinsic_asm "WaveActiveBallot";
+ case metal: return __metal_simd_vote_mask_to_uint4(__metal_simd_ballot(condition));
case spirv:
return spirv_asm
{
@@ -15039,13 +15079,14 @@ __glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
[NonUniformReturn]
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_basic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_basic)]
bool WaveIsFirstLane()
{
__target_switch
{
case glsl: __intrinsic_asm "subgroupElect()";
case hlsl: __intrinsic_asm "WaveIsFirstLane()";
+ case metal: __intrinsic_asm "simd_is_first";
case spirv:
return spirv_asm
{
@@ -15093,7 +15134,7 @@ __generic<T : __BuiltinArithmeticType>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
T WavePrefixProduct(T expr)
{
__target_switch
@@ -15102,6 +15143,7 @@ T WavePrefixProduct(T expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveMul($0)";
case hlsl: __intrinsic_asm "WavePrefixProduct";
+ case metal: __intrinsic_asm "simd_prefix_exclusive_product";
case spirv:
if (__isFloat<T>())
return spirv_asm {
@@ -15128,7 +15170,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
vector<T,N> WavePrefixProduct(vector<T,N> expr)
{
__target_switch
@@ -15137,6 +15179,7 @@ vector<T,N> WavePrefixProduct(vector<T,N> expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveMul($0)";
case hlsl: __intrinsic_asm "WavePrefixProduct";
+ case metal: __intrinsic_asm "simd_prefix_exclusive_product";
case spirv:
if (__isFloat<T>())
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector<T,N> result Subgroup ExclusiveScan $expr};
@@ -15161,16 +15204,15 @@ matrix<T, N, M> WavePrefixProduct(matrix<T, N, M> expr)
{
__target_switch
{
- case hlsl: __intrinsic_asm "WavePrefixProduct";
- case glsl:
- case spirv:
- case wgsl:
+ case cuda:
+ return WaveMaskPrefixProduct(WaveGetActiveMask(), expr);
+ case hlsl:
+ __intrinsic_asm "WavePrefixProduct";
+ default:
matrix<T, N, M> result;
for (int i = 0; i < N; ++i)
result[i] = WavePrefixProduct(expr[i]);
return result;
- case cuda:
- return WaveMaskPrefixProduct(WaveGetActiveMask(), expr);
}
}
@@ -15179,7 +15221,7 @@ __generic<T : __BuiltinArithmeticType>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
T WavePrefixSum(T expr)
{
__target_switch
@@ -15188,6 +15230,7 @@ T WavePrefixSum(T expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveAdd($0)";
case hlsl: __intrinsic_asm "WavePrefixSum";
+ case metal: __intrinsic_asm "simd_prefix_exclusive_sum";
case spirv:
if (__isFloat<T>())
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup ExclusiveScan $expr};
@@ -15210,7 +15253,7 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
vector<T,N> WavePrefixSum(vector<T,N> expr)
{
__target_switch
@@ -15219,6 +15262,7 @@ vector<T,N> WavePrefixSum(vector<T,N> expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupExclusiveAdd($0)";
case hlsl: __intrinsic_asm "WavePrefixSum";
+ case metal: __intrinsic_asm "simd_prefix_exclusive_sum";
case spirv:
if (__isFloat<T>())
return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector<T,N> result Subgroup ExclusiveScan $expr};
@@ -15238,21 +15282,20 @@ vector<T,N> WavePrefixSum(vector<T,N> expr)
}
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_arithmetic)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_arithmetic)]
matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr)
{
__target_switch
{
- case hlsl: __intrinsic_asm "WavePrefixSum";
- case glsl:
- case spirv:
- case wgsl:
+ case cuda:
+ return WaveMaskPrefixSum(WaveGetActiveMask(), expr);
+ case hlsl:
+ __intrinsic_asm "WavePrefixSum";
+ default:
matrix<T, N, M> result;
for (int i = 0; i < N; ++i)
result[i] = WavePrefixSum(expr[i]);
return result;
- case cuda:
- return WaveMaskPrefixSum(WaveGetActiveMask(), expr);
}
}
@@ -15261,7 +15304,7 @@ __generic<T : __BuiltinType>
__glsl_extension(GL_KHR_shader_subgroup_ballot)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
T WaveReadLaneFirst(T expr)
{
__target_switch
@@ -15270,6 +15313,7 @@ T WaveReadLaneFirst(T expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcastFirst($0)";
case hlsl: __intrinsic_asm "WaveReadLaneFirst";
+ case metal: __intrinsic_asm "simd_broadcast_first";
case spirv:
return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$T result Subgroup $expr};
case wgsl: __intrinsic_asm "subgroupBroadcastFirst";
@@ -15282,7 +15326,7 @@ __generic<T : __BuiltinType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_ballot)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
vector<T,N> WaveReadLaneFirst(vector<T,N> expr)
{
__target_switch
@@ -15291,6 +15335,7 @@ vector<T,N> WaveReadLaneFirst(vector<T,N> expr)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcastFirst($0)";
case hlsl: __intrinsic_asm "WaveReadLaneFirst";
+ case metal: __intrinsic_asm "simd_broadcast_first";
case spirv:
return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$vector<T,N> result Subgroup $expr};
case wgsl: __intrinsic_asm "subgroupBroadcastFirst";
@@ -15300,21 +15345,19 @@ vector<T,N> WaveReadLaneFirst(vector<T,N> expr)
}
__generic<T : __BuiltinType, let N : int, let M : int>
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr)
{
__target_switch
{
+ case cuda:
+ return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr);
case hlsl: __intrinsic_asm "WaveReadLaneFirst";
- case glsl:
- case spirv:
- case wgsl:
+ default:
matrix<T, N, M> result;
for (int i = 0; i < N; ++i)
result[i] = WaveReadLaneFirst(expr[i]);
return result;
- case cuda:
- return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr);
}
}
@@ -15329,7 +15372,7 @@ __generic<T : __BuiltinType>
__glsl_extension(GL_KHR_shader_subgroup_ballot)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
T WaveBroadcastLaneAt(T value, constexpr int lane)
{
__target_switch
@@ -15338,6 +15381,7 @@ T WaveBroadcastLaneAt(T value, constexpr int lane)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcast($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
+ case metal: __intrinsic_asm "simd_broadcast($0, ushort($1))";
case spirv:
let ulane = uint(lane);
return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcast $$T result Subgroup $value $ulane};
@@ -15352,7 +15396,7 @@ __generic<T : __BuiltinType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_ballot)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane)
{
__target_switch
@@ -15361,6 +15405,7 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupBroadcast($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
+ case metal: __intrinsic_asm "simd_broadcast($0, ushort($1))";
case spirv:
let ulane = uint(lane);
return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcast $$vector<T,N> result Subgroup $value $ulane};
@@ -15371,22 +15416,18 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane)
}
__generic<T : __BuiltinType, let N : int, let M : int>
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_ballot)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_ballot)]
matrix<T, N, M> WaveBroadcastLaneAt(matrix<T, N, M> value, constexpr int lane)
{
__target_switch
{
case cuda: __intrinsic_asm "_waveShuffleMultiple(_getActiveMask(), $0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
- case glsl:
- case spirv:
- case wgsl:
+ default:
matrix<T, N, M> result;
for (int i = 0; i < N; ++i)
result[i] = WaveBroadcastLaneAt(value[i], lane);
return result;
- case cuda:
- return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane);
}
}
@@ -15397,7 +15438,7 @@ __generic<T : __BuiltinType>
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_shuffle)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_shuffle)]
T WaveReadLaneAt(T value, int lane)
{
__target_switch
@@ -15406,6 +15447,7 @@ T WaveReadLaneAt(T value, int lane)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
+ case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
case spirv:
let ulane = uint(lane);
return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$T result Subgroup $value $ulane};
@@ -15419,7 +15461,7 @@ __generic<T : __BuiltinType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_shuffle)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_shuffle)]
vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane)
{
__target_switch
@@ -15428,6 +15470,7 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
+ case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
case spirv:
let ulane = uint(lane);
return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane};
@@ -15438,22 +15481,18 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane)
}
__generic<T : __BuiltinType, let N : int, let M : int>
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_shuffle)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_shuffle)]
matrix<T, N, M> WaveReadLaneAt(matrix<T, N, M> value, int lane)
{
__target_switch
{
case cuda: __intrinsic_asm "_waveShuffleMultiple(_getActiveMask(), $0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
- case glsl:
- case spirv:
- case wgsl:
+ default:
matrix<T,N,M> result;
for (int i = 0; i < N; ++i)
result[i] = WaveReadLaneAt(value[i], lane);
return result;
- case cuda:
- return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane);
}
}
@@ -15465,7 +15504,7 @@ __generic<T : __BuiltinType>
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_shuffle)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_shuffle)]
T WaveShuffle(T value, int lane)
{
__target_switch
@@ -15474,6 +15513,7 @@ T WaveShuffle(T value, int lane)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
+ case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
case spirv:
let ulane = uint(lane);
return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$T result Subgroup $value $ulane};
@@ -15488,7 +15528,7 @@ __generic<T : __BuiltinType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_shuffle)
__spirv_version(1.3)
__wgsl_extension(subgroups)
-[require(cuda_glsl_hlsl_spirv_wgsl, subgroup_shuffle)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_shuffle)]
vector<T,N> WaveShuffle(vector<T,N> value, int lane)
{
__target_switch
@@ -15497,6 +15537,7 @@ vector<T,N> WaveShuffle(vector<T,N> value, int lane)
if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
+ case metal: __intrinsic_asm "simd_shuffle($0, ushort($1))";
case spirv:
let ulane = uint(lane);
return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$vector<T,N> result Subgroup $value $ulane};
diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef
index f98be0e32..130439fe1 100644
--- a/source/slang/slang-capabilities.capdef
+++ b/source/slang/slang-capabilities.capdef
@@ -1939,10 +1939,11 @@ alias shader5_sm_5_0 = GL_ARB_gpu_shader5 | sm_5_0_version;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_basic'
/// [Compound]
-alias subgroup_basic = GL_KHR_shader_subgroup_basic
+alias subgroup_basic = GL_KHR_shader_subgroup_basic
| _sm_6_0
| _cuda_sm_7_0
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_ballot'
/// [Compound]
@@ -1951,6 +1952,7 @@ alias subgroup_ballot = spirv_1_0 + GL_KHR_shader_subgroup_ballot
| _sm_6_0 + shader5_sm_5_0
| _cuda_sm_7_0 + shader5_sm_5_0
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_ballot_activemask'
/// [Compound]
@@ -1966,6 +1968,7 @@ alias subgroup_basic_ballot = glsl + GL_KHR_shader_subgroup_basic + subgroup_bal
| hlsl + subgroup_ballot
| cuda + subgroup_ballot
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_vote'
/// [Compound]
@@ -1973,6 +1976,7 @@ alias subgroup_vote = GL_KHR_shader_subgroup_vote
| _sm_6_0
| _cuda_sm_7_0
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_vote'
/// [Compound]
@@ -1983,6 +1987,7 @@ alias subgroup_arithmetic = GL_KHR_shader_subgroup_arithmetic
| _sm_6_0
| _cuda_sm_7_0
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_shuffle'
@@ -1991,6 +1996,7 @@ alias subgroup_shuffle = GL_KHR_shader_subgroup_shuffle
| _sm_6_0
| _cuda_sm_7_0
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_shuffle_relative'
/// [Compound]
@@ -1998,6 +2004,7 @@ alias subgroup_shufflerelative = GL_KHR_shader_subgroup_shuffle_relative
| _sm_6_0
| _cuda_sm_7_0
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_clustered'
/// [Compound]
@@ -2008,6 +2015,7 @@ alias subgroup_quad = GL_KHR_shader_subgroup_quad
| _sm_6_0
| _cuda_sm_7_0
| wgsl
+ | metal
;
/// Capabilities required to use GLSL-style subgroup operations 'subgroup_partitioned'
/// [Compound]
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
index d44a29c14..0a0fcade5 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang
@@ -10,6 +10,9 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
#version 430
#if 1 \
@@ -100,8 +103,8 @@ bool test1Arithmetic() {
& subgroupExclusiveAdd(T(1)) == T(3)
& subgroupExclusiveMul(T(1)) == T(1)
- // WGSL does not support exclusive min/max.
-#if !defined(WGPU)
+ // WGSL and METAL does not support exclusive min/max.
+#if !defined(WGPU) && !defined(METAL)
& subgroupExclusiveMin(T(1)) == T(1)
& subgroupExclusiveMax(T(1)) == T(1)
#endif
@@ -115,8 +118,8 @@ bool testVArithmetic() {
& subgroupExclusiveAdd(gvec(T(1))) == gvec(T(3))
& subgroupExclusiveMul(gvec(T(1))) == gvec(T(1))
- // WGSL does not support exclusive min/max.
-#if !defined(WGPU)
+ // WGSL and METAL does not support exclusive min/max.
+#if !defined(WGPU) && !defined(METAL)
& subgroupExclusiveMin(gvec(T(1))) == gvec(T(1))
& subgroupExclusiveMax(gvec(T(1))) == gvec(T(1))
#endif
@@ -142,8 +145,8 @@ bool testArithmetic() {
& testVArithmetic<uint, 3>()
& testVArithmetic<uint, 4>()
- // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+ // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
& test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
& testVArithmetic<double, 2>()
& testVArithmetic<double, 3>()
@@ -152,10 +155,6 @@ bool testArithmetic() {
& testVArithmetic<int8_t, 2>()
& testVArithmetic<int8_t, 3>()
& testVArithmetic<int8_t, 4>()
- & test1Arithmetic<int16_t>()
- & testVArithmetic<int16_t, 2>()
- & testVArithmetic<int16_t, 3>()
- & testVArithmetic<int16_t, 4>()
& test1Arithmetic<int64_t>()
& testVArithmetic<int64_t, 2>()
& testVArithmetic<int64_t, 3>()
@@ -164,15 +163,23 @@ bool testArithmetic() {
& testVArithmetic<uint8_t, 2>()
& testVArithmetic<uint8_t, 3>()
& testVArithmetic<uint8_t, 4>()
- & test1Arithmetic<uint16_t>()
- & testVArithmetic<uint16_t, 2>()
- & testVArithmetic<uint16_t, 3>()
- & testVArithmetic<uint16_t, 4>()
& test1Arithmetic<uint64_t>()
& testVArithmetic<uint64_t, 2>()
& testVArithmetic<uint64_t, 3>()
& testVArithmetic<uint64_t, 4>()
#endif
+
+ // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+ & test1Arithmetic<int16_t>()
+ & testVArithmetic<int16_t, 2>()
+ & testVArithmetic<int16_t, 3>()
+ & testVArithmetic<int16_t, 4>()
+ & test1Arithmetic<uint16_t>()
+ & testVArithmetic<uint16_t, 2>()
+ & testVArithmetic<uint16_t, 3>()
+ & testVArithmetic<uint16_t, 4>()
+#endif
;
}
@@ -180,8 +187,8 @@ void computeMain()
{
bool res0 = true
- // WGSL does not support bitwise exclusive intrinsics.
-#if !defined(WGPU)
+ // WGSL and Metal does not support bitwise exclusive intrinsics.
+#if !defined(WGPU) && !defined(METAL)
& testLogical()
#endif
;
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
index 0c94d4c90..58c7d5aaa 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang
@@ -10,6 +10,9 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
#version 430
#if 1 \
@@ -100,8 +103,8 @@ bool test1Arithmetic() {
& subgroupInclusiveAdd(T(1)) == T(4)
& subgroupInclusiveMul(T(1)) == T(1)
- // WGSL does not support inclusive min/max
-#if !defined(WGPU)
+ // WGSL and Metal does not support inclusive min/max
+#if !defined(WGPU) && !defined(METAL)
& subgroupInclusiveMin(T(1)) == T(1)
& subgroupInclusiveMax(T(1)) == T(1)
#endif
@@ -115,8 +118,8 @@ bool testVArithmetic() {
& subgroupInclusiveAdd(gvec(T(1))) == gvec(T(4))
// & subgroupInclusiveMul(gvec(T(1))) == gvec(T(1))
- // WGSL does not support inclusive min/max
-#if !defined(WGPU)
+ // WGSL and Metal does not support inclusive min/max
+#if !defined(WGPU) && !defined(METAL)
& subgroupInclusiveMin(gvec(T(1))) == gvec(T(1))
& subgroupInclusiveMax(gvec(T(1))) == gvec(T(1))
#endif
@@ -142,20 +145,16 @@ bool testArithmetic() {
// & testVArithmetic<uint, 3>()
// & testVArithmetic<uint, 4>()
- // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+ // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
& test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
& testVArithmetic<double, 2>()
& testVArithmetic<double, 3>()
& testVArithmetic<double, 4>()
- & test1Arithmetic<uint8_t>()
+ & test1Arithmetic<uint8_t>()
& testVArithmetic<uint8_t, 2>()
& testVArithmetic<uint8_t, 3>()
& testVArithmetic<uint8_t, 4>()
- & test1Arithmetic<uint16_t>()
- & testVArithmetic<uint16_t, 2>()
- & testVArithmetic<uint16_t, 3>()
- & testVArithmetic<uint16_t, 4>()
& test1Arithmetic<uint64_t>()
& testVArithmetic<uint64_t, 2>()
& testVArithmetic<uint64_t, 3>()
@@ -164,16 +163,23 @@ bool testArithmetic() {
& testVArithmetic<int8_t, 2>()
& testVArithmetic<int8_t, 3>()
& testVArithmetic<int8_t, 4>()
- & test1Arithmetic<int16_t>()
- & testVArithmetic<int16_t, 2>()
- & testVArithmetic<int16_t, 3>()
- & testVArithmetic<int16_t, 4>()
& test1Arithmetic<int64_t>()
& testVArithmetic<int64_t, 2>()
& testVArithmetic<int64_t, 3>()
& testVArithmetic<int64_t, 4>()
#endif
+ // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+ & test1Arithmetic<uint16_t>()
+ & testVArithmetic<uint16_t, 2>()
+ & testVArithmetic<uint16_t, 3>()
+ & testVArithmetic<uint16_t, 4>()
+ & test1Arithmetic<int16_t>()
+ & testVArithmetic<int16_t, 2>()
+ & testVArithmetic<int16_t, 3>()
+ & testVArithmetic<int16_t, 4>()
+#endif
;
}
@@ -181,8 +187,8 @@ void computeMain()
{
bool res0 = true
- // WGSL does not support bitwise inclusive intrinsics.
-#if !defined(WGPU)
+ // WGSL and Metal does not support bitwise inclusive intrinsics.
+#if !defined(WGPU) && !defined(METAL)
& testLogical()
#endif
;
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
index e502e3608..bb6316a59 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang
@@ -10,6 +10,9 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
#version 430
#if 1 \
@@ -64,16 +67,12 @@ bool testLogical() {
& testVLogical<uint, 3>()
& testVLogical<uint, 4>()
- // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+ // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
& test1Logical<int8_t>()
& testVLogical<int8_t, 2>()
& testVLogical<int8_t, 3>()
& testVLogical<int8_t, 4>()
- & test1Logical<int16_t>()
- & testVLogical<int16_t, 2>()
- & testVLogical<int16_t, 3>()
- & testVLogical<int16_t, 4>()
& test1Logical<int64_t>()
& testVLogical<int64_t, 2>()
& testVLogical<int64_t, 3>()
@@ -82,10 +81,6 @@ bool testLogical() {
& testVLogical<uint8_t, 2>()
& testVLogical<uint8_t, 3>()
& testVLogical<uint8_t, 4>()
- & test1Logical<uint16_t>()
- & testVLogical<uint16_t, 2>()
- & testVLogical<uint16_t, 3>()
- & testVLogical<uint16_t, 4>()
& test1Logical<uint64_t>()
& testVLogical<uint64_t, 2>()
& testVLogical<uint64_t, 3>()
@@ -95,6 +90,18 @@ bool testLogical() {
& testVLogical<bool, 3>()
& testVLogical<bool, 4>()
#endif
+
+ // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+ & test1Logical<int16_t>()
+ & testVLogical<int16_t, 2>()
+ & testVLogical<int16_t, 3>()
+ & testVLogical<int16_t, 4>()
+ & test1Logical<uint16_t>()
+ & testVLogical<uint16_t, 2>()
+ & testVLogical<uint16_t, 3>()
+ & testVLogical<uint16_t, 4>()
+#endif
;
}
@@ -138,8 +145,8 @@ bool testArithmetic() {
& testVArithmetic<uint, 3>()
& testVArithmetic<uint, 4>()
- // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined (WGPU)
+ // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
& test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
& testVArithmetic<double, 2>()
& testVArithmetic<double, 3>()
@@ -148,10 +155,6 @@ bool testArithmetic() {
& testVArithmetic<int8_t, 2>()
& testVArithmetic<int8_t, 3>()
& testVArithmetic<int8_t, 4>()
- & test1Arithmetic<int16_t>()
- & testVArithmetic<int16_t, 2>()
- & testVArithmetic<int16_t, 3>()
- & testVArithmetic<int16_t, 4>()
& test1Arithmetic<int64_t>()
& testVArithmetic<int64_t, 2>()
& testVArithmetic<int64_t, 3>()
@@ -160,14 +163,21 @@ bool testArithmetic() {
& testVArithmetic<uint8_t, 2>()
& testVArithmetic<uint8_t, 3>()
& testVArithmetic<uint8_t, 4>()
+ & test1Arithmetic<uint64_t>()
+ & testVArithmetic<uint64_t, 2>()
+ & testVArithmetic<uint64_t, 3>()
+ & testVArithmetic<uint64_t, 4>()
+#endif
+ // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined (WGPU)
+ & test1Arithmetic<int16_t>()
+ & testVArithmetic<int16_t, 2>()
+ & testVArithmetic<int16_t, 3>()
+ & testVArithmetic<int16_t, 4>()
& test1Arithmetic<uint16_t>()
& testVArithmetic<uint16_t, 2>()
& testVArithmetic<uint16_t, 3>()
& testVArithmetic<uint16_t, 4>()
- & test1Arithmetic<uint64_t>()
- & testVArithmetic<uint64_t, 2>()
- & testVArithmetic<uint64_t, 3>()
- & testVArithmetic<uint64_t, 4>()
#endif
;
}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
index d1ed4cc78..04f1b935a 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
@@ -11,6 +11,9 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
#version 430
// breaks on Nvidia GPU by returning 0 which is trivially wrong (works on Intel Iris Xe)
@@ -76,8 +79,8 @@ bool testBroadcastX() {
& testVBroadcastX<uint, 3>()
& testVBroadcastX<uint, 4>()
- // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined(WGPU)
+ // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
& test1BroadcastX<double>() // WARNING: intel GPU's lack FP64 support
& testVBroadcastX<double, 2>()
& testVBroadcastX<double, 3>()
@@ -86,10 +89,6 @@ bool testBroadcastX() {
& testVBroadcastX<int8_t, 2>()
& testVBroadcastX<int8_t, 3>()
& testVBroadcastX<int8_t, 4>()
- & test1BroadcastX<int16_t>()
- & testVBroadcastX<int16_t, 2>()
- & testVBroadcastX<int16_t, 3>()
- & testVBroadcastX<int16_t, 4>()
& test1BroadcastX<int64_t>()
& testVBroadcastX<int64_t, 2>()
& testVBroadcastX<int64_t, 3>()
@@ -98,10 +97,6 @@ bool testBroadcastX() {
& testVBroadcastX<uint8_t, 2>()
& testVBroadcastX<uint8_t, 3>()
& testVBroadcastX<uint8_t, 4>()
- & test1BroadcastX<uint16_t>()
- & testVBroadcastX<uint16_t, 2>()
- & testVBroadcastX<uint16_t, 3>()
- & testVBroadcastX<uint16_t, 4>()
& test1BroadcastX<uint64_t>()
& testVBroadcastX<uint64_t, 2>()
& testVBroadcastX<uint64_t, 3>()
@@ -111,6 +106,18 @@ bool testBroadcastX() {
& testVBroadcastX<bool, 3>()
& testVBroadcastX<bool, 4>()
#endif
+
+ // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined(WGPU)
+ & test1BroadcastX<int16_t>()
+ & testVBroadcastX<int16_t, 2>()
+ & testVBroadcastX<int16_t, 3>()
+ & testVBroadcastX<int16_t, 4>()
+ & test1BroadcastX<uint16_t>()
+ & testVBroadcastX<uint16_t, 2>()
+ & testVBroadcastX<uint16_t, 3>()
+ & testVBroadcastX<uint16_t, 4>()
+#endif
;
}
@@ -118,7 +125,7 @@ bool testBallot() {
return true
& (subgroupBallot(true).x == 0xFFFFFFFF)
-#if !defined(WGPU)
+#if !defined(WGPU) && !defined(METAL)
& (subgroupInverseBallot(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == true)
& (subgroupBallotBitExtract(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), 0) == true)
& (subgroupBallotBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 32)
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
index b862d289c..834b4c5cd 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
@@ -10,6 +10,7 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl
#version 430
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
index 5290ddfae..f9abfd8e5 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
@@ -11,6 +11,7 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
#version 430
@@ -74,8 +75,8 @@ bool testShuffleX() {
& testVShuffleX<uint, 3>()
& testVShuffleX<uint, 4>()
- // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined(WGPU)
+// Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
& test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
& testVShuffleX<double, 2>()
& testVShuffleX<double, 3>()
@@ -84,10 +85,6 @@ bool testShuffleX() {
& testVShuffleX<int8_t, 2>()
& testVShuffleX<int8_t, 3>()
& testVShuffleX<int8_t, 4>()
- & test1ShuffleX<int16_t>()
- & testVShuffleX<int16_t, 2>()
- & testVShuffleX<int16_t, 3>()
- & testVShuffleX<int16_t, 4>()
& test1ShuffleX<int64_t>()
& testVShuffleX<int64_t, 2>()
& testVShuffleX<int64_t, 3>()
@@ -96,10 +93,6 @@ bool testShuffleX() {
& testVShuffleX<uint8_t, 2>()
& testVShuffleX<uint8_t, 3>()
& testVShuffleX<uint8_t, 4>()
- & test1ShuffleX<uint16_t>()
- & testVShuffleX<uint16_t, 2>()
- & testVShuffleX<uint16_t, 3>()
- & testVShuffleX<uint16_t, 4>()
& test1ShuffleX<uint64_t>()
& testVShuffleX<uint64_t, 2>()
& testVShuffleX<uint64_t, 3>()
@@ -109,6 +102,18 @@ bool testShuffleX() {
& testVShuffleX<bool, 3>()
& testVShuffleX<bool, 4>()
#endif
+
+ // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined(WGPU)
+ & test1ShuffleX<int16_t>()
+ & testVShuffleX<int16_t, 2>()
+ & testVShuffleX<int16_t, 3>()
+ & testVShuffleX<int16_t, 4>()
+ & test1ShuffleX<uint16_t>()
+ & testVShuffleX<uint16_t, 2>()
+ & testVShuffleX<uint16_t, 3>()
+ & testVShuffleX<uint16_t, 4>()
+#endif
;
}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
index ea9b8c120..62af93f3e 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
@@ -11,6 +11,7 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
#version 430
@@ -89,9 +90,9 @@ bool testShuffleX() {
& testVShuffleX<uint, 2>()
& testVShuffleX<uint, 3>()
& testVShuffleX<uint, 4>()
-
- // Disabled on WGPU as these built-in types are not supported as of time of writing.
-#if !defined(WGPU)
+
+ // Disabled on WGPU and Metal as these built-in types are not supported as of time of writing.
+#if !defined(WGPU) && !defined(METAL)
& test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
& testVShuffleX<double, 2>()
& testVShuffleX<double, 3>()
@@ -100,30 +101,33 @@ bool testShuffleX() {
& testVShuffleX<int8_t, 2>()
& testVShuffleX<int8_t, 3>()
& testVShuffleX<int8_t, 4>()
- & test1ShuffleX<int16_t>()
- & testVShuffleX<int16_t, 2>()
- & testVShuffleX<int16_t, 3>()
- & testVShuffleX<int16_t, 4>()
- & test1ShuffleX<int64_t>()
+ & test1ShuffleX<int64_t>()
& testVShuffleX<int64_t, 2>()
& testVShuffleX<int64_t, 3>()
& testVShuffleX<int64_t, 4>()
- & test1ShuffleX<uint8_t>()
+ & test1ShuffleX<uint8_t>()
& testVShuffleX<uint8_t, 2>()
& testVShuffleX<uint8_t, 3>()
& testVShuffleX<uint8_t, 4>()
- & test1ShuffleX<uint16_t>()
- & testVShuffleX<uint16_t, 2>()
- & testVShuffleX<uint16_t, 3>()
- & testVShuffleX<uint16_t, 4>()
- & test1ShuffleX<uint64_t>()
- & testVShuffleX<uint64_t, 2>()
- & testVShuffleX<uint64_t, 3>()
- & testVShuffleX<uint64_t, 4>()
& test1ShuffleX<bool>()
& testVShuffleX<bool, 2>()
& testVShuffleX<bool, 3>()
& testVShuffleX<bool, 4>()
+ & test1ShuffleX<uint64_t>()
+ & testVShuffleX<uint64_t, 2>()
+ & testVShuffleX<uint64_t, 3>()
+ & testVShuffleX<uint64_t, 4>()
+#endif
+ // Disabled on WGPU as these built-in types are not supported as of time of writing.
+#if !defined(WGPU)
+ & test1ShuffleX<int16_t>()
+ & testVShuffleX<int16_t, 2>()
+ & testVShuffleX<int16_t, 3>()
+ & testVShuffleX<int16_t, 4>()
+ & test1ShuffleX<uint16_t>()
+ & testVShuffleX<uint16_t, 2>()
+ & testVShuffleX<uint16_t, 3>()
+ & testVShuffleX<uint16_t, 4>()
#endif
;
}
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
index 3f356e647..c0b6e3788 100644
--- a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
+++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
@@ -11,6 +11,9 @@
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-wgpu -compute -entry computeMain -allow-glsl -xslang -DWGPU
+// Not testing because CI runners may not support Metal's intrinsics.
+//DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=BUF):-metal -compute -entry computeMain -allow-glsl -xslang -DMETAL
+
#version 430
//TEST_INPUT:ubuffer(data=[9], stride=4):name=inputBuffer
@@ -168,7 +171,7 @@ void computeMain()
outputBuffer.data[4] = 1;
// All equal intrinsic is not supported on WGSL as of time of writing.
-#if !defined(WGPU)
+#if !defined(WGPU) && !defined(METAL)
if (testAllEqual()) {
subgroupBarrier();
outputBuffer.data[4] = 2;
diff --git a/tests/hlsl-intrinsic/wave-active-product.slang b/tests/hlsl-intrinsic/wave-active-product.slang
index 1a17f88e9..a15cbfc6d 100644
--- a/tests/hlsl-intrinsic/wave-active-product.slang
+++ b/tests/hlsl-intrinsic/wave-active-product.slang
@@ -5,6 +5,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -30,4 +31,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
outputBuffer[idx] = WaveActiveProduct((idx & 3) + 1);
#endif
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang
index e51fdb3f9..b0cff08a9 100644
--- a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang
+++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang
@@ -2,6 +2,7 @@
//TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -27,4 +28,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
}
outputBuffer[idx] = value;
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-diverge.slang b/tests/hlsl-intrinsic/wave-diverge.slang
index 56e9c1841..a18e99f58 100644
--- a/tests/hlsl-intrinsic/wave-diverge.slang
+++ b/tests/hlsl-intrinsic/wave-diverge.slang
@@ -5,6 +5,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -25,4 +26,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
value = WaveActiveMin(idx + 1);
outputBuffer[idx] = value;
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-is-first-lane.slang b/tests/hlsl-intrinsic/wave-is-first-lane.slang
index 03dcab507..220a5758b 100644
--- a/tests/hlsl-intrinsic/wave-is-first-lane.slang
+++ b/tests/hlsl-intrinsic/wave-is-first-lane.slang
@@ -5,6 +5,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -23,4 +24,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
int value = 0;
outputBuffer[idx] = WaveIsFirstLane();
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-prefix-product.slang b/tests/hlsl-intrinsic/wave-prefix-product.slang
index dfd11a654..774f5996e 100644
--- a/tests/hlsl-intrinsic/wave-prefix-product.slang
+++ b/tests/hlsl-intrinsic/wave-prefix-product.slang
@@ -5,6 +5,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -23,4 +24,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
outputBuffer[idx] = r0 + (r2 << 16);
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang b/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang
index dc8cfa5bf..03fa39da8 100644
--- a/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang
+++ b/tests/hlsl-intrinsic/wave-prefix-sum-fp16.slang
@@ -1,6 +1,7 @@
//TEST:SIMPLE(filecheck=CHECK_SPV):-target spirv -entry computeMain -stage compute -emit-spirv-directly
//TEST:SIMPLE(filecheck=CHECK_SPV):-target spirv -entry computeMain -stage compute
//TEST:SIMPLE(filecheck=CHECK_WGSL):-target wgsl -entry computeMain -stage compute
+//TEST:SIMPLE(filecheck=CHECK_METAL):-target metal -entry computeMain -stage compute
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -14,7 +15,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
// CHECK_SPV: OpGroupNonUniformFAdd
// CHECK_WGSL: subgroupExclusiveAdd
+ // CHECK_METAL: simd_prefix_exclusive_sum
float2 r1 = WavePrefixSum(v1);
outputBuffer[idx] = (int)r1.x;
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-prefix-sum.slang b/tests/hlsl-intrinsic/wave-prefix-sum.slang
index ab3480646..4f7c2912d 100644
--- a/tests/hlsl-intrinsic/wave-prefix-sum.slang
+++ b/tests/hlsl-intrinsic/wave-prefix-sum.slang
@@ -5,6 +5,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -22,4 +23,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
int r2 = int(r1.x) + int(r1.y) - idx;
outputBuffer[idx] = r0 + (r2 << 16);
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang
index 4f8a27a74..89ea47415 100644
--- a/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang
+++ b/tests/hlsl-intrinsic/wave-read-lane-at-vk.slang
@@ -4,6 +4,7 @@
//TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -42,4 +43,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
}
outputBuffer[idx] = value;
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-shuffle-vk.slang b/tests/hlsl-intrinsic/wave-shuffle-vk.slang
index 980a8e3b4..fe8defa23 100644
--- a/tests/hlsl-intrinsic/wave-shuffle-vk.slang
+++ b/tests/hlsl-intrinsic/wave-shuffle-vk.slang
@@ -6,6 +6,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -31,4 +32,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
}
outputBuffer[idx] = value;
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave-vector.slang b/tests/hlsl-intrinsic/wave-vector.slang
index d4d99b776..f786794ec 100644
--- a/tests/hlsl-intrinsic/wave-vector.slang
+++ b/tests/hlsl-intrinsic/wave-vector.slang
@@ -5,6 +5,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;
@@ -28,4 +29,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
int2 r = r0 + int2(r1) + r2 + r3 + r4;
outputBuffer[idx] = r.x + r.y;
-} \ No newline at end of file
+}
diff --git a/tests/hlsl-intrinsic/wave.slang b/tests/hlsl-intrinsic/wave.slang
index c15233e9c..f7e52b887 100644
--- a/tests/hlsl-intrinsic/wave.slang
+++ b/tests/hlsl-intrinsic/wave.slang
@@ -5,6 +5,7 @@
//TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj
//TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
+//TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
RWStructuredBuffer<int> outputBuffer;