diff options
16 files changed, 3389 insertions, 127 deletions
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 8403d1391..824b3e3f3 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -2825,21 +2825,1794 @@ public uint rayQueryGetIntersectionTypeEXT(rayQueryEXT q, bool committed) return 0; } +// TODO: implementation of built-in variables; proper tests; these are stubs +// likley related to the following issue since GLSL adds new +// 'system' variables: https://github.com/shader-slang/slang/issues/411 -// -// Subgroup -// +__generic<T : __BuiltinType> +[ForceInline] +void typeRequireChecks_shader_subgroup_GLSL() { + // the following is a seperate function call, since else the `__requireGLSLExtension` and associated __intrinsic_asm is ignored if the calling function also calls an __intrinsic_asm + __target_switch + { + case glsl: + if (__type_equals<T, half>() + || __type_equals<T, float16_t>() + ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + else if (__type_equals<T, uint8_t>() + || __type_equals<T, int8_t>() + ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_int8"); + else if (__type_equals<T, uint16_t>() + || __type_equals<T, int16_t>() + ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_int16"); + else if (__type_equals<T, uint64_t>() + || __type_equals<T, int64_t>() + ) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_int64"); + + __intrinsic_asm ""; + } +} + +__generic<T : __BuiltinType> +void shader_subgroup_preamble() { + // checks needed for shader_subgroup functions; __requireGLSLExtension does not work + // (does not add the ext specified correctly to the compile output; using extended type + // will result in error for using the type) + __target_switch + { + case glsl: + typeRequireChecks_shader_subgroup_GLSL<T>(); + case spirv: + return; + } + +} + +// GL_KHR_shader_subgroup_basic Built-in Variables + +void requireGLSLExtForSubgroupBasicBuiltin() { + __target_switch + { + case glsl: + __requireGLSLExtension("GL_KHR_shader_subgroup_basic"); + __intrinsic_asm ""; + } +} + +__spirv_version(1.3) +void setupExtForSubgroupBasicBuiltIn() { + __target_switch + { + case glsl: + requireGLSLExtForSubgroupBasicBuiltin(); + case spirv: + return; + } +} + +void requireGLSLExtForSubgroupBallotBuiltin() { + __target_switch + { + case glsl: + __requireGLSLExtension("GL_KHR_shader_subgroup_ballot"); + __intrinsic_asm ""; + } +} + +__spirv_version(1.3) +void setupExtForSubgroupBallotBuiltIn() { + __target_switch + { + case glsl: + requireGLSLExtForSubgroupBallotBuiltin(); + case spirv: + return; + } +} + +[require(glsl)] +[require(spirv)] +public property uint gl_NumSubgroups { + + get { + setupExtForSubgroupBasicBuiltIn(); + __target_switch + { + case glsl: + __intrinsic_asm "(gl_NumSubgroups)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniform; + result:$$uint = OpLoad builtin(NumSubgroups:uint); + }; + } + + } +} + +[require(glsl)] +[require(spirv)] +public property uint gl_SubgroupID +{ + get { + setupExtForSubgroupBasicBuiltIn(); + __target_switch + { + case glsl: + __intrinsic_asm "(gl_SubgroupID)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniform; + result:$$uint = OpLoad builtin(SubgroupId:uint); + }; + } + } +} + +[require(glsl)] +[require(spirv)] +public property uint gl_SubgroupSize +{ + get { + setupExtForSubgroupBasicBuiltIn(); + return WaveGetLaneCount(); + } +} + +[require(glsl)] +[require(spirv)] +public property uint gl_SubgroupInvocationID +{ + get { + setupExtForSubgroupBasicBuiltIn(); + return WaveGetLaneIndex(); + } +} + +[require(glsl)] +[require(spirv)] +public property uvec4 gl_SubgroupEqMask +{ + get { + setupExtForSubgroupBasicBuiltIn(); + setupExtForSubgroupBallotBuiltIn(); + __target_switch + { + case glsl: + __intrinsic_asm "(gl_SubgroupEqMask)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + result:$$uvec4 = OpLoad builtin(SubgroupEqMask:uvec4); + }; + } + } +} + +[require(glsl)] +[require(spirv)] +public property uvec4 gl_SubgroupGeMask +{ + get { + setupExtForSubgroupBasicBuiltIn(); + setupExtForSubgroupBallotBuiltIn(); + __target_switch + { + case glsl: + __intrinsic_asm "(gl_SubgroupGeMask)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + result:$$uvec4 = OpLoad builtin(SubgroupGeMask:uvec4); + }; + } + } +} + +[require(glsl)] +[require(spirv)] +public property uvec4 gl_SubgroupGtMask +{ + get { + setupExtForSubgroupBasicBuiltIn(); + setupExtForSubgroupBallotBuiltIn(); + __target_switch + { + case glsl: + __intrinsic_asm "(gl_SubgroupGtMask)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + result:$$uvec4 = OpLoad builtin(SubgroupGtMask:uvec4); + }; + } + } +} + +[require(glsl)] +[require(spirv)] +public property uvec4 gl_SubgroupLeMask +{ + get { + setupExtForSubgroupBasicBuiltIn(); + setupExtForSubgroupBallotBuiltIn(); + __target_switch + { + case glsl: + __intrinsic_asm "(gl_SubgroupLeMask)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + result:$$uvec4 = OpLoad builtin(SubgroupLeMask:uvec4); + }; + } + } +} + +[require(glsl)] +[require(spirv)] +public property uvec4 gl_SubgroupLtMask +{ + get { + setupExtForSubgroupBasicBuiltIn(); + setupExtForSubgroupBallotBuiltIn(); + __target_switch + { + case glsl: + __intrinsic_asm "(gl_SubgroupLtMask)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + result:$$uvec4 = OpLoad builtin(SubgroupLtMask:uvec4); + }; + } + } +} + +// GL_KHR_shader_subgroup_basic + +__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public void subgroupBarrier() +{ + __target_switch + { + case cuda: + __intrinsic_asm "__syncwarp()"; + case hlsl: + __intrinsic_asm "AllMemoryBarrierWithGroupSync()"; + case glsl: + __intrinsic_asm "subgroupBarrier()"; + case spirv: + spirv_asm { + OpCapability Shader; + OpControlBarrier Subgroup Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory + }; + + } +} + +__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public void subgroupMemoryBarrier() +{ + __target_switch + { + case cuda: + __intrinsic_asm "__threadfence_block()"; + case hlsl: + __intrinsic_asm "AllMemoryBarrier()"; + case glsl: + __intrinsic_asm "subgroupMemoryBarrier()"; + case spirv: + spirv_asm { + OpCapability Shader; + OpMemoryBarrier Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory + }; + + } +} + +__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public void subgroupMemoryBarrierBuffer() +{ + // the following implementation is NOT the same as DeviceMemoryBarrier + // HLSL lacks the same granularity of blocking on subgroup memory within a subgroup + __target_switch + { + case cuda: + __intrinsic_asm "__threadfence_block()"; + case hlsl: + __intrinsic_asm "DeviceMemoryBarrier()"; + case glsl: + __intrinsic_asm "subgroupMemoryBarrierBuffer()"; + case spirv: + spirv_asm { + OpCapability Shader; + OpMemoryBarrier Subgroup AcquireRelease|UniformMemory + }; + + } +} + +__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public void subgroupMemoryBarrierImage() +{ + __target_switch + { + case cuda: + __intrinsic_asm "__threadfence_block()"; + case hlsl: + __intrinsic_asm "DeviceMemoryBarrier()"; + case glsl: + __intrinsic_asm "subgroupMemoryBarrierImage()"; + case spirv: + spirv_asm { + OpMemoryBarrier Subgroup AcquireRelease|ImageMemory + }; + + } +} + +__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public void subgroupMemoryBarrierShared() +{ + __target_switch + { + case cuda: + __intrinsic_asm "__threadfence_block()"; + case hlsl: + __intrinsic_asm "GroupMemoryBarrier()"; + case glsl: + __intrinsic_asm "subgroupMemoryBarrierShared()"; + case spirv: + spirv_asm { + // SubgroupMemory triggers vulkan validation layer error; + // WorkgroupMemory is the next level of granularity + OpMemoryBarrier Subgroup AcquireRelease|WorkgroupMemory + }; + + } +} + +__glsl_extension(GL_KHR_shader_subgroup_basic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public bool subgroupElect() +{ + __target_switch + { + case cuda: + __intrinsic_asm "( (__activemask() & (__activemask()*-1)) == _getLaneId())"; + case glsl: + case spirv: + case hlsl: + return WaveIsFirstLane(); + + } +} + +// GL_KHR_shader_subgroup_vote + +__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public bool subgroupAll(bool value) +{ + + return WaveActiveAllTrue(value); + +} + +__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public bool subgroupAny(bool value) +{ + return WaveActiveAnyTrue(value); + +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public bool subgroupAllEqual(T value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveAllEqual(value); +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_vote) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public bool subgroupAllEqual(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveAllEqual(value); +} + +// GL_KHR_shader_subgroup_arithmetic + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupAdd(T value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveSum(value); +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupMul(T value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveProduct(value); +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupMin(T value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveMin(value); +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupMax(T value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveMax(value); +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupAnd(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupAnd($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalAnd $$T result Subgroup 0 $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseAnd $$T result Subgroup 0 $value + }; + } + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupOr(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupOr($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalOr $$T result Subgroup 0 $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseOr $$T result Subgroup 0 $value + }; + } + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupXor(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupXor($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalXor $$T result Subgroup 0 $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseXor $$T result Subgroup 0 $value + }; + } + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupInclusiveAdd(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveAdd($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup InclusiveScan $value}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$T result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupInclusiveMul(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveMul($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup InclusiveScan $value}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$T result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupInclusiveMin(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveMin($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMin $$T result Subgroup InclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMin $$T result Subgroup InclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMin $$T result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupInclusiveMax(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveMax($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMax $$T result Subgroup InclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMax $$T result Subgroup InclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMax $$T result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupInclusiveAnd(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupInclusiveAnd($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalAnd $$T result Subgroup InclusiveScan $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseAnd $$T result Subgroup InclusiveScan $value + }; + } + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupInclusiveOr(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupInclusiveOr($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalOr $$T result Subgroup InclusiveScan $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseOr $$T result Subgroup InclusiveScan $value + }; + } + } +} -__glsl_extension(KHR_shader_subgroup) -__glsl_version(450) -public void subgroupBarrier() +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupInclusiveXor(T value) { - //__subgroupBarrier(); + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveXor($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalXor $$T result Subgroup InclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$T result Subgroup InclusiveScan $value}; + } + return T(0); } -__glsl_extension(KHR_shader_subgroup) -__glsl_version(450) -public void subgroupMemoryBarrier() +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupExclusiveAdd(T value) { + shader_subgroup_preamble<T>(); + return WavePrefixSum(value); } + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupExclusiveMul(T value) +{ + shader_subgroup_preamble<T>(); + return WavePrefixProduct(value); +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupExclusiveMin(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupExclusiveMin($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMin $$T result Subgroup ExclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMin $$T result Subgroup ExclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMin $$T result Subgroup ExclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupExclusiveMax(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupExclusiveMax($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMax $$T result Subgroup ExclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMax $$T result Subgroup ExclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMax $$T result Subgroup ExclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupExclusiveAnd(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupExclusiveAnd($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalAnd $$T result Subgroup ExclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$T result Subgroup ExclusiveScan $value}; + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupExclusiveOr(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupExclusiveOr($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalOr $$T result Subgroup ExclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseOr $$T result Subgroup ExclusiveScan $value}; + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupExclusiveXor(T value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupExclusiveXor($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalXor $$T result Subgroup ExclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$T result Subgroup ExclusiveScan $value}; + } +} + +// GL_KHR_shader_subgroup_arithmetic +//note: this is a seperate section because it is so huge that the only reasonable way to implement this is to just regex replace code + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupAdd(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveSum(value); +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupMul(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveProduct(value); +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupMin(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveMin(value); +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupMax(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WaveActiveMax(value); +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupAnd(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupAnd($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalAnd $$vector<T,N> result Subgroup 0 $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseAnd $$vector<T,N> result Subgroup 0 $value + }; + } + + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupOr(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupOr($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalOr $$vector<T,N> result Subgroup 0 $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseOr $$vector<T,N> result Subgroup 0 $value + }; + } + + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupXor(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupXor($0)"; + case spirv: + if (__isBool<T>()) { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformLogicalXor $$vector<T,N> result Subgroup 0 $value + }; + } + else { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformBitwiseXor $$vector<T,N> result Subgroup 0 $value + }; + } + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupInclusiveAdd(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveAdd($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector<T,N> result Subgroup InclusiveScan $value}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$vector<T,N> result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupInclusiveMul(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveMul($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector<T,N> result Subgroup InclusiveScan $value}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$vector<T,N> result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupInclusiveMin(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveMin($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMin $$vector<T,N> result Subgroup InclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMin $$vector<T,N> result Subgroup InclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMin $$vector<T,N> result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupInclusiveMax(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveMax($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMax $$vector<T,N> result Subgroup InclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMax $$vector<T,N> result Subgroup InclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMax $$vector<T,N> result Subgroup InclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupInclusiveAnd(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveAnd($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalAnd $$vector<T,N> result Subgroup InclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$vector<T,N> result Subgroup InclusiveScan $value}; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupInclusiveOr(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveOr($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalOr $$vector<T,N> result Subgroup InclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseOr $$vector<T,N> result Subgroup InclusiveScan $value}; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupInclusiveXor(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupInclusiveXor($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalXor $$vector<T,N> result Subgroup InclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$vector<T,N> result Subgroup InclusiveScan $value}; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupExclusiveAdd(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WavePrefixSum(value); +} + + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupExclusiveMul(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WavePrefixProduct(value); +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupExclusiveMin(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupExclusiveMin($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMin $$vector<T,N> result Subgroup ExclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMin $$vector<T,N> result Subgroup ExclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMin $$vector<T,N> result Subgroup ExclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupExclusiveMax(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupExclusiveMax($0)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMax $$vector<T,N> result Subgroup ExclusiveScan $value}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMax $$vector<T,N> result Subgroup ExclusiveScan $value}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMax $$vector<T,N> result Subgroup ExclusiveScan $value}; + else return value; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupExclusiveAnd(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupExclusiveAnd($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalAnd $$vector<T,N> result Subgroup ExclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$vector<T,N> result Subgroup ExclusiveScan $value}; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupExclusiveOr(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupExclusiveOr($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalOr $$vector<T,N> result Subgroup ExclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseOr $$vector<T,N> result Subgroup ExclusiveScan $value}; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupExclusiveXor(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: __intrinsic_asm "subgroupExclusiveXor($0)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformLogicalXor $$vector<T,N> result Subgroup ExclusiveScan $value}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$vector<T,N> result Subgroup ExclusiveScan $value}; + } +} + +// GL_KHR_shader_subgroup_ballot + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupBroadcast(T value, uint id) +{ + shader_subgroup_preamble<T>(); + return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, id); +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupBroadcast(vector<T,N> value, uint id) +{ + shader_subgroup_preamble<T>(); + return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, id); +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupBroadcastFirst(T value) +{ + shader_subgroup_preamble<T>(); + return WaveMaskReadLaneFirst(WaveGetActiveMask(), value); +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupBroadcastFirst(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return WaveMaskReadLaneFirst(WaveGetActiveMask(), value); +} + +// WaveMaskBallot is not the same; it force trunc's +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public uvec4 subgroupBallot(bool value) +{ + return WaveActiveBallot(value); +} + +// logic for HLSL and CUDA which lack InverseBalloc +// CUDA: works exclusivly 32 waves, therefore only need comp x +// HLSL:{ +// 1. index into comp I want: index = trunc(float(lane)*(1/32)) +// 2. lane & value[index] +// note: 1/32 wil be converted to multiplication +// we do 1/32 since 1 uint stores 32 threads +// note 2: we have a waveLaneCount check because based on wave lane count we can determine if we can do a +// fast path or slow path (know index is 0 or non 0) +// } +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public bool subgroupInverseBallot(uvec4 value) +{ + __target_switch + { + case cuda: + // only has 32 warps + __intrinsic_asm "(($0).x >> (_getLaneId()) & 1)"; + case hlsl: + // much like _WaveCountBits, but here we hope that we hit case 0; we can then avoid the expensive logic + const uint waveLaneCount = WaveGetLaneCount(); + switch ((waveLaneCount - 1) / 32) + { + case 0: + __intrinsic_asm "(($0)[0] >> WaveGetLaneIndex()) & 1)"; + case 1: + case 2: + case 3: + __intrinsic_asm "((($0)[uint(float(WaveGetLaneIndex())*0.03125f)] >> WaveGetLaneIndex()) & 1)"; + } + case glsl: + __intrinsic_asm "subgroupInverseBallot($0)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformInverseBallot $$bool result Subgroup $value + }; + } + return false; +} + +// same logic as subgroupInverseBallot +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public bool subgroupBallotBitExtract(uvec4 value, uint index) +{ + __target_switch + { + case cuda: + __intrinsic_asm "($1 & ($0).x) != 0"; + case hlsl: + const uint waveLaneCount = WaveGetLaneCount(); + switch ((waveLaneCount - 1) / 32) + { + case 0: + __intrinsic_asm "($0)[0] & ($1)"; + case 1: + case 2: + case 3: + __intrinsic_asm "($0)[uint(float($1)*0.03125f)] & ($1)"; + } + case glsl: + __intrinsic_asm "subgroupBallotBitExtract($0, $1)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformBallotBitExtract $$bool result Subgroup $value $index + }; + } + return false; +} + + +// the count is only supposed to use uvec4 values within bottom bits of subgroup launched, not a simple countbits +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public uint subgroupBallotBitCount(uvec4 value) +{ + __target_switch + { + case glsl: + __intrinsic_asm "subgroupBallotBitCount($0)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformBallotBitCount $$uint result Subgroup Reduce $value + }; + } +} + +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public uint subgroupBallotInclusiveBitCount(uvec4 value) +{ + __target_switch + { + case glsl: + __intrinsic_asm "subgroupBallotInclusiveBitCount($0)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformBallotBitCount $$uint result Subgroup InclusiveScan $value + }; + } +} + +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public uint subgroupBallotExclusiveBitCount(uvec4 value) +{ + __target_switch + { + case glsl: + __intrinsic_asm "subgroupBallotExclusiveBitCount($0)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformBallotBitCount $$uint result Subgroup ExclusiveScan $value + }; + } +} + +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public uint subgroupBallotFindLSB(uvec4 value) +{ + __target_switch + { + case glsl: + __intrinsic_asm "subgroupBallotFindLSB($0)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformBallotFindLSB $$uint result Subgroup $value + }; + } +} + +__glsl_extension(GL_KHR_shader_subgroup_ballot) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public uint subgroupBallotFindMSB(uvec4 value) +{ + __target_switch + { + case glsl: + __intrinsic_asm "subgroupBallotFindMSB($0)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformBallotFindMSB $$uint result Subgroup $value + }; + } +} + +// GL_KHR_shader_subgroup_shuffle + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupShuffle(T value, uint index) +{ + shader_subgroup_preamble<T>(); + return WaveShuffle(value, index); +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupShuffleXor(T value, uint mask) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupShuffleXor($0,$1)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformShuffleXor $$T result Subgroup $value $mask + }; + } +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupShuffle(vector<T,N> value, uint index) +{ + shader_subgroup_preamble<T>(); + return WaveShuffle(value, index); +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupShuffleXor(vector<T,N> value, uint mask) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupShuffleXor($0,$1)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformBallot; + OpGroupNonUniformShuffleXor $$vector<T,N> result Subgroup $value $mask + }; + } +} + + +// GL_KHR_shader_subgroup_shuffle_relative + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupShuffleUp(T value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupShuffleUp($0, $1)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformShuffleRelative; + OpGroupNonUniformShuffleUp $$T result Subgroup $value $delta + }; + } +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupShuffleDown(T value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupShuffleDown($0, $1)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformShuffleRelative; + OpGroupNonUniformShuffleDown $$T result Subgroup $value $delta + }; + } +} + + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupShuffleUp(vector<T,N> value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupShuffleUp($0, $1)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformShuffleRelative; + OpGroupNonUniformShuffleUp $$vector<T,N> result Subgroup $value $delta + }; + } +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_shuffle_relative) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupShuffleDown(vector<T,N> value, uint delta) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupShuffleDown($0, $1)"; + case spirv: + return spirv_asm { + OpCapability GroupNonUniformShuffleRelative; + OpGroupNonUniformShuffleDown $$vector<T,N> result Subgroup $value $delta + }; + } +} +// GL_KHR_shader_subgroup_clustered + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupClusteredAdd(T value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredAdd($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformFAdd $$T result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformIAdd $$T result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupClusteredMul(T value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredMul($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformFMul $$T result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformIMul $$T result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupClusteredMin(T value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredMin($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformFMin $$T result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformSMin $$T result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformUMin $$T result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupClusteredMax(T value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredMax($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformFMax $$T result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformSMax $$T result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformUMax $$T result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupClusteredAnd(T value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredAnd($0, $1)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformLogicalAnd $$T result Subgroup ClusteredReduce $value $clusterSize}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformBitwiseAnd $$T result Subgroup ClusteredReduce $value $clusterSize}; + } +} + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupClusteredOr(T value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredOr($0, $1)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformLogicalOr $$T result Subgroup ClusteredReduce $value $clusterSize}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformBitwiseOr $$T result Subgroup ClusteredReduce $value $clusterSize}; + } +} + + + +__generic<T : __BuiltinLogicalType> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupClusteredXor(T value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredXor($0, $1)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformLogicalXor $$T result Subgroup ClusteredReduce $value $clusterSize}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformBitwiseXor $$T result Subgroup ClusteredReduce $value $clusterSize}; + } +} + + + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupClusteredAdd(vector<T,N> value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredAdd($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; + OpGroupNonUniformFAdd $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformIAdd $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupClusteredMul(vector<T,N> value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredMul($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformFMul $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformIMul $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupClusteredMin(vector<T,N> value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredMin($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformFMin $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformSMin $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformUMin $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinArithmeticType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupClusteredMax(vector<T,N> value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredMax($0, $1)"; + case spirv: + if (__isFloat<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformFMax $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isSignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformSMax $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else if (__isUnsignedInt<T>()) + return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformUMax $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else return value; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupClusteredAnd(vector<T,N> value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredAnd($0, $1)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformLogicalAnd $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformBitwiseAnd $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupClusteredOr(vector<T,N> value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredOr($0, $1)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformLogicalOr $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformBitwiseOr $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + } +} + +__generic<T : __BuiltinLogicalType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_clustered) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupClusteredXor(vector<T,N> value, uint clusterSize) +{ + shader_subgroup_preamble<T>(); + __target_switch + { + case glsl: + __intrinsic_asm "subgroupClusteredXor($0, $1)"; + case spirv: + if (__isBool<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformLogicalXor $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpCapability GroupNonUniformClustered; OpGroupNonUniformBitwiseXor $$vector<T,N> result Subgroup ClusteredReduce $value $clusterSize}; + } +} + +// GL_KHR_shader_subgroup_quad + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupQuadBroadcast(T value, uint id) +{ + shader_subgroup_preamble<T>(); + return QuadReadLaneAt(value, id); +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupQuadSwapHorizontal(T value) +{ + shader_subgroup_preamble<T>(); + return QuadReadAcrossX(value); +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupQuadSwapVertical(T value) +{ + shader_subgroup_preamble<T>(); + return QuadReadAcrossY(value); +} + +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public T subgroupQuadSwapDiagonal(T value) +{ + shader_subgroup_preamble<T>(); + return QuadReadAcrossDiagonal(value); +} + + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupQuadBroadcast(vector<T,N> value, uint id) +{ + shader_subgroup_preamble<T>(); + return QuadReadLaneAt(value, id); +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupQuadSwapHorizontal(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return QuadReadAcrossX(value); +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupQuadSwapVertical(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return QuadReadAcrossY(value); +} + +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_quad) [require(glsl)] +__spirv_version(1.3) [require(spirv)] +[ForceInline] public vector<T,N> subgroupQuadSwapDiagonal(vector<T,N> value) +{ + shader_subgroup_preamble<T>(); + return QuadReadAcrossDiagonal(value); +}
\ No newline at end of file diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 156ecc194..84ba11cad 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -7807,19 +7807,14 @@ T WaveMaskProduct(WaveMask mask, T expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup 0 $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$uint %uvalue $expr; - OpGroupNonUniformIMul $$uint %mulResult Subgroup 0 %uvalue; - OpBitcast $$T result %mulResult + OpGroupNonUniformIMul $$T result Subgroup 0 $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$T result Subgroup 0 $expr}; else return expr; } } @@ -7837,19 +7832,14 @@ vector<T,N> WaveMaskProduct(WaveMask mask, vector<T,N> expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector<T,N> result Subgroup 0 $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$vector<uint,N> %uvalue $expr; - OpGroupNonUniformIMul $$vector<uint,N> %mulResult Subgroup 0 %uvalue; - OpBitcast $$vector<T,N> result %mulResult + OpGroupNonUniformIMul $$vector<T,N> result Subgroup 0 $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$vector<T,N> result Subgroup 0 $expr}; else return expr; } } @@ -7877,19 +7867,14 @@ T WaveMaskSum(WaveMask mask, T expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup 0 $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$uint %uvalue $expr; - OpGroupNonUniformIAdd $$uint %mulResult Subgroup 0 %uvalue; - OpBitcast $$T result %mulResult + OpGroupNonUniformIAdd $$T result Subgroup 0 $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$T result Subgroup 0 $expr}; else return expr; } } @@ -7908,19 +7893,14 @@ vector<T,N> WaveMaskSum(WaveMask mask, vector<T,N> expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector<T,N> result Subgroup 0 $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$vector<uint,N> %uvalue $expr; - OpGroupNonUniformIAdd $$vector<uint,N> %mulResult Subgroup 0 %uvalue; - OpBitcast $$vector<T,N> result %mulResult + OpGroupNonUniformIAdd $$vector<T,N> result Subgroup 0 $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$vector<T,N> result Subgroup 0 $expr}; else return expr; } } @@ -8002,19 +7982,14 @@ T WaveMaskPrefixProduct(WaveMask mask, T expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup ExclusiveScan $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$uint %uvalue $expr; - OpGroupNonUniformIMul $$uint %mulResult Subgroup ExclusiveScan %uvalue; - OpBitcast $$T result %mulResult + OpGroupNonUniformIMul $$T result Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpGroupNonUniformIMul $$T result Subgroup ExclusiveScan $expr}; else return expr; } } @@ -8033,19 +8008,14 @@ vector<T,N> WaveMaskPrefixProduct(WaveMask mask, vector<T,N> expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector<T,N> result Subgroup ExclusiveScan $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$vector<uint,N> %uvalue $expr; - OpGroupNonUniformIMul $$vector<uint,N> %mulResult Subgroup ExclusiveScan %uvalue; - OpBitcast $$vector<T,N> result %mulResult + OpGroupNonUniformIMul $$vector<T,N> result Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$vector<T,N> result Subgroup ExclusiveScan $expr}; else return expr; } } @@ -8069,19 +8039,14 @@ T WaveMaskPrefixSum(WaveMask mask, T expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup ExclusiveScan $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - %uvalue:$$uint = OpBitcast $expr; - %mulResult:$$uint = OpGroupNonUniformIAdd Subgroup ExclusiveScan %uvalue; - result:$$T = OpBitcast %mulResult + result:$$T = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$T result Subgroup ExclusiveScan $expr}; else return expr; } } @@ -8101,19 +8066,14 @@ vector<T,N> WaveMaskPrefixSum(WaveMask mask, vector<T,N> expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector<T,N> result Subgroup ExclusiveScan $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - %uvalue: $$vector<uint,N> = OpBitcast $expr; - %mulResult: $$vector<uint,N> = OpGroupNonUniformIAdd Subgroup ExclusiveScan %uvalue; - result: $$vector<T,N> = OpBitcast %mulResult + result:$$vector<T,N> = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$vector<T,N> result Subgroup ExclusiveScan $expr}; else return expr; } } @@ -8612,23 +8572,14 @@ T WaveActive$(opName.hlslName)(T expr) OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName.glslName) $$T result Subgroup 0 $expr }; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$uint %uvalue $expr; - OpGroupNonUniformI$(opName.glslName) $$uint %mulResult Subgroup 0 %uvalue; - OpBitcast $$T result %mulResult + OpGroupNonUniformI$(opName.glslName) $$T result Subgroup 0 $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm - { - OpCapability GroupNonUniformArithmetic; - OpGroupNonUniformI$(opName.glslName) $$T result Subgroup 0 $expr - }; else return expr; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); @@ -8653,23 +8604,14 @@ vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr) OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName.glslName) $$vector<T,N> result Subgroup 0 $expr }; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$vector<uint,N> %uvalue $expr; - OpGroupNonUniformI$(opName.glslName) $$vector<uint,N> %$(opName.glslName)Result Subgroup 0 %uvalue; - OpBitcast $$vector<T,N> result %$(opName.glslName)Result + OpGroupNonUniformI$(opName.glslName) $$vector<T,N> result Subgroup 0 $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm - { - OpCapability GroupNonUniformArithmetic; - OpGroupNonUniformI$(opName.glslName) $$vector<T,N> result Subgroup 0 $expr - }; else return expr; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); @@ -8909,19 +8851,14 @@ T WavePrefixProduct(T expr) OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup ExclusiveScan $expr }; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$uint %uvalue $expr; - OpGroupNonUniformIMul $$uint %mulResult Subgroup ExclusiveScan %uvalue; - OpBitcast $$T result %mulResult + OpGroupNonUniformIMul $$T result Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$T result Subgroup ExclusiveScan $expr}; else return expr; default: return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); @@ -8943,19 +8880,14 @@ vector<T,N> WavePrefixProduct(vector<T,N> expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector<T,N> result Subgroup ExclusiveScan $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - OpBitcast $$vector<uint,N> %uvalue $expr; - OpGroupNonUniformIMul $$vector<uint,N> %mulResult Subgroup ExclusiveScan %uvalue; - OpBitcast $$vector<T,N> result %mulResult + OpGroupNonUniformIMul $$vector<T,N> result Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$vector<T,N> result Subgroup ExclusiveScan $expr}; else return expr; default: return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); @@ -8983,19 +8915,14 @@ T WavePrefixSum(T expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup ExclusiveScan $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - %uvalue:$$uint = OpBitcast $expr; - %mulResult:$$uint = OpGroupNonUniformIAdd Subgroup ExclusiveScan %uvalue; - result:$$T = OpBitcast %mulResult + result:$$T = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$T result Subgroup ExclusiveScan $expr}; else return expr; default: return WaveMaskPrefixSum(WaveGetActiveMask(), expr); @@ -9016,19 +8943,14 @@ vector<T,N> WavePrefixSum(vector<T,N> expr) case spirv: if (__isFloat<T>()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector<T,N> result Subgroup ExclusiveScan $expr}; - else if (__isSignedInt<T>()) + else if (__isInt<T>()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; - // TODO: use the correct integer width - %uvalue:$$vector<uint,N> = OpBitcast $expr; - %mulResult:$$vector<uint,N> = OpGroupNonUniformIAdd Subgroup ExclusiveScan %uvalue; - result:$$vector<T,N> = OpBitcast %mulResult + result:$$vector<T,N> = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } - else if (__isUnsignedInt<T>()) - return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$vector<T,N> result Subgroup ExclusiveScan $expr}; else return expr; default: return WaveMaskPrefixSum(WaveGetActiveMask(), expr); @@ -11036,6 +10958,7 @@ struct HitObject let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { + OpCapability ShaderInvocationReorderNV; OpHitObjectTraceRayNV /**/ &__return_val /**/ $AccelerationStructure @@ -11781,7 +11704,7 @@ struct HitObject } } - /// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss. + /// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss. [ForceInline] attr_t GetAttributes<attr_t>() { @@ -13164,6 +13087,14 @@ struct ConstBufferPointer } } + + + __subscript(int index) -> T + { + [ForceInline] + get {return ConstBufferPointer<T>.fromUInt(toUInt() + __naturalStrideOf<T>() * index).get(); } + } + __glsl_version(450) __glsl_extension(GL_EXT_shader_explicit_arithmetic_types_int64) __glsl_extension(GL_EXT_buffer_reference) @@ -13215,10 +13146,4 @@ struct ConstBufferPointer }; } } - - __subscript(int index)->T - { - [ForceInline] - get { return ConstBufferPointer<T>.fromUInt(toUInt() + __naturalStrideOf<T>() * index).get(); } - } } diff --git a/source/slang/slang-ast-dump.cpp b/source/slang/slang-ast-dump.cpp index ccd9b9ee7..3bb83f80b 100644 --- a/source/slang/slang-ast-dump.cpp +++ b/source/slang/slang-ast-dump.cpp @@ -666,6 +666,9 @@ struct ASTDumpContext case SPIRVAsmOperand::SlangImmediateValue: m_writer->emit("!"); break; + case SPIRVAsmOperand::BuiltinVar: + m_writer->emit("builtin"); + break; default: SLANG_UNREACHABLE("Unhandled case in ast dump for SPIRVAsmOperand"); } diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 416a6671b..1359e1242 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -3714,10 +3714,19 @@ struct ExprLoweringVisitorBase : public ExprVisitor<Derived, LoweredValInfo> LoweredValInfo visitVarExpr(VarExpr* expr) { + auto lowerTypeOfExpr = lowerType(context, expr->type); + auto declRef = expr->declRef; + if (auto propertyDeclRef = declRef.as<PropertyDecl>()) + { + // A reference to a property is a special case, because + // we must translate the reference to the property + // into a reference to one of its accessors. + return lowerStorageReference(context, lowerTypeOfExpr, propertyDeclRef, LoweredValInfo(), 0, nullptr); + } LoweredValInfo info = emitDeclRef( context, - expr->declRef, - lowerType(context, expr->type)); + declRef, + lowerTypeOfExpr); return info; } diff --git a/tests/glsl-intrinsic/intrinsic-texture.slang b/tests/glsl-intrinsic/intrinsic-texture.slang index 3b42be715..591ced099 100644 --- a/tests/glsl-intrinsic/intrinsic-texture.slang +++ b/tests/glsl-intrinsic/intrinsic-texture.slang @@ -6,8 +6,8 @@ //TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage fragment -entry computeMain -target cuda // Disabling following targets because they are currently causing compile errors. -//T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage fragment -entry computeMain -target hlsl -//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage fragment -entry computeMain -target cpp +//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage fragment -entry computeMain -target hlsl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage fragment -entry computeMain -target cpp // "Offset" family of texture functions in GLSL requires offset parameter to be a constant value. // It appears that slangc removes the constant-ness of constant values. diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang new file mode 100644 index 000000000..7bfc4d886 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Exclusive.slang @@ -0,0 +1,191 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL +//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA + +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +#if 1 \ + && !defined(TARGET_HLSL) \ + && !defined(TARGET_CUDA) +// hlsl does not treat boolean types with subgroup.* as a logical operator +// cuda is missing an implementation +#define TEST_when_logical_operators_are_implemented +#endif + +//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +#define local_size_x_v 4 +layout(local_size_x = local_size_x_v) in; + +__generic<T : __BuiltinLogicalType> +bool test1Logical() { + return true +#if defined(TEST_when_logical_operators_are_implemented) + && subgroupExclusiveAnd(T(1)) == T(1) + && subgroupExclusiveOr(T(1)) == T(1) + && subgroupExclusiveXor(T(1)) == T(1) +#endif // #if defined(TEST_when_logical_operators_are_implemented) + ; +} + +__generic<T : __BuiltinLogicalType, let N : int> +bool testVLogical() { + typealias gvec = vector<T, N>; + + return true +#if defined(TEST_when_logical_operators_are_implemented) + && subgroupExclusiveAnd(gvec(T(1))) == gvec(T(1)) + && subgroupExclusiveOr(gvec(T(1))) == gvec(T(1)) + && subgroupExclusiveXor(gvec(T(1))) == gvec(T(1)) +#endif // #if defined(TEST_when_logical_operators_are_implemented) + ; +} + +bool testLogical() { + return true + && test1Logical<int>() + && testVLogical<int, 2>() + && testVLogical<int, 3>() + && testVLogical<int, 4>() + && test1Logical<int8_t>() + && testVLogical<int8_t, 2>() + && testVLogical<int8_t, 3>() + && testVLogical<int8_t, 4>() + && test1Logical<int16_t>() + && testVLogical<int16_t, 2>() + && testVLogical<int16_t, 3>() + && testVLogical<int16_t, 4>() + && test1Logical<int64_t>() + && testVLogical<int64_t, 2>() + && testVLogical<int64_t, 3>() + && testVLogical<int64_t, 4>() + && test1Logical<uint>() + && testVLogical<uint, 2>() + && testVLogical<uint, 3>() + && testVLogical<uint, 4>() + && test1Logical<uint8_t>() + && testVLogical<uint8_t, 2>() + && testVLogical<uint8_t, 3>() + && testVLogical<uint8_t, 4>() + && test1Logical<uint16_t>() + && testVLogical<uint16_t, 2>() + && testVLogical<uint16_t, 3>() + && testVLogical<uint16_t, 4>() + && test1Logical<uint64_t>() + && testVLogical<uint64_t, 2>() + && testVLogical<uint64_t, 3>() + && testVLogical<uint64_t, 4>() + && test1Logical<bool>() + && testVLogical<bool, 2>() + && testVLogical<bool, 3>() + && testVLogical<bool, 4>() + ; +} + +__generic<T : __BuiltinArithmeticType> +bool test1Arithmetic() { + return true + && subgroupExclusiveAdd(T(1)) == T(3) + && subgroupExclusiveMul(T(1)) == T(1) + && subgroupExclusiveMin(T(1)) == T(1) + && subgroupExclusiveMax(T(1)) == T(1) + ; +} +__generic<T : __BuiltinArithmeticType, let N : int> +bool testVArithmetic() { + typealias gvec = vector<T, N>; + + return true + && subgroupExclusiveAdd(gvec(T(1))) == gvec(T(3)) + && subgroupExclusiveMul(gvec(T(1))) == gvec(T(1)) + && subgroupExclusiveMin(gvec(T(1))) == gvec(T(1)) + && subgroupExclusiveMax(gvec(T(1))) == gvec(T(1)) + ; +} + +bool testArithmetic() { + return true + && test1Arithmetic<float>() + && testVArithmetic<float, 2>() + && testVArithmetic<float, 3>() + && testVArithmetic<float, 4>() + && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support + && testVArithmetic<double, 2>() + && testVArithmetic<double, 3>() + && testVArithmetic<double, 4>() + && test1Arithmetic<half>() + && testVArithmetic<half, 2>() + && testVArithmetic<half, 3>() + && testVArithmetic<half, 4>() + && test1Arithmetic<int>() + && testVArithmetic<int, 2>() + && testVArithmetic<int, 3>() + && testVArithmetic<int, 4>() + && test1Arithmetic<int8_t>() + && testVArithmetic<int8_t, 2>() + && testVArithmetic<int8_t, 3>() + && testVArithmetic<int8_t, 4>() + && test1Arithmetic<int16_t>() + && testVArithmetic<int16_t, 2>() + && testVArithmetic<int16_t, 3>() + && testVArithmetic<int16_t, 4>() + && test1Arithmetic<int64_t>() + && testVArithmetic<int64_t, 2>() + && testVArithmetic<int64_t, 3>() + && testVArithmetic<int64_t, 4>() + && test1Arithmetic<uint>() + && testVArithmetic<uint, 2>() + && testVArithmetic<uint, 3>() + && testVArithmetic<uint, 4>() + && test1Arithmetic<uint8_t>() + && testVArithmetic<uint8_t, 2>() + && testVArithmetic<uint8_t, 3>() + && testVArithmetic<uint8_t, 4>() + && test1Arithmetic<uint16_t>() + && testVArithmetic<uint16_t, 2>() + && testVArithmetic<uint16_t, 3>() + && testVArithmetic<uint16_t, 4>() + && test1Arithmetic<uint64_t>() + && testVArithmetic<uint64_t, 2>() + && testVArithmetic<uint64_t, 3>() + && testVArithmetic<uint64_t, 4>() + ; +} + +void computeMain() +{ + + bool res0 = true + && testLogical() + ; + + bool res1 = true + && testArithmetic() + ; + + if (gl_LocalInvocationID.x == 3) { + // seperate so if there is an erroneous error the "major" + // tests are issolated into 2 branches without polluting the + // file with a bunch of individual test values + outputBuffer.data[0] = res0; + outputBuffer.data[1] = res1; + } + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 + // BUF-NEXT: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang new file mode 100644 index 000000000..09c6bdbdf --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_Inclusive.slang @@ -0,0 +1,191 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL +//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA + +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +#if 1 \ + && !defined(TARGET_HLSL) \ + && !defined(TARGET_CUDA) +// hlsl does not treat boolean types with subgroup.* as a logical operator +// cuda is missing an implementation +#define TEST_when_logical_operators_are_implemented +#endif + +//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +#define local_size_x_v 4 +layout(local_size_x = local_size_x_v) in; + +__generic<T : __BuiltinLogicalType> +bool test1Logical() { + return true +#if defined(TEST_when_logical_operators_are_implemented) + && subgroupInclusiveAnd(T(1)) == T(1) + && subgroupInclusiveOr(T(1)) == T(1) + && subgroupInclusiveXor(T(1)) == T(0) +#endif // #if defined(TEST_when_logical_operators_are_implemented) + ; +} + +__generic<T : __BuiltinLogicalType, let N : int> +bool testVLogical() { + typealias gvec = vector<T, N>; + + return true +#if defined(TEST_when_logical_operators_are_implemented) + && subgroupInclusiveAnd(gvec(T(1))) == gvec(T(1)) + && subgroupInclusiveOr(gvec(T(1))) == gvec(T(1)) + && subgroupInclusiveXor(gvec(T(1))) == gvec(T(0)) +#endif // #if defined(TEST_when_logical_operators_are_implemented) + ; +} + +bool testLogical() { + return true + && test1Logical<int>() + && testVLogical<int, 2>() + && testVLogical<int, 3>() + && testVLogical<int, 4>() + && test1Logical<int8_t>() + && testVLogical<int8_t, 2>() + && testVLogical<int8_t, 3>() + && testVLogical<int8_t, 4>() + && test1Logical<int16_t>() + && testVLogical<int16_t, 2>() + && testVLogical<int16_t, 3>() + && testVLogical<int16_t, 4>() + && test1Logical<int64_t>() + && testVLogical<int64_t, 2>() + && testVLogical<int64_t, 3>() + && testVLogical<int64_t, 4>() + && test1Logical<uint>() + && testVLogical<uint, 2>() + && testVLogical<uint, 3>() + && testVLogical<uint, 4>() + && test1Logical<uint8_t>() + && testVLogical<uint8_t, 2>() + && testVLogical<uint8_t, 3>() + && testVLogical<uint8_t, 4>() + && test1Logical<uint16_t>() + && testVLogical<uint16_t, 2>() + && testVLogical<uint16_t, 3>() + && testVLogical<uint16_t, 4>() + && test1Logical<uint64_t>() + && testVLogical<uint64_t, 2>() + && testVLogical<uint64_t, 3>() + && testVLogical<uint64_t, 4>() + && test1Logical<bool>() + && testVLogical<bool, 2>() + && testVLogical<bool, 3>() + && testVLogical<bool, 4>() + ; +} + +__generic<T : __BuiltinArithmeticType> +bool test1Arithmetic() { + return true + && subgroupInclusiveAdd(T(1)) == T(4) + && subgroupInclusiveMul(T(1)) == T(1) + && subgroupInclusiveMin(T(1)) == T(1) + && subgroupInclusiveMax(T(1)) == T(1) + ; +} +__generic<T : __BuiltinArithmeticType, let N : int> +bool testVArithmetic() { + typealias gvec = vector<T, N>; + + return true + && subgroupInclusiveAdd(gvec(T(1))) == gvec(T(4)) + && subgroupInclusiveMul(gvec(T(1))) == gvec(T(1)) + && subgroupInclusiveMin(gvec(T(1))) == gvec(T(1)) + && subgroupInclusiveMax(gvec(T(1))) == gvec(T(1)) + ; +} + +bool testArithmetic() { + return true + && test1Arithmetic<float>() + && testVArithmetic<float, 2>() + && testVArithmetic<float, 3>() + && testVArithmetic<float, 4>() + && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support + && testVArithmetic<double, 2>() + && testVArithmetic<double, 3>() + && testVArithmetic<double, 4>() + && test1Arithmetic<half>() + && testVArithmetic<half, 2>() + && testVArithmetic<half, 3>() + && testVArithmetic<half, 4>() + && test1Arithmetic<int>() + && testVArithmetic<int, 2>() + && testVArithmetic<int, 3>() + && testVArithmetic<int, 4>() + && test1Arithmetic<int8_t>() + && testVArithmetic<int8_t, 2>() + && testVArithmetic<int8_t, 3>() + && testVArithmetic<int8_t, 4>() + && test1Arithmetic<int16_t>() + && testVArithmetic<int16_t, 2>() + && testVArithmetic<int16_t, 3>() + && testVArithmetic<int16_t, 4>() + && test1Arithmetic<int64_t>() + && testVArithmetic<int64_t, 2>() + && testVArithmetic<int64_t, 3>() + && testVArithmetic<int64_t, 4>() + && test1Arithmetic<uint>() + && testVArithmetic<uint, 2>() + && testVArithmetic<uint, 3>() + && testVArithmetic<uint, 4>() + && test1Arithmetic<uint8_t>() + && testVArithmetic<uint8_t, 2>() + && testVArithmetic<uint8_t, 3>() + && testVArithmetic<uint8_t, 4>() + && test1Arithmetic<uint16_t>() + && testVArithmetic<uint16_t, 2>() + && testVArithmetic<uint16_t, 3>() + && testVArithmetic<uint16_t, 4>() + && test1Arithmetic<uint64_t>() + && testVArithmetic<uint64_t, 2>() + && testVArithmetic<uint64_t, 3>() + && testVArithmetic<uint64_t, 4>() + ; +} + +void computeMain() +{ + + bool res0 = true + && testLogical() + ; + + bool res1 = true + && testArithmetic() + ; + + if (gl_LocalInvocationID.x == 3) { + // seperate so if there is an erroneous error the "major" + // tests are issolated into 2 branches without polluting the + // file with a bunch of individual test values + outputBuffer.data[0] = res0; + outputBuffer.data[1] = res1; + } + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 + // BUF-NEXT: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang new file mode 100644 index 000000000..5300e6796 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic_None.slang @@ -0,0 +1,191 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL +//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA + +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +#if 1 \ + && !defined(TARGET_HLSL) \ + && !defined(TARGET_CUDA) +// hlsl does not treat boolean types with subgroup.* as a logical operator +// cuda is missing an implementation +#define TEST_when_logical_operators_are_implemented +#endif + +//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +#define local_size_x_v 4 +layout(local_size_x = local_size_x_v) in; + +__generic<T : __BuiltinLogicalType> +bool test1Logical() { + return true +#if defined(TEST_when_logical_operators_are_implemented) + && subgroupAnd(T(1)) == T(1) + && subgroupOr(T(1)) == T(1) + && subgroupXor(T(1)) == T(0) +#endif // #if defined(TEST_when_logical_operators_are_implemented) + ; +} + +__generic<T : __BuiltinLogicalType, let N : int> +bool testVLogical() { + typealias gvec = vector<T, N>; + + return true +#if defined(TEST_when_logical_operators_are_implemented) + && subgroupAnd(gvec(T(1))) == gvec(T(1)) + && subgroupOr(gvec(T(1))) == gvec(T(1)) + && subgroupXor(gvec(T(1))) == gvec(T(0)) +#endif // #if defined(TEST_when_logical_operators_are_implemented) + ; +} + +bool testLogical() { + return true + && test1Logical<int>() + && testVLogical<int, 2>() + && testVLogical<int, 3>() + && testVLogical<int, 4>() + && test1Logical<int8_t>() + && testVLogical<int8_t, 2>() + && testVLogical<int8_t, 3>() + && testVLogical<int8_t, 4>() + && test1Logical<int16_t>() + && testVLogical<int16_t, 2>() + && testVLogical<int16_t, 3>() + && testVLogical<int16_t, 4>() + && test1Logical<int64_t>() + && testVLogical<int64_t, 2>() + && testVLogical<int64_t, 3>() + && testVLogical<int64_t, 4>() + && test1Logical<uint>() + && testVLogical<uint, 2>() + && testVLogical<uint, 3>() + && testVLogical<uint, 4>() + && test1Logical<uint8_t>() + && testVLogical<uint8_t, 2>() + && testVLogical<uint8_t, 3>() + && testVLogical<uint8_t, 4>() + && test1Logical<uint16_t>() + && testVLogical<uint16_t, 2>() + && testVLogical<uint16_t, 3>() + && testVLogical<uint16_t, 4>() + && test1Logical<uint64_t>() + && testVLogical<uint64_t, 2>() + && testVLogical<uint64_t, 3>() + && testVLogical<uint64_t, 4>() + && test1Logical<bool>() + && testVLogical<bool, 2>() + && testVLogical<bool, 3>() + && testVLogical<bool, 4>() + ; +} + +__generic<T : __BuiltinArithmeticType> +bool test1Arithmetic() { + return true + && subgroupAdd(T(1)) == T(local_size_x_v) // 32 + && subgroupMul(T(1)) == T(1) + && subgroupMin(T(1)) == T(1) + && subgroupMax(T(1)) == T(1) + ; +} +__generic<T : __BuiltinArithmeticType, let N : int> +bool testVArithmetic() { + typealias gvec = vector<T, N>; + + return true + && subgroupAdd(gvec(T(1))) == gvec(T(local_size_x_v)) // 32 + && subgroupMul(gvec(T(1))) == gvec(T(1)) + && subgroupMin(gvec(T(1))) == gvec(T(1)) + && subgroupMax(gvec(T(1))) == gvec(T(1)) + ; +} + +bool testArithmetic() { + return true + && test1Arithmetic<float>() + && testVArithmetic<float, 2>() + && testVArithmetic<float, 3>() + && testVArithmetic<float, 4>() + && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support + && testVArithmetic<double, 2>() + && testVArithmetic<double, 3>() + && testVArithmetic<double, 4>() + && test1Arithmetic<half>() + && testVArithmetic<half, 2>() + && testVArithmetic<half, 3>() + && testVArithmetic<half, 4>() + && test1Arithmetic<int>() + && testVArithmetic<int, 2>() + && testVArithmetic<int, 3>() + && testVArithmetic<int, 4>() + && test1Arithmetic<int8_t>() + && testVArithmetic<int8_t, 2>() + && testVArithmetic<int8_t, 3>() + && testVArithmetic<int8_t, 4>() + && test1Arithmetic<int16_t>() + && testVArithmetic<int16_t, 2>() + && testVArithmetic<int16_t, 3>() + && testVArithmetic<int16_t, 4>() + && test1Arithmetic<int64_t>() + && testVArithmetic<int64_t, 2>() + && testVArithmetic<int64_t, 3>() + && testVArithmetic<int64_t, 4>() + && test1Arithmetic<uint>() + && testVArithmetic<uint, 2>() + && testVArithmetic<uint, 3>() + && testVArithmetic<uint, 4>() + && test1Arithmetic<uint8_t>() + && testVArithmetic<uint8_t, 2>() + && testVArithmetic<uint8_t, 3>() + && testVArithmetic<uint8_t, 4>() + && test1Arithmetic<uint16_t>() + && testVArithmetic<uint16_t, 2>() + && testVArithmetic<uint16_t, 3>() + && testVArithmetic<uint16_t, 4>() + && test1Arithmetic<uint64_t>() + && testVArithmetic<uint64_t, 2>() + && testVArithmetic<uint64_t, 3>() + && testVArithmetic<uint64_t, 4>() + ; +} + +void computeMain() +{ + + bool res0 = true + && testLogical() + ; + + bool res1 = true + && testArithmetic() + ; + + if (gl_LocalInvocationID.x == 3) { + // seperate so if there is an erroneous error the "major" + // tests are issolated into 2 branches without polluting the + // file with a bunch of individual test values + outputBuffer.data[0] = res0; + outputBuffer.data[1] = res1; + } + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 + // BUF-NEXT: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang new file mode 100644 index 000000000..8bbd60689 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang @@ -0,0 +1,142 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly +//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL + +// not testing cuda due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +// breaks on Nvidia GPU by returning 0 which is trivially wrong (works on Intel Iris Xe) +//#define TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged + +//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 32) in; + +__generic<T : __BuiltinLogicalType> +bool test1BroadcastX() { + return true + && subgroupBroadcast(T(1), 0) == T(1) + && subgroupBroadcastFirst(T(1)) == T(1) + ; +} +__generic<T : __BuiltinLogicalType, let N : int> +bool testVBroadcastX() { + typealias gvec = vector<T, N>; + + return true + && subgroupBroadcast(gvec(T(1)), 0) == gvec(T(1)) + && subgroupBroadcastFirst(gvec(T(1))) == gvec(T(1)) + ; +} + +__generic<T : __BuiltinFloatingPointType> +bool test1BroadcastX() { + return true + && subgroupBroadcast(T(1), 0) == T(1) + && subgroupBroadcastFirst(T(1)) == T(1) + ; +} +__generic<T : __BuiltinFloatingPointType, let N : int> +bool testVBroadcastX() { + typealias gvec = vector<T, N>; + + return true + && subgroupBroadcast(gvec(T(1)), 0) == gvec(T(1)) + && subgroupBroadcastFirst(gvec(T(1))) == gvec(T(1)) + ; +} +bool testBroadcastX() { + return true + && test1BroadcastX<float>() + && testVBroadcastX<float, 2>() + && testVBroadcastX<float, 3>() + && testVBroadcastX<float, 4>() + && test1BroadcastX<double>() // WARNING: intel GPU's lack FP64 support + && testVBroadcastX<double, 2>() + && testVBroadcastX<double, 3>() + && testVBroadcastX<double, 4>() + && test1BroadcastX<half>() + && testVBroadcastX<half, 2>() + && testVBroadcastX<half, 3>() + && testVBroadcastX<half, 4>() + && test1BroadcastX<int>() + && testVBroadcastX<int, 2>() + && testVBroadcastX<int, 3>() + && testVBroadcastX<int, 4>() + && test1BroadcastX<int8_t>() + && testVBroadcastX<int8_t, 2>() + && testVBroadcastX<int8_t, 3>() + && testVBroadcastX<int8_t, 4>() + && test1BroadcastX<int16_t>() + && testVBroadcastX<int16_t, 2>() + && testVBroadcastX<int16_t, 3>() + && testVBroadcastX<int16_t, 4>() + && test1BroadcastX<int64_t>() + && testVBroadcastX<int64_t, 2>() + && testVBroadcastX<int64_t, 3>() + && testVBroadcastX<int64_t, 4>() + && test1BroadcastX<uint>() + && testVBroadcastX<uint, 2>() + && testVBroadcastX<uint, 3>() + && testVBroadcastX<uint, 4>() + && test1BroadcastX<uint8_t>() + && testVBroadcastX<uint8_t, 2>() + && testVBroadcastX<uint8_t, 3>() + && testVBroadcastX<uint8_t, 4>() + && test1BroadcastX<uint16_t>() + && testVBroadcastX<uint16_t, 2>() + && testVBroadcastX<uint16_t, 3>() + && testVBroadcastX<uint16_t, 4>() + && test1BroadcastX<uint64_t>() + && testVBroadcastX<uint64_t, 2>() + && testVBroadcastX<uint64_t, 3>() + && testVBroadcastX<uint64_t, 4>() + && test1BroadcastX<bool>() + && testVBroadcastX<bool, 2>() + && testVBroadcastX<bool, 3>() + && testVBroadcastX<bool, 4>() + ; +} + +bool testBallot() { + return true + && (subgroupBallot(true).x == 0xFFFFFFFF) + && (subgroupInverseBallot(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == true) + && (subgroupBallotBitExtract(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), 0) == true) + && (subgroupBallotBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 32) + && (subgroupBallotInclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0) +#ifdef TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged + && (subgroupBallotExclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0) +#endif + && (subgroupBallotFindLSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 0) + && (subgroupBallotFindMSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 31) + ; +} + +void computeMain() +{ + outputBuffer.data[0] = true + && testBroadcastX() + ; + outputBuffer.data[1] = true + && testBallot() + ; + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 + // BUF-NEXT: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang new file mode 100644 index 000000000..82f2dc8e2 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang @@ -0,0 +1,66 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly +//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL + +// not testing cuda due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 32) in; + +shared uint shareMem; + +void computeMain() +{ + // TODO: no test for image memory was done -- subgroupMemoryBarrierImage(); + // tests are seperate since concurrency testing + + shareMem = 100; + subgroupMemoryBarrierShared(); + outputBuffer.data[0] = 1; + subgroupBarrier(); + outputBuffer.data[0] = 2; + subgroupBarrier(); + + outputBuffer.data[1] = 1; + subgroupMemoryBarrier(); + outputBuffer.data[1] = 2; + subgroupBarrier(); + + outputBuffer.data[2] = 1; + subgroupMemoryBarrierBuffer(); + outputBuffer.data[2] = 2; + subgroupBarrier(); + + shareMem = 2; + subgroupMemoryBarrierShared(); + outputBuffer.data[3] = shareMem; + subgroupBarrier(); + + if (subgroupElect()) { + outputBuffer.data[4] = gl_GlobalInvocationID.x + 2; + } + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + + // BUF: 2 + // BUF-NEXT: 2 + // BUF-NEXT: 2 + // BUF-NEXT: 2 + // BUF-NEXT: 2 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang new file mode 100644 index 000000000..21b533178 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang @@ -0,0 +1,44 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly + +// missing implementation of most builtin values due to non trivial translation +//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL +// missing implementation of most builtin values due to non trivial translation +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +//missing implementation of system (varying?) values +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 32) in; + +void computeMain() +{ + if (gl_GlobalInvocationID.x == 3) { + outputBuffer.data[0] = true + && gl_NumSubgroups == 1 + && gl_SubgroupID == 0 //1 subgroup, 0 based indexing + && gl_SubgroupSize == 32 + && gl_SubgroupInvocationID == 3 + && gl_SubgroupEqMask == uvec4(0b1000,0,0,0) + && gl_SubgroupGeMask == uvec4(0xFFFFFFF8,0,0,0) + && gl_SubgroupGtMask == uvec4(0xFFFFFFF0,0,0,0) + && gl_SubgroupLeMask == uvec4(0b1111,0,0,0) + && gl_SubgroupLtMask == uvec4(0b111,0,0,0) + ; + } + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang new file mode 100644 index 000000000..9e9b089d2 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang @@ -0,0 +1,171 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly + +// not testing hlsl due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL +// not testing cuda due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 32) in; + +__generic<T : __BuiltinLogicalType> +bool test1Logical() { + return true + && subgroupClusteredAnd(T(1), 1) == T(1) + && subgroupClusteredOr(T(1), 1) == T(1) + && subgroupClusteredXor(T(1), 1) == T(1) + ; +} + +__generic<T : __BuiltinLogicalType, let N : int> +bool testVLogical() { + typealias gvec = vector<T, N>; + + return true + && subgroupClusteredAnd(gvec(T(1)), 1) == gvec(T(1)) + && subgroupClusteredOr(gvec(T(1)), 1) == gvec(T(1)) + && subgroupClusteredXor(gvec(T(1)), 1) == gvec(T(1)) + ; +} + +bool testLogical() { + return true + && test1Logical<int>() + && testVLogical<int, 2>() + && testVLogical<int, 3>() + && testVLogical<int, 4>() + && test1Logical<int8_t>() + && testVLogical<int8_t, 2>() + && testVLogical<int8_t, 3>() + && testVLogical<int8_t, 4>() + && test1Logical<int16_t>() + && testVLogical<int16_t, 2>() + && testVLogical<int16_t, 3>() + && testVLogical<int16_t, 4>() + && test1Logical<int64_t>() + && testVLogical<int64_t, 2>() + && testVLogical<int64_t, 3>() + && testVLogical<int64_t, 4>() + && test1Logical<uint>() + && testVLogical<uint, 2>() + && testVLogical<uint, 3>() + && testVLogical<uint, 4>() + && test1Logical<uint8_t>() + && testVLogical<uint8_t, 2>() + && testVLogical<uint8_t, 3>() + && testVLogical<uint8_t, 4>() + && test1Logical<uint16_t>() + && testVLogical<uint16_t, 2>() + && testVLogical<uint16_t, 3>() + && testVLogical<uint16_t, 4>() + && test1Logical<uint64_t>() + && testVLogical<uint64_t, 2>() + && testVLogical<uint64_t, 3>() + && testVLogical<uint64_t, 4>() + && test1Logical<bool>() + && testVLogical<bool, 2>() + && testVLogical<bool, 3>() + && testVLogical<bool, 4>() + ; +} + +__generic<T : __BuiltinArithmeticType> +bool test1Arithmetic() { + return true + && subgroupClusteredAdd(T(1), 1) == T(1) + && subgroupClusteredMul(T(1), 1) == T(1) + && subgroupClusteredMin(T(1), 1) == T(1) + && subgroupClusteredMax(T(1), 1) == T(1) + ; +} + +__generic<T : __BuiltinArithmeticType, let N : int> +bool testVArithmetic() { + typealias gvec = vector<T, N>; + + return true + && subgroupClusteredAdd(gvec(T(1)), 1) == gvec(T(1)) + && subgroupClusteredMul(gvec(T(1)), 1) == gvec(T(1)) + && subgroupClusteredMin(gvec(T(1)), 1) == gvec(T(1)) + && subgroupClusteredMax(gvec(T(1)), 1) == gvec(T(1)) + ; +} + +bool testArithmetic() { + return true + && test1Arithmetic<float>() + && testVArithmetic<float, 2>() + && testVArithmetic<float, 3>() + && testVArithmetic<float, 4>() + && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support + && testVArithmetic<double, 2>() + && testVArithmetic<double, 3>() + && testVArithmetic<double, 4>() + && test1Arithmetic<half>() + && testVArithmetic<half, 2>() + && testVArithmetic<half, 3>() + && testVArithmetic<half, 4>() + && test1Arithmetic<int>() + && testVArithmetic<int, 2>() + && testVArithmetic<int, 3>() + && testVArithmetic<int, 4>() + && test1Arithmetic<int8_t>() + && testVArithmetic<int8_t, 2>() + && testVArithmetic<int8_t, 3>() + && testVArithmetic<int8_t, 4>() + && test1Arithmetic<int16_t>() + && testVArithmetic<int16_t, 2>() + && testVArithmetic<int16_t, 3>() + && testVArithmetic<int16_t, 4>() + && test1Arithmetic<int64_t>() + && testVArithmetic<int64_t, 2>() + && testVArithmetic<int64_t, 3>() + && testVArithmetic<int64_t, 4>() + && test1Arithmetic<uint>() + && testVArithmetic<uint, 2>() + && testVArithmetic<uint, 3>() + && testVArithmetic<uint, 4>() + && test1Arithmetic<uint8_t>() + && testVArithmetic<uint8_t, 2>() + && testVArithmetic<uint8_t, 3>() + && testVArithmetic<uint8_t, 4>() + && test1Arithmetic<uint16_t>() + && testVArithmetic<uint16_t, 2>() + && testVArithmetic<uint16_t, 3>() + && testVArithmetic<uint16_t, 4>() + && test1Arithmetic<uint64_t>() + && testVArithmetic<uint64_t, 2>() + && testVArithmetic<uint64_t, 3>() + && testVArithmetic<uint64_t, 4>() + ; +} + +void computeMain() +{ + outputBuffer.data[0] = true + && testLogical() + ; + outputBuffer.data[1] = true + && testArithmetic() + ; + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 + // BUF-NEXT: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang new file mode 100644 index 000000000..5ed6398b2 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang @@ -0,0 +1,129 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly +//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL + +// not testing cuda due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 4) in; + +__generic<T : __BuiltinLogicalType> +bool test1QuadX() { + return true + && subgroupQuadSwapHorizontal(T(2)) == T(2) + && subgroupQuadSwapVertical(T(2)) == T(2) + && subgroupQuadSwapDiagonal(T(3)) == T(3) + && subgroupQuadBroadcast(T(1), 1) == T(1) + ; +} +__generic<T : __BuiltinLogicalType, let N : int> +bool testVQuadX() { + typealias gvec = vector<T, N>; + + return true + && subgroupQuadSwapHorizontal(gvec(T(2))) == gvec(T(2)) + && subgroupQuadSwapVertical(gvec(T(2))) == gvec(T(2)) + && subgroupQuadSwapDiagonal(gvec(T(3))) == gvec(T(3)) + && subgroupQuadBroadcast(gvec(T(1)), 1) == gvec(T(1)) + ; +} + +__generic<T : __BuiltinFloatingPointType> +bool test1QuadX() { + return true + && subgroupQuadSwapHorizontal(T(2)) == T(2) + && subgroupQuadSwapVertical(T(2)) == T(2) + && subgroupQuadSwapDiagonal(T(3)) == T(3) + && subgroupQuadBroadcast(T(1), 1) == T(1) + ; +} +__generic<T : __BuiltinFloatingPointType, let N : int> +bool testVQuadX() { + typealias gvec = vector<T, N>; + + return true + && subgroupQuadSwapHorizontal(gvec(T(2))) == gvec(T(2)) + && subgroupQuadSwapVertical(gvec(T(2))) == gvec(T(2)) + && subgroupQuadSwapDiagonal(gvec(T(3))) == gvec(T(3)) + && subgroupQuadBroadcast(gvec(T(1)), 1) == gvec(T(1)) + ; +} +bool testQuadSwapX() { + return true + && test1QuadX<float>() + && testVQuadX<float, 2>() + && testVQuadX<float, 3>() + && testVQuadX<float, 4>() + && test1QuadX<double>() // WARNING: intel GPU's lack FP64 support + && testVQuadX<double, 2>() + && testVQuadX<double, 3>() + && testVQuadX<double, 4>() + && test1QuadX<half>() + && testVQuadX<half, 2>() + && testVQuadX<half, 3>() + && testVQuadX<half, 4>() + && test1QuadX<int>() + && testVQuadX<int, 2>() + && testVQuadX<int, 3>() + && testVQuadX<int, 4>() + && test1QuadX<int8_t>() + && testVQuadX<int8_t, 2>() + && testVQuadX<int8_t, 3>() + && testVQuadX<int8_t, 4>() + && test1QuadX<int16_t>() + && testVQuadX<int16_t, 2>() + && testVQuadX<int16_t, 3>() + && testVQuadX<int16_t, 4>() + && test1QuadX<int64_t>() + && testVQuadX<int64_t, 2>() + && testVQuadX<int64_t, 3>() + && testVQuadX<int64_t, 4>() + && test1QuadX<uint>() + && testVQuadX<uint, 2>() + && testVQuadX<uint, 3>() + && testVQuadX<uint, 4>() + && test1QuadX<uint8_t>() + && testVQuadX<uint8_t, 2>() + && testVQuadX<uint8_t, 3>() + && testVQuadX<uint8_t, 4>() + && test1QuadX<uint16_t>() + && testVQuadX<uint16_t, 2>() + && testVQuadX<uint16_t, 3>() + && testVQuadX<uint16_t, 4>() + && test1QuadX<uint64_t>() + && testVQuadX<uint64_t, 2>() + && testVQuadX<uint64_t, 3>() + && testVQuadX<uint64_t, 4>() + && test1QuadX<bool>() + && testVQuadX<bool, 2>() + && testVQuadX<bool, 3>() + && testVQuadX<bool, 4>() + ; +} + +void computeMain() +{ + + outputBuffer.data[0] = true + && testQuadSwapX() + ; + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang new file mode 100644 index 000000000..0e187c568 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang @@ -0,0 +1,121 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly + +// not testing hlsl due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL +// not testing cuda due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 32) in; + +__generic<T : __BuiltinLogicalType> +bool test1ShuffleX() { + return true + && subgroupShuffleUp(T(1), 1) == T(1) + && subgroupShuffleDown(T(1), 1) == T(1) + ; +} +__generic<T : __BuiltinLogicalType, let N : int> +bool testVShuffleX() { + typealias gvec = vector<T, N>; + + return true + && subgroupShuffleUp(gvec(T(1)), 1) == gvec(T(1)) + && subgroupShuffleDown(gvec(T(1)), 1) == gvec(T(1)) + ; +} + +__generic<T : __BuiltinFloatingPointType> +bool test1ShuffleX() { + return true + && subgroupShuffleUp(T(1), 1) == T(1) + && subgroupShuffleDown(T(1), 1) == T(1) + ; +} +__generic<T : __BuiltinFloatingPointType, let N : int> +bool testVShuffleX() { + typealias gvec = vector<T, N>; + + return true + && subgroupShuffleUp(gvec(T(1)), 1) == gvec(T(1)) + && subgroupShuffleDown(gvec(T(1)), 1) == gvec(T(1)) + ; +} +bool testShuffleX() { + return true + && test1ShuffleX<float>() + && testVShuffleX<float, 2>() + && testVShuffleX<float, 3>() + && testVShuffleX<float, 4>() + && test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support + && testVShuffleX<double, 2>() + && testVShuffleX<double, 3>() + && testVShuffleX<double, 4>() + && test1ShuffleX<half>() + && testVShuffleX<half, 2>() + && testVShuffleX<half, 3>() + && testVShuffleX<half, 4>() + && test1ShuffleX<int>() + && testVShuffleX<int, 2>() + && testVShuffleX<int, 3>() + && testVShuffleX<int, 4>() + && test1ShuffleX<int8_t>() + && testVShuffleX<int8_t, 2>() + && testVShuffleX<int8_t, 3>() + && testVShuffleX<int8_t, 4>() + && test1ShuffleX<int16_t>() + && testVShuffleX<int16_t, 2>() + && testVShuffleX<int16_t, 3>() + && testVShuffleX<int16_t, 4>() + && test1ShuffleX<int64_t>() + && testVShuffleX<int64_t, 2>() + && testVShuffleX<int64_t, 3>() + && testVShuffleX<int64_t, 4>() + && test1ShuffleX<uint>() + && testVShuffleX<uint, 2>() + && testVShuffleX<uint, 3>() + && testVShuffleX<uint, 4>() + && test1ShuffleX<uint8_t>() + && testVShuffleX<uint8_t, 2>() + && testVShuffleX<uint8_t, 3>() + && testVShuffleX<uint8_t, 4>() + && test1ShuffleX<uint16_t>() + && testVShuffleX<uint16_t, 2>() + && testVShuffleX<uint16_t, 3>() + && testVShuffleX<uint16_t, 4>() + && test1ShuffleX<uint64_t>() + && testVShuffleX<uint64_t, 2>() + && testVShuffleX<uint64_t, 3>() + && testVShuffleX<uint64_t, 4>() + && test1ShuffleX<bool>() + && testVShuffleX<bool, 2>() + && testVShuffleX<bool, 3>() + && testVShuffleX<bool, 4>() + ; +} + +void computeMain() +{ + outputBuffer.data[0] = true + && testShuffleX() + ; + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang new file mode 100644 index 000000000..5dca1a588 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang @@ -0,0 +1,139 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly + +// not testing hlsl due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL +// not testing cuda due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +#if 1 \ + && !defined(TARGET_HLSL) \ + && !defined(TARGET_CUDA) +// hlsl is missing an implementation +// cuda is missing an implementation +#define TEST_when_subgroupShuffleXor_is_implemented +#endif + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 32) in; + +__generic<T : __BuiltinLogicalType> +bool test1ShuffleX() { + return true + && subgroupShuffle(T(1), 1) == T(1) +#ifdef TEST_when_subgroupShuffleXor_is_implemented + && subgroupShuffleXor(T(1), 1) == T(1) +#endif // #ifdef TEST_when_subgroupShuffleXor_is_implemented + ; +} +__generic<T : __BuiltinLogicalType, let N : int> +bool testVShuffleX() { + typealias gvec = vector<T, N>; + + return true + && subgroupShuffle(gvec(T(1)), 1) == gvec(T(1)) +#ifdef TEST_when_subgroupShuffleXor_is_implemented + && subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1)) +#endif // #ifdef TEST_when_subgroupShuffleXor_is_implemented + ; +} + +__generic<T : __BuiltinFloatingPointType> +bool test1ShuffleX() { + return true + && subgroupShuffle(T(1), 1) == T(1) +#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL) + && subgroupShuffleXor(T(1), 1) == T(1) +#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL) + ; +} +__generic<T : __BuiltinFloatingPointType, let N : int> +bool testVShuffleX() { + typealias gvec = vector<T, N>; + + return true + && subgroupShuffle(gvec(T(1)), 1) == gvec(T(1)) +#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL) + && subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1)) +#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL) + ; +} +bool testShuffleX() { + return true + && test1ShuffleX<float>() + && testVShuffleX<float, 2>() + && testVShuffleX<float, 3>() + && testVShuffleX<float, 4>() + && test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support + && testVShuffleX<double, 2>() + && testVShuffleX<double, 3>() + && testVShuffleX<double, 4>() + && test1ShuffleX<half>() + && testVShuffleX<half, 2>() + && testVShuffleX<half, 3>() + && testVShuffleX<half, 4>() + && test1ShuffleX<int>() + && testVShuffleX<int, 2>() + && testVShuffleX<int, 3>() + && testVShuffleX<int, 4>() + && test1ShuffleX<int8_t>() + && testVShuffleX<int8_t, 2>() + && testVShuffleX<int8_t, 3>() + && testVShuffleX<int8_t, 4>() + && test1ShuffleX<int16_t>() + && testVShuffleX<int16_t, 2>() + && testVShuffleX<int16_t, 3>() + && testVShuffleX<int16_t, 4>() + && test1ShuffleX<int64_t>() + && testVShuffleX<int64_t, 2>() + && testVShuffleX<int64_t, 3>() + && testVShuffleX<int64_t, 4>() + && test1ShuffleX<uint>() + && testVShuffleX<uint, 2>() + && testVShuffleX<uint, 3>() + && testVShuffleX<uint, 4>() + && test1ShuffleX<uint8_t>() + && testVShuffleX<uint8_t, 2>() + && testVShuffleX<uint8_t, 3>() + && testVShuffleX<uint8_t, 4>() + && test1ShuffleX<uint16_t>() + && testVShuffleX<uint16_t, 2>() + && testVShuffleX<uint16_t, 3>() + && testVShuffleX<uint16_t, 4>() + && test1ShuffleX<uint64_t>() + && testVShuffleX<uint64_t, 2>() + && testVShuffleX<uint64_t, 3>() + && testVShuffleX<uint64_t, 4>() + && test1ShuffleX<bool>() + && testVShuffleX<bool, 2>() + && testVShuffleX<bool, 3>() + && testVShuffleX<bool, 4>() + ; +} + + +void computeMain() +{ + + outputBuffer.data[0] = true + && testShuffleX() + ; + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 1 +} diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang new file mode 100644 index 000000000..bcd4aeb56 --- /dev/null +++ b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang @@ -0,0 +1,167 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly +//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL + +// not testing cuda due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA +// not testing cpp due to missing impl +//DISABLE_TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp -DTARGET_CPP + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +//TEST_INPUT:ubuffer(data=[9], stride=4):name=inputBuffer +buffer MyBlockName +{ + uint data[]; +} inputBuffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer +buffer MyBlockName2 +{ + uint data[]; +} outputBuffer; + +layout(local_size_x = 32) in; + +__generic<T : __BuiltinLogicalType> +bool test1AllEqual() { + return true + && subgroupAllEqual(T(1)) == true + && subgroupAllEqual(T(gl_GlobalInvocationID.x)) == false + ; +} +__generic<T : __BuiltinLogicalType, let N : int> +bool testVAllEqual() { + typealias gvec = vector<T, N>; + + return true + && subgroupAllEqual(gvec(T(1))) == true + && subgroupAllEqual(gvec(T(gl_GlobalInvocationID.x))) == false + ; +} + +__generic<T : __BuiltinFloatingPointType> +bool test1AllEqual() { + return true + && subgroupAllEqual(T(1)) == true + && subgroupAllEqual(T(gl_GlobalInvocationID.x)) == false + ; +} +__generic<T : __BuiltinFloatingPointType, let N : int> +bool testVAllEqual() { + typealias gvec = vector<T, N>; + + return true + && subgroupAllEqual(gvec(T(1))) == true + && subgroupAllEqual(gvec(T(gl_GlobalInvocationID.x))) == false + ; +} +bool testAllEqual() { + return true + && test1AllEqual<float>() + && testVAllEqual<float, 2>() + && testVAllEqual<float, 3>() + && testVAllEqual<float, 4>() + && test1AllEqual<double>() // WARNING: intel GPU's lack FP64 support + && testVAllEqual<double, 2>() + && testVAllEqual<double, 3>() + && testVAllEqual<double, 4>() + && test1AllEqual<half>() + && testVAllEqual<half, 2>() + && testVAllEqual<half, 3>() + && testVAllEqual<half, 4>() + && test1AllEqual<int>() + && testVAllEqual<int, 2>() + && testVAllEqual<int, 3>() + && testVAllEqual<int, 4>() + && test1AllEqual<int8_t>() + && testVAllEqual<int8_t, 2>() + && testVAllEqual<int8_t, 3>() + && testVAllEqual<int8_t, 4>() + && test1AllEqual<int16_t>() + && testVAllEqual<int16_t, 2>() + && testVAllEqual<int16_t, 3>() + && testVAllEqual<int16_t, 4>() + && test1AllEqual<int64_t>() + && testVAllEqual<int64_t, 2>() + && testVAllEqual<int64_t, 3>() + && testVAllEqual<int64_t, 4>() + && test1AllEqual<uint>() + && testVAllEqual<uint, 2>() + && testVAllEqual<uint, 3>() + && testVAllEqual<uint, 4>() + && test1AllEqual<uint8_t>() + && testVAllEqual<uint8_t, 2>() + && testVAllEqual<uint8_t, 3>() + && testVAllEqual<uint8_t, 4>() + && test1AllEqual<uint16_t>() + && testVAllEqual<uint16_t, 2>() + && testVAllEqual<uint16_t, 3>() + && testVAllEqual<uint16_t, 4>() + && test1AllEqual<uint64_t>() + && testVAllEqual<uint64_t, 2>() + && testVAllEqual<uint64_t, 3>() + && testVAllEqual<uint64_t, 4>() + && test1AllEqual<bool>() + && testVAllEqual<bool, 2>() + && testVAllEqual<bool, 3>() + && testVAllEqual<bool, 4>() + ; +} + +void computeMain() +{ + //seperate tests since testing concurrency + + // one is true, rest false, positive + outputBuffer.data[0] = 1; + bool t1 = inputBuffer.data[0] == gl_GlobalInvocationID.x; + if (subgroupAny(t1)) { + subgroupBarrier(); + outputBuffer.data[0] = 2; + } + + // all false, negative + outputBuffer.data[1] = 1; + t1 = false; + if (!subgroupAny(t1)) { + subgroupBarrier(); + outputBuffer.data[1] = 2; + } + + // all true, positive + outputBuffer.data[2] = 1; + t1 = true; + if (subgroupAll(t1)) { + subgroupBarrier(); + outputBuffer.data[2] = 2; + } + + // all false, negative + outputBuffer.data[3] = 1; + t1 = false; + if (!subgroupAll(t1)) { + subgroupBarrier(); + outputBuffer.data[3] = 2; + } + + outputBuffer.data[4] = 1; + + if (testAllEqual()) { + subgroupBarrier(); + outputBuffer.data[4] = 2; + } + + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // CHECK_HLSL: void computeMain( + // CHECK_CUDA: void computeMain( + // CHECK_CPP: void _computeMain( + // BUF: 2 + // BUF-NEXT: 2 + // BUF-NEXT: 2 + // BUF-NEXT: 2 + // BUF-NEXT: 2 +} |
