diff options
| author | Yong He <yonghe@outlook.com> | 2024-02-02 22:04:40 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-02 22:04:40 -0800 |
| commit | c15e7ade4e27e1649d5b98f5854e9e52bb9e60ae (patch) | |
| tree | 22082fda85b2b25eec36da8c4505de7b6cb987fc /source/slang/hlsl.meta.slang | |
| parent | a67cb0609587c230746b52567ff5775cab215220 (diff) | |
Atomics+Wave ops intrinsics fixes. (#3542)
* Fix atomics intrinsics, increase kMaxDescriptorSets.
* Add SPIRVASM to known non-differentiable insts.
* Support fp16 wave ops when targeting glsl.
* Fixes.
* Fix vk validation errors.
* Fix.
* Add to allowed failures.
Diffstat (limited to 'source/slang/hlsl.meta.slang')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 114 |
1 files changed, 84 insertions, 30 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2900d6ea0..0b60bda0d 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1778,8 +1778,8 @@ float __atomicAdd(__ref float value, float amount) } __glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float2) -half __atomicAdd(__ref half value, half amount) +__glsl_extension(GL_NV_shader_atomic_fp16_vector) +half2 __atomicAdd(__ref half2 value, half2 amount) { __target_switch { @@ -1787,9 +1787,9 @@ half __atomicAdd(__ref half value, half amount) case spirv: return spirv_asm { - OpExtension "SPV_EXT_shader_atomic_float16_add"; - OpCapability AtomicFloat16AddEXT; - result:$$half = OpAtomicFAddEXT &value Device None $amount + OpExtension "SPV_EXT_shader_atomic_float_add"; + OpCapability AtomicFloat32AddEXT; + result:$$half2 = OpAtomicFAddEXT &value Device None $amount }; } } @@ -2337,7 +2337,7 @@ ${{{{ __target_switch { case hlsl: - __intrinsic_asm "NvInterlockedAddFp32($0, $1, $2))"; + __intrinsic_asm "NvInterlockedAddFp16x2($0, $1, $2))"; } } @@ -2364,8 +2364,15 @@ ${{{{ case glsl: case spirv: { - let buf = __getEquivalentStructuredBuffer<half>(this); - originalValue = __atomicAdd(buf[byteAddress / 2], value); + let buf = __getEquivalentStructuredBuffer<half2>(this); + if ((byteAddress & 2) == 0) + { + originalValue = __atomicAdd(buf[byteAddress/4], half2(value, half(0.0))).x; + } + else + { + originalValue = __atomicAdd(buf[byteAddress/4], half2(half(0.0), value)).y; + } return; } } @@ -7555,6 +7562,9 @@ __target_intrinsic(cuda, "_waveProductMultiple($0, $1)") __target_intrinsic(hlsl, "WaveActiveProduct($1)") matrix<T,N,M> WaveMaskProduct(WaveMask mask, matrix<T,N,M> expr); +__intrinsic_op($(kIROp_RequireGLSLExtension)) +void __requireGLSLExtension(String extensionName); + __generic<T : __BuiltinArithmeticType> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) @@ -7562,7 +7572,9 @@ T WaveMaskSum(WaveMask mask, T expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupAdd($1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupAdd($1)"; case cuda: __intrinsic_asm "_waveSum($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveSum($1)"; case spirv: @@ -7591,7 +7603,9 @@ vector<T,N> WaveMaskSum(WaveMask mask, vector<T,N> expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupAdd($1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupAdd($1)"; case cuda: __intrinsic_asm "_waveSumMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveSum($1)"; case spirv: @@ -7627,6 +7641,7 @@ bool WaveMaskAllEqual(WaveMask mask, T value) __target_switch { case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupAllEqual($1)"; case hlsl: __intrinsic_asm "WaveActiveAllEqual($1)"; @@ -7651,6 +7666,7 @@ bool WaveMaskAllEqual(WaveMask mask, vector<T,N> value) __target_switch { case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupAllEqual($1)"; case hlsl: __intrinsic_asm "WaveActiveAllEqual($1)"; @@ -7681,7 +7697,9 @@ T WaveMaskPrefixProduct(WaveMask mask, T expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveMul($1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveMul($1)"; case cuda: __intrinsic_asm "_wavePrefixProduct($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixProduct($1)"; case spirv: @@ -7710,7 +7728,9 @@ vector<T,N> WaveMaskPrefixProduct(WaveMask mask, vector<T,N> expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveMul($1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveMul($1)"; case cuda: __intrinsic_asm "_wavePrefixProductMultiple($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixProduct($1)"; case spirv: @@ -7744,7 +7764,9 @@ T WaveMaskPrefixSum(WaveMask mask, T expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveAdd($1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveAdd($1)"; case cuda: __intrinsic_asm "_wavePrefixSum($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixSum($1)"; case spirv: @@ -7774,7 +7796,9 @@ vector<T,N> WaveMaskPrefixSum(WaveMask mask, vector<T,N> expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveAdd($1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveAdd($1)"; case cuda: __intrinsic_asm "_wavePrefixSumMultiple($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixSum($1)"; case spirv: @@ -8281,7 +8305,9 @@ T WaveActive$(opName.hlslName)(T expr) { __target_switch { - case glsl: __intrinsic_asm "subgroup$(opName.glslName)($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroup$(opName.glslName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case spirv: if (__isFloat<T>()) @@ -8320,7 +8346,9 @@ vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr) { __target_switch { - case glsl: __intrinsic_asm "subgroup$(opName.glslName)($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroup$(opName.glslName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case spirv: if (__isFloat<T>()) @@ -8574,7 +8602,9 @@ T WavePrefixProduct(T expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveMul($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveMul($0)"; case hlsl: __intrinsic_asm "WavePrefixProduct"; case spirv: if (__isFloat<T>()) @@ -8609,7 +8639,9 @@ vector<T,N> WavePrefixProduct(vector<T,N> expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveMul($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveMul($0)"; case hlsl: __intrinsic_asm "WavePrefixProduct"; case spirv: if (__isFloat<T>()) @@ -8647,7 +8679,9 @@ T WavePrefixSum(T expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveAdd($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveAdd($0)"; case hlsl: __intrinsic_asm "WavePrefixSum"; case spirv: if (__isFloat<T>()) @@ -8678,7 +8712,9 @@ vector<T,N> WavePrefixSum(vector<T,N> expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupExclusiveAdd($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupExclusiveAdd($0)"; case hlsl: __intrinsic_asm "WavePrefixSum"; case spirv: if (__isFloat<T>()) @@ -8716,7 +8752,9 @@ T WaveReadLaneFirst(T expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupBroadcastFirst($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupBroadcastFirst($0)"; case hlsl: __intrinsic_asm "WaveReadLaneFirst"; case spirv: return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$T result Subgroup $expr}; @@ -8732,7 +8770,9 @@ vector<T,N> WaveReadLaneFirst(vector<T,N> expr) { __target_switch { - case glsl: __intrinsic_asm "subgroupBroadcastFirst($0)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupBroadcastFirst($0)"; case hlsl: __intrinsic_asm "WaveReadLaneFirst"; case spirv: return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$vector<T,N> result Subgroup $expr}; @@ -8761,7 +8801,9 @@ T WaveBroadcastLaneAt(T value, constexpr int lane) { __target_switch { - case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupBroadcast($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); @@ -8778,7 +8820,9 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane) { __target_switch { - case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupBroadcast($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); @@ -8805,7 +8849,9 @@ T WaveReadLaneAt(T value, int lane) { __target_switch { - case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); @@ -8822,7 +8868,9 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane) { __target_switch { - case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); @@ -8850,7 +8898,9 @@ T WaveShuffle(T value, int lane) { __target_switch { - case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); @@ -8867,7 +8917,9 @@ vector<T,N> WaveShuffle(vector<T,N> value, int lane) { __target_switch { - case glsl: __intrinsic_asm "subgroupShuffle($0, $1)"; + case glsl: + if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); + __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); @@ -8890,7 +8942,8 @@ uint WavePrefixCountBits(bool value) { __target_switch { - case glsl: __intrinsic_asm "subgroupBallotExclusiveBitCount(subgroupBallot($0))"; + case glsl: + __intrinsic_asm "subgroupBallotExclusiveBitCount(subgroupBallot($0))"; case hlsl: __intrinsic_asm "WavePrefixCountBits($0)"; case spirv: return spirv_asm @@ -8910,7 +8963,8 @@ uint4 WaveGetConvergedMulti() { __target_switch { - case glsl: __intrinsic_asm "subgroupBallot(true)"; + case glsl: + __intrinsic_asm "subgroupBallot(true)"; case hlsl: __intrinsic_asm "WaveActiveBallot(true)"; case cuda: __intrinsic_asm "make_uint4(__activemask(), 0, 0, 0)"; case spirv: |
