summaryrefslogtreecommitdiff
path: root/source/slang/hlsl.meta.slang
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2024-02-02 22:04:40 -0800
committerGitHub <noreply@github.com>2024-02-02 22:04:40 -0800
commitc15e7ade4e27e1649d5b98f5854e9e52bb9e60ae (patch)
tree22082fda85b2b25eec36da8c4505de7b6cb987fc /source/slang/hlsl.meta.slang
parenta67cb0609587c230746b52567ff5775cab215220 (diff)
Atomics+Wave ops intrinsics fixes. (#3542)
* Fix atomics intrinsics, increase kMaxDescriptorSets. * Add SPIRVASM to known non-differentiable insts. * Support fp16 wave ops when targeting glsl. * Fixes. * Fix vk validation errors. * Fix. * Add to allowed failures.
Diffstat (limited to 'source/slang/hlsl.meta.slang')
-rw-r--r--source/slang/hlsl.meta.slang114
1 files changed, 84 insertions, 30 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 2900d6ea0..0b60bda0d 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1778,8 +1778,8 @@ float __atomicAdd(__ref float value, float amount)
}
__glsl_version(430)
-__glsl_extension(GL_EXT_shader_atomic_float2)
-half __atomicAdd(__ref half value, half amount)
+__glsl_extension(GL_NV_shader_atomic_fp16_vector)
+half2 __atomicAdd(__ref half2 value, half2 amount)
{
__target_switch
{
@@ -1787,9 +1787,9 @@ half __atomicAdd(__ref half value, half amount)
case spirv:
return spirv_asm
{
- OpExtension "SPV_EXT_shader_atomic_float16_add";
- OpCapability AtomicFloat16AddEXT;
- result:$$half = OpAtomicFAddEXT &value Device None $amount
+ OpExtension "SPV_EXT_shader_atomic_float_add";
+ OpCapability AtomicFloat32AddEXT;
+ result:$$half2 = OpAtomicFAddEXT &value Device None $amount
};
}
}
@@ -2337,7 +2337,7 @@ ${{{{
__target_switch
{
case hlsl:
- __intrinsic_asm "NvInterlockedAddFp32($0, $1, $2))";
+ __intrinsic_asm "NvInterlockedAddFp16x2($0, $1, $2))";
}
}
@@ -2364,8 +2364,15 @@ ${{{{
case glsl:
case spirv:
{
- let buf = __getEquivalentStructuredBuffer<half>(this);
- originalValue = __atomicAdd(buf[byteAddress / 2], value);
+ let buf = __getEquivalentStructuredBuffer<half2>(this);
+ if ((byteAddress & 2) == 0)
+ {
+ originalValue = __atomicAdd(buf[byteAddress/4], half2(value, half(0.0))).x;
+ }
+ else
+ {
+ originalValue = __atomicAdd(buf[byteAddress/4], half2(half(0.0), value)).y;
+ }
return;
}
}
@@ -7555,6 +7562,9 @@ __target_intrinsic(cuda, "_waveProductMultiple($0, $1)")
__target_intrinsic(hlsl, "WaveActiveProduct($1)")
matrix<T,N,M> WaveMaskProduct(WaveMask mask, matrix<T,N,M> expr);
+__intrinsic_op($(kIROp_RequireGLSLExtension))
+void __requireGLSLExtension(String extensionName);
+
__generic<T : __BuiltinArithmeticType>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
@@ -7562,7 +7572,9 @@ T WaveMaskSum(WaveMask mask, T expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupAdd($1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupAdd($1)";
case cuda: __intrinsic_asm "_waveSum($0, $1)";
case hlsl: __intrinsic_asm "WaveActiveSum($1)";
case spirv:
@@ -7591,7 +7603,9 @@ vector<T,N> WaveMaskSum(WaveMask mask, vector<T,N> expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupAdd($1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupAdd($1)";
case cuda: __intrinsic_asm "_waveSumMultiple($0, $1)";
case hlsl: __intrinsic_asm "WaveActiveSum($1)";
case spirv:
@@ -7627,6 +7641,7 @@ bool WaveMaskAllEqual(WaveMask mask, T value)
__target_switch
{
case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupAllEqual($1)";
case hlsl:
__intrinsic_asm "WaveActiveAllEqual($1)";
@@ -7651,6 +7666,7 @@ bool WaveMaskAllEqual(WaveMask mask, vector<T,N> value)
__target_switch
{
case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
__intrinsic_asm "subgroupAllEqual($1)";
case hlsl:
__intrinsic_asm "WaveActiveAllEqual($1)";
@@ -7681,7 +7697,9 @@ T WaveMaskPrefixProduct(WaveMask mask, T expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveMul($1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveMul($1)";
case cuda: __intrinsic_asm "_wavePrefixProduct($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixProduct($1)";
case spirv:
@@ -7710,7 +7728,9 @@ vector<T,N> WaveMaskPrefixProduct(WaveMask mask, vector<T,N> expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveMul($1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveMul($1)";
case cuda: __intrinsic_asm "_wavePrefixProductMultiple($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixProduct($1)";
case spirv:
@@ -7744,7 +7764,9 @@ T WaveMaskPrefixSum(WaveMask mask, T expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveAdd($1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveAdd($1)";
case cuda: __intrinsic_asm "_wavePrefixSum($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixSum($1)";
case spirv:
@@ -7774,7 +7796,9 @@ vector<T,N> WaveMaskPrefixSum(WaveMask mask, vector<T,N> expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveAdd($1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveAdd($1)";
case cuda: __intrinsic_asm "_wavePrefixSumMultiple($0, $1)";
case hlsl: __intrinsic_asm "WavePrefixSum($1)";
case spirv:
@@ -8281,7 +8305,9 @@ T WaveActive$(opName.hlslName)(T expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroup$(opName.glslName)($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroup$(opName.glslName)($0)";
case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
case spirv:
if (__isFloat<T>())
@@ -8320,7 +8346,9 @@ vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroup$(opName.glslName)($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroup$(opName.glslName)($0)";
case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)";
case spirv:
if (__isFloat<T>())
@@ -8574,7 +8602,9 @@ T WavePrefixProduct(T expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveMul($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveMul($0)";
case hlsl: __intrinsic_asm "WavePrefixProduct";
case spirv:
if (__isFloat<T>())
@@ -8609,7 +8639,9 @@ vector<T,N> WavePrefixProduct(vector<T,N> expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveMul($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveMul($0)";
case hlsl: __intrinsic_asm "WavePrefixProduct";
case spirv:
if (__isFloat<T>())
@@ -8647,7 +8679,9 @@ T WavePrefixSum(T expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveAdd($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveAdd($0)";
case hlsl: __intrinsic_asm "WavePrefixSum";
case spirv:
if (__isFloat<T>())
@@ -8678,7 +8712,9 @@ vector<T,N> WavePrefixSum(vector<T,N> expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupExclusiveAdd($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupExclusiveAdd($0)";
case hlsl: __intrinsic_asm "WavePrefixSum";
case spirv:
if (__isFloat<T>())
@@ -8716,7 +8752,9 @@ T WaveReadLaneFirst(T expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupBroadcastFirst($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupBroadcastFirst($0)";
case hlsl: __intrinsic_asm "WaveReadLaneFirst";
case spirv:
return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$T result Subgroup $expr};
@@ -8732,7 +8770,9 @@ vector<T,N> WaveReadLaneFirst(vector<T,N> expr)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupBroadcastFirst($0)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupBroadcastFirst($0)";
case hlsl: __intrinsic_asm "WaveReadLaneFirst";
case spirv:
return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$vector<T,N> result Subgroup $expr};
@@ -8761,7 +8801,9 @@ T WaveBroadcastLaneAt(T value, constexpr int lane)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupBroadcast($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case spirv:
let ulane = uint(lane);
@@ -8778,7 +8820,9 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupBroadcast($0, $1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupBroadcast($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case spirv:
let ulane = uint(lane);
@@ -8805,7 +8849,9 @@ T WaveReadLaneAt(T value, int lane)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case spirv:
let ulane = uint(lane);
@@ -8822,7 +8868,9 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case spirv:
let ulane = uint(lane);
@@ -8850,7 +8898,9 @@ T WaveShuffle(T value, int lane)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case spirv:
let ulane = uint(lane);
@@ -8867,7 +8917,9 @@ vector<T,N> WaveShuffle(vector<T,N> value, int lane)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupShuffle($0, $1)";
+ case glsl:
+ if (__isHalf<T>()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16");
+ __intrinsic_asm "subgroupShuffle($0, $1)";
case hlsl: __intrinsic_asm "WaveReadLaneAt";
case spirv:
let ulane = uint(lane);
@@ -8890,7 +8942,8 @@ uint WavePrefixCountBits(bool value)
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupBallotExclusiveBitCount(subgroupBallot($0))";
+ case glsl:
+ __intrinsic_asm "subgroupBallotExclusiveBitCount(subgroupBallot($0))";
case hlsl: __intrinsic_asm "WavePrefixCountBits($0)";
case spirv:
return spirv_asm
@@ -8910,7 +8963,8 @@ uint4 WaveGetConvergedMulti()
{
__target_switch
{
- case glsl: __intrinsic_asm "subgroupBallot(true)";
+ case glsl:
+ __intrinsic_asm "subgroupBallot(true)";
case hlsl: __intrinsic_asm "WaveActiveBallot(true)";
case cuda: __intrinsic_asm "make_uint4(__activemask(), 0, 0, 0)";
case spirv: