From b94a12b91086ea004d9b78fa8a14fd4726af9e76 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Fri, 6 Mar 2020 15:46:35 -0500 Subject: Wave intrinsics for Vector and Matrix types (#1262) * Update slang-binaries to verison with SPIR-V version support. * Support vec and matrix Wave intrinsics on vk. Added wave-vector.slang test Add wave-diverge.slang test Add support for more wave intrinsics to vk. * Test out Wave intrinsic support for matrices. * Remove matrix glsl intrinsics -> not available. Fix some typo. --- source/slang/hlsl.meta.slang | 154 +++++++++++++++----- source/slang/hlsl.meta.slang.h | 156 ++++++++++++++++----- tests/hlsl-intrinsic/wave-active-product.slang | 2 +- tests/hlsl-intrinsic/wave-diverge.slang | 26 ++++ .../hlsl-intrinsic/wave-diverge.slang.expected.txt | 4 + tests/hlsl-intrinsic/wave-matrix.slang | 37 +++++ .../hlsl-intrinsic/wave-matrix.slang.expected.txt | 8 ++ tests/hlsl-intrinsic/wave-vector.slang | 29 ++++ .../hlsl-intrinsic/wave-vector.slang.expected.txt | 8 ++ tests/hlsl-intrinsic/wave.slang | 2 +- 10 files changed, 353 insertions(+), 73 deletions(-) create mode 100644 tests/hlsl-intrinsic/wave-diverge.slang create mode 100644 tests/hlsl-intrinsic/wave-diverge.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-matrix.slang create mode 100644 tests/hlsl-intrinsic/wave-matrix.slang.expected.txt create mode 100644 tests/hlsl-intrinsic/wave-vector.slang create mode 100644 tests/hlsl-intrinsic/wave-vector.slang.expected.txt diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index b020ef4d4..572b64b21 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2230,8 +2230,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAnd($0)") __target_intrinsic(cuda, "_waveAnd(__activemask(), $0)") T WaveActiveBitAnd(T expr); -__generic vector WaveActiveBitAnd(vector expr); -__generic matrix WaveActiveBitAnd(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupAnd($0)") +vector WaveActiveBitAnd(vector expr); +__generic +matrix WaveActiveBitAnd(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2239,8 +2244,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupOr($0)") __target_intrinsic(cuda, "_waveOr(__activemask(), $0)") T WaveActiveBitOr(T expr); -__generic vector WaveActiveBitOr(vector expr); -__generic matrix WaveActiveBitOr(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupOr($0)") +vector WaveActiveBitOr(vector expr); +__generic +matrix WaveActiveBitOr(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2248,8 +2258,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupXor($0)") __target_intrinsic(cuda, "_waveXor(__activemask(), $0)") T WaveActiveBitXor(T expr); -__generic vector WaveActiveBitXor(vector expr); -__generic matrix WaveActiveBitXor(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupXor($0)") +vector WaveActiveBitXor(vector expr); +__generic +matrix WaveActiveBitXor(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2257,8 +2272,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMax($0)") __target_intrinsic(cuda, "_waveMax(__activemask(), $0)") T WaveActiveMax(T expr); -__generic vector WaveActiveMax(vector expr); -__generic matrix WaveActiveMax(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupMax($0)") +vector WaveActiveMax(vector expr); +__generic +matrix WaveActiveMax(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2266,8 +2286,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMin($0)") __target_intrinsic(cuda, "_waveMin(__activemask(), $0)") T WaveActiveMin(T expr); -__generic vector WaveActiveMin(vector expr); -__generic matrix WaveActiveMin(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupMin($0)") +vector WaveActiveMin(vector expr); +__generic +matrix WaveActiveMin(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2275,8 +2300,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupMul($0)") __target_intrinsic(cuda, "_waveProduct(__activemask(), $0)") T WaveActiveProduct(T expr); -__generic vector WaveActiveProduct(vector expr); -__generic matrix WaveActiveProduct(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupMul($0)") +vector WaveActiveProduct(vector expr); +__generic +matrix WaveActiveProduct(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) @@ -2284,8 +2314,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAdd($0)") __target_intrinsic(cuda, "_waveSum(__activemask(), $0)") T WaveActiveSum(T expr); -__generic vector WaveActiveSum(vector expr); -__generic matrix WaveActiveSum(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupAdd($0)") +vector WaveActiveSum(vector expr); +__generic +matrix WaveActiveSum(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_vote) @@ -2293,8 +2328,13 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAllEqual($0)") __target_intrinsic(cuda, "_waveAllEqual(__activemask(), $0)") bool WaveActiveAllEqual(T value); -__generic vector WaveActiveAllEqual(vector value); -__generic matrix WaveActiveAllEqual(matrix value); +__generic +__glsl_extension(GL_KHR_shader_subgroup_vote) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupAllEqual($0)") +vector WaveActiveAllEqual(vector value); +__generic +matrix WaveActiveAllEqual(matrix value); __generic uint4 WaveMatch(T value); __generic uint4 WaveMatch(vector value); @@ -2357,34 +2397,69 @@ __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExcusiveMul($0)") T WavePrefixProduct(T expr); -__generic vector WavePrefixProduct(vector expr); -__generic matrix WavePrefixProduct(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveMul($0)") +vector WavePrefixProduct(vector expr); +__generic +matrix WavePrefixProduct(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExcusiveAdd($0)") T WavePrefixSum(T expr); -__generic vector WavePrefixSum(vector expr); -__generic matrix WavePrefixSum(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveAdd($0)") +vector WavePrefixSum(vector expr); +__generic +matrix WavePrefixSum(matrix expr); -__generic T WaveMultiPrefixBitAnd(T expr); -__generic vector WaveMultiPrefixBitAnd(vector expr); -__generic matrix WaveMultiPrefixBitAnd(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveAnd($0)") +T WaveMultiPrefixBitAnd(T expr); +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveAnd($0)") +__generic +vector WaveMultiPrefixBitAnd(vector expr); +__generic +matrix WaveMultiPrefixBitAnd(matrix expr); -__generic T WaveMultiPrefixBitOr(T expr); -__generic vector WaveMultiPrefixBitOr(vector expr); -__generic matrix WaveMultiPrefixBitOr(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveOr($0)") +T WaveMultiPrefixBitOr(T expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveOr($0)") +vector WaveMultiPrefixBitOr(vector expr); +__generic +matrix WaveMultiPrefixBitOr(matrix expr); __generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveXor($0)") T WaveMultiPrefixBitXor(T expr); -__generic vector WaveMultiPrefixBitXor(vector expr); -__generic matrix WaveMultiPrefixBitXor(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_arithmetic) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupExcusiveXor($0)") +vector WaveMultiPrefixBitXor(vector expr); +__generic +matrix WaveMultiPrefixBitXor(matrix expr); -// TODO(JS): This takes uvec4 parameter on GLSL __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupBallotExclusiveBitCount($0)") +__target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") __target_intrinsic(cuda, "__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())") uint WavePrefixCountBits(bool value); @@ -2401,11 +2476,16 @@ __generic matrix W __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) -__target_intrinsic(glsl, "subgoupBroadcastFirst($0)") +__target_intrinsic(glsl, "subgroupBroadcastFirst($0)") __target_intrinsic(cuda, "_waveReadFirst($0)") T WaveReadLaneFirst(T expr); -__generic vector WaveReadLaneFirst(vector expr); -__generic matrix WaveReadLaneFirst(matrix expr); +__generic +__glsl_extension(GL_KHR_shader_subgroup_ballot) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBroadcastFirst($0)") +vector WaveReadLaneFirst(vector expr); +__generic +matrix WaveReadLaneFirst(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) @@ -2413,8 +2493,12 @@ __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") __target_intrinsic(cuda, "__shfl_sync(SLANG_CUDA_WARP_MASK, $0, $1)") T WaveReadLaneAt(T value, int lane); -__generic vector WaveReadLaneAt(vector value, int lane); -__generic matrix WaveReadLaneAt(matrix value, int lane); +__generic +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupBroadcast($0, $1)") +vector WaveReadLaneAt(vector value, int lane); +__generic +matrix WaveReadLaneAt(matrix value, int lane); // `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points diff --git a/source/slang/hlsl.meta.slang.h b/source/slang/hlsl.meta.slang.h index 677fa48f1..216f45bbe 100644 --- a/source/slang/hlsl.meta.slang.h +++ b/source/slang/hlsl.meta.slang.h @@ -2306,8 +2306,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupAnd($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveAnd(__activemask(), $0)\")\n") SLANG_RAW("T WaveActiveBitAnd(T expr);\n") -SLANG_RAW("__generic vector WaveActiveBitAnd(vector expr);\n") -SLANG_RAW("__generic matrix WaveActiveBitAnd(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupAnd($0)\")\n") +SLANG_RAW("vector WaveActiveBitAnd(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveBitAnd(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") @@ -2315,8 +2320,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupOr($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveOr(__activemask(), $0)\")\n") SLANG_RAW("T WaveActiveBitOr(T expr);\n") -SLANG_RAW("__generic vector WaveActiveBitOr(vector expr);\n") -SLANG_RAW("__generic matrix WaveActiveBitOr(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupOr($0)\")\n") +SLANG_RAW("vector WaveActiveBitOr(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveBitOr(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") @@ -2324,8 +2334,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupXor($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveXor(__activemask(), $0)\")\n") SLANG_RAW("T WaveActiveBitXor(T expr);\n") -SLANG_RAW("__generic vector WaveActiveBitXor(vector expr);\n") -SLANG_RAW("__generic matrix WaveActiveBitXor(matrix expr);\n") +SLANG_RAW("__generic \n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupXor($0)\")\n") +SLANG_RAW("vector WaveActiveBitXor(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveBitXor(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") @@ -2333,8 +2348,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupMax($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveMax(__activemask(), $0)\")\n") SLANG_RAW("T WaveActiveMax(T expr);\n") -SLANG_RAW("__generic vector WaveActiveMax(vector expr);\n") -SLANG_RAW("__generic matrix WaveActiveMax(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupMax($0)\")\n") +SLANG_RAW("vector WaveActiveMax(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveMax(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") @@ -2342,8 +2362,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupMin($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveMin(__activemask(), $0)\")\n") SLANG_RAW("T WaveActiveMin(T expr);\n") -SLANG_RAW("__generic vector WaveActiveMin(vector expr);\n") -SLANG_RAW("__generic matrix WaveActiveMin(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupMin($0)\")\n") +SLANG_RAW("vector WaveActiveMin(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveMin(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") @@ -2351,8 +2376,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupMul($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveProduct(__activemask(), $0)\")\n") SLANG_RAW("T WaveActiveProduct(T expr);\n") -SLANG_RAW("__generic vector WaveActiveProduct(vector expr);\n") -SLANG_RAW("__generic matrix WaveActiveProduct(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupMul($0)\")\n") +SLANG_RAW("vector WaveActiveProduct(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveProduct(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") @@ -2360,8 +2390,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupAdd($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveSum(__activemask(), $0)\")\n") SLANG_RAW("T WaveActiveSum(T expr);\n") -SLANG_RAW("__generic vector WaveActiveSum(vector expr);\n") -SLANG_RAW("__generic matrix WaveActiveSum(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupAdd($0)\")\n") +SLANG_RAW("vector WaveActiveSum(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveSum(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_vote)\n") @@ -2369,8 +2404,13 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupAllEqual($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveAllEqual(__activemask(), $0)\")\n") SLANG_RAW("bool WaveActiveAllEqual(T value);\n") -SLANG_RAW("__generic vector WaveActiveAllEqual(vector value);\n") -SLANG_RAW("__generic matrix WaveActiveAllEqual(matrix value);\n") +SLANG_RAW("__generic \n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_vote)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupAllEqual($0)\")\n") +SLANG_RAW("vector WaveActiveAllEqual(vector value);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveActiveAllEqual(matrix value);\n") SLANG_RAW("\n") SLANG_RAW("__generic uint4 WaveMatch(T value);\n") SLANG_RAW("__generic uint4 WaveMatch(vector value);\n") @@ -2433,34 +2473,69 @@ SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveMul($0)\")\n") SLANG_RAW("T WavePrefixProduct(T expr);\n") -SLANG_RAW("__generic vector WavePrefixProduct(vector expr);\n") -SLANG_RAW("__generic matrix WavePrefixProduct(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveMul($0)\")\n") +SLANG_RAW("vector WavePrefixProduct(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WavePrefixProduct(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveAdd($0)\")\n") SLANG_RAW("T WavePrefixSum(T expr);\n") -SLANG_RAW("__generic vector WavePrefixSum(vector expr);\n") -SLANG_RAW("__generic matrix WavePrefixSum(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveAdd($0)\")\n") +SLANG_RAW("vector WavePrefixSum(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WavePrefixSum(matrix expr);\n") SLANG_RAW("\n") -SLANG_RAW("__generic T WaveMultiPrefixBitAnd(T expr);\n") -SLANG_RAW("__generic vector WaveMultiPrefixBitAnd(vector expr);\n") -SLANG_RAW("__generic matrix WaveMultiPrefixBitAnd(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveAnd($0)\")\n") +SLANG_RAW("T WaveMultiPrefixBitAnd(T expr);\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveAnd($0)\")\n") +SLANG_RAW("__generic\n") +SLANG_RAW("vector WaveMultiPrefixBitAnd(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveMultiPrefixBitAnd(matrix expr);\n") SLANG_RAW("\n") -SLANG_RAW("__generic T WaveMultiPrefixBitOr(T expr);\n") -SLANG_RAW("__generic vector WaveMultiPrefixBitOr(vector expr);\n") -SLANG_RAW("__generic matrix WaveMultiPrefixBitOr(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveOr($0)\")\n") +SLANG_RAW("T WaveMultiPrefixBitOr(T expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveOr($0)\")\n") +SLANG_RAW("vector WaveMultiPrefixBitOr(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveMultiPrefixBitOr(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveXor($0)\")\n") SLANG_RAW("T WaveMultiPrefixBitXor(T expr);\n") -SLANG_RAW("__generic vector WaveMultiPrefixBitXor(vector expr);\n") -SLANG_RAW("__generic matrix WaveMultiPrefixBitXor(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_arithmetic)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupExcusiveXor($0)\")\n") +SLANG_RAW("vector WaveMultiPrefixBitXor(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveMultiPrefixBitXor(matrix expr);\n") SLANG_RAW("\n") -SLANG_RAW("// TODO(JS): This takes uvec4 parameter on GLSL\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_ballot)\n") SLANG_RAW("__spirv_version(1.3)\n") -SLANG_RAW("__target_intrinsic(glsl, \"subgroupBallotExclusiveBitCount($0)\")\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupBallotExclusiveBitCount(subgroupBallot($0))\")\n") SLANG_RAW("__target_intrinsic(cuda, \"__popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())\")\n") SLANG_RAW("uint WavePrefixCountBits(bool value);\n") SLANG_RAW("\n") @@ -2477,11 +2552,16 @@ SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_ballot)\n") SLANG_RAW("__spirv_version(1.3)\n") -SLANG_RAW("__target_intrinsic(glsl, \"subgoupBroadcastFirst($0)\")\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupBroadcastFirst($0)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"_waveReadFirst($0)\")\n") SLANG_RAW("T WaveReadLaneFirst(T expr);\n") -SLANG_RAW("__generic vector WaveReadLaneFirst(vector expr);\n") -SLANG_RAW("__generic matrix WaveReadLaneFirst(matrix expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_ballot)\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupBroadcastFirst($0)\")\n") +SLANG_RAW("vector WaveReadLaneFirst(vector expr);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveReadLaneFirst(matrix expr);\n") SLANG_RAW("\n") SLANG_RAW("__generic\n") SLANG_RAW("__glsl_extension(GL_KHR_shader_subgroup_ballot)\n") @@ -2489,8 +2569,12 @@ SLANG_RAW("__spirv_version(1.3)\n") SLANG_RAW("__target_intrinsic(glsl, \"subgroupBroadcast($0, $1)\")\n") SLANG_RAW("__target_intrinsic(cuda, \"__shfl_sync(SLANG_CUDA_WARP_MASK, $0, $1)\")\n") SLANG_RAW("T WaveReadLaneAt(T value, int lane);\n") -SLANG_RAW("__generic vector WaveReadLaneAt(vector value, int lane);\n") -SLANG_RAW("__generic matrix WaveReadLaneAt(matrix value, int lane);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("__spirv_version(1.3)\n") +SLANG_RAW("__target_intrinsic(glsl, \"subgroupBroadcast($0, $1)\")\n") +SLANG_RAW("vector WaveReadLaneAt(vector value, int lane);\n") +SLANG_RAW("__generic\n") +SLANG_RAW("matrix WaveReadLaneAt(matrix value, int lane);\n") SLANG_RAW("\n") SLANG_RAW("\n") SLANG_RAW("// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points\n") @@ -2547,7 +2631,7 @@ for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa) sb << "};\n"; } -SLANG_RAW("#line 2474 \"hlsl.meta.slang\"") +SLANG_RAW("#line 2558 \"hlsl.meta.slang\"") SLANG_RAW("\n") SLANG_RAW("\n") SLANG_RAW("\n") diff --git a/tests/hlsl-intrinsic/wave-active-product.slang b/tests/hlsl-intrinsic/wave-active-product.slang index 351df1635..ca3fdcb77 100644 --- a/tests/hlsl-intrinsic/wave-active-product.slang +++ b/tests/hlsl-intrinsic/wave-active-product.slang @@ -2,7 +2,7 @@ //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; diff --git a/tests/hlsl-intrinsic/wave-diverge.slang b/tests/hlsl-intrinsic/wave-diverge.slang new file mode 100644 index 000000000..ab83a1553 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-diverge.slang @@ -0,0 +1,26 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + if (idx == 2) + { + // diverge + return; + } + + value = WaveActiveMin(idx + 1); + + outputBuffer[idx] = value; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-diverge.slang.expected.txt b/tests/hlsl-intrinsic/wave-diverge.slang.expected.txt new file mode 100644 index 000000000..68b8a88e2 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-diverge.slang.expected.txt @@ -0,0 +1,4 @@ +1 +1 +0 +1 diff --git a/tests/hlsl-intrinsic/wave-matrix.slang b/tests/hlsl-intrinsic/wave-matrix.slang new file mode 100644 index 000000000..022182164 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-matrix.slang @@ -0,0 +1,37 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const int idx = int(dispatchThreadID.x); + + // NOTE! dxc only supports bit ops on uint and associated types NOT int + // Also GLSL does not have built in support for int matrices. So we'll just try with float for now + // GLSL does not support matrix types for Wave like intrinsics + + matrix v0 = matrix(idx + 1, idx + 2, idx + 3, idx + 4); + matrix v1 = matrix(v0) + matrix(1, 1, 1, 1); + + + matrix uv0 = matrix(v0[0][0], v0[0][1], v0[1][0], v0[0][1]); + + matrix r0 = WaveActiveSum(v0); + matrix r1 = WaveActiveSum(v1); + matrix r2 = WaveActiveBitXor(uv0); + matrix r3 = WaveActiveBitOr(uv0); + matrix r4 = WaveActiveBitAnd(uv0); + + matrix r5 = r2 + r3 + r4; + matrix r6 = matrix(r5[0][0], r5[0][1], r5[1][0], r5[1][1]); + + matrix r = r0 + matrix(r1) + r6; + + outputBuffer[idx] = r[0][0] + r[0][1] + r[1][0] + r[1][1]; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-matrix.slang.expected.txt b/tests/hlsl-intrinsic/wave-matrix.slang.expected.txt new file mode 100644 index 000000000..23f9285c3 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-matrix.slang.expected.txt @@ -0,0 +1,8 @@ +1EC +1EC +1EC +1EC +1EC +1EC +1EC +1EC diff --git a/tests/hlsl-intrinsic/wave-vector.slang b/tests/hlsl-intrinsic/wave-vector.slang new file mode 100644 index 000000000..808f0c5f6 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-vector.slang @@ -0,0 +1,29 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const int idx = int(dispatchThreadID.x); + + int2 v0 = int2(idx + 1, idx + 2); + float2 v1 = float2(idx + 2, idx + 3); + // NOTE! dxc only supports bit ops on uint and associated types NOT int + uint2 uv0 = v0; + + int2 r0 = WaveActiveSum(v0); + float2 r1 = WaveActiveSum(v1); + int2 r2 = WaveActiveBitXor(uv0); + int2 r3 = WaveActiveBitOr(uv0); + int2 r4 = WaveActiveBitAnd(uv0); + + int2 r = r0 + int2(r1) + r2 + r3 + r4; + + outputBuffer[idx] = r.x + r.y; +} \ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-vector.slang.expected.txt b/tests/hlsl-intrinsic/wave-vector.slang.expected.txt new file mode 100644 index 000000000..eb6984bb6 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-vector.slang.expected.txt @@ -0,0 +1,8 @@ +D6 +D6 +D6 +D6 +D6 +D6 +D6 +D6 diff --git a/tests/hlsl-intrinsic/wave.slang b/tests/hlsl-intrinsic/wave.slang index 9fc9dc26d..d8273080c 100644 --- a/tests/hlsl-intrinsic/wave.slang +++ b/tests/hlsl-intrinsic/wave.slang @@ -2,7 +2,7 @@ //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; -- cgit v1.2.3