From a10d9cd8767e88a064719d71cc97144ba8b112d1 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 10 Mar 2020 12:31:25 -0400 Subject: WIP Prefix Sum for CUDA (#1268) * Fix some typos. * Add wave-prefix-sum.slang test * First pass at implementing prefixSum. * Small improvments to prefixSum CUDA. * Small improvement to prefix sum. * Enable prefix sum in stdlib. --- source/slang/hlsl.meta.slang | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 4b717d540..b43cd009f 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2497,12 +2497,12 @@ bool WaveIsFirstLane(); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveMul($0)") +__target_intrinsic(glsl, "subgroupExclusiveMul($0)") T WavePrefixProduct(T expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveMul($0)") +__target_intrinsic(glsl, "subgroupExclusiveMul($0)") vector WavePrefixProduct(vector expr); __generic matrix WavePrefixProduct(matrix expr); @@ -2510,12 +2510,13 @@ matrix WavePrefixProduct(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveAdd($0)") +__target_intrinsic(glsl, "subgroupExclusiveAdd($0)") +__target_intrinsic(cuda, "_wavePrefixSum($0)") T WavePrefixSum(T expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveAdd($0)") +__target_intrinsic(glsl, "subgroupExclusiveAdd($0)") vector WavePrefixSum(vector expr); __generic matrix WavePrefixSum(matrix expr); @@ -2523,11 +2524,11 @@ matrix WavePrefixSum(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveAnd($0)") +__target_intrinsic(glsl, "subgroupExclusiveAnd($0)") T WaveMultiPrefixBitAnd(T expr); __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveAnd($0)") +__target_intrinsic(glsl, "subgroupExclusiveAnd($0)") __generic vector WaveMultiPrefixBitAnd(vector expr); __generic @@ -2536,12 +2537,12 @@ matrix WaveMultiPrefixBitAnd(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveOr($0)") +__target_intrinsic(glsl, "subgroupExclusiveOr($0)") T WaveMultiPrefixBitOr(T expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveOr($0)") +__target_intrinsic(glsl, "subgroupExclusiveOr($0)") vector WaveMultiPrefixBitOr(vector expr); __generic matrix WaveMultiPrefixBitOr(matrix expr); @@ -2549,12 +2550,12 @@ matrix WaveMultiPrefixBitOr(matrix expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveXor($0)") +__target_intrinsic(glsl, "subgroupExclusiveXor($0)") T WaveMultiPrefixBitXor(T expr); __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) -__target_intrinsic(glsl, "subgroupExcusiveXor($0)") +__target_intrinsic(glsl, "subgroupExclusiveXor($0)") vector WaveMultiPrefixBitXor(vector expr); __generic matrix WaveMultiPrefixBitXor(matrix expr); -- cgit v1.2.3