diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-03-12 15:47:44 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-12 15:47:44 -0400 |
| commit | c1743a52c814377198ec8ee6a22f4487278c57be (patch) | |
| tree | 2cf06644a28a12cbf217ec33f990a2a3cd787264 /source | |
| parent | 69f7d288313eb238bfb42943694dfcd9bb911d3e (diff) | |
Vector & Matrix Prefix Sum & Product (#1272)
* Implement matrix and vector versions of prefixSum and prefix product.
* Comment around how code is organized - where it seems it could be more performant.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 03496ccc8..2b556c10b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2677,8 +2677,10 @@ __generic<T : __BuiltinArithmeticType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveMul($0)") +__target_intrinsic(cuda, "_wavePrefixProductMultiple($0)") vector<T,N> WavePrefixProduct(vector<T,N> expr); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> +__target_intrinsic(cuda, "_wavePrefixProductMultiple($0)") matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr); __generic<T : __BuiltinArithmeticType> @@ -2691,8 +2693,10 @@ __generic<T : __BuiltinArithmeticType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupExclusiveAdd($0)") +__target_intrinsic(cuda, "_wavePrefixSumMultiple($0)") vector<T,N> WavePrefixSum(vector<T,N> expr); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> +__target_intrinsic(cuda, "_wavePrefixSumMultiple($0)") matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr); __generic<T : __BuiltinType> |
