summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-03-12 15:47:44 -0400
committerGitHub <noreply@github.com>2020-03-12 15:47:44 -0400
commitc1743a52c814377198ec8ee6a22f4487278c57be (patch)
tree2cf06644a28a12cbf217ec33f990a2a3cd787264 /source
parent69f7d288313eb238bfb42943694dfcd9bb911d3e (diff)
Vector & Matrix Prefix Sum & Product (#1272)
* Implement matrix and vector versions of prefixSum and prefix product. * Comment around how code is organized - where it seems it could be more performant.
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang4
1 files changed, 4 insertions, 0 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 03496ccc8..2b556c10b 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -2677,8 +2677,10 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__target_intrinsic(glsl, "subgroupExclusiveMul($0)")
+__target_intrinsic(cuda, "_wavePrefixProductMultiple($0)")
vector<T,N> WavePrefixProduct(vector<T,N> expr);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__target_intrinsic(cuda, "_wavePrefixProductMultiple($0)")
matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
__generic<T : __BuiltinArithmeticType>
@@ -2691,8 +2693,10 @@ __generic<T : __BuiltinArithmeticType, let N : int>
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
__spirv_version(1.3)
__target_intrinsic(glsl, "subgroupExclusiveAdd($0)")
+__target_intrinsic(cuda, "_wavePrefixSumMultiple($0)")
vector<T,N> WavePrefixSum(vector<T,N> expr);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__target_intrinsic(cuda, "_wavePrefixSumMultiple($0)")
matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
__generic<T : __BuiltinType>