diff options
| author | Harsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com> | 2025-08-20 14:41:06 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-20 09:11:06 +0000 |
| commit | e0c20a076f2ec84586b6508664df4f59273c6aaf (patch) | |
| tree | ae629eb56413f1ffd1d269ffe447471c07aa8137 /source | |
| parent | e4a7129b84692ddc3c586f0d0dde95e80e173ed8 (diff) | |
Updated support to enable batch3 (#8219)
Enable CUDA support for batch 3 tests
- Enhanced wave operations with exclusive support
- Added proper identity values for min/max operations
- Fixed intrinsic name mapping issues
- Updated test configurations
Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 0d5b8cb1f..d2e98529b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -8578,7 +8578,7 @@ matrix<T, N, M> degrees(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] [PreferCheckpoint] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix<T,N,N> m) { __target_switch @@ -8590,6 +8590,7 @@ T determinant(matrix<T,N,N> m) OpExtInst $$T result glsl450 Determinant $m }; case wgsl: __intrinsic_asm "determinant"; + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -8629,13 +8630,14 @@ T determinant(matrix<T,N,N> m) /// @category math __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix<T,N,N> m) { __target_switch { case hlsl: __intrinsic_asm "determinant"; - // GLSL, WGSL, and SPIR-V don't support integer determinants for lowered matrices, so we need to implement it manually + // GLSL, WGSL, SPIR-V, and CUDA don't support integer determinants for lowered matrices, so we need to implement it manually + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -16680,7 +16682,7 @@ for (auto opName : kWaveMultiPrefixMinMaxNames) { __generic<T : __BuiltinArithmeticType> __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { __shaderSubgroupPartitionedPreamble<T>(); @@ -16688,6 +16690,8 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)(($1).x, $0)"; case spirv: { if (__isFloat<T>()) @@ -16703,7 +16707,7 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) __generic<T : __BuiltinArithmeticType, let N : int> __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] vector<T, N> WaveMultiPrefix$(opName.name)(vector<T, N> value, uint4 mask) { __shaderSubgroupPartitionedPreamble<T>(); @@ -16711,6 +16715,8 @@ vector<T, N> WaveMultiPrefix$(opName.name)(vector<T, N> value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)Multiple(($1).x, $0)"; case spirv: { if (__isFloat<T>()) |
