From e0c20a076f2ec84586b6508664df4f59273c6aaf Mon Sep 17 00:00:00 2001 From: "Harsh Aggarwal (NVIDIA)" Date: Wed, 20 Aug 2025 14:41:06 +0530 Subject: Updated support to enable batch3 (#8219) Enable CUDA support for batch 3 tests - Enhanced wave operations with exclusive support - Added proper identity values for min/max operations - Fixed intrinsic name mapping issues - Updated test configurations Co-authored-by: Ellie Hermaszewska --- source/slang/hlsl.meta.slang | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 0d5b8cb1f..d2e98529b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -8578,7 +8578,7 @@ matrix degrees(matrix x) __generic [__readNone] [PreferCheckpoint] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix m) { __target_switch @@ -8590,6 +8590,7 @@ T determinant(matrix m) OpExtInst $$T result glsl450 Determinant $m }; case wgsl: __intrinsic_asm "determinant"; + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -8629,13 +8630,14 @@ T determinant(matrix m) /// @category math __generic [__readNone] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix m) { __target_switch { case hlsl: __intrinsic_asm "determinant"; - // GLSL, WGSL, and SPIR-V don't support integer determinants for lowered matrices, so we need to implement it manually + // GLSL, WGSL, SPIR-V, and CUDA don't support integer determinants for lowered matrices, so we need to implement it manually + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -16680,7 +16682,7 @@ for (auto opName : kWaveMultiPrefixMinMaxNames) { __generic __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { __shaderSubgroupPartitionedPreamble(); @@ -16688,6 +16690,8 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)(($1).x, $0)"; case spirv: { if (__isFloat()) @@ -16703,7 +16707,7 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) __generic __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] vector WaveMultiPrefix$(opName.name)(vector value, uint4 mask) { __shaderSubgroupPartitionedPreamble(); @@ -16711,6 +16715,8 @@ vector WaveMultiPrefix$(opName.name)(vector value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)Multiple(($1).x, $0)"; case spirv: { if (__isFloat()) -- cgit v1.2.3