summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorHarsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com>2025-08-20 14:41:06 +0530
committerGitHub <noreply@github.com>2025-08-20 09:11:06 +0000
commite0c20a076f2ec84586b6508664df4f59273c6aaf (patch)
treeae629eb56413f1ffd1d269ffe447471c07aa8137 /source
parente4a7129b84692ddc3c586f0d0dde95e80e173ed8 (diff)
Updated support to enable batch3 (#8219)
Enable CUDA support for batch 3 tests - Enhanced wave operations with exclusive support - Added proper identity values for min/max operations - Fixed intrinsic name mapping issues - Updated test configurations Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang16
1 files changed, 11 insertions, 5 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 0d5b8cb1f..d2e98529b 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -8578,7 +8578,7 @@ matrix<T, N, M> degrees(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType, let N : int>
[__readNone]
[PreferCheckpoint]
-[require(glsl_hlsl_metal_spirv_wgsl)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl)]
T determinant(matrix<T,N,N> m)
{
__target_switch
@@ -8590,6 +8590,7 @@ T determinant(matrix<T,N,N> m)
OpExtInst $$T result glsl450 Determinant $m
};
case wgsl: __intrinsic_asm "determinant";
+ case cuda:
default:
static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices");
if (N == 1)
@@ -8629,13 +8630,14 @@ T determinant(matrix<T,N,N> m)
/// @category math
__generic<T : __BuiltinIntegerType, let N : int>
[__readNone]
-[require(glsl_hlsl_metal_spirv_wgsl)]
+[require(cuda_glsl_hlsl_metal_spirv_wgsl)]
T determinant(matrix<T,N,N> m)
{
__target_switch
{
case hlsl: __intrinsic_asm "determinant";
- // GLSL, WGSL, and SPIR-V don't support integer determinants for lowered matrices, so we need to implement it manually
+ // GLSL, WGSL, SPIR-V, and CUDA don't support integer determinants for lowered matrices, so we need to implement it manually
+ case cuda:
default:
static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices");
if (N == 1)
@@ -16680,7 +16682,7 @@ for (auto opName : kWaveMultiPrefixMinMaxNames) {
__generic<T : __BuiltinArithmeticType>
__spirv_version(1.3)
[ForceInline]
-[require(glsl_spirv, subgroup_partitioned)]
+[require(cuda_glsl_spirv, subgroup_partitioned)]
T WaveMultiPrefix$(opName.name)(T value, uint4 mask)
{
__shaderSubgroupPartitionedPreamble<T>();
@@ -16688,6 +16690,8 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask)
{
case glsl:
__intrinsic_asm "subgroupPartitioned$(opName.glslName)NV";
+ case cuda:
+ __intrinsic_asm "_wavePrefix$(opName.name)(($1).x, $0)";
case spirv:
{
if (__isFloat<T>())
@@ -16703,7 +16707,7 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask)
__generic<T : __BuiltinArithmeticType, let N : int>
__spirv_version(1.3)
[ForceInline]
-[require(glsl_spirv, subgroup_partitioned)]
+[require(cuda_glsl_spirv, subgroup_partitioned)]
vector<T, N> WaveMultiPrefix$(opName.name)(vector<T, N> value, uint4 mask)
{
__shaderSubgroupPartitionedPreamble<T>();
@@ -16711,6 +16715,8 @@ vector<T, N> WaveMultiPrefix$(opName.name)(vector<T, N> value, uint4 mask)
{
case glsl:
__intrinsic_asm "subgroupPartitioned$(opName.glslName)NV";
+ case cuda:
+ __intrinsic_asm "_wavePrefix$(opName.name)Multiple(($1).x, $0)";
case spirv:
{
if (__isFloat<T>())