From e0c20a076f2ec84586b6508664df4f59273c6aaf Mon Sep 17 00:00:00 2001 From: "Harsh Aggarwal (NVIDIA)" Date: Wed, 20 Aug 2025 14:41:06 +0530 Subject: Updated support to enable batch3 (#8219) Enable CUDA support for batch 3 tests - Enhanced wave operations with exclusive support - Added proper identity values for min/max operations - Fixed intrinsic name mapping issues - Updated test configurations Co-authored-by: Ellie Hermaszewska --- prelude/slang-cuda-prelude.h | 295 ++++++++++++++++++++- source/slang/hlsl.meta.slang | 16 +- tests/autodiff-dstdlib/determinant.slang | 1 + .../byte-address-half-atomics.slang | 1 + tests/hlsl-intrinsic/debug-break.slang | 5 +- .../scalar-double-vk-intrinsic.slang | 5 +- tests/hlsl-intrinsic/wave-active-count-bits.slang | 3 +- .../hlsl-intrinsic/wave-broadcast-lane-at-vk.slang | 1 + tests/hlsl-intrinsic/wave-is-first-lane.slang | 2 +- .../wave-multi/wave-multi-prefix-max.slang | 2 +- .../wave-multi/wave-multi-prefix-min.slang | 2 +- .../wave-multi-prefix-scalar-functional.slang | 2 +- tests/hlsl-intrinsic/wave-prefix-product.slang | 2 +- tests/hlsl-intrinsic/wave-prefix-sum.slang | 2 +- tests/hlsl-intrinsic/wave-vector.slang | 2 +- 15 files changed, 321 insertions(+), 20 deletions(-) diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 9df2727f6..44afd71b9 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -2661,17 +2661,67 @@ struct WaveOpMul template struct WaveOpMax { - __inline__ __device__ static T getInitial(T a) { return a; } + __inline__ __device__ static T getInitial(T a, bool exclusive = false); __inline__ __device__ static T doOp(T a, T b) { return a > b ? a : b; } }; template struct WaveOpMin { - __inline__ __device__ static T getInitial(T a) { return a; } + __inline__ __device__ static T getInitial(T a, bool exclusive = false); __inline__ __device__ static T doOp(T a, T b) { return a < b ? a : b; } }; +// Compact specializations using macro for getInitial +#define SLANG_WAVE_MIN_SPEC(T, EXCL_VAL) \ + template<> \ + __inline__ __device__ T WaveOpMin::getInitial(T a, bool exclusive) \ + { \ + return exclusive ? (EXCL_VAL) : a; \ + } + +#define SLANG_WAVE_MAX_SPEC(T, EXCL_VAL) \ + template<> \ + __inline__ __device__ T WaveOpMax::getInitial(T a, bool exclusive) \ + { \ + return exclusive ? (EXCL_VAL) : a; \ + } + +// Min specializations (exclusive identity = max value) +SLANG_WAVE_MIN_SPEC(float, SLANG_INFINITY) +SLANG_WAVE_MIN_SPEC(double, SLANG_INFINITY) +SLANG_WAVE_MIN_SPEC(int, 0x7FFFFFFF) +SLANG_WAVE_MIN_SPEC(uint, 0xFFFFFFFF) +SLANG_WAVE_MIN_SPEC(char, (char)0x7F) +SLANG_WAVE_MIN_SPEC(int8_t, (int8_t)0x7F) +SLANG_WAVE_MIN_SPEC(uint8_t, (uint8_t)0xFF) +SLANG_WAVE_MIN_SPEC(int16_t, (int16_t)0x7FFF) +SLANG_WAVE_MIN_SPEC(uint16_t, (uint16_t)0xFFFF) +SLANG_WAVE_MIN_SPEC(int64_t, 0x7FFFFFFFFFFFFFFFLL) +SLANG_WAVE_MIN_SPEC(uint64_t, 0xFFFFFFFFFFFFFFFFULL) +#if SLANG_CUDA_ENABLE_HALF +SLANG_WAVE_MIN_SPEC(__half, __ushort_as_half(0x7BFF)) +#endif + +// Max specializations (exclusive identity = min value) +SLANG_WAVE_MAX_SPEC(float, -SLANG_INFINITY) +SLANG_WAVE_MAX_SPEC(double, -SLANG_INFINITY) +SLANG_WAVE_MAX_SPEC(int, (int)0x80000000) +SLANG_WAVE_MAX_SPEC(uint, 0) +SLANG_WAVE_MAX_SPEC(char, (char)0x80) +SLANG_WAVE_MAX_SPEC(int8_t, (int8_t)0x80) +SLANG_WAVE_MAX_SPEC(uint8_t, 0) +SLANG_WAVE_MAX_SPEC(int16_t, (int16_t)0x8000) +SLANG_WAVE_MAX_SPEC(uint16_t, 0) +SLANG_WAVE_MAX_SPEC(int64_t, (int64_t)0x8000000000000000LL) +SLANG_WAVE_MAX_SPEC(uint64_t, 0) +#if SLANG_CUDA_ENABLE_HALF +SLANG_WAVE_MAX_SPEC(__half, __ushort_as_half(0xFBFF)) +#endif + +#undef SLANG_WAVE_MIN_SPEC +#undef SLANG_WAVE_MAX_SPEC + template struct ElementTypeTrait; @@ -2706,6 +2756,33 @@ struct ElementTypeTrait { typedef int64_t Type; }; +template<> +struct ElementTypeTrait +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait +{ + typedef ushort Type; +}; +#if SLANG_CUDA_ENABLE_HALF +template<> +struct ElementTypeTrait<__half> +{ + typedef __half Type; +}; +#endif // Vector template<> @@ -2792,6 +2869,115 @@ struct ElementTypeTrait typedef double Type; }; +// Additional vector types +template<> +struct ElementTypeTrait +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait +{ + typedef ushort Type; +}; +template<> +struct ElementTypeTrait +{ + typedef ushort Type; +}; +template<> +struct ElementTypeTrait +{ + typedef ushort Type; +}; +template<> +struct ElementTypeTrait +{ + typedef int64_t Type; +}; +template<> +struct ElementTypeTrait +{ + typedef int64_t Type; +}; +template<> +struct ElementTypeTrait +{ + typedef int64_t Type; +}; +template<> +struct ElementTypeTrait +{ + typedef uint64_t Type; +}; +template<> +struct ElementTypeTrait +{ + typedef uint64_t Type; +}; +template<> +struct ElementTypeTrait +{ + typedef uint64_t Type; +}; +#if SLANG_CUDA_ENABLE_HALF +template<> +struct ElementTypeTrait<__half2> +{ + typedef __half Type; +}; +template<> +struct ElementTypeTrait<__half3> +{ + typedef __half Type; +}; +template<> +struct ElementTypeTrait<__half4> +{ + typedef __half Type; +}; +#endif + // Matrix template struct ElementTypeTrait> @@ -3430,6 +3616,111 @@ __inline__ __device__ T _wavePrefixAndMultiple(WarpMask mask, T val) return val; } +template +__inline__ __device__ T _wavePrefixMin(WarpMask mask, T val) +{ + return _wavePrefixScalar, T>(mask, val); +} + +template +__inline__ __device__ T _wavePrefixMax(WarpMask mask, T val) +{ + return _wavePrefixScalar, T>(mask, val); +} + +template +__inline__ __device__ T _wavePrefixMinMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixMultiple, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + +template +__inline__ __device__ T _wavePrefixMaxMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixMultiple, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + +// Wrapper structures for exclusive operations that use the overloaded getInitial method +template +struct WaveOpExclusiveMin +{ + __inline__ __device__ static T getInitial(T a) { return WaveOpMin::getInitial(a, true); } + __inline__ __device__ static T doOp(T a, T b) { return WaveOpMin::doOp(a, b); } +}; + +template +struct WaveOpExclusiveMax +{ + __inline__ __device__ static T getInitial(T a) { return WaveOpMax::getInitial(a, true); } + __inline__ __device__ static T doOp(T a, T b) { return WaveOpMax::doOp(a, b); } +}; + +// Inclusive prefix min/max functions (for WaveMultiPrefixInclusive*) +template +__inline__ __device__ T _wavePrefixInclusiveMin(WarpMask mask, T val) +{ + return _wavePrefixMin(mask, val); +} + +template +__inline__ __device__ T _wavePrefixInclusiveMax(WarpMask mask, T val) +{ + return _wavePrefixMax(mask, val); +} + +template +__inline__ __device__ T _wavePrefixInclusiveMinMultiple(WarpMask mask, T val) +{ + return _wavePrefixMinMultiple(mask, val); +} + +template +__inline__ __device__ T _wavePrefixInclusiveMaxMultiple(WarpMask mask, T val) +{ + return _wavePrefixMaxMultiple(mask, val); +} + +// Explicit exclusive prefix min/max functions (for WaveMultiPrefixExclusive*) +template +__inline__ __device__ T _wavePrefixExclusiveMin(WarpMask mask, T val) +{ + return _wavePrefixScalar, T>(mask, val); +} + +template +__inline__ __device__ T _wavePrefixExclusiveMax(WarpMask mask, T val) +{ + return _wavePrefixScalar, T>(mask, val); +} + +template +__inline__ __device__ T _wavePrefixExclusiveMinMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixMultiple, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + +template +__inline__ __device__ T _wavePrefixExclusiveMaxMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixMultiple, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + template __inline__ __device__ uint4 _waveMatchScalar(WarpMask mask, T val) { diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 0d5b8cb1f..d2e98529b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -8578,7 +8578,7 @@ matrix degrees(matrix x) __generic [__readNone] [PreferCheckpoint] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix m) { __target_switch @@ -8590,6 +8590,7 @@ T determinant(matrix m) OpExtInst $$T result glsl450 Determinant $m }; case wgsl: __intrinsic_asm "determinant"; + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -8629,13 +8630,14 @@ T determinant(matrix m) /// @category math __generic [__readNone] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix m) { __target_switch { case hlsl: __intrinsic_asm "determinant"; - // GLSL, WGSL, and SPIR-V don't support integer determinants for lowered matrices, so we need to implement it manually + // GLSL, WGSL, SPIR-V, and CUDA don't support integer determinants for lowered matrices, so we need to implement it manually + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -16680,7 +16682,7 @@ for (auto opName : kWaveMultiPrefixMinMaxNames) { __generic __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { __shaderSubgroupPartitionedPreamble(); @@ -16688,6 +16690,8 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)(($1).x, $0)"; case spirv: { if (__isFloat()) @@ -16703,7 +16707,7 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) __generic __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] vector WaveMultiPrefix$(opName.name)(vector value, uint4 mask) { __shaderSubgroupPartitionedPreamble(); @@ -16711,6 +16715,8 @@ vector WaveMultiPrefix$(opName.name)(vector value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)Multiple(($1).x, $0)"; case spirv: { if (__isFloat()) diff --git a/tests/autodiff-dstdlib/determinant.slang b/tests/autodiff-dstdlib/determinant.slang index d2e699551..f73d3cdfa 100644 --- a/tests/autodiff-dstdlib/determinant.slang +++ b/tests/autodiff-dstdlib/determinant.slang @@ -1,5 +1,6 @@ //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type -cuda //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer RWStructuredBuffer outputBuffer; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang index f53d38d74..d23a675b2 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang @@ -3,6 +3,7 @@ // Disabled because validation layer doesn't like vector atomics, although nv driver does allow it. //DISABLED_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -cuda -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type //TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation //TEST:SIMPLE(filecheck=HLSL):-target hlsl -entry computeMain -profile cs_6_3 //TEST_INPUT:set tmpBuffer = ubuffer(data=[0 0 0 0], stride=4) diff --git a/tests/hlsl-intrinsic/debug-break.slang b/tests/hlsl-intrinsic/debug-break.slang index c16139f72..8fd4567dc 100644 --- a/tests/hlsl-intrinsic/debug-break.slang +++ b/tests/hlsl-intrinsic/debug-break.slang @@ -1,7 +1,7 @@ //TEST:SIMPLE(filecheck=CHECK_GLSL):-stage compute -entry computeMain -target glsl -line-directive-mode none // We can't enable because output source includes path to prelude. //DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target cpp -line-directive-mode none -//DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target cuda -line-directive-mode none +//TEST:SIMPLE(filecheck=CHECK_CUDA):-stage compute -entry computeMain -target cuda -line-directive-mode none // Not currently supported on HLSL //DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target hlsl -line-directive-mode none // With `slang-llvm` this will crash, but the call stack isn't really usable. @@ -15,6 +15,7 @@ // CHECK_GLSL: void main // CHECK_GLSL: glslDebugBreak{{.*}}(); +// CHECK_CUDA:__brkpt() RWStructuredBuffer outputBuffer; [numthreads(4, 1, 1)] @@ -25,4 +26,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) debugBreak(); outputBuffer[idx] = idx; -} \ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang b/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang index 128c1c1cd..042c0b62e 100644 --- a/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang +++ b/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang @@ -3,8 +3,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj -render-feature double -// We don't want to run a cuda test here... -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; @@ -54,4 +53,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) } outputBuffer[idx] = ft; -} \ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/wave-active-count-bits.slang b/tests/hlsl-intrinsic/wave-active-count-bits.slang index 105b95a6f..a7aa48687 100644 --- a/tests/hlsl-intrinsic/wave-active-count-bits.slang +++ b/tests/hlsl-intrinsic/wave-active-count-bits.slang @@ -2,6 +2,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-cpu -compute -shaderobj //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device +//TEST:COMPARE_COMPUTE_EX:-slang -compute -cuda -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj @@ -14,4 +15,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) int idx = int(dispatchThreadID.x); outputBuffer[idx] = int(WaveActiveCountBits(bool(idx & 5))); -} \ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang index b0cff08a9..4a32ab736 100644 --- a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang +++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang @@ -1,5 +1,6 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-slang -compute -cuda -profile cs_6_0 -shaderobj //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-is-first-lane.slang b/tests/hlsl-intrinsic/wave-is-first-lane.slang index 220a5758b..ae986c7b3 100644 --- a/tests/hlsl-intrinsic/wave-is-first-lane.slang +++ b/tests/hlsl-intrinsic/wave-is-first-lane.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang index 654fd6130..0aca11f38 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang @@ -1,7 +1,7 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl - +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -capability cuda_sm_7_0 -compute -shaderobj //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang index 68e1e9c05..321b99a0e 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang @@ -1,7 +1,7 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl - +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -capability cuda_sm_7_0 -compute -shaderobj //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang index 5de34b20a..67367f264 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang @@ -4,7 +4,7 @@ //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_5 -shaderobj //TEST:COMPARE_COMPUTE_EX:-vk -compute -shaderobj -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; diff --git a/tests/hlsl-intrinsic/wave-prefix-product.slang b/tests/hlsl-intrinsic/wave-prefix-product.slang index 774f5996e..f8c0ed57a 100644 --- a/tests/hlsl-intrinsic/wave-prefix-product.slang +++ b/tests/hlsl-intrinsic/wave-prefix-product.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-prefix-sum.slang b/tests/hlsl-intrinsic/wave-prefix-sum.slang index 4f7c2912d..8a092e20d 100644 --- a/tests/hlsl-intrinsic/wave-prefix-sum.slang +++ b/tests/hlsl-intrinsic/wave-prefix-sum.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-vector.slang b/tests/hlsl-intrinsic/wave-vector.slang index f786794ec..4c3b7ccb6 100644 --- a/tests/hlsl-intrinsic/wave-vector.slang +++ b/tests/hlsl-intrinsic/wave-vector.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj -- cgit v1.2.3